Copy the index script from the Tom directory into the Portfolio directory
#make sure you are in the Portfolio directory
cp /share/bitcpt/Fall2022/lscapozi/Tom/Tom.starindex.sh .
Rename the file to reflect the Tom-Heinz1706 data
mv Tom.starindex.sh Hz.starindex.sh
Edit the script
#!/bin/tcsh
#BSUB -J starindices_Hz_CherryTom #job name
#BSUB -n 10 #number of nodes
#BSUB -W 2:0 #time for job to complete
#BSUB -o starindices.out.%J #output file
#BSUB -e starindices.err.%J #error file
# For running star to generate genome index
# Run in working directory /share/bitcpt/Fall2022/lscapozi/Portfolio
# Must run this in working directory with subdirectory named /starindices
module load conda
conda activate /usr/local/usrapps/bitcpt/star
set IN=/share/bitcpt/Fall2022/referenceGenomes/Solanum_lycopersicum/Portfolio/Tom-Heinz1706
STAR --runThreadN 10 --runMode genomeGenerate --genomeSAindexNbases 13 --genomeDir starindices --genomeFastaFiles ${IN}/Tom-Heinz_assembly.fasta --sjdbGTFfile ${IN}/Tom-Heinz.agat.gtf --sjdbOverhang 58
Run the index Tom-Heinz1706 code
bsub <Hz.starindex.sh
Check the output
ll starindices
Figure 1: Heinz 1706 Tomato indexing output
Copy the alignment script from the Tom directory to the Portfolio directory
#make sure you are in the portfolio directory
cp /share/bitcpt/Fall2022/lscapozi/Tom/Tom.staralign.sh .
Rename the file
mv Tom.staralign.sh Hz.staralign.sh
Edit code
vi Hz.staralign.sh
#a new window will open with the following information
#!/bin/tcsh
#BSUB -J Hz_staralign #job name
#BSUB -n 12 #number of threads
#BSUB -W 10:0 #time for job to complete
#BSUB -R span[hosts=1] #to keep tasks on one node
#BSUB -R "rusage[mem=20000]" #to request a node with 20MB of memory
#BSUB -o Tom_staralign_%J.out #output file
#BSUB -e Tom_staralign_%J.err #error file
#to align RNA-seq reads to indexed genome using STAR
#STAR cannot make use of HPC MPI, must have -R options to set 1 node & memory
#set threads under 12 on Henry2
#input of indexed genome path is /share/bitcpt/Fall2022/lscapozi/Portfolio/starindices
#input of sequence reads path is /share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum/
#output of aligned reads will go into STAR_align_Portfolio subdirectory in working directory
module load conda
conda activate /usr/local/usrapps/bitcpt/star
# SET IN VARIABLES
set IN=/share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum
set index=starindices
set out=AlignedToTranscriptome
################################
## Leaf Rep 1
################################
# RNA-seq data are in format Sl_Leaf_Rep1_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep1_3X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## Leaf Rep 2
################################
# RNA-seq data are in format Sl_Leaf_Rep2_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep2_3X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## Leaf Rep 3
################################
# RNA-seq data are in format Sl_Leaf_Rep3_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep3_3X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 1
################################
#RNA-seq data are in format Sl_SAM_Rep1_3X_1.fp.fq.gz
set S=Sl_SAM_Rep1_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 2
################################
#RNA-seq data are in format Sl_SAM_Rep2_3X_1.fp.fq.gz
set S=Sl_SAM_Rep2_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 3
################################
#RNA-seq data are in format Sl_SAM_Rep3_3X_1.fp.fq.gz
set S=Sl_SAM_Rep3_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 4
################################
#RNA-seq data are in format Sl_SAM_Rep4_3X_1.fp.fq.gz
set S=Sl_SAM_Rep4_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
Run the Alignment job
bsub <Hz.staralign.sh