Login into HPC
mcescalo@login.hpc.ncsu.edu
Change the working directory to my user name under bitcpt group
>>: cd /share/bitcpt/Fall2022/mcescalo/Portafolio
Create "Sl.staralign.sh" shell script using vi command and write STAR align script for Sl
>>>: vi Sl.staralign.sh
#!/bin/tcsh
#BSUB -J staralign_Sl_Caro #job name
#BSUB -n 10 #number of nodes
#BSUB -W 10:0 #time for job to complete
#BSUB -o starindices.out.%J #output file
#BSUB -e starindices.err.%J #error file
# For running star to generate genome index
# Run in working directory /share/bitcpt/Fall2022/UnityID/Tom
# Must run this in working directory with subdirectory named starindices/
module load conda
conda activate /usr/local/usrapps/bitcpt/star
#input of indexed genome path is /share/bitcpt/Fall2022/UNITYID/Portafolio/starindices
#input of sequence reads path is /share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum/
#output of aligned reads will go into STAR_align_At subdirectory in working directory
module load conda
conda activate /usr/local/usrapps/bitcpt/star
# SET IN VARIABLES
set IN=/share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum
set index=starindices
set out=AlignedToTranscriptome
################################
## Leaf Rep 1
################################
# RNA-seq data are in format Sl_Leaf_Rep1_2X_1.fp.fq.gz
set S=Sl_Leaf_Rep1_2X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## Leaf Rep 3
################################
# RNA-seq data are in format
set S=Sl_Leaf_Rep3_2X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 1
################################
#RNA-seq data are in format Sl_SAM_Rep1_1X_1.fp.fq.gz
set S=Sl_SAM_Rep1_2X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 2
################################
#RNA-seq data are in format
set S=Sl_SAM_Rep2_2X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 3
################################
#RNA-seq data are in format
set S=Sl_SAM_Rep3_2X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 4
################################
#RNA-seq data are in format
set S=Sl_SAM_Rep4_2X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 13 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
Submit the job to run the star alignment
>>>: bsub < Sl.staralign.sh
After successful completion of the star alignment job, change the directory to starOutputfiles(output directory for star alignment)
>>>: cd starOutputfiles
list out the files inside the starOutputfiles directory using the tree command
>>>: tree
.
├── Sl_Leaf_Rep1_2X_Aligned.out.bam
├── Sl_Leaf_Rep1_2X_Aligned.toTranscriptome.out.bam
├── Sl_Leaf_Rep1_2X_Log.final.out
├── Sl_Leaf_Rep1_2X_Log.out
├── Sl_Leaf_Rep1_2X_Log.progress.out
├── Sl_Leaf_Rep1_2X_SJ.out.tab
├── Sl_Leaf_Rep1_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
├── Sl_Leaf_Rep1_2X__STARpass1
│ ├── Log.final.out
│ └── SJ.out.tab
├── Sl_Leaf_Rep3_2X_Aligned.out.bam
├── Sl_Leaf_Rep3_2X_Aligned.toTranscriptome.out.bam
├── Sl_Leaf_Rep3_2X_Log.final.out
├── Sl_Leaf_Rep3_2X_Log.out
├── Sl_Leaf_Rep3_2X_Log.progress.out
├── Sl_Leaf_Rep3_2X_SJ.out.tab
├── Sl_Leaf_Rep3_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
├── Sl_Leaf_Rep3_2X__STARpass1
│ ├── Log.final.out
│ └── SJ.out.tab
├── Sl_SAM_Rep1_2X_Aligned.out.bam
├── Sl_SAM_Rep1_2X_Aligned.toTranscriptome.out.bam
├── Sl_SAM_Rep1_2X_Log.final.out
├── Sl_SAM_Rep1_2X_Log.out
├── Sl_SAM_Rep1_2X_Log.progress.out
├── Sl_SAM_Rep1_2X_SJ.out.tab
├── Sl_SAM_Rep1_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
├── Sl_SAM_Rep1_2X__STARpass1
│ ├── Log.final.out
│ └── SJ.out.tab
├── Sl_SAM_Rep2_2X_Aligned.out.bam
├── Sl_SAM_Rep2_2X_Aligned.toTranscriptome.out.bam
├── Sl_SAM_Rep2_2X_Log.final.out
├── Sl_SAM_Rep2_2X_Log.out
├── Sl_SAM_Rep2_2X_Log.progress.out
├── Sl_SAM_Rep2_2X_SJ.out.tab
├── Sl_SAM_Rep2_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
├── Sl_SAM_Rep2_2X__STARpass1
│ ├── Log.final.out
│ └── SJ.out.tab
├── Sl_SAM_Rep3_2X_Aligned.out.bam
├── Sl_SAM_Rep3_2X_Aligned.toTranscriptome.out.bam
├── Sl_SAM_Rep3_2X_Log.final.out
├── Sl_SAM_Rep3_2X_Log.out
├── Sl_SAM_Rep3_2X_Log.progress.out
├── Sl_SAM_Rep3_2X_SJ.out.tab
├── Sl_SAM_Rep3_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
├── Sl_SAM_Rep3_2X__STARpass1
│ ├── Log.final.out
│ └── SJ.out.tab
├── Sl_SAM_Rep4_2X_Aligned.out.bam
├── Sl_SAM_Rep4_2X_Aligned.toTranscriptome.out.bam
├── Sl_SAM_Rep4_2X_Log.final.out
├── Sl_SAM_Rep4_2X_Log.out
├── Sl_SAM_Rep4_2X_Log.progress.out
├── Sl_SAM_Rep4_2X_SJ.out.tab
├── Sl_SAM_Rep4_2X__STARgenome
│ ├── sjdbInfo.txt
│ └── sjdbList.out.tab
└── Sl_SAM_Rep4_2X__STARpass1
├── Log.final.out
└── SJ.out.tab
--runThreadN 10 ###command to set number of threads to be used for genome alignment; 20
--runMode alignReads ###command to set STAR mode; alignReads
--genomeDir starindices ###path to genome directory where genome indices are sorted; starindices is the name of the directory that contains sorted genome indices
--outFileNamePrefix ${out}/${S}_ ###command specifying output subdirectory and prefix to output files; output directory is AlignedToTrancriptome
--readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN}###path to multiple input files containing sequences to be mapped;here contains both read 1 and 2 of paired-end reads
--readFilesCommand zcat ###command to view files; zcat views compressed files without uncompressing the files
--outSAMtype BAM Unsorted ###STAR outputs alignments in unsorted bam files format
--twopassMode Basic ###command to run STAR 2-pass mapping for individual samples
--quantMode TranscriptomeSAM ###commands STAR to output alignments translated to transcript coordinates