An indexed reference genome will be built to align the Heinz 1706 sequences to. Sequences will then be aligned to reference genome.
Code:
cp /share/bitcpt/Fall2022/scripts/At.starindex.sh /share/bitcpt/Fall2022/UnityID/Heinz/Heinz.starindex.sh
ls ../Heinz
AlignedToTranscriptome fastqc.out.380458 salmon_align_quant transcriptome
fastqc Heinz.fastqc.sh starindices
fastqc.err.380458 Heinz.starindex.sh starOutputfiles
Edit script:
vi Heinz.starindex.sh
Script:
#!/bin/tcsh
#BSUB -J starindices_Heinz_cpjohns #job name
#BSUB -n 10 #number of nodes
#BSUB -W 8:0 #time for job to complete
#BSUB -o starindices.out.%J #output file
#BSUB -e Genomes/Solanum_lycopersicum/Portfolio/Tom-Heinz1706.err.%J #error file
# For running star to generate genome index
# Run in working directory /share/bitcpt/Fall2022/UnityID/Heinz
# Must run this in working directory with subdirectory named /starindices
module load conda
conda activate /usr/local/usrapps/bitcpt/star
set IN=/share/bitcpt/Fall2022/referenceGenomes/Solanum_lycopersicum/Portfolio/Tom-Heinz1706
STAR --runThreadN 10 --runMode genomeGenerate --genomeSAindexNbases 13 --genomeDir starindices --genomeFastaFiles ${IN}/Tom-Heinz_assembly.fasta --sjdbGTFfile ${IN}/Tom-Heinz.agat.gtf --sjdbOverhang 58
~
~
~
CODE:
bsub < Heinz.starindex.sh
Code:
cp /share/bitcpt/Fall2022/scripts/At.staralign.sh ../Heinz/Heinz.staralign.sh
ls
AlignedToTranscriptome Heinz.fastqc.sh starindices transcriptome
fastqc Heinz.staralign.sh starindices.out.380562
fastqc.err.380458 Heinz.starindex.sh starOutputfiles
fastqc.out.380458 salmon_align_quant _STARtmp
Edit script:
vi Heinz.staralign.sh
Script:
#!/bin/tcsh
#BSUB -J Heinz_staralign #job name
#BSUB -n 12 #number of threads
#BSUB -W 10:0 #time for job to complete
#BSUB -R span[hosts=1] #to keep tasks on one node
#BSUB -R "rusage[mem=20000]" #to request a node with 20MB of memory
#BSUB -o Heinz_staralign_%J.out #output file
#BSUB -e Heinz_staralign_%J.err #error file
#to align RNA-seq reads to indexed genome using STAR
#STAR cannot make use of HPC MPI, must have -R options to set 1 node & memory
#set threads under 12 on Henry2
#input of indexed genome path is /share/bitcpt/Fall2022/cpjohns4/Heinz/starindices
#input of sequence reads path is /share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum/
#output of aligned reads will go into STAR_align_Heinz subdirectory in working directory
module load conda
conda activate /usr/local/usrapps/bitcpt/star
# SET IN VARIABLES
set IN=/share/bitcpt/Fall2022/CleanData/Solanum_lycopersicum
set index=starindices
set out=AlignedToTranscriptome
################################
## Leaf Rep 1
################################
# RNA-seq data are in format Sl_Leaf_Rep1_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep1_1X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## Leaf Rep 2
################################
# RNA-seq data are in format Sl_Leaf_Rep1_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep2_3X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## Leaf Rep 3
################################
#RNA-seq data are in format Sl_Leaf_Rep1_3X_1.fp.fq.gz
set S=Sl_Leaf_Rep3_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 1
################################
#RNA-seq data are in format Sl_SAM_Rep1_3X_1.fp.fq.gz
set S=Sl_SAM_Rep1_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 2
################################
#RNA-seq data are in format Sl_SAM_Rep1_3X_1.fp.fq.gz
set S=Sl_SAM_Rep2_3X
set EN=fp.fq.gz
# Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 3
################################
#RNA-seq data are in format Sl_SAM_Rep1_3X_1.fp.fq.gz
set S=Sl_SAM_Rep3_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
################################
## SAM Rep 4
################################
#RNA-seq data are in format Sl_SAM_Rep1_3X_1.fp.fq.gz
set S=Sl_SAM_Rep4_3X
set EN=fp.fq.gz
#Print the file name to make sure it is right
echo ${IN}/${S}_1.${EN}
STAR --runThreadN 12 --runMode alignReads --genomeDir ${index} --outFileNamePrefix ${out}/${S}_ --readFilesIn ${IN}/${S}_1.${EN} ${IN}/${S}_2.${EN} --readFilesCommand zcat --outSAMtype BAM Unsorted --twopassMode Basic --quantMode TranscriptomeSAM
Code:
bsub < Heinz.startalign.sh