Post date: Oct 22, 2019 7:55:53 PM
Raw data and alignments/scripts for the start of this are in /uufs/chpc.utah.edu/common/home/gompert-group1/data/timema/moe_radiation_wgrs/
Details from README in /uufs/chpc.utah.edu/common/home/gompert-group2/data/timema_sp_wgs/
cd /uufs/chpc.utah.edu/common/home/gompert-group1/data/timema/moe_radiation_wgrs/sequences/
bwa mem -t 8 -k 20 -w 100 -r 1.3 -T 30 -R '@RG\tID:moe09C2-135840\tPL:ILLUMINA\tLB:moe09C2\tSM:moe09C2' /uufs/chpc.utah.edu/common/home/u6000989/data/timema/tcrDovetail/version3/map_timema_06Jun2016_RvNkF702.fasta WTCHG_135840_296_1.fastq.gz WTCHG_135840_296_2.fastq.gz > /uufs/chpc.utah.edu/common/home/gompert-group2/data/timema_sp_wgs/aln_135840_moe09C2.sam 2> /uufs/chpc.utah.edu/common/home/gompert-group2/data/timema_sp_wgs/error_135840_moe09C2.log
## fixed sams, specifically made it so contig/scaffold names don't have =
sbatch RunFix.sh
This creates the mod_*sam files.
## convert sam 2 bam
batch SubSam2Bam.sh
which runs Sam2BamFork.pl
this generates the mod*sorted.bam files (and related bai files)
i cleaned up the original sam/bam files
## merge files (4) for each individuals
sbatch SubMerge.sh
runs MergeFork.pl
output is merge*sorted.bam
## remove pcr duplicates---see https://sites.google.com/site/gompertlabnotes/home/researcher-pages/zach-gompert/timema/timema_misc/variantcallingforadditionalwholegenomesthatwerepartofthespermdonorrun
i am using rmdup from samtools for this rather than picardtools, there are some discussions about which is better, but the answer is they probably don't differ much and rmdup is probably a bit more agressive
i ran
sbatch SubRmdup.sh
which runs
RmdupFork.pl with the -S option to also work with SE reads.
## generate g.vcf files for genotypying with GATK haplotyp caller
#
sbatch RunGatk.sh
which runs the GatkFork.pl perl script
use Parallel::ForkManager;
my $max = 30;
my $pm = Parallel::ForkManager->new($max);
my $idir = '/uufs/chpc.utah.edu/common/home/gompert-group2/data/timema_sp_wgs';
my $odir = '/uufs/chpc.utah.edu/common/home/gompert-group2/data/timema_sp_wgs/variants';
my $tdir = '/scratch/general/lustre/tcrGatk';
my $genome = "/uufs/chpc.utah.edu/common/home/u6000989/data/timema/tcrDovetail/version3/map_timema_06Jun2016_RvNkF702.fasta";
FILES:
foreach $bam (@ARGV){
$pm->start and next FILES; ## fork
$out = $bam;
$out =~ s/bam/g.vcf/ or die "failed here: $out\n";
system "java -Xmx48g -jar ~/bin/GenomeAnalysisTK.jar -T HaplotypeCaller -R $genome -I $idir/$bam -o $tdir/$out -gt_mode DISCOVERY -hets 0.001 -mbq 20 -out_mode EMIT_VARIANTS_ONLY -ploidy 2 -stand_call_conf 50 -pcrModel AGGRESSIVE --emitRefConfidence GVCF --variant_index_type LINEAR --variant_index_parameter 128000\n";
system "cp $tdir/$out $odir/$out\n";
$pm->finish;
}
$pm->wait_all_children;