Post date: Nov 21, 2019 5:52:28 PM
MuTect2 can use a panel (40+ recommended) of non-"tumor" samples to refine the somatic mutation detection algorithm. These are especially useful for distinguishing true somatic mutations from sequencing artifacts. See the description here.
Rozenn chose 100 tress from sites in or near Utah to generate a panel of normals The list is in
/uufs/chpc.utah.edu/common/home/u6000989/data/aspen/gbs_pando_plus//Alignments_mem/ponBamList.txt
Working in /uufs/chpc.utah.edu/common/home/u6000989/data/aspen/gbs_pando_plus/PoN we:
1. Ran normal-only calling for the creation of the panel of normals
sbatch RunPoNGatk.sh
which runs PoNGatkFork.pl
which looks like this:
#!/usr/bin/perl
#
# make vcf files for PoNs
#
use Parallel::ForkManager;
my $max = 30;
my $pm = Parallel::ForkManager->new($max);
my $idir = '/uufs/chpc.utah.edu/common/home/u6000989/data/aspen/gbs_pando_plus/Alignments_mem';
my $odir = '/uufs/chpc.utah.edu/common/home/u6000989/data/aspen/gbs_pando_plus/PoN';
my $tdir = '/scratch/general/lustre/pandoPoN';
my $genome = "/uufs/chpc.utah.edu/common/home/u6000989/data/aspen/genome/Potrs01-genome.fa";
my $in = shift(@ARGV);
open(IN, $in) or die;
FILES:
while(<IN>){
$pm->start and next FILES; ## fork
chomp;
$bam = $_;
$out = $bam;
$out =~ s/bam/vcf.gz/ or die "failed here: $out\n";
$out = "pon_$out";
system "java -Xmx48g -jar ~/bin/GenomeAnalysisTK.jar -T MuTect2 -R $genome -I:tumor $idir/$bam --artifact_detection_mode -o $tdir/$out\n";
system "cp $tdir/$out $odir/$out\n";
$pm->finish;
}
$pm->wait_all_children;
This generated 100 pon*vcf files.
2. Used used CombineVariants to output only sites where a variant was seen in at least two samples
sbatch CombinePoN.sh
which looks like this:
#!/bin/sh
#SBATCH --time=240:00:00
#SBATCH --nodes=1
#SBATCH --ntasks=24
#SBATCH --account=gompert-kp
#SBATCH --partition=gompert-kp
#SBATCH --job-name=gatk-pon
#SBATCH --mail-type=FAIL
#SBATCH --mail-user=zach.gompert@usu.edu
echo ------------------------------------------------------
echo -n 'Job is running on node '; cat $SLURM_JOB_NODELIST
echo ------------------------------------------------------
echo SLURM: job identifier is $SLURM_JOBID
echo SLURM: job name is $SLURM_JOB_NAME
echo ------------------------------------------------------
module load gatk
cd /uufs/chpc.utah.edu/common/home/u6000989/data/aspen/gbs_pando_plus/PoN
java -Xmx128g -jar ~/bin/GenomeAnalysisTK.jar -T CombineVariants -R /uufs/chpc.utah.edu/common/home/u6000989/data/aspen/genome/Potrs01-genome.fa -V pon_aln_mem_COC-1605.sorted.vcf -V pon_aln_mem_COC-1607.sorted.vcf -V pon_aln_mem_COC-1609.sorted.vcf -V pon_aln_mem_COC-1610.sorted.vcf -V pon_aln_mem_COC-1612.sorted.vcf -V pon_aln_mem_COC-1622.sorted.vcf -V pon_aln_mem_COC-1626.sorted.vcf -V pon_aln_mem_COC-1631.sorted.vcf -V pon_aln_mem_COC-1645.sorted.vcf -V pon_aln_mem_COC-1646.sorted.vcf -V pon_aln_mem_COD-1613.sorted.vcf -V pon_aln_mem_COD-1617.sorted.vcf -V pon_aln_mem_COD-1621.sorted.vcf -V pon_aln_mem_COH-1626.sorted.vcf -V pon_aln_mem_COH-1627.sorted.vcf -V pon_aln_mem_COH-1643.sorted.vcf -V pon_aln_mem_COH-16CW.sorted.vcf -V pon_aln_mem_COM-1601.sorted.vcf -V pon_aln_mem_COM-1610.sorted.vcf -V pon_aln_mem_COM-1616.sorted.vcf -V pon_aln_mem_COM-1621.sorted.vcf -V pon_aln_mem_COM-1623.sorted.vcf -V pon_aln_mem_COM-1625.sorted.vcf -V pon_aln_mem_COM-1631.sorted.vcf -V pon_aln_mem_COM-1636.sorted.vcf -V pon_aln_mem_COM-1640-RpA132.sorted.vcf -V pon_aln_mem_COP-1608.sorted.vcf -V pon_aln_mem_COP-1614.sorted.vcf -V pon_aln_mem_COP-1618.sorted.vcf -V pon_aln_mem_COP-1622.sorted.vcf -V pon_aln_mem_COP-1629.sorted.vcf -V pon_aln_mem_COP-1649.sorted.vcf -V pon_aln_mem_COS-1607.sorted.vcf -V pon_aln_mem_COS-1608.sorted.vcf -V pon_aln_mem_COS-1628.sorted.vcf -V pon_aln_mem_COS-1635.sorted.vcf -V pon_aln_mem_COS-1649.sorted.vcf -V pon_aln_mem_COS-16CW.sorted.vcf -V pon_aln_mem_COT-1605.sorted.vcf -V pon_aln_mem_COT-1647-RpA134.sorted.vcf -V pon_aln_mem_COU-1602.sorted.vcf -V pon_aln_mem_COU-1608.sorted.vcf -V pon_aln_mem_COU-1621.sorted.vcf -V pon_aln_mem_COU-1636.sorted.vcf -V pon_aln_mem_COU-1638.sorted.vcf -V pon_aln_mem_COU-1648.sorted.vcf -V pon_aln_mem_COU-1650.sorted.vcf -V pon_aln_mem_COV-1609.sorted.vcf -V pon_aln_mem_COV-1615.sorted.vcf -V pon_aln_mem_COV-1628.sorted.vcf -V pon_aln_mem_COV-1630.sorted.vcf -V pon_aln_mem_NVO-1613.sorted.vcf -V pon_aln_mem_NVO-1619.sorted.vcf -V pon_aln_mem_NVO-1628.sorted.vcf -V pon_aln_mem_NVO-1631.sorted.vcf -V pon_aln_mem_NVP-1614.sorted.vcf -V pon_aln_mem_NVP-1615.sorted.vcf -V pon_aln_mem_NVP-1619.sorted.vcf -V pon_aln_mem_NVP-1631.sorted.vcf -V pon_aln_mem_UTB-1610.sorted.vcf -V pon_aln_mem_UTB-1613.sorted.vcf -V pon_aln_mem_UTB-1617.sorted.vcf -V pon_aln_mem_UTB-1622.sorted.vcf -V pon_aln_mem_UTB-1624.sorted.vcf -V pon_aln_mem_UTB-1628.sorted.vcf -V pon_aln_mem_UTB-1640.sorted.vcf -V pon_aln_mem_UTB-1646.sorted.vcf -V pon_aln_mem_UTK-1610.sorted.vcf -V pon_aln_mem_UTK-1631.sorted.vcf -V pon_aln_mem_UTM-1603.sorted.vcf -V pon_aln_mem_UTM-1608-RpA141.sorted.vcf -V pon_aln_mem_UTM-1610.sorted.vcf -V pon_aln_mem_UTM-1616.sorted.vcf -V pon_aln_mem_UTM-1624-RpA141.sorted.vcf -V pon_aln_mem_UTM-1625.sorted.vcf -V pon_aln_mem_UTM-1650.sorted.vcf -V pon_aln_mem_UTU-1607.sorted.vcf -V pon_aln_mem_UTU-1611.sorted.vcf -V pon_aln_mem_UTU-1613.sorted.vcf -V pon_aln_mem_UTU-1619.sorted.vcf -V pon_aln_mem_UTU-1631.sorted.vcf -V pon_aln_mem_UTU-1632.sorted.vcf -V pon_aln_mem_UTU-1650.sorted.vcf -V pon_aln_mem_UTW-1601.sorted.vcf -V pon_aln_mem_UTW-1602-RpA142.sorted.vcf -V pon_aln_mem_UTW-1613.sorted.vcf -V pon_aln_mem_UTW-1614.sorted.vcf -V pon_aln_mem_UTW-1648.sorted.vcf -V pon_aln_mem_WYB-1613.sorted.vcf -V pon_aln_mem_WYB-1620.sorted.vcf -V pon_aln_mem_WYB-1639.sorted.vcf -V pon_aln_mem_WYB-1649.sorted.vcf -V pon_aln_mem_WYW-1605.sorted.vcf -V pon_aln_mem_WYW-1617.sorted.vcf -V pon_aln_mem_WYW-1620.sorted.vcf -V pon_aln_mem_WYW-1622.sorted.vcf -V pon_aln_mem_WYW-1627.sorted.vcf -V pon_aln_mem_WYW-1634.sorted.vcf -V pon_aln_mem_WYW-1645.sorted.vcf -V pon_aln_mem_WYW-1649.sorted.vcf -minN 2 --setKey "null" --filteredAreUncalled --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED -o potr_PON.vcf
early()
{
echo ' '
echo ' ############ WARNING: EARLY TERMINATION ############# '
echo ' '
}
exit