Here are the details of variant calling with the consensus references. I followed all the steps described on this page: https://sites.google.com/site/gompertlabnotes/home/researcher-pages/samridhi-chaturvedi/timema/sequencealignment/variantcalling
#details from the unfiltered variants file
##### FILTERING ################
1. usage: perl vcfFilter_af.pl variantsTimemaBart.vcf coverage missing
#2X
perl vcfFilter_af.pl variantsTimemaBart.vcf 390 39
perl vcfFilter_af.pl variantsTimemaCali.vcf 77 15.4
perl vcfFilter_af.pl variantsTimemaChum.vcf 358 71.6
perl vcfFilter_af.pl variantsTimemaCris.vcf 205 41
perl vcfFilter_af.pl variantsTimemaKnul.vcf 89 17.8
perl vcfFilter_af.pl variantsTimemaLand.vcf 125 25
perl vcfFilter_af.pl variantsTimemaPodu.vcf 255 51
perl vcfFilter_af.pl variantsTimemaPopp.vcf 116 23.2
#3X
perl vcfFilter_af.pl variantsTimemaBart.vcf 390 58.5
perl vcfFilter_af.pl variantsTimemaCali.vcf 77 23.1
perl vcfFilter_af.pl variantsTimemaChum.vcf 358 107.4
perl vcfFilter_af.pl variantsTimemaCris.vcf 205 61.5
perl vcfFilter_af.pl variantsTimemaKnul.vcf 89 26.7
perl vcfFilter_af.pl variantsTimemaLand.vcf 125 37.5
perl vcfFilter_af.pl variantsTimemaPodu.vcf 255 76.5
perl vcfFilter_af.pl variantsTimemaPopp.vcf 116 34.8
#create scaffold position files for next step of filtering
grep ^Sc filtered3x_variantsTimemaBart.vcf | cut -d ':' -f 3 > scafBart3x.txt
grep ^Sc filtered3x_variantsTimemaCali.vcf | cut -d ':' -f 3 > scafCali3x.txt
grep ^Sc filtered3x_variantsTimemaChum.vcf | cut -d ':' -f 3 > scafChum3x.txt
grep ^Sc filtered3x_variantsTimemaCris.vcf | cut -d ':' -f 3 > scafCris3x.txt
grep ^Sc filtered3x_variantsTimemaKnul.vcf | cut -d ':' -f 3 > scafKnul3x.txt
grep ^Sc filtered3x_variantsTimemaLand.vcf | cut -d ':' -f 3 > scafLand3x.txt
grep ^Sc filtered3x_variantsTimemaPodu.vcf | cut -d ':' -f 3 > scafPodu3x.txt
grep ^Sc filtered3x_variantsTimemaPopp.vcf | cut -d ':' -f 3 > scafPopp3x.txt
grep ^Sc filtered3x_variantsTimemaBart.vcf | cut -f 2 > posBart3x.txt
grep ^Sc filtered3x_variantsTimemaCali.vcf | cut -f 2 > posCali3x.txt
grep ^Sc filtered3x_variantsTimemaChum.vcf | cut -f 2 > posChum3x.txt
grep ^Sc filtered3x_variantsTimemaCris.vcf | cut -f 2 > posCris3x.txt
grep ^Sc filtered3x_variantsTimemaKnul.vcf | cut -f 2 > posKnul3x.txt
grep ^Sc filtered3x_variantsTimemaLand.vcf | cut -f 2 > posLand3x.txt
grep ^Sc filtered3x_variantsTimemaPodu.vcf | cut -f 2 > posPodu3x.txt
grep ^Sc filtered3x_variantsTimemaPopp.vcf | cut -f 2 > posPopp3x.txt
#create final file
paste scafBart3x.txt posBart3x.txt > snpsBart3x.txt
paste scafCali3x.txt posCali3x.txt > snpsCali3x.txt
paste scafChum3x.txt posChum3x.txt > snpsChum3x.txt
paste scafCris3x.txt posCris3x.txt > snpsCris3x.txt
paste scafKnul3x.txt posKnul3x.txt > snpsKnul3x.txt
paste scafLand3x.txt posLand3x.txt > snpsLand3x.txt
paste scafPodu3x.txt posPodu3x.txt > snpsPodu3x.txt
paste scafPopp3x.txt posPopp3x.txt > snpsPopp3x.txt
2. Next round of filtering
#get depth for next step
grep ^Sc filtered3x_variantsTimemaBart.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xbart_depth.txt
grep ^Sc filtered3x_variantsTimemaCali.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xcali_depth.txt
grep ^Sc filtered3x_variantsTimemaChum.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xchum_depth.txt
grep ^Sc filtered3x_variantsTimemaCris.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xcris_depth.txt
grep ^Sc filtered3x_variantsTimemaKnul.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xknul_depth.txt
grep ^Sc filtered3x_variantsTimemaLand.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xland_depth.txt
grep ^Sc filtered3x_variantsTimemaPodu.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xpodu_depth.txt
grep ^Sc filtered3x_variantsTimemaPopp.vcf | cut -f 8 | cut -d ';' -f 1 | grep -o '[0-9]*' > filt3Xpopp_depth.txt
#calculate coverage in R
bart = read.table("filt3Xbart_depth.txt", header=FALSE)
cali = read.table("filt3Xcali_depth.txt", header=FALSE)
chum = read.table("filt3Xchum_depth.txt", header=FALSE)
cris = read.table("filt3Xcris_depth.txt", header=FALSE)
knul = read.table("filt3Xknul_depth.txt", header=FALSE)
land = read.table("filt3Xland_depth.txt", header=FALSE)
podu = read.table("filt3Xpodu_depth.txt", header=FALSE)
popp = read.table("filt3Xpopp_depth.txt", header=FALSE)
bartcov = mean(bart$V1) + 2*(sd(bart$V1))
calicov = mean(cali$V1) + 2*(sd(cali$V1))
chumcov = mean(chum$V1) + 2*(sd(chum$V1))
criscov = mean(cris$V1) + 2*(sd(cris$V1))
knulcov = mean(knul$V1) + 2*(sd(knul$V1))
landcov = mean(land$V1) + 2*(sd(land$V1))
poducov = mean(podu$V1) + 2*(sd(podu$V1))
poppcov = mean(popp$V1) + 2*(sd(popp$V1))
usage: perl filterSomemore.pl infile.vcf snpsfile maxcoverage
perl filterSomeMore.pl filtered3x_variantsTimemaBart.vcf snpsBart3x.txt 3877
perl filterSomeMore.pl filtered3x_variantsTimemaCali.vcf snpsCali3x.txt 1785
perl filterSomeMore.pl filtered3x_variantsTimemaChum.vcf snpsChum3x.txt 7366
perl filterSomeMore.pl filtered3x_variantsTimemaCris.vcf snpsCris3x.txt 1758
perl filterSomeMore.pl filtered3x_variantsTimemaKnul.vcf snpsKnul3x.txt 1382
perl filterSomeMore.pl filtered3x_variantsTimemaLand.vcf snpsLand3x.txt 2020
perl filterSomeMore.pl filtered3x_variantsTimemaPodu.vcf snpsPodu3x.txt 3846
perl filterSomeMore.pl filtered3x_variantsTimemaPopp.vcf snpsPopp3x.txt 2315
#bart 3877.336 roundoff to 3877
#cali 1785.011 roundoff to 1785
#chum 7366.142 roundoff to 7366
#cris 1757.885 roundoff to 1758
#knul 1382.433 roundoff to 1382
#land 2019.856 roundoff to 2020
#podu 3846.46 roundoff to 3846
#popp 2314.925 roundoff to 2315
## get snpids to match with the whole genome
grep ^Sc morefilter3X_filtered3x_variantsTimemaBart.vcf | cut -f 1 > snpidBart3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaCali.vcf | cut -f 1 > snpidCali3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaChum.vcf | cut -f 1 > snpidChum3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaCris.vcf | cut -f 1 > snpidCris3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaKnul.vcf | cut -f 1 > snpidKnul3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaLand.vcf | cut -f 1 > snpidLand3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaPodu.vcf | cut -f 1 > snpidPodu3x.txt
grep ^Sc morefilter3X_filtered3x_variantsTimemaPopp.vcf | cut -f 1 > snpidPopp3x.txt