Post date: Jun 14, 2015 3:14:14 AM
cp wrap_qsub_rc_theta.pl wrap_qsub_rc_theta_CW.pl
#changed pops to "W" and "C", running 30 iterations (2 . . 30), Zach made it compatible with SLURM
cd /labs/evolution/projects/sosorum/
samtools mpileup -C 50 -d 50 -I -g -q 10 -Q 15 -f Esosorum_40mil_qc_mmp84.fasta alnE-BS-C*sorted.bam > Csites.bcf
bcftools view -cGP cond2 Csites.bcf > /dev/null 2> Csites.1.afs
bcftools view -cGP Csites.1.afs Csites.bcf > /dev/null 2> Csites.2.afs
bcftools view -cGP Csites.2.afs Csites.bcf > /dev/null 2> Csites.3.afs
bcftools view -cGP Csites.3.afs Csites.bcf > /dev/null 2> Csites.4.afs
bcftools view -cGP Csites.4.afs Csites.bcf > /dev/null 2> Csites.5.afs
bcftools view -cGP Csites.5.afs Csites.bcf > /dev/null 2> Csites.6.afs
bcftools view -cGP Csites.6.afs Csites.bcf > /dev/null 2> Csites.7.afs
bcftools view -cGP Csites.7.afs Csites.bcf > /dev/null 2> Csites.8.afs
bcftools view -cGP Csites.8.afs Csites.bcf > /dev/null 2> Csites.9.afs
bcftools view -cGP Csites.9.afs Csites.bcf > /dev/null 2> Csites.10.afs
bcftools view -cGP Csites.10.afs Csites.bcf > /dev/null 2> Csites.11.afs
bcftools view -cGP Csites.11.afs Csites.bcf > /dev/null 2> Csites.12.afs
bcftools view -cGP Csites.12.afs Csites.bcf > /dev/null 2> Csites.13.afs
bcftools view -cGP Csites.13.afs Csites.bcf > /dev/null 2> Csites.14.afs
bcftools view -cGP Csites.14.afs Csites.bcf > /dev/null 2> Csites.15.afs
bcftools view -cGP Csites.15.afs Csites.bcf > /dev/null 2> Csites.16.afs
bcftools view -cGP Csites.16.afs Csites.bcf > /dev/null 2> Csites.17.afs
bcftools view -cGP Csites.17.afs Csites.bcf > /dev/null 2> Csites.18.afs
bcftools view -cGP Csites.18.afs Csites.bcf > /dev/null 2> Csites.19.afs
bcftools view -cGP Csites.19.afs Csites.bcf > /dev/null 2> Csites.20.afs
bcftools view -cGP Csites.20.afs Csites.bcf > /dev/null 2> Csites.21.afs
bcftools view -cGP Csites.21.afs Csites.bcf > /dev/null 2> Csites.22.afs
bcftools view -cGP Csites.22.afs Csites.bcf > /dev/null 2> Csites.23.afs
bcftools view -cGP Csites.23.afs Csites.bcf > /dev/null 2> Csites.24.afs
bcftools view -cGP Csites.24.afs Csites.bcf > /dev/null 2> Csites.25.afs
bcftools view -cGP Csites.25.afs Csites.bcf > /dev/null 2> Csites.26.afs
bcftools view -cGP Csites.26.afs Csites.bcf > /dev/null 2> Csites.27.afs
bcftools view -cGP Csites.27.afs Csites.bcf > /dev/null 2> Csites.28.afs
bcftools view -cGP Csites.28.afs Csites.bcf > /dev/null 2> Csites.29.afs
bcftools view -cGP Csites.29.afs Csites.bcf > /dev/null 2> Csites.30.afs
cd /labs/evolution/projects/sosorum/
samtools mpileup -C 50 -d 50 -I -g -q 10 -Q 15 -f Esosorum_40mil_qc_mmp84.fasta alnE-BS-W*sorted.bam > Wsites.bcf
bcftools view -cGP cond2 Wsites.bcf > /dev/null 2> Wsites.1.afs
bcftools view -cGP Wsites.1.afs Wsites.bcf > /dev/null 2> Wsites.2.afs
bcftools view -cGP Wsites.2.afs Wsites.bcf > /dev/null 2> Wsites.3.afs
bcftools view -cGP Wsites.3.afs Wsites.bcf > /dev/null 2> Wsites.4.afs
bcftools view -cGP Wsites.4.afs Wsites.bcf > /dev/null 2> Wsites.5.afs
bcftools view -cGP Wsites.5.afs Wsites.bcf > /dev/null 2> Wsites.6.afs
bcftools view -cGP Wsites.6.afs Wsites.bcf > /dev/null 2> Wsites.7.afs
bcftools view -cGP Wsites.7.afs Wsites.bcf > /dev/null 2> Wsites.8.afs
bcftools view -cGP Wsites.8.afs Wsites.bcf > /dev/null 2> Wsites.9.afs
bcftools view -cGP Wsites.9.afs Wsites.bcf > /dev/null 2> Wsites.10.afs
bcftools view -cGP Wsites.10.afs Wsites.bcf > /dev/null 2> Wsites.11.afs
bcftools view -cGP Wsites.11.afs Wsites.bcf > /dev/null 2> Wsites.12.afs
bcftools view -cGP Wsites.12.afs Wsites.bcf > /dev/null 2> Wsites.13.afs
bcftools view -cGP Wsites.13.afs Wsites.bcf > /dev/null 2> Wsites.14.afs
bcftools view -cGP Wsites.14.afs Wsites.bcf > /dev/null 2> Wsites.15.afs
bcftools view -cGP Wsites.15.afs Wsites.bcf > /dev/null 2> Wsites.16.afs
bcftools view -cGP Wsites.16.afs Wsites.bcf > /dev/null 2> Wsites.17.afs
bcftools view -cGP Wsites.17.afs Wsites.bcf > /dev/null 2> Wsites.18.afs
bcftools view -cGP Wsites.18.afs Wsites.bcf > /dev/null 2> Wsites.19.afs
bcftools view -cGP Wsites.19.afs Wsites.bcf > /dev/null 2> Wsites.20.afs
bcftools view -cGP Wsites.20.afs Wsites.bcf > /dev/null 2> Wsites.21.afs
bcftools view -cGP Wsites.21.afs Wsites.bcf > /dev/null 2> Wsites.22.afs
bcftools view -cGP Wsites.22.afs Wsites.bcf > /dev/null 2> Wsites.23.afs
bcftools view -cGP Wsites.23.afs Wsites.bcf > /dev/null 2> Wsites.24.afs
bcftools view -cGP Wsites.24.afs Wsites.bcf > /dev/null 2> Wsites.25.afs
bcftools view -cGP Wsites.25.afs Wsites.bcf > /dev/null 2> Wsites.26.afs
bcftools view -cGP Wsites.26.afs Wsites.bcf > /dev/null 2> Wsites.27.afs
bcftools view -cGP Wsites.27.afs Wsites.bcf > /dev/null 2> Wsites.28.afs
bcftools view -cGP Wsites.28.afs Wsites.bcf > /dev/null 2> Wsites.29.afs
bcftools view -cGP Wsites.29.afs Wsites.bcf > /dev/null 2> Wsites.30.afs
Ready to submit 2 jobs to pbs (y/n): y
Proceeding with pbs submission
428670
428671
#To check for convergence: grep theta *afs
#To get theta and pi (from final runs, #30): grep theta *30.afs
#When done, put all .afs files in new directory called thetas_variantcalling/
---
#16june15: Here's what the numbers are looking like at the end of the runs, for all populations, not just wild pooled:
Wsites.21.afs:[bcf_p1_read_prior] heterozygosity=0.009595, theta=0.005964
Wsites.22.afs:[bcf_p1_read_prior] heterozygosity=0.009282, theta=0.005809
Wsites.23.afs:[bcf_p1_read_prior] heterozygosity=0.009005, theta=0.005672
Wsites.24.afs:[bcf_p1_read_prior] heterozygosity=0.008758, theta=0.005550
Wsites.25.afs:[bcf_p1_read_prior] heterozygosity=0.008536, theta=0.005442
Wsites.26.afs:[bcf_p1_read_prior] heterozygosity=0.008337, theta=0.005344
Wsites.27.afs:[bcf_p1_read_prior] heterozygosity=0.008158, theta=0.005256
Wsites.28.afs:[bcf_p1_read_prior] heterozygosity=0.007997, theta=0.005177
Wsites.29.afs:[bcf_p1_read_prior] heterozygosity=0.007850, theta=0.005106
Wsites.30.afs:[bcf_p1_read_prior] heterozygosity=0.007717, theta=0.005041
You could probably call that convergence...they will not really bounce around, but the changes will just get really small, but it would be nice to run 10-20 more iterations. So I'll start with runs 31-50 for wild pooled and see how much a difference it makes.. then may do the same for captive pooled and all groups.
wrap_qsub_rc_theta_CW31-50.pl
429242
---
#19june15:
Wsites.30.afs:[bcf_p1_read_prior] heterozygosity=0.007717, theta=0.005041
Wsites.31.afs:[bcf_p1_read_prior] heterozygosity=0.007596, theta=0.004982
Wsites.32.afs:[bcf_p1_read_prior] heterozygosity=0.007486, theta=0.004928
Wsites.33.afs:[bcf_p1_read_prior] heterozygosity=0.007385, theta=0.004879
Wsites.34.afs:[bcf_p1_read_prior] heterozygosity=0.007293, theta=0.004834
Wsites.35.afs:[bcf_p1_read_prior] heterozygosity=0.007208, theta=0.004793
Wsites.36.afs:[bcf_p1_read_prior] heterozygosity=0.007130, theta=0.004756
Wsites.37.afs:[bcf_p1_read_prior] heterozygosity=0.007058, theta=0.004721
Wsites.38.afs:[bcf_p1_read_prior] heterozygosity=0.006992, theta=0.004689
Wsites.39.afs:[bcf_p1_read_prior] heterozygosity=0.006931, theta=0.004660
Wsites.40.afs:[bcf_p1_read_prior] heterozygosity=0.006875, theta=0.004633
Wsites.41.afs:[bcf_p1_read_prior] heterozygosity=0.006823, theta=0.004608
Wsites.42.afs:[bcf_p1_read_prior] heterozygosity=0.006774, theta=0.004585
Wsites.43.afs:[bcf_p1_read_prior] heterozygosity=0.006729, theta=0.004563
Wsites.44.afs:[bcf_p1_read_prior] heterozygosity=0.006687, theta=0.004543
Wsites.45.afs:[bcf_p1_read_prior] heterozygosity=0.006649, theta=0.004525
Wsites.46.afs:[bcf_p1_read_prior] heterozygosity=0.006612, theta=0.004507
Wsites.47.afs:[bcf_p1_read_prior] heterozygosity=0.006578, theta=0.004491
Wsites.48.afs:[bcf_p1_read_prior] heterozygosity=0.006547, theta=0.004476
Wsites.49.afs:[bcf_p1_read_prior] heterozygosity=0.006517, theta=0.004462
Wsites.50.afs:[bcf_p1_read_prior] heterozygosity=0.006490, theta=0.004449
It looks like its starting to level off some let's go 10 or 20 more iterations...
wrap_qsub_rc_theta_CW31-50.pl
#kept same name but used vim to change to 51:70 runs
#431723
#Then edited wrap_qsub_rc_theta_CW31-50.pl to run runs 18-50 of captive pooled, "C".
#431724
#Then I started runs 31-50 for all 9 subpops / groups since I've only done 30 runs for them so far and it doesn't look like they've reached convergence.
#wrap_qsub_rc_theta.pl in /thetas_variantcalling/
#431727, 431728, 431729, 431730, 431731, 431732, 431733, 431734, 431735
#22june15: oops, I didn't have the bam files in the same directory as the perl script. Reran these; 432249, 432250, 432251, 432252, 432253, 432254, 432255, 432256, 432257
#j/k the bcf files weren't in the same directory. one more time: 432427, 432428, 432429, 432430, 432431, 432432, 432433, 432434, 432435
#23June15: running wild subpops for 20 more runs (51-70); changed script: wrap_qsub_rc_theta.pl: 432483, 432484, 432485, 432486, 432487
#ran more for C groups to get them up to 70 runs too (at least): 432493, 432494, 432495, 432496
#ran more for C pooled, 44:70: 432835
#24june15: ran 71:100 runs for wild pooled and wild subpops: 433034, 433035, 433036, 433037, 433038, 433039
and captive groups: 433055, 433056, 433057,433058
#to check for convergence:
#in unix:
grep theta W-UP*afs | perl -p -i -e 's/^[A-Za-z\-\.]+//' | perl -p -i -e 's/\.afs\S+//' | perl -p -i -e 's/[a-z=,]+//g' > thetaW-UP.txt
#in R:
theta<-read.table("thetaW-UP.txt",header=F)
plot(theta[,1],theta[,2])
plot(theta[,1],theta[,3])
#convergence happened at ~60 but double check this with Csites (captive pooled) when it finishes.
#note: thetas and pi are pretty much the same for all groups, even the pooled ones, because there is basically no structure. Will recalculate Ne based on theta; remember this assumes a constant pop size over time (no recent bottleneck).
#28june15: ran C pooled 56:70; 434724
#29june15: I'm calling it -- convergence at 60 runs.
grep theta *60.afs
C-ELsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006165, theta=0.004285
C-F1sites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006112, theta=0.004265
C-F2sites.60.afs:[bcf_p1_read_prior] heterozygosity=0.005932, theta=0.004158
C-F3sites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006497, theta=0.004490
Csites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006033, theta=0.004250
W-CSsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.007164, theta=0.004813
W-ELsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006159, theta=0.004275
W-POsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006700, theta=0.004556
W-SGsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006035, theta=0.004193
Wsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006291, theta=0.004356
W-UPsites.60.afs:[bcf_p1_read_prior] heterozygosity=0.006241, theta=0.004177
#Updated wild population Ne's using = (theta)/(4)(mutation rate 1.1 x 10^-8 from human genome (wikipedia))
W-CS: 109386.36363636363636
W-EL: 97159.09090909090909
W-PO: 103545.45454545454545
W-SG: 95295.45454545454545
W-UP: 94931.81818181818182
W total: 99000