Post date: Jun 23, 2015 10:10:26 PM
Given the problems I encountered with dadi and the fact that its diffusion approximation assumes low migration (which could explain the convergence issues), I have decided to instead try a WF model with drift and migration with parameter estimation via ABC. The program I wrote for this is wfabc. In this program, ancestral allele frequencies are assumed to equal the mean allele frequencies for a population pair. Evolution then proceeds for T generations following a split where each population experiences drift (binomial sampling as defined by WF) and gene flow (where p' = (1-m) p + m(p_m)). Right now I just have 3 summary stats, multilocus Fst, and private (maf < 5% in one pop. and not the other) alleles in each population.
Here are the current priors for log-uniform (T and N) and uniform (migration) parameters.
param.Nlb = 200; // log unfiorm prior on Nanc, N0, N1
param.Nub = 20000;
param.Tlb = 10; // log uniform prior on split time
param.Tub = 20000;
param.Mlb = 0; // uniform prior on migration rate
param.Mub = 0.2;
Here is the R code I used to extract the allele frequencies for somewhat independent SNPs associated with selection candidates, stripe, and 5000 random SNPs.
## grab different subsets of SNPs
P<-read.table("tcristAlleleFrequencies.txt",header=TRUE)
## randoms subset of 5000 SNPs
x<-sample(1:dim(P)[1],5000,replace=FALSE)
write.table(P[x,c(3,4)],"pRanHVAxHVC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[x,c(5,8)],"pRanLAxPRC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[x,c(6,7)],"pRanMR1AxMR1C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[x,c(9,10)],"pRanR12AxR12C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
## stripe SNPs
stripe<-c(270,440236)
## within 10 20kb windows + 10kb
x<-which(P[,1] == stripe[1] & abs(P[,2] - stripe[2]) <= (2e+05 + 10000))
## check autocorrelations
library(zoo)
acs<-matrix(NA,nrow=10,ncol=8)
for(j in 1:8){
ts<-as.zoo(P[x,j+2])
for(k in 1:10){
ts_n<-lag(ts,k=-k,na.pad=TRUE)
acs[k,j]<-cor(ts[!is.na(ts_n)], ts_n[!is.na(ts_n)])
}
}
plot(acs[,1],ylim=c(0,0.5),type='l')
for(j in 2:8){lines(acs[,j])}
## ac varies by pop, but biggest decay happens by lag 3, so keep every 4th SNP
Xstr<-x[seq(1,length(x),4)]
write.table(P[Xstr,c(3,4)],"pStrHVAxHVC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xstr,c(5,8)],"pStrLAxPRC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xstr,c(6,7)],"pStrMR1AxMR1C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xstr,c(9,10)],"pStrR12AxR12C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
## selection candidates
scands<-read.csv("/labs/evolution/projects/timema_speciation/timema_wgexperiment/popgen/selectionCandidates.csv")
x<-NULL
for(l in 1:25){
a<-which(P[,1] == scands[l,1] & abs(P[,2] - scands[l,2]) <= 10000)
x<-c(x,a)
}
## check autocorrelations
library(zoo)
acs<-matrix(NA,nrow=10,ncol=8)
for(j in 1:8){
ts<-as.zoo(P[x,j+2])
for(k in 1:10){
ts_n<-lag(ts,k=-k,na.pad=TRUE)
acs[k,j]<-cor(ts[!is.na(ts_n)], ts_n[!is.na(ts_n)])
}
}
plot(acs[,1],ylim=c(0,0.5),type='l')
for(j in 2:8){lines(acs[,j])}
## ac varies by pop, but biggest decay happens by lag 3, so keep every 4th SNP
Xscan<-x[seq(1,758,4)]
write.table(P[Xscan,c(3,4)],"pScanHVAxHVC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xscan,c(5,8)],"pScanLAxPRC.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xscan,c(6,7)],"pScanMR1AxMR1C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(P[Xscan,c(9,10)],"pScanR12AxR12C.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
Here are example runs for abc, 500,000 iterations total (everything is in /labs/evolution/projects/timema_speciation/demog):
# selection candidates
cd /local/scratch/
sleep 19
~/bin/wfabc -i /labs/evolution/projects/timema_speciation/demog/pScanR12AxR12C.txt -l 190 -x 25000 -a 1 > abcSim19_pScanR12AxR12C.txt
rsync -avz abcSim19_pScanR12AxR12C.txt /labs/evolution/projects/timema_speciation/demog/sims/
rm abcSim19_pScanR12AxR12C.txt
# random SNPs
cd /local/scratch/
sleep 19
~/bin/wfabc -i /labs/evolution/projects/timema_speciation/demog/pRanR12AxR12C.txt -l 5000 -x 25000 -a 1 > abcSim19_pRanR12AxR12C.txt
rsync -avz abcSim19_pRanR12AxR12C.txt /labs/evolution/projects/timema_speciation/demog/sims/
rm abcSim19_pRanR12AxR12C.txt
# stripe SNPs
cd /local/scratch/
sleep 19
~/bin/wfabc -i /labs/evolution/projects/timema_speciation/demog/pStrR12AxR12C.txt -l 1397 -x 25000 -a 1 > abcSim19_pStrR12AxR12C.txt
rsync -avz abcSim19_pStrR12AxR12C.txt /labs/evolution/projects/timema_speciation/demog/sims/
rm abcSim19_pStrR12AxR12C.txt