Post date: Jun 23, 2015 4:11:27 PM
~/Documents/VarNe/varne -a Ne/in_p_M-14.txt -b Ne/in_p_L1-F100.txt -l 51371 -t 231 -n 2000 -x 1000
Reading input from files: Ne/in_p_M-14.txt and Ne/in_p_L1-F100.txt
Estimating effective population size
hat{Ne} = 454.487
median of posterior = 454.215
50% credible intervals = 451.723, 456.883
90% credible intervals = 447.878, 460.701
95% credible intervals = 446.306, 462.21
~/Documents/VarNe/varne -a Ne/in_p_M-14.txt -b Ne/in_p_L2-F87.txt -l 51371 -t 194 -n 2000 -x 1000
Reading input from files: Ne/in_p_M-14.txt and Ne/in_p_L2-F87.txt
Estimating effective population size
hat{Ne} = 524.813
median of posterior = 524.924
50% credible intervals = 521.525, 528.306
90% credible intervals = 517.086, 532.61
95% credible intervals = 515.33, 534.379
~/Documents/VarNe/varne -a Ne/in_p_M-14.txt -b Ne/in_p_L3-F85.txt -l 51371 -t 189 -n 2000 -x 1000
Reading input from files: Ne/in_p_M-14.txt and Ne/in_p_L3-F85.txt
Estimating effective population size
hat{Ne} = 549.623
median of posterior = 549.266
50% credible intervals = 546.214, 552.761
90% credible intervals = 540.949, 557.65
95% credible intervals = 539.256, 558.727
## solve for ne in L1, L2, and L3, assume mung Ne has been constant, r is prop. of time on Mung
NL = (NM * Ne * r - NM * Ne)/(Ne * r - NM)
NL1 = 253.7
NL2 = 314.8
NL3 = 335.4
I generated plots that show allele frequencies at about 50,000 SNPs in L(1,2,3) and L(1,2,3)R lines (here the reversion lines are at generation 45) (see /labs/evolution/projects/cmaclentil/popgen/SeedBeetleSim/). Gray points show the actual allele frequencies and the solid lines give the null expectations based on our Ne estimates (black lines give the 1st and 99th quantiles of the null distribution, and the red lines give the 0.1th and 99.9th quantiles). In L1 x L1R there are many SNPs with one allele at very high frequency in L1 (i.e. allele frequency near 0 or 1) but with intermediate allele frequencies in L1R, which is quite cool. We don't really see this in L2 x L2R or L3 x L3R. Indeed, in both of these there are fewer instances where the allele frequencies don't match the null expectations. In L3 x L3R there are very few SNPs that really stand out, and in L2 x L3R the biggest signal appears to be with SNPs with intermediate allele frequencies in L2 where one allele was nearly fixed in L2R (i.e. the opposite of L1 x L1R).
The MxL1, MxL2 and MxL3 plots look at the observed and expected differences by drift for M vs. each lentil independently. Given the long time separating these drift can explain alot, but there is still evidence of selection. In particular, we see alleles that were at very high frequency in M that were nearly lost in lentil, and this holds for all three lentil lines. I then ran a separate simulation of a null model with all three lentil lines and the mung line that accounts for the fact that divergence in the mung line would affect all three pairwise comparisons, and that uses all of the correct Ne estimates for the right parts of the evolutionary hisory. I used this the generate null expectations for the average (across the three L lines) allele frequency difference expected by drift between mung and lentil as a function of mung allele frequency (see MxL1L2L3.pdf). Just for fun, I also added the 99.99th and 0.01th quantiles of the null distribution with an orange line. Even at this significance threshold there are multiple SNPs that differ more between mung and all three lentil lines than expected by chance. Again these mostly involve SNPs with one allele that was very common in M that decreased substantially in frequency across the three lentil lines. So, we have pretty convincing evidence of selection contributing to genetic differences between M, LX and LXR lines.
The next step is to look at whether the same alleles that stand out in the M vs. LX comparisons also stand out in the LX vs. LXR comparisons (which I suspect will be the case given the overall allele frequency correlations we looked at previously).
## Plots of observed vs. expected allele frequency correlations and combined allele frequency differentiation (from nullSims.R)
nullSimP<-function(p0=0.5,t=100,ne=100,N=1000){
fst<-1-(1-2/(2*ne))^t
alpha<-p0 * (1-fst)/fst
beta<-(1-p0) * (1-fst)/fst
p<-rbeta(N,alpha,beta)
p
}
## L1 v L1R
pL1<-read.table("p_L1-F100.txt",header=FALSE,sep=",")
pL1R<-read.table("p_L1R-F46.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=84,ne=455.5,N)
}
pdf("L1xL1R.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in L1",ylab="frequency in L1R",cex.lab=1.4)
points(pL1[,1],pL1R[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
## L2 v L2R
pL2<-read.table("p_L2-F87.txt",header=FALSE,sep=",")
pL2R<-read.table("p_L2R-F45.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=84,ne=437.5,N)
}
pdf("L2xL2R.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in L2",ylab="frequency in L2R",cex.lab=1.4)
points(pL2[,1],pL2R[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
## L3 v L3R
pL3<-read.table("p_L3-F85.txt",header=FALSE,sep=",")
pL3R<-read.table("p_L3R-F45.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=85,ne=937,N)
}
pdf("L3xL3R.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in L3",ylab="frequency in L3R",cex.lab=1.4)
points(pL3[,1],pL3R[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
## M vs. L1
pL1<-read.table("p_L1-F100.txt",header=FALSE,sep=",")
pM<-read.table("p_M-14.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=231,ne=454.5,N)
}
pdf("MxL1.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in M",ylab="frequency in L1",cex.lab=1.4)
points(pM[,1],pL1[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
## M vs. L2
pM<-read.table("p_M-14.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=194,ne=525,N)
}
pdf("MxL2.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in M",ylab="frequency in L2",cex.lab=1.4)
points(pM[,1],pL2[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
## M vs. L3
pM<-read.table("p_M-14.txt",header=FALSE,sep=",")
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
p<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
p[i,]<-nullSimP(p0[i],t=189,ne=549.5,N)
}
pdf("MxL3.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(0,1),type='n',xlab="frequency in M",ylab="frequency in L3",cex.lab=1.4)
points(pM[,1],pL3[,1],col="gray")
lb<-apply(p,1,quantile,probs=0.01)
ub<-apply(p,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(p,1,quantile,probs=0.001)
ub<-apply(p,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
dev.off()
# ## M vs. L1 + L2 + L3
nullSimP2<-function(p0=0.5,t=100,ne=100,N=1000){
pi<-matrix(NA,nrow=N,ncol=3)
t[1]<-t[1]-t[2]
t[2]<-t[2]-t[3]
fst<-1-(1-2/(2*ne))^t
## back to split of L3
alpha<-p0 * (1-fst[3])/fst[3]
beta<-(1-p0) * (1-fst[3])/fst[3]
pi[,3]<-rbeta(N,alpha,beta)
## further back to L2
alpha<-pi[,3] * (1-fst[2])/fst[2]
beta<-(1-pi[,3]) * (1-fst[2])/fst[2]
pi[,2]<-rbeta(N,alpha,beta)
## all the way to L1
alpha<-pi[,2] * (1-fst[1])/fst[1]
beta<-(1-pi[,2]) * (1-fst[1])/fst[1]
pi[,1]<-rbeta(N,alpha,beta)
p<-matrix(NA,nrow=N,ncol=3)
for(j in 4:6){
alpha<-pi[,j-3] * (1-fst[j])/fst[j]
beta<-(1-pi[,j-3]) * (1-fst[j])/fst[j]
p[,j-3]<-rbeta(N,alpha,beta)
}
dp<-apply(p,1,sum) - (3*p0)
dp
}
p0<-c(0.001,seq(0.01,0.99,0.01),0.999)
L<-length(p0)
N<-10000
dp<-matrix(NA,nrow=L,ncol=N)
for(i in 1:L){
dp[i,]<-nullSimP2(p0[i],t=c(131,107,105,100,87,85),ne=c(1149,1149,1149,253.7,314.8,335.4),N)
}
dp<-dp/3
pdf("MxL1L2L3.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(p0,lb,ylim=c(-1,1),type='n',xlab="frequency in M",ylab="avg. frequency difference",cex.lab=1.4)
obs<-((pL1[,1] + pL2[,1] + pL3[,1]) - 3*pM[,1])/3
points(pM[,1],obs,col="gray")
lb<-apply(dp,1,quantile,probs=0.01)
ub<-apply(dp,1,quantile,probs=0.99)
lines(p0,lb,lwd=1.5)
lines(p0,ub,lwd=1.5)
lb<-apply(dp,1,quantile,probs=0.001)
ub<-apply(dp,1,quantile,probs=0.999)
lines(p0,lb,lwd=1.5,col="darkred")
lines(p0,ub,lwd=1.5,col="darkred")
lb<-apply(dp,1,quantile,probs=0.0001)
ub<-apply(dp,1,quantile,probs=0.9999)
lines(p0,lb,lwd=1.5,col="orange")
lines(p0,ub,lwd=1.5,col="orange")
dev.off()