Post date: Sep 06, 2017 11:19:46 PM
The selection estimates from before () used mel and stripe, we are now working just with mel (now called Mel-Stripe). Thus, I re-did them for both FHA and the within-generation experiment (which is actually a subset of FHA).
The R scripts are in /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_fluct/genomic_change_dark_morph/sel_fha/, and are fhaAbcMel.R and fhaEcolSmel.R. Both scripts are pasted below as are summaries from the FHA analysis, some of which are in the paper. In the end this changed very little, but I did have to standardize (to 42 the number of green bugs) the individuals used for k-means clustering as differences in sample size were giving wonky cluster ddefinitions. I then used cluster centers from one round of clustering to assign individuals in a second round of clustering.
R code from fhaAbcMel.R:
N0<-500
N1<-602
L<-175918
g0<-matrix(scan("pntest_fha2011.txt",n=N0*L,sep=" "),nrow=L,ncol=N0,byrow=TRUE)
g1<-matrix(scan("pntest_fha2013.txt",n=N1*L,sep=" "),nrow=L,ncol=N1,byrow=TRUE)
ids<-read.table("../popgen_fha/snpids.txt",header=F)
cdat<-read.table("fha2013pheno.txt",header=TRUE)
ph<-cdat$color2
ph[cdat$color2==0]<-2+cdat$stripe2[cdat$color2==0]
## define mel and stripy
mlb<-4139489
mub<-6414835
slb<-6414836
mel<-c(which(ids[,2]==702.1 & ids[,3] < mlb),
which(ids[,2]==128 & ids[,3] < mub))
stripy<-which(ids[,2]==128 & ids[,3] >= slb)
##
g0x<-g0[mel,]
g1x<-g1[mel,]
pcmel<-prcomp(rbind(t(g0x),t(g1x)),center=TRUE,scale=FALSE)
a<-which(ph==1)
b<-which(ph==2)
c<-which(ph==3)
minNum<-length(b)
a<-sample(a,minNum, replace=FALSE)
c<-sample(c,minNum, replace=FALSE)
subdat<-pcmel$x[-c(1:500),]
subdat<-subdat[c(a,b,c),]
omel<-kmeans(subdat[,1:2],centers=6,nstart=100,iter.max=200)
omelAss<-kmeans(pcmel$x[,1:2],centers=omel$centers,n.start=100,iter.max=200)
## to avoid changing code
pcall<-pcmel
oall<-omelAss
## groups based on pca
## 1 = m/g, 2 = s/s, 3 = g/g, 4 = m/m, 5 = m/s, 6 = g/s
## NEW2OLD = 1 = 2; 2 = 6, 3 = 5, 4 = 1, 5 = 4, 6 = 3
## convert new2old
indx<-oall$cluster
new<-c(2,6,5,1,4,3)
for(i in 1:6){
oall$cluster[indx==i]<-new[i]
}
pdf("pca6-2p.pdf",width=10,height=5)
par(mfrow=c(1,2))
par(mar=c(5,5,.5,.5))
plot(pcall$x[,1],pcall$x[,2],pch=19,cex.lab=1.5,xlab="PC1(24.9%)",ylab="PC2(2.9%)",col=c(rep("gray",500),c("brown","green","black")[ph]))
legend(4.5,10,adj=0,c("green-unstriped","green-striped","melanistic","unknown"),pch=19,col=c("green","black","brown","gray"),bty='n')
plot(pcall$x[,1],pcall$x[,2],type="n",cex.lab=1.5,xlab="PC1(24.9%)",ylab="PC2(2.9%)")
text(pcall$x[,1],pcall$x[,2],oall$cluster,col=c(rep("gray",500),c("brown","green","black")[ph]))
dev.off()
gen0<-oall$cluster[1:500]
gen1<-oall$cluster[-c(1:500)]
gfr0<-table(gen0)/500
gfr1<-table(gen1)/602
m0<-(sum(gen0==4) + 0.5 * sum(gen0==1 | gen0==5))/(length(gen0))
m1<-(sum(gen1==4) + 0.5 * sum(gen1==1 | gen1==5))/(length(gen1))
s0<-(sum(gen0==2) + 0.5 * sum(gen0==5 | gen0==6))/(length(gen0))
s1<-(sum(gen1==2) + 0.5 * sum(gen1==5 | gen1==6))/(length(gen1))
r0<-(sum(gen0==3) + 0.5 * sum(gen0==1 | gen0==6))/(length(gen0))
r1<-(sum(gen1==3) + 0.5 * sum(gen1==1 | gen1==6))/(length(gen1))
p0<-c(m0,s0,r0)
p1<-c(m1,s1,r1)
hw0<-c(2*m0*r0,s0^2,r0^2,m0^2,2*m0*s0,2*r0*s0)
hw1<-c(2*m1*r1,s1^2,r1^2,m1^2,2*m1*s1,2*r1*s1)
sum0<-cbind(gfr0,hw0,gfr0-hw0)
rownames(sum0)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(sum0)<-c("obs","hw","obs-hw")
sum1<-cbind(gfr1,hw1,gfr1-hw1)
rownames(sum1)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(sum1)<-c("obs","hw","obs-hw")
## abc analysis
onesim<-function(W,Gfreq,Ne){
## m at start of next gen
m<-Gfreq[4] + 0.5 * (Gfreq[1] + Gfreq[5])
s<-Gfreq[2] + 0.5 * (Gfreq[5] + Gfreq[6])
g<-1-(m+s)
## starting genotype frequencies
Gfreq<-c(2*m*g,s^2,g^2,m^2,2*m*s,2*g*s)
## post selection expected gen freqs
Gfreq <-Gfreq * W/wbar
## actual
Gcnts<-rmultinom(n=1,size=Ne,prob=Gfreq)
Gfreq<-Gcnts/Ne
## m at start of final gen
m<-Gfreq[4] + 0.5 * (Gfreq[1] + Gfreq[5])
s<-Gfreq[2] + 0.5 * (Gfreq[5] + Gfreq[6])
g<-1-(m+s)
## starting genotype frequencies
Gfreq<-c(2*m*g,s^2,g^2,m^2,2*m*s,2*g*s)
## post selection expected gen freqs
Gfreq <-Gfreq * W/wbar
## actual
Gcnts<-rmultinom(n=1,size=Ne,prob=Gfreq)
Gfreq<-Gcnts/Ne
return(t(Gfreq))
}
## groups based on pca
## 1 = m/g, 2 = s/s, 3 = g/g, 4 = m/m, 5 = m/s, 6 = g/s
abcloop<-function(N=1000,Ne=200,Gfreq=Gfr0,lb=-0.5,ub=0.5){
simfr<-matrix(NA,nrow=N,ncol=6)
s<-matrix(NA,nrow=N,ncol=4)
for(x in 1:N){
## defines m/m and s/s and offset of g/g relative to s/s
s[x,1:3]<-runif(n=3,min=lb,max=ub)
s[x,4]<-runif(n=1,0,1) ## defines s/g relative to g/g and s/s
W<-c(1+s[x,1]+s[x,3]*s[x,4],1+s[x,2],1+s[x,2]+s[x,3],1+s[x,1],1,1+s[x,2]+s[x,3]*s[x,4])
simfr[x,]<-onesim(W=W,Gfreq=Gfreq,Ne=Ne)
}
out<-list(s,simfr)
return(out)
}
oo<-abcloop(N=1000000,Ne=110.3,Gfreq=gfr0,lb=-0.5,ub=0.5)
aout<-abc(target=as.vector(gfr1),param=as.matrix(oo[[1]]),sumstat=as.matrix(oo[[2]]),tol=0.005,method="ridge")
summary(aout)
wout<-matrix(NA,nrow=5000,ncol=6)
wout[,1]<-1+aout$adj.values[,1] + aout$adj.values[,3] * aout$adj.values[,4]
wout[,2]<-1+aout$adj.values[,2]
wout[,3]<-1+aout$adj.values[,2] + aout$adj.values[,3]
wout[,4]<-1+aout$adj.values[,1]
wout[,5]<-1
wout[,6]<-1+aout$adj.values[,2] + aout$adj.values[,3] * aout$adj.values[,4]
## density plots
cs<-c("green","darkgray","green","brown","black","darkgray")
ll<-c(2,1,1,1,2,2)
pdf("fha_rel_fitness.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(density(wout[,2]),xlim=c(0,1.8),ylim=c(0,4),col=cs[2],lty=ll[2],ylab="density",xlab="relative fitness",cex.lab=1.4,cex.axis=1.1,main="")
for(i in c(1,3,4,6)){
lines(density(wout[,i]),col=cs[i],lty=ll[i])
}
abline(v=1,col=cs[5],lty=ll[5],lwd=2)
legend(0,4,c("m/u","s/s","u/u","m/m","m/s","u/s"),col=cs,lty=ll)
dev.off()
save(list=ls(),file="fhaAbcMel.rdat")
And here are the summaries:
Haplotype freqs.
2011= 0.316 0.602 0.082
2013 = 0.35963455 0.56976744 0.07059801
Genotype freqs.
2011
obs hw obs-hw
m/g 0.034 0.051824 -0.017824
s/s 0.286 0.362404 -0.076404
g/g 0.006 0.006724 -0.000724
m/m 0.042 0.099856 -0.057856
m/s 0.514 0.380464 0.133536
g/s 0.118 0.098728 0.019272
2013
obs hw obs-hw
m/g 0.038205980 0.050778965 -1.257298e-02
s/s 0.272425249 0.324634938 -5.220969e-02
g/g 0.004983389 0.004984079 -6.898379e-07
m/m 0.089700997 0.129337011 -3.963601e-02
m/s 0.501661130 0.409816117 9.184501e-02
g/s 0.093023256 0.080448891 1.257436e-02
Relative fitness values
apply(wout,2,quantile,probs=c(0.5,0.025,0.975))
[,1] [,2] [,3] [,4] [,5] [,6]
50% 0.7551019 0.7242493 0.7858974 0.7176492 1 0.7504112
2.5% 0.4932732 0.5357255 0.3331572 0.5213196 1 0.5283982
97.5% 1.1548987 1.0090563 1.2459021 1.1291778 1 1.0647425
Here is the code from fhaEcolSmel.R, note that the paper results are for relative fitness on Adenostoma:
load("fhaAbcMel.rdat")
gen0<-oall$cluster[1:500]
gfr0<-table(gen0)/500
m0<-(sum(gen0==4) + 0.5 * sum(gen0==1 | gen0==5))/(length(gen0))
s0<-(sum(gen0==2) + 0.5 * sum(gen0==5 | gen0==6))/(length(gen0))
r0<-(sum(gen0==3) + 0.5 * sum(gen0==1 | gen0==6))/(length(gen0))
p0<-c(m0,s0,r0)
## survival data
surv<-scan("SurvivalGbs.txt")
genS<-gen0[surv==1]
gfrS<-table(genS)/140
gfrS<-c(gfrS[1:2],0,gfrS[3:5])
## host data
host<-read.table("ecolGbsHost.txt",header=F)
genA<-gen0[host[,1]=='A']
genC<-gen0[host[,1]=='C']
genAS<-gen0[host[,1]=='A' & surv==1]
genCS<-gen0[host[,1]=='C' & surv==1]
gfrA<-table(genA)/250
gfrC<-table(genC)/250
gfrAS<-table(genAS)/69
gfrAS<-c(gfrAS[1:2],0,gfrAS[3:5])
gfrCS<-table(genCS)/71
gfrCS<-c(gfrCS[1:2],0,gfrCS[3:5])
sumS<-cbind(gfr0,gfrS,gfrA,gfrAS,gfrC,gfrCS)
rownames(sumS)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(sumS)<-c("all","surv","A","A-surv","C","C-surv")
## posteriors under binom-beta
cntsAll<-table(gen0)
survAll<-table(genS)
survAll<-c(survAll[1:2],0,survAll[3:5])
probAll<-matrix(NA,nrow=5000,ncol=6)
for(i in 1:6){
a<-0.5 + survAll[i]
b<-0.5 + cntsAll[i]-survAll[i]
probAll[,i]<-rbeta(5000,a,b)
}
wall<-apply(probAll,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))
colnames(wall)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
## density plots
cs<-c("green","darkgray","green","brown","black","darkgray")
ll<-c(2,1,1,1,2,2)
pdf("ecol_fitness.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(density(probAll[,2],from=0,to=1),xlim=c(0,1),ylim=c(0,15),col=cs[2],lty=ll[2],xlab="absolute fitness",cex.lab=1.4,cex.axis=1.1,main="")
for(i in c(1,3,4,5,6)){
lines(density(probAll[,i],from=0,to=1),col=cs[i],lty=ll[i])
}
legend(0.7,15,c("m/g","s/s","g/g","m/m","m/s","g/s"),col=cs,lty=ll)
dev.off()
## posteriors under binom-beta
cntsA<-table(genA)
survA<-table(genAS)
survA<-c(survA[1:2],0,survA[3:5])
probA<-matrix(NA,nrow=5000,ncol=6)
for(i in 1:6){
a<-0.5 + survA[i]
b<-0.5 + cntsA[i]-survA[i]
probA[,i]<-rbeta(5000,a,b)
}
wA<-apply(probA,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))
colnames(wA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
## density plots
cs<-c("green","darkgray","green","brown","black","darkgray")
ll<-c(2,1,1,1,2,2)
pdf("ecolA_fitness.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(density(probA[,2],from=0,to=1),xlim=c(0,1),ylim=c(0,10),col=cs[2],lty=ll[2],xlab="absolute fitness",cex.lab=1.4,cex.axis=1.1,main="")
for(i in c(1,3,4,5,6)){
lines(density(probA[,i],from=0,to=1),col=cs[i],lty=ll[i])
}
legend(0.7,10,c("m/g","s/s","g/g","m/m","m/s","g/s"),col=cs,lty=ll)
dev.off()
## posteriors under binom-beta
cntsC<-table(genC)
survC<-table(genCS)
survC<-c(survC[1:2],0,survC[3:5])
probC<-matrix(NA,nrow=5000,ncol=6)
for(i in 1:6){
a<-0.5 + survC[i]
b<-0.5 + cntsC[i]-survC[i]
probC[,i]<-rbeta(5000,a,b)
}
wC<-apply(probC,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))
colnames(wC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
## relative to s/s
cs<-c("green","darkgray","green","brown","black","darkgray")
ll<-c(2,1,1,1,2,2)
pdf("ecolA_rel_fitness.pdf",width=6,height=6)
par(mar=c(5,5,0.5,0.5))
plot(density(probA[,1]/probA[,2],from=0,to=2),xlim=c(0,2),ylim=c(0,2.5),col=cs[1],lty=ll[1],ylab="density",xlab="relative fitness",cex.lab=1.4,cex.axis=1.1,main="")
for(i in c(3:6)){
lines(density(probA[,i]/probA[,2],from=0,to=2),col=cs[i],lty=ll[i])
}
abline(v=1,col=cs[2],lty=ll[2],lwd=2)
legend(1.5,2.5,c("m/u","s/s","u/u","m/m","m/s","u/s"),col=cs,lty=ll)
dev.off()
## abs fitness pp matrix
ppmat<-matrix(NA,nrow=6,ncol=6)
for(i in 1:6){for(j in 1:6){
if(i != j){
ppmat[i,j]<-mean(probAll[,i] > probAll[,j])
}
}}
ppmatA<-matrix(NA,nrow=6,ncol=6)
for(i in 1:6){for(j in 1:6){
if(i != j){
ppmatA[i,j]<-mean(probA[,i] > probA[,j])
}
}}
ppmatC<-matrix(NA,nrow=6,ncol=6)
for(i in 1:6){for(j in 1:6){
if(i != j){
ppmatC[i,j]<-mean(probC[,i] > probC[,j])
}
}}
rownames(ppmat)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(ppmat)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
rownames(ppmatA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(ppmatA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
rownames(ppmatC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
colnames(ppmatC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")
save(list=ls(),file="fhaAbcEcolSMel.rdat")