Updated selection estimates for FHA with Mel-Stripe

Post date: Sep 06, 2017 11:19:46 PM

The selection estimates from before () used mel and stripe, we are now working just with mel (now called Mel-Stripe). Thus, I re-did them for both FHA and the within-generation experiment (which is actually a subset of FHA).

The R scripts are in /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_fluct/genomic_change_dark_morph/sel_fha/, and are fhaAbcMel.R and fhaEcolSmel.R. Both scripts are pasted below as are summaries from the FHA analysis, some of which are in the paper. In the end this changed very little, but I did have to standardize (to 42 the number of green bugs) the individuals used for k-means clustering as differences in sample size were giving wonky cluster ddefinitions. I then used cluster centers from one round of clustering to assign individuals in a second round of clustering.

R code from fhaAbcMel.R:

N0<-500

N1<-602

L<-175918

g0<-matrix(scan("pntest_fha2011.txt",n=N0*L,sep=" "),nrow=L,ncol=N0,byrow=TRUE)

g1<-matrix(scan("pntest_fha2013.txt",n=N1*L,sep=" "),nrow=L,ncol=N1,byrow=TRUE)

ids<-read.table("../popgen_fha/snpids.txt",header=F)

cdat<-read.table("fha2013pheno.txt",header=TRUE)

ph<-cdat$color2

ph[cdat$color2==0]<-2+cdat$stripe2[cdat$color2==0]

## define mel and stripy

mlb<-4139489

mub<-6414835

slb<-6414836

mel<-c(which(ids[,2]==702.1 & ids[,3] < mlb),

which(ids[,2]==128 & ids[,3] < mub))

stripy<-which(ids[,2]==128 & ids[,3] >= slb)

g0x<-g0[mel,]

g1x<-g1[mel,]

pcmel<-prcomp(rbind(t(g0x),t(g1x)),center=TRUE,scale=FALSE)

a<-which(ph==1)

b<-which(ph==2)

c<-which(ph==3)

minNum<-length(b)

a<-sample(a,minNum, replace=FALSE)

c<-sample(c,minNum, replace=FALSE)

subdat<-pcmel$x[-c(1:500),]

subdat<-subdat[c(a,b,c),]

omel<-kmeans(subdat[,1:2],centers=6,nstart=100,iter.max=200)

omelAss<-kmeans(pcmel$x[,1:2],centers=omel$centers,n.start=100,iter.max=200)

## to avoid changing code

pcall<-pcmel

oall<-omelAss

## groups based on pca

## 1 = m/g, 2 = s/s, 3 = g/g, 4 = m/m, 5 = m/s, 6 = g/s

## NEW2OLD = 1 = 2; 2 = 6, 3 = 5, 4 = 1, 5 = 4, 6 = 3

## convert new2old

indx<-oall$cluster

new<-c(2,6,5,1,4,3)

for(i in 1:6){

oall$cluster[indx==i]<-new[i]

}

pdf("pca6-2p.pdf",width=10,height=5)

par(mfrow=c(1,2))

par(mar=c(5,5,.5,.5))

plot(pcall$x[,1],pcall$x[,2],pch=19,cex.lab=1.5,xlab="PC1(24.9%)",ylab="PC2(2.9%)",col=c(rep("gray",500),c("brown","green","black")[ph]))

legend(4.5,10,adj=0,c("green-unstriped","green-striped","melanistic","unknown"),pch=19,col=c("green","black","brown","gray"),bty='n')

plot(pcall$x[,1],pcall$x[,2],type="n",cex.lab=1.5,xlab="PC1(24.9%)",ylab="PC2(2.9%)")

text(pcall$x[,1],pcall$x[,2],oall$cluster,col=c(rep("gray",500),c("brown","green","black")[ph]))

dev.off()

gen0<-oall$cluster[1:500]

gen1<-oall$cluster[-c(1:500)]

gfr0<-table(gen0)/500

gfr1<-table(gen1)/602

m0<-(sum(gen0==4) + 0.5 * sum(gen0==1 | gen0==5))/(length(gen0))

m1<-(sum(gen1==4) + 0.5 * sum(gen1==1 | gen1==5))/(length(gen1))

s0<-(sum(gen0==2) + 0.5 * sum(gen0==5 | gen0==6))/(length(gen0))

s1<-(sum(gen1==2) + 0.5 * sum(gen1==5 | gen1==6))/(length(gen1))

r0<-(sum(gen0==3) + 0.5 * sum(gen0==1 | gen0==6))/(length(gen0))

r1<-(sum(gen1==3) + 0.5 * sum(gen1==1 | gen1==6))/(length(gen1))

p0<-c(m0,s0,r0)

p1<-c(m1,s1,r1)

hw0<-c(2*m0*r0,s0^2,r0^2,m0^2,2*m0*s0,2*r0*s0)

hw1<-c(2*m1*r1,s1^2,r1^2,m1^2,2*m1*s1,2*r1*s1)

sum0<-cbind(gfr0,hw0,gfr0-hw0)

rownames(sum0)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(sum0)<-c("obs","hw","obs-hw")

sum1<-cbind(gfr1,hw1,gfr1-hw1)

rownames(sum1)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(sum1)<-c("obs","hw","obs-hw")

## abc analysis

onesim<-function(W,Gfreq,Ne){

## m at start of next gen

m<-Gfreq[4] + 0.5 * (Gfreq[1] + Gfreq[5])

s<-Gfreq[2] + 0.5 * (Gfreq[5] + Gfreq[6])

g<-1-(m+s)

## starting genotype frequencies

Gfreq<-c(2*m*g,s^2,g^2,m^2,2*m*s,2*g*s)

## post selection expected gen freqs

Gfreq <-Gfreq * W/wbar

## actual

Gcnts<-rmultinom(n=1,size=Ne,prob=Gfreq)

Gfreq<-Gcnts/Ne

## m at start of final gen

m<-Gfreq[4] + 0.5 * (Gfreq[1] + Gfreq[5])

s<-Gfreq[2] + 0.5 * (Gfreq[5] + Gfreq[6])

g<-1-(m+s)

## starting genotype frequencies

Gfreq<-c(2*m*g,s^2,g^2,m^2,2*m*s,2*g*s)

## post selection expected gen freqs

Gfreq <-Gfreq * W/wbar

## actual

Gcnts<-rmultinom(n=1,size=Ne,prob=Gfreq)

Gfreq<-Gcnts/Ne

return(t(Gfreq))

}

## groups based on pca

## 1 = m/g, 2 = s/s, 3 = g/g, 4 = m/m, 5 = m/s, 6 = g/s

abcloop<-function(N=1000,Ne=200,Gfreq=Gfr0,lb=-0.5,ub=0.5){

simfr<-matrix(NA,nrow=N,ncol=6)

s<-matrix(NA,nrow=N,ncol=4)

for(x in 1:N){

## defines m/m and s/s and offset of g/g relative to s/s

s[x,1:3]<-runif(n=3,min=lb,max=ub)

s[x,4]<-runif(n=1,0,1) ## defines s/g relative to g/g and s/s

W<-c(1+s[x,1]+s[x,3]*s[x,4],1+s[x,2],1+s[x,2]+s[x,3],1+s[x,1],1,1+s[x,2]+s[x,3]*s[x,4])

simfr[x,]<-onesim(W=W,Gfreq=Gfreq,Ne=Ne)

}

out<-list(s,simfr)

return(out)

}

oo<-abcloop(N=1000000,Ne=110.3,Gfreq=gfr0,lb=-0.5,ub=0.5)

aout<-abc(target=as.vector(gfr1),param=as.matrix(oo[[1]]),sumstat=as.matrix(oo[[2]]),tol=0.005,method="ridge")

summary(aout)

wout<-matrix(NA,nrow=5000,ncol=6)

wout[,1]<-1+aout$adj.values[,1] + aout$adj.values[,3] * aout$adj.values[,4]

wout[,2]<-1+aout$adj.values[,2]

wout[,3]<-1+aout$adj.values[,2] + aout$adj.values[,3]

wout[,4]<-1+aout$adj.values[,1]

wout[,5]<-1

wout[,6]<-1+aout$adj.values[,2] + aout$adj.values[,3] * aout$adj.values[,4]

## density plots

cs<-c("green","darkgray","green","brown","black","darkgray")

ll<-c(2,1,1,1,2,2)

pdf("fha_rel_fitness.pdf",width=6,height=6)

par(mar=c(5,5,0.5,0.5))

plot(density(wout[,2]),xlim=c(0,1.8),ylim=c(0,4),col=cs[2],lty=ll[2],ylab="density",xlab="relative fitness",cex.lab=1.4,cex.axis=1.1,main="")

for(i in c(1,3,4,6)){

lines(density(wout[,i]),col=cs[i],lty=ll[i])

}

abline(v=1,col=cs[5],lty=ll[5],lwd=2)

legend(0,4,c("m/u","s/s","u/u","m/m","m/s","u/s"),col=cs,lty=ll)

dev.off()

save(list=ls(),file="fhaAbcMel.rdat")

And here are the summaries:

Haplotype freqs.

2011= 0.316 0.602 0.082

2013 = 0.35963455 0.56976744 0.07059801

Genotype freqs.

2011

obs hw obs-hw

m/g 0.034 0.051824 -0.017824

s/s 0.286 0.362404 -0.076404

g/g 0.006 0.006724 -0.000724

m/m 0.042 0.099856 -0.057856

m/s 0.514 0.380464 0.133536

g/s 0.118 0.098728 0.019272

2013

obs hw obs-hw

m/g 0.038205980 0.050778965 -1.257298e-02

s/s 0.272425249 0.324634938 -5.220969e-02

g/g 0.004983389 0.004984079 -6.898379e-07

m/m 0.089700997 0.129337011 -3.963601e-02

m/s 0.501661130 0.409816117 9.184501e-02

g/s 0.093023256 0.080448891 1.257436e-02

Relative fitness values

apply(wout,2,quantile,probs=c(0.5,0.025,0.975))

[,1] [,2] [,3] [,4] [,5] [,6]

50% 0.7551019 0.7242493 0.7858974 0.7176492 1 0.7504112

2.5% 0.4932732 0.5357255 0.3331572 0.5213196 1 0.5283982

97.5% 1.1548987 1.0090563 1.2459021 1.1291778 1 1.0647425

Here is the code from fhaEcolSmel.R, note that the paper results are for relative fitness on Adenostoma:

load("fhaAbcMel.rdat")

gen0<-oall$cluster[1:500]

gfr0<-table(gen0)/500

m0<-(sum(gen0==4) + 0.5 * sum(gen0==1 | gen0==5))/(length(gen0))

s0<-(sum(gen0==2) + 0.5 * sum(gen0==5 | gen0==6))/(length(gen0))

r0<-(sum(gen0==3) + 0.5 * sum(gen0==1 | gen0==6))/(length(gen0))

p0<-c(m0,s0,r0)

## survival data

surv<-scan("SurvivalGbs.txt")

genS<-gen0[surv==1]

gfrS<-table(genS)/140

gfrS<-c(gfrS[1:2],0,gfrS[3:5])

## host data

host<-read.table("ecolGbsHost.txt",header=F)

genA<-gen0[host[,1]=='A']

genC<-gen0[host[,1]=='C']

genAS<-gen0[host[,1]=='A' & surv==1]

genCS<-gen0[host[,1]=='C' & surv==1]

gfrA<-table(genA)/250

gfrC<-table(genC)/250

gfrAS<-table(genAS)/69

gfrAS<-c(gfrAS[1:2],0,gfrAS[3:5])

gfrCS<-table(genCS)/71

gfrCS<-c(gfrCS[1:2],0,gfrCS[3:5])

sumS<-cbind(gfr0,gfrS,gfrA,gfrAS,gfrC,gfrCS)

rownames(sumS)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(sumS)<-c("all","surv","A","A-surv","C","C-surv")

## posteriors under binom-beta

cntsAll<-table(gen0)

survAll<-table(genS)

survAll<-c(survAll[1:2],0,survAll[3:5])

probAll<-matrix(NA,nrow=5000,ncol=6)

for(i in 1:6){

a<-0.5 + survAll[i]

b<-0.5 + cntsAll[i]-survAll[i]

probAll[,i]<-rbeta(5000,a,b)

}

wall<-apply(probAll,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))

colnames(wall)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

## density plots

cs<-c("green","darkgray","green","brown","black","darkgray")

ll<-c(2,1,1,1,2,2)

pdf("ecol_fitness.pdf",width=6,height=6)

par(mar=c(5,5,0.5,0.5))

plot(density(probAll[,2],from=0,to=1),xlim=c(0,1),ylim=c(0,15),col=cs[2],lty=ll[2],xlab="absolute fitness",cex.lab=1.4,cex.axis=1.1,main="")

for(i in c(1,3,4,5,6)){

lines(density(probAll[,i],from=0,to=1),col=cs[i],lty=ll[i])

}

legend(0.7,15,c("m/g","s/s","g/g","m/m","m/s","g/s"),col=cs,lty=ll)

dev.off()

## posteriors under binom-beta

cntsA<-table(genA)

survA<-table(genAS)

survA<-c(survA[1:2],0,survA[3:5])

probA<-matrix(NA,nrow=5000,ncol=6)

for(i in 1:6){

a<-0.5 + survA[i]

b<-0.5 + cntsA[i]-survA[i]

probA[,i]<-rbeta(5000,a,b)

}

wA<-apply(probA,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))

colnames(wA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

## density plots

cs<-c("green","darkgray","green","brown","black","darkgray")

ll<-c(2,1,1,1,2,2)

pdf("ecolA_fitness.pdf",width=6,height=6)

par(mar=c(5,5,0.5,0.5))

plot(density(probA[,2],from=0,to=1),xlim=c(0,1),ylim=c(0,10),col=cs[2],lty=ll[2],xlab="absolute fitness",cex.lab=1.4,cex.axis=1.1,main="")

for(i in c(1,3,4,5,6)){

lines(density(probA[,i],from=0,to=1),col=cs[i],lty=ll[i])

}

legend(0.7,10,c("m/g","s/s","g/g","m/m","m/s","g/s"),col=cs,lty=ll)

dev.off()

## posteriors under binom-beta

cntsC<-table(genC)

survC<-table(genCS)

survC<-c(survC[1:2],0,survC[3:5])

probC<-matrix(NA,nrow=5000,ncol=6)

for(i in 1:6){

a<-0.5 + survC[i]

b<-0.5 + cntsC[i]-survC[i]

probC[,i]<-rbeta(5000,a,b)

}

wC<-apply(probC,2,quantile,probs=c(0.5,0.05,0.95,0.025,0.975))

colnames(wC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

## relative to s/s

cs<-c("green","darkgray","green","brown","black","darkgray")

ll<-c(2,1,1,1,2,2)

pdf("ecolA_rel_fitness.pdf",width=6,height=6)

par(mar=c(5,5,0.5,0.5))

plot(density(probA[,1]/probA[,2],from=0,to=2),xlim=c(0,2),ylim=c(0,2.5),col=cs[1],lty=ll[1],ylab="density",xlab="relative fitness",cex.lab=1.4,cex.axis=1.1,main="")

for(i in c(3:6)){

lines(density(probA[,i]/probA[,2],from=0,to=2),col=cs[i],lty=ll[i])

}

abline(v=1,col=cs[2],lty=ll[2],lwd=2)

legend(1.5,2.5,c("m/u","s/s","u/u","m/m","m/s","u/s"),col=cs,lty=ll)

dev.off()

## abs fitness pp matrix

ppmat<-matrix(NA,nrow=6,ncol=6)

for(i in 1:6){for(j in 1:6){

if(i != j){

ppmat[i,j]<-mean(probAll[,i] > probAll[,j])

}

}}

ppmatA<-matrix(NA,nrow=6,ncol=6)

for(i in 1:6){for(j in 1:6){

if(i != j){

ppmatA[i,j]<-mean(probA[,i] > probA[,j])

}

}}

ppmatC<-matrix(NA,nrow=6,ncol=6)

for(i in 1:6){for(j in 1:6){

if(i != j){

ppmatC[i,j]<-mean(probC[,i] > probC[,j])

}

}}

rownames(ppmat)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(ppmat)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

rownames(ppmatA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(ppmatA)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

rownames(ppmatC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

colnames(ppmatC)<-c("m/g","s/s","g/g","m/m","m/s","g/s")

save(list=ls(),file="fhaAbcEcolSMel.rdat")

Page updated

Google Sites

Report abuse