Modeling expected fitness in the T. chumash experiment

Post date: Oct 11, 2019 10:46:27 PM

## from /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/color_exp_2019, the code is in selectionGradBayes.R

## compute expected fitnesses from color

compExpFit<-function(b=NA,xx=NA,yy=NA){

lgm<-b[6] + b[1]*xx + b[2]*yy + b[3]*xx^2 + b[4]*yy^2 + b[5] * xx * yy

gm<-1/(1+exp(-1 * lgm))

}

## estiamte expected fitness by block

fitAC<-Dac$y

fitMM<-Dmm$y

for(i in 1:3){ ## blocks

betas<-c(est_hcl_ac[(6*(i))+1:6,3])

bids<-which(Dac$bk==(i))

fitAC[bids]<-compExpFit(betas,Dac$x1[bids],Dac$x2[bids])

betas<-c(est_hcl_mm[(6*(i))+1:6,3])

bids<-which(Dmm$bk==(i))

fitMM[bids]<-compExpFit(betas,Dmm$x1[bids],Dmm$x2[bids])

}

o_ac<-cbind(Dac$y,Dac$bk,fitAC)

o_mm<-cbind(Dmm$y,Dmm$bk,fitMM)

write.table(file="fitAC.txt",o_ac,col.names=FALSE,row.names=FALSE)

write.table(file="fitMM.txt",o_mm,col.names=FALSE,row.names=FALSE)

o_combined<-matrix(NA,nrow=454,ncol=4)

o_combined[ac,1:3]<-o_ac

o_combined[mm,1:3]<-o_mm

o_combined[ac,4]<-1

o_combined[mm,4]<-2

write.table(file="fitAll.txt",o_combined,col.names=FALSE,row.names=FALSE)

## now working form /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/color_exp_2019_gbs/Gemma_mel

## try to explain expected fitness by genotype

## from formatPhFit.R

## line up expected fitness with phenotype/genotype data

N<-454

keep<-scan("../Variants/hiCov2x.txt") ## single column, denotes whether 1 or not 0 mean cov for an individual > 2

## true for 437, going to drop the others

gids<-read.table("../Variants/IdsGen.txt",header=FALSE)

## read phenotype and fitness data

phtab<-read.table("../Variants/2019_Tchumash_transplant_table.csv",sep=",",header=TRUE)

phtab2<-read.table("../../color_exp_2019/2019_Tchumash_transplant_table.csv",sep=",",header=TRUE)

fit<-read.table("fitAll.txt",header=FALSE)

ph<-matrix(NA,nrow=N,ncol=8)

ph<-as.data.frame(ph)

for(i in 1:N){

a<-which(as.character(phtab[,1])==as.character(gids[i,1]))

if(length(a)==1){

ph[i,]<-phtab[a,c(1,3:6,11:13)]

}

colnames(ph)<-colnames(phtab)[c(1,3:6,11:13)]

#number codes

# Geneder: M = 2, F = 1

# Color: G = 1, GB = 2, I = 3, M = 4

# Treatment: AC = 1, MM = 2

## fit

fitt<-matrix(NA,nrow=N,ncol=4)

fitt<-as.data.frame(fitt)

for(i in 1:N){

a<-which(as.character(phtab2[,1])==as.character(gids[i,1]))

if(length(a)==1){

fitt[i,]<-fit[a,]

}

## subset those that have good genetic data

g_hc<-g[,keep==1]

ph_hc<-ph[keep==1,]

fitt_hc<-fitt[keep==1,]

dim(g_hc)

#[1] 11990 437

dim(ph_hc)

#[1] 437 8

dim(fitt_hc)

#[1] 437 4

fitt_hc[,4]<-ph_hc$Treatment

## now by treatment, gender and survival (former is kind of for fun)

AC<-which(fitt_hc[,4]==1)

MM<-which(fitt_hc[,4]==2)

colnames(fitt_hc)<-c("survival","blk","exfit","tretament")

write.table(file="exfit_AC.txt",fitt_hc[AC,1:3],row.names=FALSE,col.names=TRUE,quote=FALSE)

write.table(file="exfit_MM.txt",fitt_hc[MM,1:3],row.names=FALSE,col.names=TRUE,quote=FALSE)

#################################################

fit_AC<-read.table("exfit_AC.txt",header=TRUE)

fit_MM<-read.table("exfit_MM.txt",header=TRUE)

##### version without snp 6 but with PC1

pc<-prcomp(c_AC_pip[,-6],center=TRUE,scale=FALSE)

#Importance of components:

# PC1 PC2 PC3 PC4 PC5

#Standard deviation 0.9763 0.4129 0.37197 0.30890 0.26942

#Proportion of Variance 0.6665 0.1192 0.09676 0.06673 0.05076

#Cumulative Proportion 0.6665 0.7857 0.88251 0.94924 1.00000

pc_AC<-pc$x[,1]

library(BMS)

V<-15*2 ## variables

form<-p_AC[,2] ~ .^2

mod<-model.matrix(form,dat=as.data.frame(c_AC_pip[,-6]))

mod<-cbind(mod[,-1])

pcmod<-mod

for(i in 1:(V/2)){

pcmod[,i]<-pcmod[,i] * pc_AC

}

dfAC<-data.frame(fit_AC$exfit,blk2=as.numeric(fit_AC$blk==2),blk3=as.numeric(fit_AC$blk==3),mod,pcmod)

#lexfit<-log(fit_AC$exfit/(1-fit_AC$exfit))

#dfAC<-data.frame(lexfit,mod,pcmod)

bmsAC<-vector("list",5)

for(i in 1:5){

bmsAC[[i]]<-bms(dfAC,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))

}

bpipsAC<-matrix(NA,nrow=V,ncol=5)

bmavAC<-matrix(NA,nrow=V,ncol=5)

for(i in 1:5){

oo<-coef(bmsAC[[i]],order.by.pip=FALSE)

bpipsAC[,i]<-oo[-c(1:2),1]

bmavAC[,i]<-oo[-c(1:2),2]

}

paIds<-row.names(oo)[-c(1:2)]

pc<-prcomp(c_MM_pip[,-6],center=TRUE,scale=FALSE)

#Importance of components:

# PC1 PC2 PC3 PC4 PC5

#Standard deviation 0.911 0.4188 0.36091 0.34237 0.24600

#Proportion of Variance 0.632 0.1335 0.09918 0.08925 0.04608

#Cumulative Proportion 0.632 0.7655 0.86467 0.95392 1.00000

pc_MM<-pc$x[,1]

form<-p_MM[,2] ~ .^2

mod<-model.matrix(form,dat=as.data.frame(c_MM_pip[,-6]))

mod<-cbind(mod[,-1])

pcmod<-mod

for(i in 1:(V/2)){

pcmod[,i]<-pcmod[,i] * pc_MM

}

dfMM<-data.frame(fit_MM$exfit,blk2=as.numeric(fit_MM$blk==2),blk3=as.numeric(fit_MM$blk==3),mod,pcmod)

#lexfit<-log(fit_MM$exfit/(1-fit_MM$exfit))

#dfMM<-data.frame(lexfit,mod,pcmod)

bmsMM<-vector("list",5)

for(i in 1:5){

bmsMM[[i]]<-bms(dfMM,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))

}

bpipsMM<-matrix(NA,nrow=V,ncol=5)

bmavMM<-matrix(NA,nrow=V,ncol=5)

for(i in 1:5){

oo<-coef(bmsMM[[i]],order.by.pip=FALSE)

bpipsMM[,i]<-oo[-c(1:2),1]

bmavMM[,i]<-oo[-c(1:2),2]

}

cs<-rep(c("cornsilk3","coral3","gold1","dodgerblue4"),c(5,10,5,10))

pdf("ColorExFitPips5snpPC.pdf",width=7,height=9)

par(mfrow=c(2,1))

par(mar=c(5.75,4.5,3,0.5))

barplot(apply(bpipsAC,1,mean),ylim=c(0,1),names.arg=paIds,col=cs,xlab="Coefficient",ylab="PIP",cex.lab=1.4,las=2,cex.names=.7)

title(main="(A) Expected fitness on A/C",cex.main=1.5)

barplot(apply(bpipsMM,1,mean),ylim=c(0,1),names.arg=paIds,col=cs,xlab="Coefficient",ylab="PIP",cex.lab=1.4,las=2,cex.names=.7)

title(main="(B) Expected fitness on MM",cex.main=1.5)

dev.off()

pdf("ColorExFitModAvEffs5snpPC.pdf",width=7,height=9)

par(mfrow=c(2,1))

par(mar=c(5.75,4.5,3,0.5))

barplot(apply(bmavAC,1,mean),ylim=c(-.5,.5),names.arg=paIds,col=cs,xlab="Coefficient",ylab="Effect",cex.lab=1.4,las=2,cex.names=.7)

title(main="(A) Expected fitness on A/C",cex.main=1.5)

barplot(apply(bmavMM,1,mean),ylim=c(-.5,.5),names.arg=paIds,col=cs,xlab="Coefficient",ylab="Effect",cex.lab=1.4,las=2,cex.names=.7)

title(main="(B) Expected fitness on MM",cex.main=1.5)

dev.off()

## cross validation from cvScriptExFit.R

library(BMS)

load("models.rdat")

## leave-one-out cross-validation

pc<-prcomp(c_AC_pip[,-6],center=TRUE,scale=FALSE)

pc_AC<-pc$x[,1]

V<-15*2 ## variables

form<-p_AC[,2] ~ .^2

mod<-model.matrix(form,dat=as.data.frame(c_AC_pip[,-6]))

mod<-cbind(mod[,-1])

pcmod<-mod

for(i in 1:(V/2)){

pcmod[,i]<-pcmod[,i] * pc_AC

}

dfAC<-data.frame(fit_AC$exfit,blk2=as.numeric(fit_AC$blk==2),blk3=as.numeric(fit_AC$blk==3),mod,pcmod)

Ni<-dim(dfAC)[1]

kgrp<-sample(1:Ni,Ni,replace=FALSE)

predAC<-rep(NA,Ni)

for(i in 1:200){

dfACx<-dfAC[-c(kgrp==i),]

o<-bms(dfACx,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))

j<-which(kgrp==i)

predAC[j]<-sum(coef(o,order.by.pip=FALSE)[,2]*dfAC[j,-1])

}

cor.test(dfAC[,1],predAC)

pc<-prcomp(c_MM_pip[,-6],center=TRUE,scale=FALSE)

pc_MM<-pc$x[,1]

form<-p_MM[,2] ~ .^2

mod<-model.matrix(form,dat=as.data.frame(c_MM_pip[,-6]))

mod<-cbind(mod[,-1])

pcmod<-mod

for(i in 1:(V/2)){

pcmod[,i]<-pcmod[,i] * pc_MM

}

dfMM<-data.frame(fit_MM$exfit,blk2=as.numeric(fit_MM$blk==2),blk3=as.numeric(fit_MM$blk==3),mod,pcmod)

Ni<-dim(dfMM)[1]

kgrp<-sample(1:Ni,Ni,replace=FALSE)

predMM<-rep(NA,Ni)

for(i in 1:200){

dfMMx<-dfMM[-c(kgrp==i),]

o<-bms(dfMMx,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))

j<-which(kgrp==i)

predMM[j]<-sum(coef(o,order.by.pip=FALSE)[,2]*dfMM[j,-1])

}

cor.test(dfMM[,1],predMM)

save(list=ls(),file="models_cv.rdat")

## remove block effect

mnObsAC<-tapply(INDEX=fit_AC$blk,X=dfAC[,1],mean,na.rm=TRUE)

mnPredAC<-tapply(INDEX=fit_AC$blk,X=predAC,mean,na.rm=TRUE)

rObsAC<-dfAC[,1]

rPredAC<-predAC

for(i in 1:3){

a<-which(fit_AC$blk==i)

rObsAC[a]<-rObsAC[a]-mnObsAC[i]

rPredAC[a]<-rPredAC[a]-mnPredAC[i]

}

cor.test(rObsAC,rPredAC)

#t = 9.567, df = 195, p-value < 2.2e-16

#alternative hypothesis: true correlation is not equal to 0

#95 percent confidence interval:

# 0.4618854 0.6533604

#sample estimates:

# cor

#0.5651872

mnObsMM<-tapply(INDEX=fit_MM$blk,X=dfMM[,1],mean,na.rm=TRUE)

mnPredMM<-tapply(INDEX=fit_MM$blk,X=predMM,mean,na.rm=TRUE)

rObsMM<-dfMM[,1]

rPredMM<-predMM

for(i in 1:3){

a<-which(fit_MM$blk==i)

rObsMM[a]<-rObsMM[a]-mnObsMM[i]

rPredMM[a]<-rPredMM[a]-mnPredMM[i]

}

cor.test(rObsMM,rPredMM)

#t = 7.9074, df = 195, p-value = 1.907e-13

#alternative hypothesis: true correlation is not equal to 0

#95 percent confidence interval:

# 0.3790579 0.5917751

#sample estimates:

# cor

#0.4927429

## 3 way epistasis

Page updated

Google Sites

Report abuse