Post date: Oct 11, 2019 10:46:27 PM
## from /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/color_exp_2019, the code is in selectionGradBayes.R
## compute expected fitnesses from color
compExpFit<-function(b=NA,xx=NA,yy=NA){
lgm<-b[6] + b[1]*xx + b[2]*yy + b[3]*xx^2 + b[4]*yy^2 + b[5] * xx * yy
gm<-1/(1+exp(-1 * lgm))
gm
}
## estiamte expected fitness by block
fitAC<-Dac$y
fitMM<-Dmm$y
for(i in 1:3){ ## blocks
betas<-c(est_hcl_ac[(6*(i))+1:6,3])
bids<-which(Dac$bk==(i))
fitAC[bids]<-compExpFit(betas,Dac$x1[bids],Dac$x2[bids])
betas<-c(est_hcl_mm[(6*(i))+1:6,3])
bids<-which(Dmm$bk==(i))
fitMM[bids]<-compExpFit(betas,Dmm$x1[bids],Dmm$x2[bids])
}
o_ac<-cbind(Dac$y,Dac$bk,fitAC)
o_mm<-cbind(Dmm$y,Dmm$bk,fitMM)
write.table(file="fitAC.txt",o_ac,col.names=FALSE,row.names=FALSE)
write.table(file="fitMM.txt",o_mm,col.names=FALSE,row.names=FALSE)
o_combined<-matrix(NA,nrow=454,ncol=4)
o_combined[ac,1:3]<-o_ac
o_combined[mm,1:3]<-o_mm
o_combined[ac,4]<-1
o_combined[mm,4]<-2
write.table(file="fitAll.txt",o_combined,col.names=FALSE,row.names=FALSE)
## now working form /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/color_exp_2019_gbs/Gemma_mel
## try to explain expected fitness by genotype
## from formatPhFit.R
## line up expected fitness with phenotype/genotype data
N<-454
keep<-scan("../Variants/hiCov2x.txt") ## single column, denotes whether 1 or not 0 mean cov for an individual > 2
## true for 437, going to drop the others
gids<-read.table("../Variants/IdsGen.txt",header=FALSE)
## read phenotype and fitness data
phtab<-read.table("../Variants/2019_Tchumash_transplant_table.csv",sep=",",header=TRUE)
phtab2<-read.table("../../color_exp_2019/2019_Tchumash_transplant_table.csv",sep=",",header=TRUE)
fit<-read.table("fitAll.txt",header=FALSE)
ph<-matrix(NA,nrow=N,ncol=8)
ph<-as.data.frame(ph)
for(i in 1:N){
a<-which(as.character(phtab[,1])==as.character(gids[i,1]))
if(length(a)==1){
ph[i,]<-phtab[a,c(1,3:6,11:13)]
}
}
colnames(ph)<-colnames(phtab)[c(1,3:6,11:13)]
#number codes
# Geneder: M = 2, F = 1
# Color: G = 1, GB = 2, I = 3, M = 4
# Treatment: AC = 1, MM = 2
## fit
fitt<-matrix(NA,nrow=N,ncol=4)
fitt<-as.data.frame(fitt)
for(i in 1:N){
a<-which(as.character(phtab2[,1])==as.character(gids[i,1]))
if(length(a)==1){
fitt[i,]<-fit[a,]
}
}
## subset those that have good genetic data
g_hc<-g[,keep==1]
ph_hc<-ph[keep==1,]
fitt_hc<-fitt[keep==1,]
dim(g_hc)
#[1] 11990 437
dim(ph_hc)
#[1] 437 8
dim(fitt_hc)
#[1] 437 4
fitt_hc[,4]<-ph_hc$Treatment
## now by treatment, gender and survival (former is kind of for fun)
AC<-which(fitt_hc[,4]==1)
MM<-which(fitt_hc[,4]==2)
colnames(fitt_hc)<-c("survival","blk","exfit","tretament")
write.table(file="exfit_AC.txt",fitt_hc[AC,1:3],row.names=FALSE,col.names=TRUE,quote=FALSE)
write.table(file="exfit_MM.txt",fitt_hc[MM,1:3],row.names=FALSE,col.names=TRUE,quote=FALSE)
#################################################
fit_AC<-read.table("exfit_AC.txt",header=TRUE)
fit_MM<-read.table("exfit_MM.txt",header=TRUE)
##### version without snp 6 but with PC1
pc<-prcomp(c_AC_pip[,-6],center=TRUE,scale=FALSE)
#Importance of components:
# PC1 PC2 PC3 PC4 PC5
#Standard deviation 0.9763 0.4129 0.37197 0.30890 0.26942
#Proportion of Variance 0.6665 0.1192 0.09676 0.06673 0.05076
#Cumulative Proportion 0.6665 0.7857 0.88251 0.94924 1.00000
pc_AC<-pc$x[,1]
library(BMS)
V<-15*2 ## variables
form<-p_AC[,2] ~ .^2
mod<-model.matrix(form,dat=as.data.frame(c_AC_pip[,-6]))
mod<-cbind(mod[,-1])
pcmod<-mod
for(i in 1:(V/2)){
pcmod[,i]<-pcmod[,i] * pc_AC
}
dfAC<-data.frame(fit_AC$exfit,blk2=as.numeric(fit_AC$blk==2),blk3=as.numeric(fit_AC$blk==3),mod,pcmod)
#lexfit<-log(fit_AC$exfit/(1-fit_AC$exfit))
#dfAC<-data.frame(lexfit,mod,pcmod)
bmsAC<-vector("list",5)
for(i in 1:5){
bmsAC[[i]]<-bms(dfAC,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))
}
bpipsAC<-matrix(NA,nrow=V,ncol=5)
bmavAC<-matrix(NA,nrow=V,ncol=5)
for(i in 1:5){
oo<-coef(bmsAC[[i]],order.by.pip=FALSE)
bpipsAC[,i]<-oo[-c(1:2),1]
bmavAC[,i]<-oo[-c(1:2),2]
}
paIds<-row.names(oo)[-c(1:2)]
pc<-prcomp(c_MM_pip[,-6],center=TRUE,scale=FALSE)
#Importance of components:
# PC1 PC2 PC3 PC4 PC5
#Standard deviation 0.911 0.4188 0.36091 0.34237 0.24600
#Proportion of Variance 0.632 0.1335 0.09918 0.08925 0.04608
#Cumulative Proportion 0.632 0.7655 0.86467 0.95392 1.00000
pc_MM<-pc$x[,1]
form<-p_MM[,2] ~ .^2
mod<-model.matrix(form,dat=as.data.frame(c_MM_pip[,-6]))
mod<-cbind(mod[,-1])
pcmod<-mod
for(i in 1:(V/2)){
pcmod[,i]<-pcmod[,i] * pc_MM
}
dfMM<-data.frame(fit_MM$exfit,blk2=as.numeric(fit_MM$blk==2),blk3=as.numeric(fit_MM$blk==3),mod,pcmod)
#lexfit<-log(fit_MM$exfit/(1-fit_MM$exfit))
#dfMM<-data.frame(lexfit,mod,pcmod)
bmsMM<-vector("list",5)
for(i in 1:5){
bmsMM[[i]]<-bms(dfMM,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))
}
bpipsMM<-matrix(NA,nrow=V,ncol=5)
bmavMM<-matrix(NA,nrow=V,ncol=5)
for(i in 1:5){
oo<-coef(bmsMM[[i]],order.by.pip=FALSE)
bpipsMM[,i]<-oo[-c(1:2),1]
bmavMM[,i]<-oo[-c(1:2),2]
}
cs<-rep(c("cornsilk3","coral3","gold1","dodgerblue4"),c(5,10,5,10))
pdf("ColorExFitPips5snpPC.pdf",width=7,height=9)
par(mfrow=c(2,1))
par(mar=c(5.75,4.5,3,0.5))
barplot(apply(bpipsAC,1,mean),ylim=c(0,1),names.arg=paIds,col=cs,xlab="Coefficient",ylab="PIP",cex.lab=1.4,las=2,cex.names=.7)
title(main="(A) Expected fitness on A/C",cex.main=1.5)
barplot(apply(bpipsMM,1,mean),ylim=c(0,1),names.arg=paIds,col=cs,xlab="Coefficient",ylab="PIP",cex.lab=1.4,las=2,cex.names=.7)
title(main="(B) Expected fitness on MM",cex.main=1.5)
dev.off()
pdf("ColorExFitModAvEffs5snpPC.pdf",width=7,height=9)
par(mfrow=c(2,1))
par(mar=c(5.75,4.5,3,0.5))
barplot(apply(bmavAC,1,mean),ylim=c(-.5,.5),names.arg=paIds,col=cs,xlab="Coefficient",ylab="Effect",cex.lab=1.4,las=2,cex.names=.7)
title(main="(A) Expected fitness on A/C",cex.main=1.5)
barplot(apply(bmavMM,1,mean),ylim=c(-.5,.5),names.arg=paIds,col=cs,xlab="Coefficient",ylab="Effect",cex.lab=1.4,las=2,cex.names=.7)
title(main="(B) Expected fitness on MM",cex.main=1.5)
dev.off()
## cross validation from cvScriptExFit.R
library(BMS)
load("models.rdat")
## leave-one-out cross-validation
pc<-prcomp(c_AC_pip[,-6],center=TRUE,scale=FALSE)
pc_AC<-pc$x[,1]
V<-15*2 ## variables
form<-p_AC[,2] ~ .^2
mod<-model.matrix(form,dat=as.data.frame(c_AC_pip[,-6]))
mod<-cbind(mod[,-1])
pcmod<-mod
for(i in 1:(V/2)){
pcmod[,i]<-pcmod[,i] * pc_AC
}
dfAC<-data.frame(fit_AC$exfit,blk2=as.numeric(fit_AC$blk==2),blk3=as.numeric(fit_AC$blk==3),mod,pcmod)
Ni<-dim(dfAC)[1]
kgrp<-sample(1:Ni,Ni,replace=FALSE)
predAC<-rep(NA,Ni)
for(i in 1:200){
dfACx<-dfAC[-c(kgrp==i),]
o<-bms(dfACx,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))
j<-which(kgrp==i)
predAC[j]<-sum(coef(o,order.by.pip=FALSE)[,2]*dfAC[j,-1])
}
cor.test(dfAC[,1],predAC)
pc<-prcomp(c_MM_pip[,-6],center=TRUE,scale=FALSE)
pc_MM<-pc$x[,1]
form<-p_MM[,2] ~ .^2
mod<-model.matrix(form,dat=as.data.frame(c_MM_pip[,-6]))
mod<-cbind(mod[,-1])
pcmod<-mod
for(i in 1:(V/2)){
pcmod[,i]<-pcmod[,i] * pc_MM
}
dfMM<-data.frame(fit_MM$exfit,blk2=as.numeric(fit_MM$blk==2),blk3=as.numeric(fit_MM$blk==3),mod,pcmod)
Ni<-dim(dfMM)[1]
kgrp<-sample(1:Ni,Ni,replace=FALSE)
predMM<-rep(NA,Ni)
for(i in 1:200){
dfMMx<-dfMM[-c(kgrp==i),]
o<-bms(dfMMx,burn=10000,iter=200000,mprior="uniform",g="UIP",user.int=FALSE,fixed.reg=c(1,2))
j<-which(kgrp==i)
predMM[j]<-sum(coef(o,order.by.pip=FALSE)[,2]*dfMM[j,-1])
}
cor.test(dfMM[,1],predMM)
save(list=ls(),file="models_cv.rdat")
## remove block effect
mnObsAC<-tapply(INDEX=fit_AC$blk,X=dfAC[,1],mean,na.rm=TRUE)
mnPredAC<-tapply(INDEX=fit_AC$blk,X=predAC,mean,na.rm=TRUE)
rObsAC<-dfAC[,1]
rPredAC<-predAC
for(i in 1:3){
a<-which(fit_AC$blk==i)
rObsAC[a]<-rObsAC[a]-mnObsAC[i]
rPredAC[a]<-rPredAC[a]-mnPredAC[i]
}
cor.test(rObsAC,rPredAC)
#t = 9.567, df = 195, p-value < 2.2e-16
#alternative hypothesis: true correlation is not equal to 0
#95 percent confidence interval:
# 0.4618854 0.6533604
#sample estimates:
# cor
#0.5651872
mnObsMM<-tapply(INDEX=fit_MM$blk,X=dfMM[,1],mean,na.rm=TRUE)
mnPredMM<-tapply(INDEX=fit_MM$blk,X=predMM,mean,na.rm=TRUE)
rObsMM<-dfMM[,1]
rPredMM<-predMM
for(i in 1:3){
a<-which(fit_MM$blk==i)
rObsMM[a]<-rObsMM[a]-mnObsMM[i]
rPredMM[a]<-rPredMM[a]-mnPredMM[i]
}
cor.test(rObsMM,rPredMM)
#t = 7.9074, df = 195, p-value = 1.907e-13
#alternative hypothesis: true correlation is not equal to 0
#95 percent confidence interval:
# 0.3790579 0.5917751
#sample estimates:
# cor
#0.4927429
## 3 way epistasis