Post date: Nov 08, 2015 4:59:14 PM
Rather than focus on Fst x trait-associated SNPs, we decided to ask whether trait-associated SNPs occurred in high HMM state regions (from 20kb window Fst) more often than expected by chance. This presumably will get rid of some noise and not depend as much on us precisely pinpointing functional variants. Its worth noting that only R12AxR12C (LG8 only) and LAxPRC (LG1 and LG8) have high HMM regions. Such regions are more widespread in the experiment. I tested different combinations of traits and populations. The results are below followed by the code, first for nature, then for the experiment. All necessary files are /labs/evolution/projects/timema_speciation/tcristinae/traits/.
## stripe, 17 unique windows with pip > 0.05
## all 4: obs = 0.14, 8.24X, p = 0.0003
## para: obs = 0.14 5.84X, p = 0.0060
## allopatric: obs = 0.29, 13.43X, p = 0.0003
## all but stripe, 121 unique windows with pip > 0.05
## all 4: obs = 0.048, 2.77X, p = 0.0012
## para: obs = 0.030, 2.12X, p = 0.0297
## allopatric: obs = 0.090, 4.31X, p = 0.0010
## CHCs, 10 unique windows with pip > 0.05; none in high HMM regions
## stripe, 7 unique windows with pip > 0.1
## all 4: obs = 0.2, 11.52X, p = 0.0033
## para: obs = 0.13 58.43X, p = 0.0150
## allopatric: obs = 0.4, 19.23X, p = 0.0065
## all but stripe, 78 unique windows with pip > 0.1
## all 4: obs = 0.054, 3.13X, p = 0.0028
## para: obs = 0.044, 2.26X, p = 0.0441
## allopatric: obs = 0.109, 5.26X, p = 0.0020
## all 13, 134 unique windows, pip > 0.05
## all LGs: obs = 0.053, 3.08X, p = 0.0001
## all but LG8: obs = 0.0, 0X, p = 0.2687
## LG8: obs = 0.32, 1.56X, p = 0.0004
## trait pips
TrBL<-as.matrix(read.table("pip_2013FHA_all_BL_MAF01_sparse.txt",header=F))
TrBW<-as.matrix(read.table("pip_2013FHA_all_residBW_MAF01_sparse.txt",header=F))
TrHW<-as.matrix(read.table("pip_2013FHA_all_residHW_MAF01_sparse.txt",header=F))
TrlatGB<-as.matrix(read.table("pip_2013FHA_all_resid_latGB_MAF01_sparse.txt",header=F))
TrlatL<-as.matrix(read.table("pip_2013FHA_all_resid_latL_MAF01_sparse.txt",header=F))
TrlatRG<-as.matrix(read.table("pip_2013FHA_all_resid_latRG_MAF01_sparse.txt",header=F))
Trfpent<-as.matrix(read.table("pip_2013FHA_females_penta_MAF01_sparse.txt",header=F))
Trfhepta<-as.matrix(read.table("pip_2013FHA_females_hepta_MAF01_sparse.txt",header=F))
Trfnona<-as.matrix(read.table("pip_2013FHA_females_nona_MAF01_sparse.txt",header=F))
Trmpent<-as.matrix(read.table("pip_2013FHA_males_mpenta_MAF01_sparse.txt",header=F))
Trmhepta<-as.matrix(read.table("pip_2013FHA_males_mhepta_MAF01_sparse.txt",header=F))
Trmnona<-as.matrix(read.table("pip_2013FHA_males_mnona_MAF01_sparse.txt",header=F))
Trpatln<-as.matrix(read.table("pip_2013FHA_all_resid_percent_stripe_MAF01_sparse.txt",header=F))
Tlist<-list(TrBL,TrBW,TrHW,TrlatGB,TrlatL,TrlatRG,Trfpent,Trfhepta,Trfnona,Trmpent,Trmhepta,Trmnona,Tr
patln)
## fst
n1<-42687
parname<-c("pi","Dxy","Da","Fst")
parm<-9
d1<-matrix(scan("../fst/nei20k_lgver3_p_tcristHV.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
d2<-matrix(scan("../fst/nei20k_lgver3_p_tcristMR1.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
d3<-matrix(scan("../fst/nei20k_lgver3_p_tcristR12.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
d4<-matrix(scan("../fst/nei20k_lgver3_p_tcristL.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
hmm<-matrix(NA,nrow=n1,ncol=4)
load("../fst/hmm2s_HVpar9.rwsp")
hmm[which(is.na(d1[,1])==F),1]<-fit[[1]]
load("../fst/hmm2s_MR1par9.rwsp")
hmm[which(is.na(d2[,1])==F),2]<-fit[[1]]
load("../fst/hmm2s_R12par9.rwsp")
hmm[which(is.na(d3[,1])==F),3]<-fit[[1]]
load("../fst/hmm2s_Lpar9.rwsp")
hmm[which(is.na(d4[,1])==F),4]<-fit[[1]]
load("../fst/hmm2s_SurvMinInit9.rwsp")
exhmm<-rep(NA,n1)
exhmm[which(is.na(nei[,1])==F)]<-fit[[1]]
#get high pip pattern SNPs
hiPip<-NULL
bnd<-0.1
#bnd<-0.05
hiPip<-Trpatln[which(Trpatln[,4] > bnd),]
uHiPip<-hiPip[which(!duplicated(hiPip[,3])),]
# get high pip SNPs
hiPip<-NULL
#bnd<-0.1
bnd<-0.05
#for(i in 7:12){
for(i in 1:13){
#for(i in 1:12){
x<-Tlist[[i]][which(Tlist[[i]][,4] > bnd),]
if(length(x) > 0){hiPip<-rbind(hiPip,x)}
}
uHiPip<-hiPip
#uHiPip<-hiPip[-duplicated(hiPip[,3]),]
## 20kb window for each hi pip SNPs
n<-dim(uHiPip)[1]
pipWin<-rep(NA,n)
for(i in 1:n){
x<-which(d1[,3]==uHiPip[i,2] & abs(d1[,4]-uHiPip[i,3]) < 20000)
if (length(x) == 1){
pipWin[i]<-x
}
else if (length(x) > 1){
x<-which(d1[,3]==uHiPip[i,2] & abs(d1[,4]-uHiPip[i,3]) < 10000)
if (length(x) == 1){
pipWin[i]<-x
}
else{
cat ("mult snps for ",i,"\n")
}
}
}
pipWin<-pipWin[is.na(pipWin)==FALSE]
pipWin<-unique(pipWin)
mnFst<-apply(hmm==1,1,mean)
mnFst<-apply(hmm[,1:3]==1,1,mean)
mnFst<-hmm[,4]==1
obs<-mean(mnFst[pipWin],na.rm=T)
null<-rep(NA,10000)
for(i in 1:10000){
a<-sample(1:n1,length(pipWin),replace=TRUE)
null[i]<-mean(mnFst[a],na.rm=TRUE)
}
mean(null > obs,na.rm=TRUE)
obs/(mean(null,na.rm=TRUE))
### comparing LGs
mnFst<-apply(hmm==1,1,mean)
lg8<-which(d1[,1]==8)
nlg8<-which(d1[,1]!=8)
x<-1:n1
x<-x[nlg8]
#x<-x[lg8]
obs<-mean(mnFst[pipWin],na.rm=T)
y<-pipWin[pipWin %in% lg8]
y<-pipWin[pipWin %in% nlg8]
obs<-mean(mnFst[y],na.rm=T)
null<-rep(NA,10000)
for(i in 1:10000){
a<-sample(1:n1,length(pipWin),replace=TRUE)
#a<-sample(x,length(y),replace=TRUE)
null[i]<-mean(mnFst[a],na.rm=TRUE)
}
mean(null > obs,na.rm=TRUE)
obs/(mean(null,na.rm=TRUE))
###### code for the experiment ###################
Here our result is simply that more high HMM regions are on LG per size than others
## trait pips
TrBL<-as.matrix(read.table("pip_2013FHA_all_BL_MAF01_sparse.txt",header=F))
TrBW<-as.matrix(read.table("pip_2013FHA_all_residBW_MAF01_sparse.txt",header=F))
TrHW<-as.matrix(read.table("pip_2013FHA_all_residHW_MAF01_sparse.txt",header=F))
TrlatGB<-as.matrix(read.table("pip_2013FHA_all_resid_latGB_MAF01_sparse.txt",header=F))
TrlatL<-as.matrix(read.table("pip_2013FHA_all_resid_latL_MAF01_sparse.txt",header=F))
TrlatRG<-as.matrix(read.table("pip_2013FHA_all_resid_latRG_MAF01_sparse.txt",header=F))
Trfpent<-as.matrix(read.table("pip_2013FHA_females_penta_MAF01_sparse.txt",header=F))
Trfhepta<-as.matrix(read.table("pip_2013FHA_females_hepta_MAF01_sparse.txt",header=F))
Trfnona<-as.matrix(read.table("pip_2013FHA_females_nona_MAF01_sparse.txt",header=F))
Trmpent<-as.matrix(read.table("pip_2013FHA_males_mpenta_MAF01_sparse.txt",header=F))
Trmhepta<-as.matrix(read.table("pip_2013FHA_males_mhepta_MAF01_sparse.txt",header=F))
Trmnona<-as.matrix(read.table("pip_2013FHA_males_mnona_MAF01_sparse.txt",header=F))
Trpatln<-as.matrix(read.table("pip_2013FHA_all_resid_percent_stripe_MAF01_sparse.txt",header=F))
Tlist<-list(TrBL,TrBW,TrHW,TrlatGB,TrlatL,TrlatRG,Trfpent,Trfhepta,Trfnona,Trmpent,Trmhepta,Trmnona
,Trpatln)
## fst
n1<-42687
parname<-c("pi","Dxy","Da","Fst")
parm<-9
d1<-matrix(scan("../fst/nei20k_lgver3_p_tcristHV.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
ex1<-matrix(scan("../fst/nei20k_AvC.txt",n=n1*parm,sep=" "),nrow=n1,ncol=parm,byrow=T)
load("../fst/hmm2s_SurvMinInit9.rwsp")
hmm<-which(is.na(ex1[,1])==F)]<-fit[[1]]
#get high pip pattern SNPs
hiPip<-NULL
bnd<-0.05
hiPip<-Trpatln[which(Trpatln[,4] > bnd),]
uHiPip<-hiPip[which(!duplicated(hiPip[,3])),]
# get high pip SNPs
hiPip<-NULL
bnd<-0.05
for(i in 1:12){
x<-Tlist[[i]][which(Tlist[[i]][,4] > bnd),]
if(length(x) > 0){hiPip<-rbind(hiPip,x)}
}
uHiPip<-hiPip
#uHiPip<-hiPip[-duplicated(hiPip[,3]),]
## 20kb window for each hi pip SNPs
n<-dim(uHiPip)[1]
pipWin<-rep(NA,n)
for(i in 1:n){
x<-which(d1[,3]==uHiPip[i,2] & abs(d1[,4]-uHiPip[i,3]) < 20000)
if (length(x) == 1){
pipWin[i]<-x
}
else if (length(x) > 1){
x<-which(d1[,3]==uHiPip[i,2] & abs(d1[,4]-uHiPip[i,3]) < 10000)
if (length(x) == 1){
pipWin[i]<-x
}
else{
cat ("mult snps for ",i,"\n")
}
}
}
pipWin<-pipWin[is.na(pipWin)==FALSE]
mnFst<-hmm
obs<-mean(mnFst[pipWin],na.rm=T)
null<-rep(NA,10000)
for(i in 1:10000){
a<-sample(1:n1,length(pipWin),replace=TRUE)
null[i]<-mean(mnFst[a],na.rm=TRUE)
}
p<-mean(null > obs,na.rm=TRUE)