Post date: Jan 01, 2016 9:44:46 PM
I generated two additional data sets with h2 = 0.3 and 0.05 but with 1000 functional variants (data sets 5 and 6). Here is the additional code in sims.R (note all results are in sims.Rdata):
## data set 5, h2 = 0.3, nsnps = 1000, normal distribution
nfunc<-1000
her<-0.3
funct5m<-matrix(NA,nrow=nfunc,ncol=nr)
effect5m<-matrix(NA,nrow=nfunc,ncol=nr)
bv5m<-matrix(NA,nrow=n,ncol=nr)
ph5m<-matrix(NA,nrow=n,ncol=nr)
her5<-rep(NA,nr)
for(x in 1:nr){
funct5<-sample(1:nl,nfunc,replace=FALSE)
effect5<-rnorm(nfunc,0,1)
bv5<-rep(NA,n)
for(i in 1:n){
bv5[i]<-sum(Ghat[funct5,i] * effect5)
}
v5<--1 * ((her - 1)*var(bv5))/her
ph5<-bv5 + rnorm(n,0,sqrt(v5))
her5[x]<-var(bv5)/var(ph5)
funct5m[,x]<-funct5
effect5m[,x]<-effect5
bv5m[,x]<-bv5
ph5m[,x]<-ph5
}
## data set 6, h2 = 0.05, nsnps = 1000, normal distribution
nfunc<-1000
her<-0.05
funct6m<-matrix(NA,nrow=nfunc,ncol=nr)
effect6m<-matrix(NA,nrow=nfunc,ncol=nr)
bv6m<-matrix(NA,nrow=n,ncol=nr)
ph6m<-matrix(NA,nrow=n,ncol=nr)
her6<-rep(NA,nr)
for(x in 1:nr){
funct6<-sample(1:nl,nfunc,replace=FALSE)
effect6<-rnorm(nfunc,0,1)
bv6<-rep(NA,n)
for(i in 1:n){
bv6[i]<-sum(Ghat[funct6,i] * effect6)
}
v6<--1 * ((her - 1)*var(bv6))/her
ph6<-bv6 + rnorm(n,0,sqrt(v6))
her6[x]<-var(bv6)/var(ph6)
funct6m[,x]<-funct6
effect6m[,x]<-effect6
bv6m[,x]<-bv6
ph6m[,x]<-ph6
}
These were analyzed with the BSLMM in gemma as described for the other 4 data sets, and the output will be in /pscratch/A01963476/sims/gemmasel/output/.
Next, I created binary phenotypic data sets based on all 6 data sets (the two described above and the four described in this post). I used the quantitative fitness metric as a liability score that was converted into a binary fitness metric by assigning 1 to the 50% of individuals with the highest score and 0 to the 50% with the lowest score (note that the error variance is already in the liability score). Here is the code (from liability2binary.R):
## convert quantitative phenotypes (latent liability variable) to binary pheontypes
## assumes population and sample proportion of successes = 0.5 (i.e. half live half die)
N<-592
sims<-50
K<-0.5 ## population prevalence
bin<-rep(c(0,1),each=N/2)
ph1<-matrix(scan("pheno_sims1.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin1<-ph1
for(i in 1:sims){
bin1[,i]<-bin[rank(ph1[,i])]
}
ph2<-matrix(scan("pheno_sims2.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin2<-ph2
for(i in 1:sims){
bin2[,i]<-bin[rank(ph2[,i])]
}
ph3<-matrix(scan("pheno_sims3.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin3<-ph3
for(i in 1:sims){
bin3[,i]<-bin[rank(ph3[,i])]
}
ph4<-matrix(scan("pheno_sims4.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin4<-ph4
for(i in 1:sims){
bin4[,i]<-bin[rank(ph4[,i])]
}
ph5<-matrix(scan("pheno_sims5.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin5<-ph5
for(i in 1:sims){
bin5[,i]<-bin[rank(ph5[,i])]
}
ph6<-matrix(scan("pheno_sims6.txt",n=N*sims,sep=" "),nrow=N,ncol=sims,byrow=TRUE)
bin6<-ph6
for(i in 1:sims){
bin6[,i]<-bin[rank(ph6[,i])]
}
write.table(bin1,"phenobin_sims1.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(bin2,"phenobin_sims2.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(bin3,"phenobin_sims3.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(bin4,"phenobin_sims4.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(bin5,"phenobin_sims5.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
write.table(bin6,"phenobin_sims6.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
All six data sets with a binary metric of fitness were analyzed with the probit BSLMM in gemma. Note that for this I used a shorter run (1 million steps) and burnin (500,000 additional steps). Here is one example:
cd /pscratch/A01963476/sims/gemmasel
sleep 5
gemma -g /labs/evolution/projects/selection_sims/geno_sims.txt -p /labs/evolution/projects/selection_sims/phenobin_sims4.txt -bslmm 3 -n 50 -o out_sim4binary50rep1 -rpace 100 -maf 0.0 -w 500000 -s 1000000