Post date: Sep 10, 2016 8:17:41 PM
Poorer estimates of PVE and n-gamma were obtained with the Rhagolettis data. I suspect this is because of the low sample size (rather than patterns of LD, or even fewer SNPs). I am conducting simulations to test this by replicating genotypes so that the 149 individuals are reproduced to yield the same number of individuals as with Timema = 592. Note that this project is now on kingspeak at /uufs/chpc.utah.edu/common/home/u6000989/projects/selection_sims/ and that these simulations and will be in the rhag sub-directory (without output initially in /scratch/general/lustre/gemmaGomp/ but then being moved to the gemmasel/output5 sub-directory. Here is what I did.
1. Replicated genotypes (random sampling) and simulated phenotypes for the four focal sets of conditions (see previous posts).
R CMD BATCH sims592r.R
nl<-33723
n<-149 ## actual number
G<-matrix(scan("pntest_rhag.txt",n=n*nl,sep=" "),nrow=nl,ncol=n,byrow=TRUE)
Ghat0<-round(G,0)
nsam<-592
x<-sample(1:n,nsam,replace=TRUE)
Ghat<-G[,x]
nr<-50
n<-nsam
## data set 1, h2 = 0.3, nsnps = 100, normal distribution
nfunc<-100
her<-0.3
funct1m<-matrix(NA,nrow=nfunc,ncol=nr)
effect1m<-matrix(NA,nrow=nfunc,ncol=nr)
bv1m<-matrix(NA,nrow=n,ncol=nr)
ph1m<-matrix(NA,nrow=n,ncol=nr)
her1<-rep(NA,nr)
for(x in 1:nr){
funct1<-sample(1:nl,nfunc,replace=FALSE)
effect1<-rnorm(nfunc,0,1)
bv1<-rep(NA,n)
for(i in 1:n){
bv1[i]<-sum(Ghat[funct1,i] * effect1)
}
v1<--1 * ((her - 1)*var(bv1))/her
ph1<-bv1 + rnorm(n,0,sqrt(v1))
her1[x]<-var(bv1)/var(ph1)
funct1m[,x]<-funct1
effect1m[,x]<-effect1
bv1m[,x]<-bv1
ph1m[,x]<-ph1
}
## data set 2, h2 = 0.3, nsnps = 10, normal distribution
nfunc<-10
her<-0.3
funct2m<-matrix(NA,nrow=nfunc,ncol=nr)
effect2m<-matrix(NA,nrow=nfunc,ncol=nr)
bv2m<-matrix(NA,nrow=n,ncol=nr)
ph2m<-matrix(NA,nrow=n,ncol=nr)
her2<-rep(NA,nr)
for(x in 1:nr){
funct2<-sample(1:nl,nfunc,replace=FALSE)
effect2<-rnorm(nfunc,0,1)
bv2<-rep(NA,n)
for(i in 1:n){
bv2[i]<-sum(Ghat[funct2,i] * effect2)
}
v2<--1 * ((her - 1)*var(bv2))/her
ph2<-bv2 + rnorm(n,0,sqrt(v2))
her2[x]<-var(bv2)/var(ph2)
funct2m[,x]<-funct2
effect2m[,x]<-effect2
bv2m[,x]<-bv2
ph2m[,x]<-ph2
}
## data set 3, h2 = 0.05, nsnps = 100, normal distribution
nfunc<-100
her<-0.05
funct3m<-matrix(NA,nrow=nfunc,ncol=nr)
effect3m<-matrix(NA,nrow=nfunc,ncol=nr)
bv3m<-matrix(NA,nrow=n,ncol=nr)
ph3m<-matrix(NA,nrow=n,ncol=nr)
her3<-rep(NA,nr)
for(x in 1:nr){
funct3<-sample(1:nl,nfunc,replace=FALSE)
effect3<-rnorm(nfunc,0,1)
bv3<-rep(NA,n)
for(i in 1:n){
bv3[i]<-sum(Ghat[funct3,i] * effect3)
}
v3<--1 * ((her - 1)*var(bv3))/her
ph3<-bv3 + rnorm(n,0,sqrt(v3))
her3[x]<-var(bv3)/var(ph3)
funct3m[,x]<-funct3
effect3m[,x]<-effect3
bv3m[,x]<-bv3
ph3m[,x]<-ph3
}
## data set 4, h2 = 0.05, nsnps = 10, normal distribution
nfunc<-10
her<-0.05
funct4m<-matrix(NA,nrow=nfunc,ncol=nr)
effect4m<-matrix(NA,nrow=nfunc,ncol=nr)
bv4m<-matrix(NA,nrow=n,ncol=nr)
ph4m<-matrix(NA,nrow=n,ncol=nr)
her4<-rep(NA,nr)
for(x in 1:nr){
funct4<-sample(1:nl,nfunc,replace=FALSE)
effect4<-rnorm(nfunc,0,1)
bv4<-rep(NA,n)
for(i in 1:n){
bv4[i]<-sum(Ghat[funct4,i] * effect4)
}
v4<--1 * ((her - 1)*var(bv4))/her
ph4<-bv4 + rnorm(n,0,sqrt(v4))
her4[x]<-var(bv4)/var(ph4)
funct4m[,x]<-funct4
effect4m[,x]<-effect4
bv4m[,x]<-bv4
ph4m[,x]<-ph4
}
Gout<-cbind(1:nl,rep("A",nl),rep("T",nl),Ghat)
write.table(Gout,"geno592r_sims.txt",row.names=F,col.names=F,quote=F)
write.table(ph1m,"pheno592r_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(ph2m,"pheno592r_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(ph3m,"pheno592r_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(ph4m,"pheno592r_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(bv1m,"bv592r_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(bv2m,"bv592r_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(bv3m,"bv592r_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(bv4m,"bv592r_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(effect1m,"effect592r_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(effect2m,"effect592r_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(effect3m,"effect592r_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(effect4m,"effect592r_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(funct1m,"functvar592r_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(funct2m,"functvar592r_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(funct3m,"functvar592r_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(funct4m,"functvar592r_sims4.txt",row.names=F,col.names=F,quote=F)
save(list=ls(),file="sims592r.Rdata")
2. MaDe SNP files
perl mkSnpFiles.pl functvar592r_sims*
3. Installed, then ran gemma.
Command (for one set of simulated data sets, 1 of 4)
perl wrap_qsub_slurm_gemma.pl /uufs/chpc.utah.edu/common/home/u6000989/projects/selection_sims/rhag/geno592r_sims.txt /uufs/chpc.utah.edu/common/home/u6000989/projects/selection_sims/rhag/pheno592r_sims1.txt
An example:
cd /scratch/general/lustre/gemmaGomp/
sleep 5
~/bin/gemma -g /uufs/chpc.utah.edu/common/home/u6000989/projects/selection_sims/rhag/geno592r_sims.txt -p /uufs/chpc.utah.edu/common/home/u6000989/projects/selection_sims/rhag/pheno592r_sims1.txt -bslmm 1 -n 50 -o out_rhag592rsim1ph50rep1 -rpace 100 -maf 0.0 -w 1000000 -s 2000000