Post date: Jan 26, 2016 8:25:22 PM
Scott provided genetic data from his Ecology Letters paper on Rhagolettis. The data are in
/labs/evolution/projects/selection_sims/rhag/snps.ScottSelection.05-400-20.filteredHwe0.05.wOutApple30.cleaned.vcf
This includes 149 individuals and 33,723 SNPs and should exhibit increased LD relative to the Timema data set.
I calculated posterior mean genotypes with a HWE prior and the ML allele frequencies from the vcf file using vcf2gl.pl and gl2genest.pl. The results are in pntest_rhag.txt.
I then generated 6 simulated data sets in R with 10, 100, or 1000 SNPs and h2 of 0.05 or 0.3 as with Timema. Here is the script (sims.R).
nl<-33723
n<-149
G<-matrix(scan("pntest_rhag.txt",n=n*nl,sep=" "),nrow=nl,ncol=n,byrow=TRUE)
Ghat<-round(G,0)
nr<-50
## data set 1, h2 = 0.3, nsnps = 100, normal distribution
nfunc<-100
her<-0.3
funct1m<-matrix(NA,nrow=nfunc,ncol=nr)
effect1m<-matrix(NA,nrow=nfunc,ncol=nr)
bv1m<-matrix(NA,nrow=n,ncol=nr)
ph1m<-matrix(NA,nrow=n,ncol=nr)
her1<-rep(NA,nr)
for(x in 1:nr){
funct1<-sample(1:nl,nfunc,replace=FALSE)
effect1<-rnorm(nfunc,0,1)
bv1<-rep(NA,n)
for(i in 1:n){
bv1[i]<-sum(Ghat[funct1,i] * effect1)
}
v1<--1 * ((her - 1)*var(bv1))/her
ph1<-bv1 + rnorm(n,0,sqrt(v1))
her1[x]<-var(bv1)/var(ph1)
funct1m[,x]<-funct1
effect1m[,x]<-effect1
bv1m[,x]<-bv1
ph1m[,x]<-ph1
}
## data set 2, h2 = 0.3, nsnps = 10, normal distribution
nfunc<-10
her<-0.3
funct2m<-matrix(NA,nrow=nfunc,ncol=nr)
effect2m<-matrix(NA,nrow=nfunc,ncol=nr)
bv2m<-matrix(NA,nrow=n,ncol=nr)
ph2m<-matrix(NA,nrow=n,ncol=nr)
her2<-rep(NA,nr)
for(x in 1:nr){
funct2<-sample(1:nl,nfunc,replace=FALSE)
effect2<-rnorm(nfunc,0,1)
bv2<-rep(NA,n)
for(i in 1:n){
bv2[i]<-sum(Ghat[funct2,i] * effect2)
}
v2<--1 * ((her - 1)*var(bv2))/her
ph2<-bv2 + rnorm(n,0,sqrt(v2))
her2[x]<-var(bv2)/var(ph2)
funct2m[,x]<-funct2
effect2m[,x]<-effect2
bv2m[,x]<-bv2
ph2m[,x]<-ph2
}
## data set 3, h2 = 0.05, nsnps = 100, normal distribution
nfunc<-100
her<-0.05
funct3m<-matrix(NA,nrow=nfunc,ncol=nr)
effect3m<-matrix(NA,nrow=nfunc,ncol=nr)
bv3m<-matrix(NA,nrow=n,ncol=nr)
ph3m<-matrix(NA,nrow=n,ncol=nr)
her3<-rep(NA,nr)
for(x in 1:nr){
funct3<-sample(1:nl,nfunc,replace=FALSE)
effect3<-rnorm(nfunc,0,1)
bv3<-rep(NA,n)
for(i in 1:n){
bv3[i]<-sum(Ghat[funct3,i] * effect3)
}
v3<--1 * ((her - 1)*var(bv3))/her
ph3<-bv3 + rnorm(n,0,sqrt(v3))
her3[x]<-var(bv3)/var(ph3)
funct3m[,x]<-funct3
effect3m[,x]<-effect3
bv3m[,x]<-bv3
ph3m[,x]<-ph3
}
## data set 4, h2 = 0.05, nsnps = 10, normal distribution
nfunc<-10
her<-0.05
funct4m<-matrix(NA,nrow=nfunc,ncol=nr)
effect4m<-matrix(NA,nrow=nfunc,ncol=nr)
bv4m<-matrix(NA,nrow=n,ncol=nr)
ph4m<-matrix(NA,nrow=n,ncol=nr)
her4<-rep(NA,nr)
for(x in 1:nr){
funct4<-sample(1:nl,nfunc,replace=FALSE)
effect4<-rnorm(nfunc,0,1)
bv4<-rep(NA,n)
for(i in 1:n){
bv4[i]<-sum(Ghat[funct4,i] * effect4)
}
v4<--1 * ((her - 1)*var(bv4))/her
ph4<-bv4 + rnorm(n,0,sqrt(v4))
her4[x]<-var(bv4)/var(ph4)
funct4m[,x]<-funct4
effect4m[,x]<-effect4
bv4m[,x]<-bv4
ph4m[,x]<-ph4
}
## data set 5, h2 = 0.3, nsnps = 1000, normal distribution
nfunc<-1000
her<-0.3
funct5m<-matrix(NA,nrow=nfunc,ncol=nr)
effect5m<-matrix(NA,nrow=nfunc,ncol=nr)
bv5m<-matrix(NA,nrow=n,ncol=nr)
ph5m<-matrix(NA,nrow=n,ncol=nr)
her5<-rep(NA,nr)
for(x in 1:nr){
funct5<-sample(1:nl,nfunc,replace=FALSE)
effect5<-rnorm(nfunc,0,1)
bv5<-rep(NA,n)
for(i in 1:n){
bv5[i]<-sum(Ghat[funct5,i] * effect5)
}
v5<--1 * ((her - 1)*var(bv5))/her
ph5<-bv5 + rnorm(n,0,sqrt(v5))
her5[x]<-var(bv5)/var(ph5)
funct5m[,x]<-funct5
effect5m[,x]<-effect5
bv5m[,x]<-bv5
ph5m[,x]<-ph5
}
## data set 6, h2 = 0.05, nsnps = 1000, normal distribution
nfunc<-1000
her<-0.05
funct6m<-matrix(NA,nrow=nfunc,ncol=nr)
effect6m<-matrix(NA,nrow=nfunc,ncol=nr)
bv6m<-matrix(NA,nrow=n,ncol=nr)
ph6m<-matrix(NA,nrow=n,ncol=nr)
her6<-rep(NA,nr)
for(x in 1:nr){
funct6<-sample(1:nl,nfunc,replace=FALSE)
effect6<-rnorm(nfunc,0,1)
bv6<-rep(NA,n)
for(i in 1:n){
bv6[i]<-sum(Ghat[funct6,i] * effect6)
}
v6<--1 * ((her - 1)*var(bv6))/her
ph6<-bv6 + rnorm(n,0,sqrt(v6))
her6[x]<-var(bv6)/var(ph6)
funct6m[,x]<-funct6
effect6m[,x]<-effect6
bv6m[,x]<-bv6
ph6m[,x]<-ph6
}
Gout<-cbind(1:nl,rep("A",nl),rep("T",nl),Ghat)
write.table(Gout,"geno_sims.txt",row.names=F,col.names=F,quote=F)
write.table(ph1m,"pheno_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(ph2m,"pheno_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(ph3m,"pheno_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(ph4m,"pheno_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(ph5m,"pheno_sims5.txt",row.names=F,col.names=F,quote=F)
write.table(ph6m,"pheno_sims6.txt",row.names=F,col.names=F,quote=F)
write.table(bv1m,"bv_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(bv2m,"bv_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(bv3m,"bv_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(bv4m,"bv_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(bv5m,"bv_sims5.txt",row.names=F,col.names=F,quote=F)
write.table(bv6m,"bv_sims6.txt",row.names=F,col.names=F,quote=F)
write.table(effect1m,"effect_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(effect2m,"effect_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(effect3m,"effect_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(effect4m,"effect_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(effect5m,"effect_sims5.txt",row.names=F,col.names=F,quote=F)
write.table(effect6m,"effect_sims6.txt",row.names=F,col.names=F,quote=F)
write.table(funct1m,"functvar_sims1.txt",row.names=F,col.names=F,quote=F)
write.table(funct2m,"functvar_sims2.txt",row.names=F,col.names=F,quote=F)
write.table(funct3m,"functvar_sims3.txt",row.names=F,col.names=F,quote=F)
write.table(funct4m,"functvar_sims4.txt",row.names=F,col.names=F,quote=F)
write.table(funct5m,"functvar_sims5.txt",row.names=F,col.names=F,quote=F)
write.table(funct6m,"functvar_sims6.txt",row.names=F,col.names=F,quote=F)
save(list=ls(),file="sims.Rdata")
I then analyzes each data set with gemma, first with functional variants included (here is an example):
cd /pscratch/A01963476/sims/gemmasel
sleep 5
gemma -g /labs/evolution/projects/selection_sims/rhag/geno_sims.txt -p /labs/evolution/projects/selection_sims/rhag/pheno_sims4.txt -bslmm 1 -n 50 -o out_rhagsim4ph50rep1 -rpace 100 -maf 0.0 -w 1000000 -s 2000000
And then without functional variants (after running mkSnpFiles.pl):
cd /pscratch/A01963476/sims/gemmasel
sleep 5
gemma -g /labs/evolution/projects/selection_sims/rhag/geno_sims.txt -p /labs/evolution/projects/selection_sims/rhag/pheno_sims1.txt -bslmm 1 -n 50 -o out_rhasimnofunc1ph50rep1 -snps /labs/evolution/projects/selection_sims/rhag/snps/snplist_file1_phenotype50.txt -rpace 100 -maf 0.0 -w 1000000 -s 2000000