Post date: Sep 03, 2013 8:7:4 PM
I obtained average genotype estimates (allele count) for common, low frequency, and rare variants. The genotype estimates are in projects/lycaeides_admixture/admixprops/results/mngen_*k2-8.txt. I used these as the basis for pca on the genotype covariance matrix. R code for calculating the genotype covariance matrixes and making plots is in the same directory and called pca.R (also include below). I need to spend time examining the pca plots carefully, but a few general points are evident. The first few pc's explain almost most of the variation in coancestry based on common variants but not rare variants (pc pve plot). Consequently, common (and to a lesser extent low frequency variants) recover the general pattern of population structure (including possible evidence for admixture) as I have seen previously (common pc; lowf pc), but rare variant pc's pull out individual populations or sets of populations and do not reveal general structure (rare pc).
## pca of genotype estimates
nloci<-15076
nind<-1521
# common variants
g<-matrix(scan("mngen_commonk2-8.txt",n=nind*nloci,sep=" "),nrow=nloci,ncol=nind,byrow=T)
## calculate N x N genotype covariance matrix
gmn<-apply(g,1,mean)
gmnmat<-matrix(gmn,nrow=nloci,ncol=nind)
gprime<-g-gmnmat
gcovarmat<-matrix(NA,nrow=nind,ncol=nind)
for(i in 1:nind){
for(j in i:nind){
if (i==j){
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
}
else{
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
gcovarmat[j,i]<-gcovarmat[i,j]
}
}
}
## pca on the genotype covariance matrix
pcgcov<-prcomp(x=gcovarmat,center=TRUE,scale=FALSE)
# lowf variants
g<-matrix(scan("mngen_lowfk2-8.txt",n=nind*nloci,sep=" "),nrow=nloci,ncol=nind,byrow=T)
## calculate N x N genotype covariance matrix
gmn<-apply(g,1,mean)
gmnmat<-matrix(gmn,nrow=nloci,ncol=nind)
gprime<-g-gmnmat
gcovarmat<-matrix(NA,nrow=nind,ncol=nind)
for(i in 1:nind){
for(j in i:nind){
if (i==j){
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
}
else{
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
gcovarmat[j,i]<-gcovarmat[i,j]
}
}
}
## pca on the genotype covariance matrix
pcgcovlowf<-prcomp(x=gcovarmat,center=TRUE,scale=FALSE)
# rare variants
g<-matrix(scan("mngen_rarek2-8.txt",n=nind*nloci,sep=" "),nrow=nloci,ncol=nind,byrow=T)
## calculate N x N genotype covariance matrix
gmn<-apply(g,1,mean)
gmnmat<-matrix(gmn,nrow=nloci,ncol=nind)
gprime<-g-gmnmat
gcovarmat<-matrix(NA,nrow=nind,ncol=nind)
for(i in 1:nind){
for(j in i:nind){
if (i==j){
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
}
else{
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
gcovarmat[j,i]<-gcovarmat[i,j]
}
}
}
## pca on the genotype covariance matrix
pcgcovrare<-prcomp(x=gcovarmat,center=TRUE,scale=FALSE)
## plot of pve
pdf("pcpve.pdf",width=5,height=5)
plot(round(100*outc[[6]][2,1:100],1),type="l",lwd=2,cex.lab=1.4,cex.axis=1.2,xlab="principal component",ylab="percent variance explained")
lines(round(100*outl[[6]][2,1:100],1),lwd=2,col="gray")
lines(round(100*outr[[6]][2,1:100],1),lwd=2,lty=2)
dev.off()
## plot pca common
mycolors<-c("orange","orangered","forestgreen","rosybrown","gold","darkblue","lightblue","salmon","black","gray","brown","violet","darkred")
leginfo<-read.table("legend.txt",header=F)
pdf("pcplotgcovCommon.pdf",width=10,height=10)
out<-summary(pcgcov)
pc1<-round(100*out[[6]][2,1],1)
pc2<-round(100*out[[6]][2,2],1)
plot(pcgcov$x[,1],pcgcov$x[,2],pch=20,cex=0.5,type='n',xlab=paste("PC1 (",pc1,"%)"),ylab=paste("PC2 (",pc2,"%)"),cex.lab=1.3)
for(i in 1:13){
A<-which(leginfo[,2]==i)
text(pcgcov$x[A,1],pcgcov$x[A,2],leginfo[A,3],cex=0.6,col=mycolors[i])
}
legend(2.9,1,legend=c("anna","ricei","idas","long","sublv","mel-gb","mel-rm","mel-an","sr-nev","white","warner","jhole","dubs"),fill=mycolors,cex=0.7)
dev.off()
## plot pca lowf
pdf("pcplotgcovLowf.pdf",width=16,height=8)
par(mfrow=c(1,2))
out<-summary(pcgcovlowf)
pc1<-round(100*out[[6]][2,1],1)
pc2<-round(100*out[[6]][2,2],1)
plot(pcgcovlowf$x[,1],pcgcovlowf$x[,2],pch=20,cex=0.5,type='n',xlab=paste("PC1 (",pc1,"%)"),ylab=paste("PC2 (",pc2,"%)"),cex.lab=1.3)
for(i in 1:13){
A<-which(leginfo[,2]==i)
text(pcgcovlowf$x[A,1],pcgcovlowf$x[A,2],leginfo[A,3],cex=0.6,col=mycolors[i])
}
pc1<-round(100*out[[6]][2,1],1)
pc3<-round(100*out[[6]][2,3],1)
plot(pcgcovlowf$x[,1],pcgcovlowf$x[,3],pch=20,cex=0.5,type='n',xlab=paste("PC1 (",pc1,"%)"),ylab=paste("PC3 (",pc3,"%)"),cex.lab=1.3)
for(i in 1:13){
A<-which(leginfo[,2]==i)
text(pcgcovlowf$x[A,1],pcgcovlowf$x[A,3],leginfo[A,3],cex=0.6,col=mycolors[i])
}
legend(0.09,-0.02,legend=c("anna","ricei","idas","long","sublv","mel-gb","mel-rm","mel-an","sr-nev","white","warner","jhole","dubs"),fill=mycolors,cex=0.7)
dev.off()
## plot pca rare
pdf("pcplotgcovRare.pdf",width=21,height=14)
par(mfrow=c(2,3))
out<-summary(pcgcovrare)
for(p in c(1,3,5,7,9,11)){
pc1<-round(100*out[[6]][2,p],1)
pc2<-round(100*out[[6]][2,p+1],1)
plot(pcgcovrare$x[,p],pcgcovrare$x[,p+1],pch=20,cex=0.5,type='n',xlab=paste("PC",p," (",pc1,"%)"),ylab=paste("PC",p+1," (",pc2,"%)"),cex.lab=1.3)
for(i in 1:13){
A<-which(leginfo[,2]==i)
text(pcgcovrare$x[A,p],pcgcovrare$x[A,p+1],leginfo[A,3],cex=0.6,col=mycolors[i])
}
}
#legend(2.9,1,legend=c("anna","ricei","idas","long","sublv","mel-gb","mel-rm","mel-an","sr-nev","white","warner","jhole","dubs"),fill=mycolors,cex=0.7)
dev.off()