Post date: Oct 22, 2013 7:36:3 PM
I am still processing the sequence data for melissa populations that were not part of the admixture project but were added in conjunction with the selection experiment, but I went ahead and examined the melissa populations from the admixture project on their own with PCA just for fun while waiting for this. See the attached plot (pca), orange = alfalfa host, gray = non alfalfa host; the 'groups' from the admixture paper are shown with symbols: 'melissa-west' = circles, 'melissa-east' = squares, and 'melissa-rockies' with triangles. Symbols denote individuals and three letter codes give population means. This is obviously preliminary but definitely argues for multiple colonizations of alfalfa.
The files for this project are in projects/lycaeides_hostplant/, and the R code is here:
## load("melpca.rwsp")
## look at structurin of melissa related to alfalfa feeder based on admixture data only
nloci<-15069
nind<-1521
# common variants
g<-matrix(scan("../lycaeides_admixture/admixprops/results/mngen_commonk2-8.txt",n=nind*nloci,sep=" "),nrow=nloci,ncol=nind,byrow=T)
ids<-read.table("../lycaeides_admixture/admixprops/figures/indinfo.txt",header=F)
Gmel<-g[,ids[,3]==4]
idsMel<-ids[ids[,3]==4,]
write.table(idsMel,"melissa_ids.txt",row.names=F,col.names=F,quote=F)
nind<-dim(idsMel)[1]
## calculate N x N genotype covariance matrix
gmn<-apply(Gmel,1,mean)
gmnmat<-matrix(gmn,nrow=nloci,ncol=nind)
gprime<-Gmel-gmnmat
gcovarmat<-matrix(NA,nrow=nind,ncol=nind)
for(i in 1:nind){
for(j in i:nind){
if (i==j){
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
}
else{
gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])
gcovarmat[j,i]<-gcovarmat[i,j]
}
}
}
## pca on genotype covariance matrix
pcgcov<-prcomp(x=gcovarmat,center=TRUE,scale=FALSE)
popinfo<-read.table("popinfo.txt",header=F)
sym<-rep(NA,nind)
mycols<-rep(NA,nind)
## set symbols and colors
for(i in 1:nind){
if(popinfo[i,2] == 'melw'){
sym[i]<-21
}
else if(popinfo[i,2] == 'mele'){
sym[i]<-22
}
else{
sym[i]<-25
}
if(popinfo[i,3]=='N'){
mycols[i]<-"gray"
}
else{
mycols[i]<-"orange"
}
}
pc1mn<-tapply(X=pcgcov$x[,1],INDEX=popinfo[,1],mean)
pc2mn<-tapply(X=pcgcov$x[,2],INDEX=popinfo[,1],mean)
plist<-unique(as.character(popinfo[,1]))
pdf("melPca.pdf",width=8,height=8)
par(mar=c(4.5,4.5,0.5,0.5))
par(pty='s')
plot(pcgcov$x[,1],pcgcov$x[,2],pch=sym,col=mycols,xlab="PC 1 (65%)",ylab="PC 2 (5%)",cex.lab=1.5,cex.axis=1.2)
text(pc1mn,pc2mn,plist)
dev.off()