pca with melissa, admixture project populations only

Post date: Oct 22, 2013 7:36:3 PM

I am still processing the sequence data for melissa populations that were not part of the admixture project but were added in conjunction with the selection experiment, but I went ahead and examined the melissa populations from the admixture project on their own with PCA just for fun while waiting for this. See the attached plot (pca), orange = alfalfa host, gray = non alfalfa host; the 'groups' from the admixture paper are shown with symbols: 'melissa-west' = circles, 'melissa-east' = squares, and 'melissa-rockies' with triangles. Symbols denote individuals and three letter codes give population means. This is obviously preliminary but definitely argues for multiple colonizations of alfalfa.

The files for this project are in projects/lycaeides_hostplant/, and the R code is here:

## load("melpca.rwsp")

## look at structurin of melissa related to alfalfa feeder based on admixture data only

nloci<-15069

nind<-1521

# common variants

g<-matrix(scan("../lycaeides_admixture/admixprops/results/mngen_commonk2-8.txt",n=nind*nloci,sep=" "),nrow=nloci,ncol=nind,byrow=T)

ids<-read.table("../lycaeides_admixture/admixprops/figures/indinfo.txt",header=F)

Gmel<-g[,ids[,3]==4]

idsMel<-ids[ids[,3]==4,]

write.table(idsMel,"melissa_ids.txt",row.names=F,col.names=F,quote=F)

nind<-dim(idsMel)[1]

## calculate N x N genotype covariance matrix

gmn<-apply(Gmel,1,mean)

gmnmat<-matrix(gmn,nrow=nloci,ncol=nind)

gprime<-Gmel-gmnmat

gcovarmat<-matrix(NA,nrow=nind,ncol=nind)

for(i in 1:nind){

for(j in i:nind){

if (i==j){

gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])

}

else{

gcovarmat[i,j]<-cov(gprime[,i],gprime[,j])

gcovarmat[j,i]<-gcovarmat[i,j]

}

## pca on genotype covariance matrix

pcgcov<-prcomp(x=gcovarmat,center=TRUE,scale=FALSE)

popinfo<-read.table("popinfo.txt",header=F)

sym<-rep(NA,nind)

mycols<-rep(NA,nind)

## set symbols and colors

for(i in 1:nind){

if(popinfo[i,2] == 'melw'){

sym[i]<-21

}

else if(popinfo[i,2] == 'mele'){

sym[i]<-22

}

else{

sym[i]<-25

}

if(popinfo[i,3]=='N'){

mycols[i]<-"gray"

}

else{

mycols[i]<-"orange"

}

pc1mn<-tapply(X=pcgcov$x[,1],INDEX=popinfo[,1],mean)

pc2mn<-tapply(X=pcgcov$x[,2],INDEX=popinfo[,1],mean)

plist<-unique(as.character(popinfo[,1]))

pdf("melPca.pdf",width=8,height=8)

par(mar=c(4.5,4.5,0.5,0.5))

par(pty='s')

plot(pcgcov$x[,1],pcgcov$x[,2],pch=sym,col=mycols,xlab="PC 1 (65%)",ylab="PC 2 (5%)",cex.lab=1.5,cex.axis=1.2)

text(pc1mn,pc2mn,plist)

dev.off()

Page updated

Google Sites

Report abuse