Post date: May 29, 2015 2:2:25 PM
#27may15 kinship matrix
sosopopinfo<-read.csv("sosorum_pops.csv",header=F)
sosogendata<-read.table("filtered_varEsosorumNoHdr.genest",header=F)
g<-(as.matrix(sosogendata))
ghat<-round(g,0)
nloci<-13999
nind<-258
## similarity matrix
kinship<-matrix(NA,nrow=nind,ncol=nind)
for(i in 1:nind){
for(j in i:nind){
if (i==j){
kinship[i,j]<-1
}
else{
kinship[i,j]<-1-mean(abs(ghat[,i]-ghat[,j]))/2
kinship[j,i]<-kinship[i,j]
}
}
}
image(kinship,col=c("gray","lightblue","darkblue","black"),breaks=c(0,0.86,0.88,0.92,1.01))
a<-which(sosopopinfo[2:258,1]!=sosopopinfo[1:257,1])
abline(h=(a+0.5)/258)
abline(v=(a+0.5)/258)
hist(kinship[lower.tri(kinship)])
#pca of kinship matrix
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7
Standard deviation 0.1841 0.02672 0.02011 0.01675 0.01369 0.01309 0.01221
Proportion of Variance 0.6411 0.01350 0.00765 0.00530 0.00354 0.00324 0.00282
Cumulative Proportion 0.6411 0.65462 0.66227 0.66757 0.67111 0.67435 0.67717
pckin<-prcomp(kinship,center=T,scale=F)
pops<-as.character(unique(sosopopinfo[,1]))
library(RColorBrewer)
pdf("sosorum_kin_PCA_27may15.pdf",width=6,height=7)
par(mfrow=c(2,1))
par(mar=c(2, 4, 1, 3.5))
plot(pckin$x[,1],pckin$x[,2],type="n",ylab="PC2 (2.5%)",xlab=NA,cex.lab=0.9,cex.axis=0.8)
myc<-brewer.pal(5,"Set2")
for(i in 5:9){
a<-which(as.character(sosopopinfo[,1])==pops[i])
mn1<-mean(pckin$x[a,1])
mn2<-mean(pckin$x[a,2])
for(j in 1:length(a)){
lines(c(mn1,pckin$x[a[j],1]),c(mn2,pckin$x[a[j],2]),col=myc[i-4])
}
}
for(i in 5:9){
a<-which(as.character(sosopopinfo[,1])==pops[i])
mn1<-mean(pckin$x[a,1])
mn2<-mean(pckin$x[a,2])
text(mn1,mn2,pops[i],cex=0.7)
}
legend(0.04,-0.014,pops[5:9],col=myc,lty=1,lwd=1.5,cex=0.8,bty="n")
par(mar=c(4, 4, 1, 3.5))
plot(pckin$x[,1],pckin$x[,2],type="n",ylab="PC2 (64.1%)",xlab="PC1 (1.4%)",cex.lab=0.9, cex.axis=0.8)
myc<-c(brewer.pal(4,"Set2"),rep("gray",5))
for(i in 1:9){
a<-which(as.character(sosopopinfo[,1])==pops[i])
mn1<-mean(pckin$x[a,1])
mn2<-mean(pckin$x[a,2])
for(j in 1:length(a)){
lines(c(mn1,pckin$x[a[j],1]),c(mn2,pckin$x[a[j],2]),col=myc[i])
}
}
for(i in 1:4){## change 9 to 4 to drop wild labels in second plot
a<-which(as.character(sosopopinfo[,1])==pops[i])
mn1<-mean(pckin$x[a,1])
mn2<-mean(pckin$x[a,2])
text(mn1,mn2,pops[i],cex=0.7)
}
legend(0.041,-0.01,c(pops[1:4],"wild"),col=myc[1:5],lty=1,lwd=1.5,cex=0.8,bty="n")
dev.off()
#But now I'm worried there might be coverage differences between captive and wild because the kinship matrix wasn't what I expected (wild more closely related to each other than captive)