Post date: Jun 06, 2016 9:43:31 PM
Below is the R code I used for an initial set of analyses for my Evolution talk. This is in king:/uufs/chpc.utah.edu/common/home/u6000989/projects/lyc_hybanc/ancfreqs/results/commands.R
## read locus information
lgs<-scan("lgs.txt")
lgs<-c(lgs,rep(23,852))
N<-length(lgs)
bnds<-which(lgs[2:N] != lgs[1:(N-1)])
bndsPlus<-c(1,bnds,length(lgs))
mids<-rep(NA,23)
for(i in 1:23){
mids[i]<-(bndsPlus[i]+bndsPlus[i+1])/2
}
## plotting function
plotanc<-function(pop=NA,nm=NA,mids=mids,bnds=bnds){
par(xpd=FALSE)
plot(pop,type='n',ylim=c(0,1),axes=FALSE,cex.lab=1.75,ylab="ancestry block freq.",xlab="chromosome")
axis(2)
for(i in seq(1,21,2)){
j<-i+1
polygon(x=bnds[c(i,j,j,i)],y=c(-1,-1,2,2),border=NA,col="lightblue")
}
par(xpd=TRUE)
#text(mids,rep(-0.15,23),c(1:22,"Z"))
text(mids,rep(-0.15,23),c(1:19,NA,21,NA,"Z"))
lines(pop)
title(main=nm)
abline(h=mean(pop),lty=3,col="darkgray")
}
## read in all of the data
sexMTR<-read.table("qsx_MTR.txt",header=FALSE,sep=",")
autoMTR<-read.table("q_MTR.txt",header=FALSE,sep=",")
MTR<-c(autoMTR[,1],sexMTR[,1])
sexCSP<-read.table("qsx_CSP.txt",header=FALSE,sep=",")
autoCSP<-read.table("q_CSP.txt",header=FALSE,sep=",")
CSP<-c(autoCSP[,1],sexCSP[,1])
sexCRP<-read.table("qsx_CRP.txt",header=FALSE,sep=",")
autoCRP<-read.table("q_CRP.txt",header=FALSE,sep=",")
CRP<-c(autoCRP[,1],sexCRP[,1])
sexSOP<-read.table("qsx_SOP.txt",header=FALSE,sep=",")
autoSOP<-read.table("q_SOP.txt",header=FALSE,sep=",")
SOP<-c(autoSOP[,1],sexSOP[,1])
sexLAE<-read.table("qsx_LAE.txt",header=FALSE,sep=",")
autoLAE<-read.table("q_LAE.txt",header=FALSE,sep=",")
LAE<-c(autoLAE[,1],sexLAE[,1])
sexSWM<-read.table("qsx_SWM.txt",header=FALSE,sep=",")
autoSWM<-read.table("q_SWM.txt",header=FALSE,sep=",")
SWM<-c(autoSWM[,1],sexSWM[,1])
sexTIC<-read.table("qsx_TIC.txt",header=FALSE,sep=",")
autoTIC<-read.table("q_TIC.txt",header=FALSE,sep=",")
TIC<-c(autoTIC[,1],sexTIC[,1])
sexCLH<-read.table("qsx_CLH.txt",header=FALSE,sep=",")
autoCLH<-read.table("q_CLH.txt",header=FALSE,sep=",")
CLH<-c(autoCLH[,1],sexCLH[,1])
sexREF<-read.table("qsx_REF.txt",header=FALSE,sep=",")
autoREF<-read.table("q_REF.txt",header=FALSE,sep=",")
REF<-c(autoREF[,1],sexREF[,1])
sexEGP<-read.table("qsx_EGP.txt",header=FALSE,sep=",")
autoEGP<-read.table("q_EGP.txt",header=FALSE,sep=",")
EGP<-c(autoEGP[,1],sexEGP[,1])
sexBKM<-read.table("qsx_BKM.txt",header=FALSE,sep=",")
autoBKM<-read.table("q_BKM.txt",header=FALSE,sep=",")
BKM<-c(autoBKM[,1],sexBKM[,1])
sexSTM<-read.table("qsx_STM.txt",header=FALSE,sep=",")
autoSTM<-read.table("q_STM.txt",header=FALSE,sep=",")
STM<-c(autoSTM[,1],sexSTM[,1])
anc<-list(MTR,CSP,CRP,SOP,LAE,SWM,TIC,CLH,REF,EGP,BKM,STM)
pnms<-c("MTR","CSP","CRP","SOP","LAE","SWM","TIC","CLH","REF","EGP","BKM","STM")
## population plots
pdf("ancplots.pdf",width=9,height=4)
for(i in 1:12){
plotanc(pop=anc[[i]],nm=pnms[i],mids,bnds)
}
dev.off()
## population plots for talk
pdf("ancplotMTR-CSP.pdf",width=10,height=8)
par(mfrow=c(2,1))
par(mar=c(5.5,5.5,2,0.5))
for(i in c(1,8)){
plotanc(pop=anc[[i]],nm=pnms[i],mids,bnds)
}
dev.off()
## LG means and sds
mns<-vector("list",12)
sds<-vector("list",12)
for(i in 1:12){
mns[[i]]<-tapply(X=anc[[i]],INDEX=lgs,mean)
sds[[i]]<-tapply(X=anc[[i]],INDEX=lgs,sd)
}
names(mns)<-pnms
names(sds)<-pnms
## cormatrixes
lgCors<-matrix(NA,nrow=12,ncol=12)
for(i in 1:12){for(j in 1:12){
if(i > j){
lgCors[i,j]<-cor(sds[[i]],sds[[j]])
}
if(i < j){
lgCors[i,j]<-cor(mns[[i]],mns[[j]])
}
}}
pdf("ancLgCors.pdf",width=7,height=6)
bls<-brewer.pal(8,"Blues")
par(mar=c(4,4,1,6))
par(xpd=TRUE)
image(lgCors,breaks=c(-1,0,seq(0.1,0.7,0.1),1),col=c("gray",bls),axes=FALSE)
axis(1,at=seq(0,11,1)/11,pnms,las=2)
axis(2,at=seq(0,11,1)/11,pnms,las=2)
box()
par(xpd=FALSE)
image.plot(lgCors,breaks=c(-1,0,seq(0.1,0.7,0.1),1),col=c("gray",bls),legend.lab="cor. coef. (r)",legend.only=TRUE,add=TRUE)
dev.off()
## example LG mns
pdf("MTRancLgs.pdf",width=6,height=6)
par(mar=c(4.5,4.5,0.5,0.5))
plot(mns[[1]],mns[[2]],type='n',cex.lab=1.8,cex.axis=1.2,ylim=c(0.13,0.42),xlab="mean anc. freq. (MTR)",ylab="mean anc. freq")
text(mns[[1]],mns[[2]],col=bls[8],c(1:22,"Z"))
text(mns[[1]],mns[[11]],col="gray",c(1:22,"Z"))
legend(0.365,0.42,c("r(MTR,CSP) = 0.76","r(MTR,CLH) = -0.26"),bty='n',fill=c(bls[8],"gray"))
dev.off()
## matrix of excess anna
exAnna<-matrix(NA,nrow=length(MTR),ncol=12)
for(i in 1:12){
ub<-quantile(anc[[i]],probs=0.99)
exAnna[,i]<-as.numeric(anc[[i]] > ub)
}
nullMx<-rep(NA,1000)
null99<-rep(NA,1000)
## perm
for(i in 1:1000){
X<-exAnna
for(j in 1:12){
X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]
}
cnts<-apply(X,1,sum)
nullMx[i]<-max(cnts)
null99[i]<-quantile(cnts,probs=0.99)
}
plot(apply(exAnna,1,sum),type='l')
abline(h=quantile(nullMx,probs=0.95),col="red")
abline(h=quantile(null99,probs=0.95),col="orange")
## matrix of excess melissa
exMel<-matrix(NA,nrow=length(MTR),ncol=12)
for(i in 1:12){
lb<-quantile(anc[[i]],probs=0.01)
exMel[,i]<-as.numeric(anc[[i]] < lb)
}
nullMx<-rep(NA,1000)
null99<-rep(NA,1000)
## perm
for(i in 1:1000){
X<-exMel
for(j in 1:12){
X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]
}
cnts<-apply(X,1,sum)
nullMx[i]<-max(cnts)
null99[i]<-quantile(cnts,probs=0.99)
}
plot(apply(exMel,1,sum),type='l')
abline(h=quantile(nullMx,probs=0.95),col="red")
abline(h=quantile(null99,probs=0.95),col="orange")
## fixed
ancM<-matrix(NA,nrow=length(MTR),ncol=12)
for(i in 1:12){
ancM[,i]<-as.numeric(anc[[i]])
}
colnames(ancM)<-pnms
## fixed or high freq. anna
apply(ancM > 0.99,2,sum)
#MTR CSP CRP SOP LAE SWM TIC CLH REF EGP BKM STM
#180 5 12 0 0 0 0 0 31 0 0 21
apply(ancM > 0.95,2,sum)
#MTR CSP CRP SOP LAE SWM TIC CLH REF EGP BKM STM
#304 26 19 5 0 15 3 6 61 3 0 35
## fixed or high freq. melissa
apply(ancM < 0.01,2,sum)
#MTR CSP CRP SOP LAE SWM TIC CLH REF EGP BKM STM
#124 118 176 88 119 97 100 386 648 307 367 340
apply(ancM < 0.05,2,sum)
#MTR CSP CRP SOP LAE SWM TIC CLH REF EGP BKM STM
#354 325 737 334 428 470 534 1026 1748 615 706 951
## next ask whether more fixed or high freq. shared than expected by chance
save(list=ls(),file="lycanc.Rdat")
## for talk, summaries of shared high or fixed
## plotting function
plotfixed<-function(obs=NA,null=NA,nm=NA,mids=mids,bnds=bnds){
par(xpd=FALSE)
ub<-12
plot(obs,type='n',ylim=c(0,ub),axes=FALSE,cex.lab=1.75,ylab="no. populations",xlab="chromosome")
axis(2)
for(i in seq(1,21,2)){
j<-i+1
polygon(x=bnds[c(i,j,j,i)],y=c(0,0,ub,ub),border=NA,col="lightblue")
}
par(xpd=TRUE)
#text(mids,rep(-0.15,23),c(1:22,"Z"))
text(mids,rep(-1,23),c(1:19,NA,21,NA,"Z"))
lines(obs)
title(main=nm,cex.main=1.3)
par(xpd=FALSE)
abline(h=quantile(null,probs=0.95),col="red",lty=2)
}
exAnna<-matrix(NA,nrow=length(MTR),ncol=12)
for(i in 1:12){
ub<-quantile(anc[[i]],probs=0.99)
exAnna[,i]<-as.numeric(anc[[i]] > ub)
}
nullMx<-rep(NA,1000)
null99<-rep(NA,1000)
## perm
for(i in 1:1000){
X<-exAnna
for(j in 1:12){
X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]
}
cnts<-apply(X,1,sum)
nullMx[i]<-max(cnts)
null99[i]<-quantile(cnts,probs=0.99)
}
## matrix of excess melissa
fixedMel<-matrix(NA,nrow=length(MTR),ncol=12)
for(i in 1:12){
lb<-0.01
fixedMel[,i]<-as.numeric(anc[[i]] < lb)
}
nullMxlm<-rep(NA,1000)
null99lm<-rep(NA,1000)
## perm
for(i in 1:1000){
X<-fixedMel
for(j in 1:12){
X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]
}
cnts<-apply(X,1,sum)
nullMxlm[i]<-max(cnts)
null99lm[i]<-quantile(cnts,probs=0.99)
}
pdf("coreblks.pdf",width=10,height=8)
par(mfrow=c(2,1))
par(mar=c(5.5,5.5,2,0.5))
plotfixed(obs=apply(exAnna,1,sum),null=nullMx,nm="high anna ancestry",mids=mids,bnds=bnds)
plotfixed(obs=apply(fixedMel,1,sum),null=nullMxlm,nm="fixed melissa ancestry",mids=mids,bnds=bnds)
dev.off()