Post date: Mar 03, 2017 4:55:4 PM
I recalculated genomic change (and a null distribution) for the Ecology Letters genomes with the defined loci mel and stripy (see notes here). Right now the results and code are in gompert:/home/zgompert/Local/timemaGenomicChange as kingspeak is down for maintenance.
For mel, I focused on total change across all bushes, and for stripy on divergence between A and C. Across all bushes, mel has the greatest change and diversity, but not excess change for diversity. In contrast, stripy has the greatest change (divergence) and change for diversity between A and C. There is one other stripy sized region that comes close, but it is brown scaffold 2963 and thus on LG8 right next to 702.1. In other words, this isn't necessarily surprising and stripy still edges it out.
We ended up with fewer scaffolds that could be included in this case, so the p-value for the null with stripy being most extreme is 1/18 = 0.056. Note that I am defining size based on number of SNPs not physical size of the scaffold. I think this is probably the most reasonable thing to do as we are averaging over the same number of loci and these are SNP based metrics (average change). Thus, size will jump around across data sets.
Here is the R code:
## read in allele freq. and id data
p0<-scan("in_p_ecol_all.txt")
p1<-scan("in_p_ecol_surv.txt")
pA0<-scan("in_p_ecol_A.txt")
pA1<-scan("in_p_ecol_Asurv.txt")
pC0<-scan("in_p_ecol_C.txt")
pC1<-scan("in_p_ecol_Csurv.txt")
p0l<-list(p0,pA0,pC0,pA1)
p1l<-list(p1,pA1,pC1,pC1)
ids<-read.table("snpids.txt",header=F)
## define mel and stripy
mlb<-4139489
mub<-6414835
slb<-6414836
mel<-c(which(ids[,2]==702.1 & ids[,3] < mlb),
which(ids[,2]==128 & ids[,3] < mub))
stripy<-which(ids[,2]==128 & ids[,3] >= slb)
nsnpMel<-length(mel)
nsnpStripy<-length(stripy)
## for mel, use total change
slist<-as.numeric(names(which(table(ids[,2]) > nsnpMel)))
drop<-which(slist == 702.1 | slist == 128)
slist<-slist[-drop]
nsc<-length(slist)
bnds<-matrix(NA,nrow=nsc,ncol=2)
for(i in 1:nsc){
n<-sum(ids[,2]==slist[i])
bnds[i,1]<-sample(1:(n-nsnpMel),1)
bnds[i,2]<-bnds[i,1]+nsnpMel
}
scmns<-rep(NA,nsc)
scvar<-rep(NA,nsc)
dp<-abs(p1l[[1]]-p0l[[1]])
div<-2 * p0l[[1]] * (1-p0l[[1]])
for(i in 1:nsc){
Dp<-dp[ids[,2]==slist[i]][bnds[i,1]:bnds[i,2]]
Div<-div[ids[,2]==slist[i]][bnds[i,1]:bnds[i,2]]
scmns[i]<-mean(Dp)
scvar[i]<-mean(Div)
}
o<-lm(scmns ~ scvar)
dpMel<-mean(dp[mel])
divMel<-mean(div[mel])
pdf("ecolDivChangeAll.pdf",width=7,height=7)
plot(scvar,scmns,xlab="mean diversity",ylab="mean change",xlim=c(0.17,0.36),ylim=c(0.02,0.034),pch=19,cex.lab=1.4,cex.axis=1.2)
abline(o$coefficients)
points(divMel,dpMel,col="brown",pch=19)
dev.off()
pdf("ecolDivChangeAllResid.pdf",width=7,height=7)
plot(scvar,o$residuals,xlab="mean diversity",ylab="residual change",xlim=c(0.17,0.36),pch=19,cex.lab=1.4,cex.axis=1.2)
ex<-o$coefficients[1] + o$coefficients[2] * divMel
points(divMel,dpMel-ex,col="brown",pch=19)
abline(h=0,lty=2)
dev.off()
## for stripy, use divergence change
slist<-as.numeric(names(which(table(ids[,2]) > nsnpStripy)))
drop<-which(slist == 702.1 | slist == 128)
slist<-slist[-drop]
nsc<-length(slist)
bnds<-matrix(NA,nrow=nsc,ncol=2)
for(i in 1:nsc){
n<-sum(ids[,2]==slist[i])
bnds[i,1]<-sample(1:(n-nsnpStripy),1)
bnds[i,2]<-bnds[i,1]+nsnpStripy
}
scmns<-rep(NA,nsc)
scvar<-rep(NA,nsc)
dp<-abs(p1l[[4]]-p0l[[4]])
div<-(2 * p0l[[4]] * (1-p0l[[4]]) + 2 * p1l[[4]] * (1-p1l[[4]]))/2
for(i in 1:nsc){
Dp<-dp[ids[,2]==slist[i]][bnds[i,1]:bnds[i,2]]
Div<-div[ids[,2]==slist[i]][bnds[i,1]:bnds[i,2]]
scmns[i]<-mean(Dp)
scvar[i]<-mean(Div)
}
o<-lm(scmns ~ scvar)
dpStripy<-mean(dp[stripy])
divStipy<-mean(div[stripy])
pdf("ecolDivChangeAvC.pdf",width=7,height=7)
plot(scvar,scmns,xlab="mean diversity",ylab="mean change (divergence)",xlim=c(0.165,0.2),ylim=c(0.049,0.057),pch=19,cex.lab=1.4,cex.axis=1.2)
abline(o$coefficients)
points(divStipy,dpStripy,col="forestgreen",pch=19)
dev.off()
pdf("ecolDivChangeAvCResid.pdf",width=7,height=7)
plot(scvar,o$residuals,xlab="mean diversity",ylab="residual change (divergence)",xlim=c(0.164,0.2),pch=19,cex.lab=1.4,cex.axis=1.2)
ex<-o$coefficients[1] + o$coefficients[2] * divStipy
points(divStipy,dpStripy-ex,col="forestgreen",pch=19)
abline(h=0,lty=2)
dev.off()