Post date: Oct 22, 2015 3:50:41 PM
Results and input files for these analyses are in /labs/evolution/projects/timema_speciation/tcristinae/fst/
I used the following R script (fstPlotTcr.R) to plot Fst along scaffolds and LGs (20kb window estimates) for each ecotype pair. Note that I made plots with and without the non-LG scaffolds. Here is the script:
fstHV<-as.matrix(read.table("nei20k_lgver3_p_tcristHV.txt",header=FALSE))
fstMR1<-as.matrix(read.table("nei20k_lgver3_p_tcristMR1.txt",header=FALSE))
fstR12<-as.matrix(read.table("nei20k_lgver3_p_tcristR12.txt",header=FALSE))
fstLPR<-as.matrix(read.table("nei20k_lgver3_p_tcristL.txt",header=FALSE))
fst<-cbind(fstHV[,9],fstMR1[,9],fstR12[,9],fstLPR[,9])
x<-which(is.na(fstHV[,1])==TRUE)
pops<-c("HV","MR1","R12","LxPR")
L<-dim(fstHV)[1]
bnds<-which(fstHV[1:(L-1),1] != fstHV[2:L,1]) + 0.5
bndsx<-bnds-length(x)
## without NA
for(i in 1:4){
onefst<-fst[-x,i]
file<-paste("fstplotTcr",pops[i],".pdf",sep="")
pdf(file=file,width=8,height=4)
par(mar=c(5,5,0.5,0.5))
qs<-quantile(onefst,probs=c(0.5,0.99),na.rm=TRUE)
plot(onefst,pch=19,cex=0.5,col="darkgray",type='n',cex.lab=1.4,xlab="20 kb window",ylab="Fst")
p<-1:(L-length(x))
z<-which(onefst < qs[2])
points(p[z],onefst[z],col="darkgray",pch=19,cex=0.5)
z<-which(onefst >= qs[2])
points(p[z],onefst[z],col="red",pch=19,cex=0.5)
abline(v=bndsx,lty=3,lwd=1.5)
abline(h=qs,lty=2,col="maroon")
dev.off()
}
## with NA LG
for(i in 1:4){
onefst<-fst[,i]
file<-paste("fstplotTcrNonLG",pops[i],".pdf",sep="")
pdf(file=file,width=8,height=4)
par(mar=c(5,5,0.5,0.5))
qs<-quantile(onefst,probs=c(0.5,0.99),na.rm=TRUE)
plot(onefst,pch=19,cex=0.5,col="darkgray",type='n',cex.lab=1.4,xlab="20 kb window",ylab="Fst")
p<-1:L
z<-which(onefst < qs[2])
points(p[z],onefst[z],col="darkgray",pch=19,cex=0.5)
z<-which(onefst >= qs[2])
points(p[z],onefst[z],col="red",pch=19,cex=0.5)
abline(v=c(max(which(is.na(fstHV[,1])==T))+0.5,bnds),lty=3,lwd=1.5)
abline(h=qs,lty=2,col="maroon")
dev.off()
}
Next I fit the two state HMM to the LG scaffolds only. I used a perl script to create the R scripts for this. I only ran the 2-state (fixed mean and variance) HMM on Fst (now parameter 9, hence the file names). The two states uses the empirical sd and the empirical mean and 90th quantile (high state). The summaries and png files that plot states show pretty cool results. Basically, elevated Fst for R12 and LxPR on LG 8. Here is one of the four R scripts that the perl script created (fitHmm2HV.R):
library(HiddenMarkov)
nl<-42687
nei<-matrix(scan("nei20k_lgver3_p_tcristHV.txt",n=nl*9,sep=" "),nrow=nl,ncol=9,byrow=TRUE)
Mstep.normc <- function(x, cond, pm, pn){
nms <- sort(names(pm))
n <- length(x)
m <- ncol(cond$u)
if (all(nms==c("mean", "sd"))){
mean <- pm$mean
#mean <- as.numeric(matrix(x, nrow = 1) %*% cond$u)/apply(cond$u,
#MARGIN = 2, FUN = sum)
sd <- pm$sd
return(list(mean=mean, sd=sd))
}
if (all(nms == "mean")) {
mean <- pm$mean
#mean <- as.numeric(matrix(x, nrow = 1) %*% cond)/apply(cond$u,
#MARGIN = 2, FUN = sum)
return(list(mean = mean))
}
if (all(nms == "sd")) {
sd <- pm$sd
return(list(sd = sd))
}
stop("Invalid specification of parameters")
}
rnormc <- rnorm
dnormc <- dnorm
pnormc <- pnorm
qnormc <- qnorm
lgwin<-which(is.na(nei[,1])==FALSE)
for(parm in 9){
if(parm < 9){lparm<-log(nei[lgwin,parm])
lparm[is.finite(lparm)==FALSE]<-log(0.000001)
}
else{lparm<-log(nei[lgwin,parm]/(1-nei[lgwin,parm]))
lparm[is.finite(lparm)==FALSE]<-log(0.000001/0.999999)
}
nl<-length(lgwin)
delta<-c(0.05,0.95)
PiA<-matrix(c(0.9,0.1,0.1,0.9),nrow=2,byrow=T)
PiB<-matrix(c(0.8,0.2,0.01,0.99),nrow=2,byrow=T)
Pi<-list(PiA,PiB)
mn<-mean(lparm,na.rm=TRUE)
s<-sd(lparm,na.rm=TRUE)
q90<-quantile(lparm,prob=0.9)
params<-vector("list",2)
fit<-vector("list",2)
for(i in 1:2){
init<-dthmm(lparm,Pi[[i]],delta,"normc",list(mean=c(q90,mn),sd=rep(s,2)),discrete=FALSE)
params[[i]]<-BaumWelch(init,bwcontrol(maxiter = 500, tol = 1e-04, prt = TRUE, posdiff = FALSE,
converge = expression(diff < tol)))
fit[[i]]<-Viterbi(params[[i]])
}
save(list=ls(),file=paste("hmm2s_HVpar",parm,".rwsp",sep=""))
mycolors<-c("orange","darkgray")
png(file=paste("hmm2stPlotHVpar",parm,".png",sep=""),width=2400,height=800)
if(parm < 9){eparm<-exp(lparm)}
else{eparm<-1/(1+exp(-1 * lparm))}
plot(eparm,col=mycolors[fit[[1]]],pch=19,xlab="snp number",ylab="Fst",cex.lab=1.5,cex.axis=1.2,main="")
lgs<-nei[lgwin,1]
bnds<-which(lgs[1:(nl-1)] != lgs[2:nl]) + 0.5
abline(v=bnds,lty=2)
dev.off()
if(parm < 9){out<-round(rbind(exp(params[[1]]$pm$mean),table(fit[[1]])/length(fit[[1]]),params[[1]]$Pi),4)}
else{out<-round(rbind(1/(1+ exp(-1*params[[1]]$pm$mean)),table(fit[[1]])/length(fit[[1]]),params[[1]]$Pi),4)}
write.table(out,file=paste("hmm2stSummaryHVpar",parm,".txt",sep=""),row.names=F,col.names=F)
cor.test(fit[[1]],fit[[2]])
}
rm(list=ls())