These are the manhattan plots for the bayesfactors. These will be created with the information for chromosomes/linkage groups, cM position of the scaffold on the linkage group, scaffold and position of SNP and bayesfactors.
The files for this are in the folder : /uufs/chpc.utah.edu/common/home/u6007910/projects/lmelissa_hostAdaptation/bayPass/linkagePlots
Files in this folder are as follows:
bfmeansWithScafPosition_all,east,west : baypass output files for all populations, eastern and western populations
orderedLinkageMap.txt : Linkage map details with linkage group, position of scaffold on the linkage group and scaffold
linkagePos_bf_combine.py : python script to combine bayesfactor files above with linkage map details. Common thing in both these files is the scaffold. This will happen in two steps and will create two files
linkagePosBfmeans_all, east and west : just contains bfmeans and position
linkagePosBfmeansfinal_all, east and west : is the final file with bfmeans, position, and linkage group. THIS IS THE FILE USED FOR CONSTRUCTING MANHATTAN PLOTS
sorted_all, west,east.txt : this is the file with all the details from above sorted in order of linkage groups.
manhattanPlots.pdf,eps : final manhattan plots
all = read.table("linkagePosBfmeansfinal_all", header = TRUE)
east = read.table("linkagePosBfmeansfinal_east", header = TRUE)
west = read.table("linkagePosBfmeansfinal_west", header = TRUE)
x.west = data.frame(west)
x.east = data.frame(east)
x.all = data.frame(all)
sum(is.na(x.all$lg)) #100219
cc = is.na(x.all$lg)
m = which(cc==c("TRUE"))
DF.all = x.all[-m,]
dim(DF.all) #105809 5
#rearrange the columns of the data frame
all = read.table("linkagePosBfmeansfinal_all", header = TRUE)
all.ord = cbind(x.all$lg, x.all$position.1, x.all$scaffold, x.all$position, x.all$bfmeans)
colnames(all.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")
#order the data frame first based on linkage groups, then based on position of scaffold within the linkage groups, then based on scaffold and positon of SNPs
all.ord.slg = all.ord[order(all.ord[,1],all.ord[,2],all.ord[,3], all.ord[,4]),]
gResults = data.frame(all.ord.slg)
#plotting the all bayesfactors with top 0.01% colored
#getting the top 0.01% SNPs
bnd = quantile(gResults$P, probs=0.99) #2061
x = which(gResults$P > bnd)
topcol = rep("gray", length(gResults$P))
topcol[x] <- "slateblue1"
yvalues = gResults$P
plot(gResults$P, col = topcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome", cex.lab = 2, ylim = c(0, max(yvalues)))
#for east and west
x.east = data.frame(east)
e.ord = cbind(x.east$lg, x.east$position.1, x.east$scaffold, x.east$position, x.east$bfmeans)
colnames(e.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")
e.ord.slg = e.ord[order(e.ord[,1],e.ord[,2],e.ord[,3],e.ord[,4]),]
e.ord.slg = data.frame(e.ord.slg)
e.ord.slg$lg[is.na(e.ord.slg$lg)] <- 24
#finding shared SNPs
bndw = quantile(w.ord.slg$bf, probs = 0.99)
bnde = quantile(e.ord.slg$bf, probs = 0.99)
sum(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)
[1] 58
xshare = which(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)
#assigning color and shape to shared snps
wcol = rep("gray", length(w.ord.slg$bf))
wcol[xshare] = "red"
wpch = rep(1, length(w.ord.slg$bf))
wpch[xshare] = 19
yvalues = w.ord.slg$bf
plot(w.ord.slg$bf, col = wcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome",> ex.lab = 2, pch = 19, ylim = c(0, max(yvalues)))
all = read.table("linkagePosBfmeansfinal_all", header = TRUE)
#dataframe without NA (not needed)
x.all = data.frame(all)
sum(is.na(x.all$lg)) #100219
cc = is.na(x.all$lg)
m = which(cc==c("TRUE"))
DF.all = x.all[-m,]
dim(DF.all) #105809 5
#rearrange the columns of the data frame
all = read.table("linkagePosBfmeansfinal_all", header = TRUE)
all.ord = cbind(x.all$lg, x.all$position.1, x.all$scaffold, x.all$position, x.all$bfmeans)
colnames(all.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")
#order the data frame first based on linkage groups, then based on position of scaffold within the linkage groups, then based on scaffold and positon of SNPs
all.ord.slg = all.ord[order(all.ord[,1],all.ord[,2],all.ord[,3], all.ord[,4]),]
all.ord.slg = data.frame(all.ord.slg)
all.ord.slg$lg[is.na(all.ord.slg$lg)] <- 24
#plotting the all bayesfactors with top 0.01% colored
#getting the top 0.01% SNPs
bnd = quantile(gResults$P, probs=0.99) #2061
x = which(gResults$P > bnd)
topcol = rep("gray", length(gResults$P))
topcol[x] <- "slateblue1"
yvalues = gResults$P
plot(gResults$P, col = topcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome", cex.lab = 2, ylim = c(0, max(yvalues)))
#for east and west
e.ord = cbind(x.east$lg, x.east$position.1, x.east$scaffold, x.east$position, x.east$bfmeans)
colnames(e.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")
e.ord.slg = e.ord[order(e.ord[,1],e.ord[,2],e.ord[,3],e.ord[,4]),]
e.ord.slg = data.frame(e.ord.slg)
e.ord.slg$lg[is.na(e.ord.slg$lg)] <- 24
#finding shared SNPs
bndw = quantile(w.ord.slg$bf, probs = 0.99)
bnde = quantile(e.ord.slg$bf, probs = 0.99)
sum(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)
[1] 58
xshare = which(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)
#assigning color and shape to shared snps
wcol = rep("gray", length(w.ord.slg$bf))
wcol[xshare] = "red"
wpch = rep(1, length(w.ord.slg$bf))
wpch[xshare] = 19
yvalues = w.ord.slg$bf
plot(w.ord.slg$bf, col = wcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome",> ex.lab = 2, pch = 19, ylim = c(0, max(yvalues)))
#plot all three all, west, east
#pdf("manhattanPlots.pdf",width=9,height=9)
setEPS()
postscript("manhattanPlots.eps", width=13, height=8)
par(mfrow=c(3,1))
par(mar=c(4,5.5,3,1.5)
#all
topcol = rep("gray", length(all.ord.slg$bf))
topcol[x] <- "orange"
allpch = rep(1, length(all.ord.slg$bf))
allpch[x] = 19
yvalues = all.ord.slg$bf
plot(all.ord.slg$bf, col = topcol, ylab= "Bayes Factors", xlab = "Position on the chromosome", cex.lab = 1.5,pch = allpch, ylim = c(0, max(yvalues)))
title(main="(a) All populations",adj=0,cex.main=1.6)
#east
ecol = rep("gray", length(e.ord.slg$bf))
ecol[xshare] = "red"
epch = rep(1, length(e.ord.slg$bf))
epch[xshare] = 19
yvalues = e.ord.slg$bf
plot(e.ord.slg$bf, col = ecol, ylab= "Bayes Factors", xlab = "Position on the chromosome",cex.lab = 1.5, pch = epch, ylim = c(0, max(yvalues)))
title(main="(c) melissa-east populations",adj=0,cex.main=1.6)
#west
wcol = rep("gray", length(w.ord.slg$bf))
wcol[xshare] = "red"
wpch = rep(1, length(w.ord.slg$bf))
wpch[xshare] = 19
yvalues = w.ord.slg$bf
plot(w.ord.slg$bf, col = wcol, ylab= "Bayes Factors", xlab = "Position on the chromosome",cex.lab = 1.5, pch = wpch, ylim = c(0, max(yvalues)))
title(main="(b) melissa-west populations",adj=0,cex.main=1.6)
dev.off()
#plotting based on linkage groups
bnd = quantile(all.ord.slg$bf, probs=0.99) #2061
x = which(all.ord.slg$bf > bnd)
lg.all = factor(all.ord.slg$lg)
levels(lg.all)
xtop = all.ord.slg[xshare,]
mticks = c(3940,7675,6722,5892,5744,5828,5545,5184,4748,4184,4755,5026,4210,3594,3538,4043,4124,3442,2873,2585,1906,1128,4793,54428)
#lg.col = c("ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","steelblue2","ivory4")
#lg.col = c("peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","yellowgreen","peachpuff")
lg.col = c("slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","yellowgreen","slategray3")
setEPS()
postscript("manhattanPlots_parallel.eps", width=13, height=9)
par(mfrow=c(2,1))
#plot all
par(mar=c(4,5.5,3,1.5))
a.col = lg.col[all.ord.slg$lg]
plot(all.ord.slg$bf, col = a.col, pch = 20, xaxt = 'n', xlab = " ", ylab = "log10(Bayes Factors)", cex.lab = 1.5,cex=.5)
axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))
xtop = all.ord.slg[xshare,]
#points(x=xshare, y = xtop$bf, col="red2", pch=20)
abline (h = bnd, col = "black", lwd=1, lty = "dashed")
title(main="all populations",adj=0,cex.main=1.6)
dev.off()
#plot east
par(mar=c(4,5.5,3,1.5))
e.col = lg.col[w.ord.slg$lg]
x.e = which(e.ord.slg$bf > bnde)
xtop = e.ord.slg[xshare,]
plot(e.ord.slg$bf, col = e.col, pch = 20, xaxt = 'n', xlab = " ", ylab = "log10(Bayes Factors)", cex.lab = 1.5,cex=.5)
axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))
points(x=xshare, y = xtop$bf, col="red2", pch=20)
abline (h = bnde, col = "black", lwd=1, lty = "dashed")
title(main="(a) melissa-east populations",adj=0,cex.main=1.6)
#plot west
par(mar=c(4,5.5,3,1.5))
w.col = lg.col[w.ord.slg$lg]
x.w = which(w.ord.slg$bf > bndw)
xtop = w.ord.slg[xshare,]
plot(w.ord.slg$bf, col = w.col, pch = 20, xaxt = 'n',xlab = "SNPs on linkage groups", ylab = "log10(Bayes Factors)", cex.lab = 1.5, cex=.5)
axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))
points(x=xshare, y = xtop$bf, col="red2", pch=20)
abline (h = bndw, col = "black", lwd=1, lty = "dashed")
title(main="(b) melissa-west populations",adj=0,cex.main=1.6)
dev.off()
#getting chromosome ranges
chr1 = (subset(w.ord.slg, w.ord.slg$lg == "1")) #7880
chr2 = (subset(w.ord.slg, w.ord.slg$lg == "2")) #7470
chr3 = (subset(w.ord.slg, w.ord.slg$lg == "3")) #5973
chr4 = (subset(w.ord.slg, w.ord.slg$lg == "4")) #5810
chr5 = (subset(w.ord.slg, w.ord.slg$lg == "5")) #5678
chr6 = (subset(w.ord.slg, w.ord.slg$lg == "6")) #5978
chr7 = (subset(w.ord.slg, w.ord.slg$lg == "7")) #5111
chr8 = (subset(w.ord.slg, w.ord.slg$lg == "8")) #5257
chr9 = (subset(w.ord.slg, w.ord.slg$lg == "9")) #4239
chr10 = (subset(w.ord.slg, w.ord.slg$lg == "10")) #4128
chr11 = (subset(w.ord.slg, w.ord.slg$lg == "11")) #5382
chr12 = (subset(w.ord.slg, w.ord.slg$lg == "12")) #4670
chr13 = (subset(w.ord.slg, w.ord.slg$lg == "13")) #3750
chr14 = (subset(w.ord.slg, w.ord.slg$lg == "14")) #3438
chr15 = (subset(w.ord.slg, w.ord.slg$lg == "15")) #3638
chr16 = (subset(w.ord.slg, w.ord.slg$lg == "16")) #4447
chr17 = (subset(w.ord.slg, w.ord.slg$lg == "17")) #3800
chr18 = (subset(w.ord.slg, w.ord.slg$lg == "18")) #3083
chr19 = (subset(w.ord.slg, w.ord.slg$lg == "19")) #2663
chr20 = (subset(w.ord.slg, w.ord.slg$lg == "20")) #2506
chr21 = (subset(w.ord.slg, w.ord.slg$lg == "21")) #1306
chr22 = (subset(w.ord.slg, w.ord.slg$lg == "22")) #950
chr23 = (subset(w.ord.slg, w.ord.slg$lg == "23")) #8636
chr24 = (subset(w.ord.slg, w.ord.slg$lg == "24")) #100219