manhattan plots

These are the manhattan plots for the bayesfactors. These will be created with the information for chromosomes/linkage groups, cM position of the scaffold on the linkage group, scaffold and position of SNP and bayesfactors.

The files for this are in the folder : /uufs/chpc.utah.edu/common/home/u6007910/projects/lmelissa_hostAdaptation/bayPass/linkagePlots

Files in this folder are as follows:

bfmeansWithScafPosition_all,east,west : baypass output files for all populations, eastern and western populations
orderedLinkageMap.txt : Linkage map details with linkage group, position of scaffold on the linkage group and scaffold
linkagePos_bf_combine.py : python script to combine bayesfactor files above with linkage map details. Common thing in both these files is the scaffold. This will happen in two steps and will create two files
linkagePosBfmeans_all, east and west : just contains bfmeans and position
linkagePosBfmeansfinal_all, east and west : is the final file with bfmeans, position, and linkage group. THIS IS THE FILE USED FOR CONSTRUCTING MANHATTAN PLOTS
sorted_all, west,east.txt : this is the file with all the details from above sorted in order of linkage groups.
manhattanPlots.pdf,eps : final manhattan plots

all = read.table("linkagePosBfmeansfinal_all", header = TRUE)

east = read.table("linkagePosBfmeansfinal_east", header = TRUE)

west = read.table("linkagePosBfmeansfinal_west", header = TRUE)

x.west = data.frame(west)

x.east = data.frame(east)

x.all = data.frame(all)

sum(is.na(x.all$lg)) #100219

cc = is.na(x.all$lg)

m = which(cc==c("TRUE"))

DF.all = x.all[-m,]

dim(DF.all) #105809 5

#rearrange the columns of the data frame

all = read.table("linkagePosBfmeansfinal_all", header = TRUE)

all.ord = cbind(x.all$lg, x.all$position.1, x.all$scaffold, x.all$position, x.all$bfmeans)

colnames(all.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")

#order the data frame first based on linkage groups, then based on position of scaffold within the linkage groups, then based on scaffold and positon of SNPs

all.ord.slg = all.ord[order(all.ord[,1],all.ord[,2],all.ord[,3], all.ord[,4]),]

gResults = data.frame(all.ord.slg)

#plotting the all bayesfactors with top 0.01% colored

#getting the top 0.01% SNPs

bnd = quantile(gResults$P, probs=0.99) #2061

x = which(gResults$P > bnd)

topcol = rep("gray", length(gResults$P))

topcol[x] <- "slateblue1"

yvalues = gResults$P

plot(gResults$P, col = topcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome", cex.lab = 2, ylim = c(0, max(yvalues)))

#for east and west

x.east = data.frame(east)

e.ord = cbind(x.east$lg, x.east$position.1, x.east$scaffold, x.east$position, x.east$bfmeans)

colnames(e.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")

e.ord.slg = e.ord[order(e.ord[,1],e.ord[,2],e.ord[,3],e.ord[,4]),]

e.ord.slg = data.frame(e.ord.slg)

e.ord.slg$lg[is.na(e.ord.slg$lg)] <- 24

#finding shared SNPs

bndw = quantile(w.ord.slg$bf, probs = 0.99)

bnde = quantile(e.ord.slg$bf, probs = 0.99)

sum(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)

[1] 58

xshare = which(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)

#assigning color and shape to shared snps

wcol = rep("gray", length(w.ord.slg$bf))

wcol[xshare] = "red"

wpch = rep(1, length(w.ord.slg$bf))

wpch[xshare] = 19

yvalues = w.ord.slg$bf

plot(w.ord.slg$bf, col = wcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome",> ex.lab = 2, pch = 19, ylim = c(0, max(yvalues)))

all = read.table("linkagePosBfmeansfinal_all", header = TRUE)

#dataframe without NA (not needed)

x.all = data.frame(all)

sum(is.na(x.all$lg)) #100219

cc = is.na(x.all$lg)

m = which(cc==c("TRUE"))

DF.all = x.all[-m,]

dim(DF.all) #105809 5

#rearrange the columns of the data frame

all = read.table("linkagePosBfmeansfinal_all", header = TRUE)

all.ord = cbind(x.all$lg, x.all$position.1, x.all$scaffold, x.all$position, x.all$bfmeans)

colnames(all.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")

#order the data frame first based on linkage groups, then based on position of scaffold within the linkage groups, then based on scaffold and positon of SNPs

all.ord.slg = all.ord[order(all.ord[,1],all.ord[,2],all.ord[,3], all.ord[,4]),]

all.ord.slg = data.frame(all.ord.slg)

all.ord.slg$lg[is.na(all.ord.slg$lg)] <- 24

#plotting the all bayesfactors with top 0.01% colored

#getting the top 0.01% SNPs

bnd = quantile(gResults$P, probs=0.99) #2061

x = which(gResults$P > bnd)

topcol = rep("gray", length(gResults$P))

topcol[x] <- "slateblue1"

yvalues = gResults$P

plot(gResults$P, col = topcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome", cex.lab = 2, ylim = c(0, max(yvalues)))

#for east and west

e.ord = cbind(x.east$lg, x.east$position.1, x.east$scaffold, x.east$position, x.east$bfmeans)

colnames(e.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")

e.ord.slg = e.ord[order(e.ord[,1],e.ord[,2],e.ord[,3],e.ord[,4]),]

e.ord.slg = data.frame(e.ord.slg)

e.ord.slg$lg[is.na(e.ord.slg$lg)] <- 24

#finding shared SNPs

bndw = quantile(w.ord.slg$bf, probs = 0.99)

bnde = quantile(e.ord.slg$bf, probs = 0.99)

sum(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)

[1] 58

xshare = which(w.ord.slg$bf > bndw & e.ord.slg$bf > bnde)

#assigning color and shape to shared snps

wcol = rep("gray", length(w.ord.slg$bf))

wcol[xshare] = "red"

wpch = rep(1, length(w.ord.slg$bf))

wpch[xshare] = 19

yvalues = w.ord.slg$bf

plot(w.ord.slg$bf, col = wcol, ylab= "log10(Bayes Factors)", xlab = "Position on the chromosome",> ex.lab = 2, pch = 19, ylim = c(0, max(yvalues)))

#plot all three all, west, east

#pdf("manhattanPlots.pdf",width=9,height=9)

setEPS()

postscript("manhattanPlots.eps", width=13, height=8)

par(mfrow=c(3,1))

par(mar=c(4,5.5,3,1.5)

#all

topcol = rep("gray", length(all.ord.slg$bf))

topcol[x] <- "orange"

allpch = rep(1, length(all.ord.slg$bf))

allpch[x] = 19

yvalues = all.ord.slg$bf

plot(all.ord.slg$bf, col = topcol, ylab= "Bayes Factors", xlab = "Position on the chromosome", cex.lab = 1.5,pch = allpch, ylim = c(0, max(yvalues)))

title(main="(a) All populations",adj=0,cex.main=1.6)

#east

ecol = rep("gray", length(e.ord.slg$bf))

ecol[xshare] = "red"

epch = rep(1, length(e.ord.slg$bf))

epch[xshare] = 19

yvalues = e.ord.slg$bf

plot(e.ord.slg$bf, col = ecol, ylab= "Bayes Factors", xlab = "Position on the chromosome",cex.lab = 1.5, pch = epch, ylim = c(0, max(yvalues)))

title(main="(c) melissa-east populations",adj=0,cex.main=1.6)

#west

wcol = rep("gray", length(w.ord.slg$bf))

wcol[xshare] = "red"

wpch = rep(1, length(w.ord.slg$bf))

wpch[xshare] = 19

yvalues = w.ord.slg$bf

plot(w.ord.slg$bf, col = wcol, ylab= "Bayes Factors", xlab = "Position on the chromosome",cex.lab = 1.5, pch = wpch, ylim = c(0, max(yvalues)))

title(main="(b) melissa-west populations",adj=0,cex.main=1.6)

dev.off()

#plotting based on linkage groups

bnd = quantile(all.ord.slg$bf, probs=0.99) #2061

x = which(all.ord.slg$bf > bnd)

lg.all = factor(all.ord.slg$lg)

levels(lg.all)

xtop = all.ord.slg[xshare,]

mticks = c(3940,7675,6722,5892,5744,5828,5545,5184,4748,4184,4755,5026,4210,3594,3538,4043,4124,3442,2873,2585,1906,1128,4793,54428)

#lg.col = c("ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","ivory4","pink2","steelblue2","ivory4")

#lg.col = c("peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","peachpuff","peru","yellowgreen","peachpuff")

lg.col = c("slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","slategray3","slategray1","yellowgreen","slategray3")

setEPS()

postscript("manhattanPlots_parallel.eps", width=13, height=9)

par(mfrow=c(2,1))

#plot all

par(mar=c(4,5.5,3,1.5))

a.col = lg.col[all.ord.slg$lg]

plot(all.ord.slg$bf, col = a.col, pch = 20, xaxt = 'n', xlab = " ", ylab = "log10(Bayes Factors)", cex.lab = 1.5,cex=.5)

axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))

xtop = all.ord.slg[xshare,]

#points(x=xshare, y = xtop$bf, col="red2", pch=20)

abline (h = bnd, col = "black", lwd=1, lty = "dashed")

title(main="all populations",adj=0,cex.main=1.6)

dev.off()

#plot east

par(mar=c(4,5.5,3,1.5))

e.col = lg.col[w.ord.slg$lg]

x.e = which(e.ord.slg$bf > bnde)

xtop = e.ord.slg[xshare,]

plot(e.ord.slg$bf, col = e.col, pch = 20, xaxt = 'n', xlab = " ", ylab = "log10(Bayes Factors)", cex.lab = 1.5,cex=.5)

axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))

points(x=xshare, y = xtop$bf, col="red2", pch=20)

abline (h = bnde, col = "black", lwd=1, lty = "dashed")

title(main="(a) melissa-east populations",adj=0,cex.main=1.6)

#plot west

par(mar=c(4,5.5,3,1.5))

w.col = lg.col[w.ord.slg$lg]

x.w = which(w.ord.slg$bf > bndw)

xtop = w.ord.slg[xshare,]

plot(w.ord.slg$bf, col = w.col, pch = 20, xaxt = 'n',xlab = "SNPs on linkage groups", ylab = "log10(Bayes Factors)", cex.lab = 1.5, cex=.5)

axis(side = 1, cumsum(mticks), labels = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","Z","NA"))

points(x=xshare, y = xtop$bf, col="red2", pch=20)

abline (h = bndw, col = "black", lwd=1, lty = "dashed")

title(main="(b) melissa-west populations",adj=0,cex.main=1.6)

dev.off()

#getting chromosome ranges

chr1 = (subset(w.ord.slg, w.ord.slg$lg == "1")) #7880

chr2 = (subset(w.ord.slg, w.ord.slg$lg == "2")) #7470

chr3 = (subset(w.ord.slg, w.ord.slg$lg == "3")) #5973

chr4 = (subset(w.ord.slg, w.ord.slg$lg == "4")) #5810

chr5 = (subset(w.ord.slg, w.ord.slg$lg == "5")) #5678

chr6 = (subset(w.ord.slg, w.ord.slg$lg == "6")) #5978

chr7 = (subset(w.ord.slg, w.ord.slg$lg == "7")) #5111

chr8 = (subset(w.ord.slg, w.ord.slg$lg == "8")) #5257

chr9 = (subset(w.ord.slg, w.ord.slg$lg == "9")) #4239

chr10 = (subset(w.ord.slg, w.ord.slg$lg == "10")) #4128

chr11 = (subset(w.ord.slg, w.ord.slg$lg == "11")) #5382

chr12 = (subset(w.ord.slg, w.ord.slg$lg == "12")) #4670

chr13 = (subset(w.ord.slg, w.ord.slg$lg == "13")) #3750

chr14 = (subset(w.ord.slg, w.ord.slg$lg == "14")) #3438

chr15 = (subset(w.ord.slg, w.ord.slg$lg == "15")) #3638

chr16 = (subset(w.ord.slg, w.ord.slg$lg == "16")) #4447

chr17 = (subset(w.ord.slg, w.ord.slg$lg == "17")) #3800

chr18 = (subset(w.ord.slg, w.ord.slg$lg == "18")) #3083

chr19 = (subset(w.ord.slg, w.ord.slg$lg == "19")) #2663

chr20 = (subset(w.ord.slg, w.ord.slg$lg == "20")) #2506

chr21 = (subset(w.ord.slg, w.ord.slg$lg == "21")) #1306

chr22 = (subset(w.ord.slg, w.ord.slg$lg == "22")) #950

chr23 = (subset(w.ord.slg, w.ord.slg$lg == "23")) #8636

chr24 = (subset(w.ord.slg, w.ord.slg$lg == "24")) #100219

Page updated

Google Sites

Report abuse