FOLDER ON CLUSTER: /uufs/chpc.utah.edu/common/home/gompert-group1/projects/lmelissa_hostAdaptation/bayPass/linkagePlots
To test for the number of SNPs present on Z-chromosome, I first prepared a file with linkage group information. This file is saved in the same folder where files for creating Manhattan plots are saved.
Here is the code in R to figure out the SNPs on a). Z- chromosome b). common between parallel and Z-chromosome:
all = read.table("linkagePosBfmeansfinal_all", header=TRUE)
x.all = data.frame(all)
all.ord = cbind(x.all$lg, x.all$position.1, x.all$scaffold, x.all$position, x.all$bfmeans)
colnames(all.ord) = c("lg", "pos_lg", "scaffold", "position", "bf")
all.ord.slg = all.ord[order(all.ord[,1],all.ord[,2],all.ord[,3], all.ord[,4]),]
df.all = data.frame(all.ord.slg)
#assigning all the SNPs with no linkage group and position info to chromosome 24
df.all$lg[is.na(df.all$lg)] <- 24
#finding the SNPs
sex = df.all[which(df.all$lg == 23),]
8636 of the 206,028 SNPs lie on the sex chromosome or 0.04191663 of the total SNPs on LG 23 (the Z)
#reading in the 58 shared SNPs details
top = read.table("top58snps_scafPosBayesfactors", header=TRUE)
#check the common SNPs between sex linked SNPs and shared 58 SNPs
which(top$position %in% sex$position)
6 out of the 58 SNPs lie on the sex chromosome 0.1034483 are on Z for the top 58. This is a significant enrichment.
#randomization tests for all,eastern and western populations
## all populations
#for 58 shared
null = rep(NA, 10000)
for (i in 1:10000){
n = 58
tnull = sample(1:206028, n, replace=F)
null[i] = sum(all.ord.slg$lg[tnull] == 23)
}
obs = 6
xfold = obs / mean(null) #2.481082
p = mean(null >= obs)
p #0.0329
#for top 0.01% quantiles
for (i in 1:10000){
n = 2061
tnull = sample(1:206028, n, replace=F)
null[i] = sum(all.ord.slg$lg[tnull] == 23)
}
obs_aq = sum(all_top0.01$position %in% sex$position) #195
xfold = obs_aq / mean(null)
xfold #2.257705
p = mean(null >= obs_aq)
p #0
##### western populations
#for 58 shared
null = rep(NA, 10000)
for (i in 1:10000){
n = 58
tnull = sample(1:206028, n, replace=F)
null[i] = sum(w.ord.slg$lg[tnull] == 23)
}
obs = 6
xfold = obs / mean(null) #2.480774
p = 0.0339
#for top 0.01% quantiles
for (i in 1:10000){
n = 2061
tnull = sample(1:206028, n, replace=F)
null[i] = sum(w.ord.slg$lg[tnull] == 23)
}
obs_wq = sum(w_top0.01$position %in% sex$position) #193
xfold = obs_wq / mean(null)
xfold #2.23
p = mean(null >= obs_wq)
p #0
### eastern populations
#shared 58
null = rep(NA, 10000)
for (i in 1:10000){
n = 58
tnull = sample(1:206028, n, replace=F)
null[i] = sum(e.ord.slg$lg[tnull] == 23)
}
obs = 6
xfold = obs / mean(null) #2.46812
p = mean(null >= obs)
p = 0.0359
#west 0.01%
null = rep(NA, 10000)
for (i in 1:10000){
n = 2061
tnull = sample(1:206028, n, replace=F)
null[i] = sum(e.ord.slg$lg[tnull] == 23)
}
obs_eq = sum(e_top0.01$position %in% sex$position) #134
xfold = obs_eq / mean(null)
xfold #1.552944
p = mean(null >= obs_eq)
p #0