File Names and information:
bfmeansWithScafpositions_all, east, west #files with bf and scaffold from baypass #206,028 SNPs
main_structAnnot.txt #structural annot file #206,049
bf_strucAnnot_combine.py #script to combine file above
all_structAnnot_bfmeans, west_structAnnot_bfmeans, east_structAnnot_bfmeans, shared58_structAnnot_bfmeans #output for all, east and west and 58 shared SNPs from above script
top58snps_scafPosBayesfactors : file baypass and scaffold and positions information of 58 shared SNPs
top_all : file with 58 shared snps info and just all bayesfactors. Used to merge bayesfactors with structural annotation info
structAnnot_barplot.pdf: made this plot for ESA meeting. Need to remake this again.
#R code for randomization example
sum(is.na(west$ongene)==FALSE)
sum(is.na(west$ongene[x])==FALSE)
mean(is.na(west$ongene[x])==FALSE)
mean(is.na(west$ongene)==FALSE)
#for top 0.001%
x=c(1:20600)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(all)[1],20600,replace=FALSE)
null[i]=mean(is.na(all$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(all$ongene[x])==FALSE)
obs
p
xfold
#barplot
counts = (c(0.39,0.37,0.28,0.27, 0.39,0.37,0.28,0.27,0.39,0.37,0.28,0.27))
barplot(counts, col = c("pink","green"), xlab = "On gene (pink) and On codon (green)", ylab = "Observed number of SNVs", cex.lab = 1.5, ylim = c(0,0.5))
#results on May 2nd, 2017 after dropping NA from the files
#getting the top quantiles for the snps
all = read.table("all_structAnnot_bfmeans", header=T)
east = read.table("east_structAnnot_bfmeans", header=T)
west = read.table("west_structAnnot_bfmeans", header=T)
shared = read.table("shared58_structAnnot_bfmeans", header=T)
#data frames without NA
#all populations
x.all = data.frame(all)
sum(is.na(x.all$bfmeans)) #19
cc = is.na(x.all$bfmeans)
m = which(cc==c("TRUE"))
DF.all = x.all[-m,]
dim(DF.all) #206028 15
#western populations
x.west = data.frame(west)
sum(is.na(x.west$bfmeans)) #19
cc = is.na(x.west$bfmeans)
m = which(cc==c("TRUE"))
DF.west = x.west[-m,]
dim(DF.west) #206028 15
#eastern populations
x.east = data.frame(east)
sum(is.na(x.east$bfmeans)) #19
cc = is.na(x.east$bfmeans)
m = which(cc==c("TRUE"))
DF.east = x.east[-m,]
## randomization
bndsw = quantile(DF.west$bfmeans,probs=c(0.9,0.99,0.999,0.9999))
sum(DF.west$bfmeans > bndsw[1]) #20603
sum(DF.west$bfmeans > bndsw[2]) #2061
sum(DF.west$bfmeans > bndsw[3]) #207
sum(DF.west$bfmeans > bndsw[4]) #27
sum(is.na(west$ongene)==FALSE) #58760
mean(is.na(west$ongene)==FALSE) #0.2851
#western
#for top 1%
#ongene
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.west)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.west$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.west)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.west$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$oncds[x])==FALSE)
obs
p
xfold
#top 0.01%
#ongene
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.west)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.west$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.west)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.west$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$oncds[x])==FALSE)
obs
p
xfold
#top 0.001%
#ongene
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.west)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.west$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.west)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.west$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.west$oncds[x])==FALSE)
obs
p
xfold
############### eastern ####################################
#for top 1%
#ongene
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.east)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.east$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.east)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.east$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$oncds[x])==FALSE)
obs
p
xfold
#top 0.01%
#ongene
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.east)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.east$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.east)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.east$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$oncds[x])==FALSE)
obs
p
xfold
#top 0.001%
#ongene
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.east)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.east$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.east)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.east$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.east$oncds[x])==FALSE)
obs
p
xfold
################# all ##################################
#for top 1%
#ongene
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.all)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.all$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:20603)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.all)[1],20603,replace=FALSE)
null[i]=mean(is.na(DF.all$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$oncds[x])==FALSE)
obs
p
xfold
#top 0.01%
#ongene
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.all)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.all$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:2061)
null<-rep(NA,100)
for(i in 1:100){
y<-sample(1:dim(DF.all)[1],2061,replace=FALSE)
null[i]=mean(is.na(DF.all$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$oncds[x])==FALSE)
obs
p
xfold
#top 0.001%
#ongene
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.all)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.all$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:207)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.all)[1],207,replace=FALSE)
null[i]=mean(is.na(DF.all$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.all$oncds[x])==FALSE)
obs
p
xfold
################# shared 58 SNPs ###########################
#ongene
x=c(1:58)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.shared)[1],58,replace=FALSE)
null[i]=mean(is.na(DF.shared$ongene[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.shared$ongene[x])==FALSE)
obs
p
xfold
#oncds
x=c(1:58)
null<-rep(NA,1000)
for(i in 1:1000){
y<-sample(1:dim(DF.shared)[1],58,replace=FALSE)
null[i]=mean(is.na(DF.shared$oncds[y])==FALSE)
}
p = mean(null >= obs)
xfold = obs / mean(null)
obs = mean(is.na(DF.shared$oncds[x])==FALSE)
obs
p
xfold
FINAL RESULTS