################## read in popanc and take means ##############################
#### aims 1 ############
bcr1<-read.table("../../popanc/outfiles/aims/out_aims1_BCR.txt", header=T, sep=",")
bic1<-read.table("../../popanc/outfiles/aims/out_aims1_BIC.txt", header=T, sep=",")
bld1<-read.table("../../popanc/outfiles/aims/out_aims1_BLD.txt", header=T, sep=",")
bnp1<-read.table("../../popanc/outfiles/aims/out_aims1_BNP.txt", header=T, sep=",")
btb1<-read.table("../../popanc/outfiles/aims/out_aims1_BTB.txt", header=T, sep=",")
frc1<-read.table("../../popanc/outfiles/aims/out_aims1_FRC.txt", header=T, sep=",")
pin1<-read.table("../../popanc/outfiles/aims/out_aims1_PIN.txt", header=T, sep=",")
psp1<-read.table("../../popanc/outfiles/aims/out_aims1_PSP.txt", header=T, sep=",")
rdl1<-read.table("../../popanc/outfiles/aims/out_aims1_RDL.txt", header=T, sep=",")
rnv1<-read.table("../../popanc/outfiles/aims/out_aims1_RNV.txt", header=T, sep=",")
#read in mappos file ####
mappos1<-read.table("../../popanc/infiles/aims/mappos1.txt", header=F, sep=":")
## take ancestry means across populations ####
ancmean1<-(bcr1$mean + bic1$mean + bld1$mean + bnp1$mean + btb1$mean + frc1$mean + rdl1$mean + rnv1$mean + pin1$mean + psp1$mean) / 10
## folding the values ####
ancmean_fold1<-ancmean1
ancmean_fold1[ancmean_fold1 < 0.5]<-1-ancmean_fold1[ancmean_fold1 < 0.5]
## combine stuff
anc1<-cbind(mappos1, ancmean1)
colnames(anc1)<-c("scaffold","position","ancmean")
## write to file
write.table(anc1, "aims1_ancestry.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
#### aims 2 #############
bcr2<-read.table("../../popanc/outfiles/aims/out_aims2_BCR.txt", header=T, sep=",")
bic2<-read.table("../../popanc/outfiles/aims/out_aims2_BIC.txt", header=T, sep=",")
bld2<-read.table("../../popanc/outfiles/aims/out_aims2_BLD.txt", header=T, sep=",")
bnp2<-read.table("../../popanc/outfiles/aims/out_aims2_BNP.txt", header=T, sep=",")
btb2<-read.table("../../popanc/outfiles/aims/out_aims2_BTB.txt", header=T, sep=",")
frc2<-read.table("../../popanc/outfiles/aims/out_aims2_FRC.txt", header=T, sep=",")
pin2<-read.table("../../popanc/outfiles/aims/out_aims2_PIN.txt", header=T, sep=",")
psp2<-read.table("../../popanc/outfiles/aims/out_aims2_PSP.txt", header=T, sep=",")
rdl2<-read.table("../../popanc/outfiles/aims/out_aims2_RDL.txt", header=T, sep=",")
rnv2<-read.table("../../popanc/outfiles/aims/out_aims2_RNV.txt", header=T, sep=",")
#read in mappos file ####
mappos2<-read.table("../../popanc/infiles/aims/mappos2.txt", header=F, sep=":")
## take ancestry means across populations ####
ancmean2<-(bcr2$mean + bic2$mean + bld2$mean + bnp2$mean + btb2$mean + frc2$mean + rdl2$mean + rnv2$mean + pin2$mean + psp2$mean) / 20
## folding the values ####
ancmean_fold2<-ancmean2
ancmean_fold2[ancmean_fold2 < 0.5]<-2-ancmean_fold2[ancmean_fold2 < 0.5]
## combine stuff
anc2<-cbind(mappos2, ancmean2)
colnames(anc2)<-c("scaffold","position","ancmean")
## write to file
write.table(anc2, "aims2_ancestry.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
#### aims 3 ############
bcr3<-read.table("../../popanc/outfiles/aims/out_aims3_BCR.txt", header=T, sep=",")
bic3<-read.table("../../popanc/outfiles/aims/out_aims3_BIC.txt", header=T, sep=",")
bld3<-read.table("../../popanc/outfiles/aims/out_aims3_BLD.txt", header=T, sep=",")
bnp3<-read.table("../../popanc/outfiles/aims/out_aims3_BNP.txt", header=T, sep=",")
btb3<-read.table("../../popanc/outfiles/aims/out_aims3_BTB.txt", header=T, sep=",")
frc3<-read.table("../../popanc/outfiles/aims/out_aims3_FRC.txt", header=T, sep=",")
pin3<-read.table("../../popanc/outfiles/aims/out_aims3_PIN.txt", header=T, sep=",")
psp3<-read.table("../../popanc/outfiles/aims/out_aims3_PSP.txt", header=T, sep=",")
rdl3<-read.table("../../popanc/outfiles/aims/out_aims3_RDL.txt", header=T, sep=",")
rnv3<-read.table("../../popanc/outfiles/aims/out_aims3_RNV.txt", header=T, sep=",")
#read in mappos file ####
mappos3<-read.table("../../popanc/infiles/aims/mappos3.txt", header=F, sep=":")
## take ancestry means across populations ####
ancmean3<-(bcr3$mean + bic3$mean + bld3$mean + bnp3$mean + btb3$mean + frc3$mean + rdl3$mean + rnv3$mean + pin3$mean + psp3$mean) / 10
## folding the values ####
ancmean_fold3<-ancmean3
ancmean_fold3[ancmean_fold3 < 0.5]<-1-ancmean_fold3[ancmean_fold3 < 0.5]
## combine stuff
anc3<-cbind(mappos3, ancmean3, ancmean_fold3)
colnames(anc3)<-c("scaffold","position","ancmean","ancmeanfold")
## write to file
write.table(anc3, "aims3_ancestry.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
#################### clines ###############################################################
### pct 1 ###
pct1alpha<-read.table("../../clines/outfiles/bgcout_pct1_0.hdf5_alpha.txt", header = F, sep = ",")
pct1beta<-read.table("../../clines/outfiles/bgcout_pct1_0.hdf5_beta.txt", header = F, sep = ",")
cline1<-cbind(mappos1, pct1alpha[,1], pct1beta[,1])
colnames(cline1)<-c("scaffold","position","alpha","beta")
write.table(cline1, "../clines/clines_pct1.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
### pct 2 ###
pct2alpha<-read.table("../../clines/outfiles/bgcout_pct2_0.hdf5_alpha.txt", header = F, sep = ",")
pct2beta<-read.table("../../clines/outfiles/bgcout_pct2_0.hdf5_beta.txt", header = F, sep = ",")
cline2<-cbind(mappos2, pct2alpha[,2], pct2beta[,2])
colnames(cline2)<-c("scaffold","position","alpha","beta")
write.table(cline2, "../clines/clines_pct2.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
### pct 3 ###
pct3alpha<-read.table("../../clines/outfiles/bgcout_pct3_0.hdf5_alpha.txt", header = F, sep = ",")
pct3beta<-read.table("../../clines/outfiles/bgcout_pct3_0.hdf5_beta.txt", header = F, sep = ",")
cline3med<-read.table("../../clines/outfiles/pct3_alphabeta_mcmcmedian.txt", header=TRUE)
cline3<-cbind(mappos3, pct3alpha[,1], pct3beta[,1], cline3med)
colnames(cline3)<-c("scaffold","position","alpha","beta","alpha_med", "beta_med")
write.table(cline3, "../clines/clines_pct3.txt", sep="\t", quote=FALSE, col.names=TRUE, row.names=FALSE)
######## combine the clines and popanc files ##############################################
## clines files #####
pct1<-read.table("clines/clines_pct1.txt", header=T)
pct2<-read.table("clines/clines_pct2.txt", header=T)
pct3<-read.table("clines/clines_pct3.txt", header=T)
### popanc files ####
aims1<-read.table("popanc/aims1_ancestry.txt", header=T)
aims2<-read.table("popanc/aims2_ancestry.txt", header=T)
aims3<-read.table("popanc/aims3_ancestry.txt", header=T)
### combine the files ###
comb1<-cbind(pct1,aims1$ancmean)
colnames(comb1)<-c("scaffold","position","alpha","beta","ancmean")
comb2<-cbind(pct2,aims2$ancmean)
colnames(comb2)<-c("scaffold","position","alpha","beta","ancmean")
comb3<-cbind(pct3$scaffold, pct3$position, pct3$alpha, pct3$beta, aims3$ancmean)
colnames(comb3)<-c("scaffold","position","alpha","beta","ancmean")
## this file is for predictability ###
comb3_fold<-cbind(pct3$scaffold, pct3$position, pct3$alpha_med, pct3$beta_med, aims3$ancmeanfold)
colnames(comb3_fold)<-c("scaffold","position","alphamed","betamed","ancfold")
#write out the files
write.table(comb1, file = "clines_popanc1.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
write.table(comb2, file = "clines_popanc2.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
write.table(comb3, file = "clines_popanc3.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
## this file is for predictability ###
write.table(comb3_fold, file = "clines_popanc3_fold.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE)
###########################################################################################
#read in the files
pct1<-read.table("comb1_clines_popanc_annot.txt", header=T, na.strings=c("","NA"), sep="\t")
pct2<-read.table("comb2_clines_popanc_annot.txt", header=T, na.strings=c("","NA"), sep="\t")
pct3<-read.table("comb3_clines_popanc_annot.txt", header=T, na.strings=c("","NA"), sep="\t")
pct3_fold<-read.table("comb3_clines_popanc_fold_annot.txt", header=T, na.strings=c("","NA"), sep="\t")
##### functions for randomization ####################################
randomFunc_on<-function(afile=NA, q=NA, column = NA, region = NA){
{
topbnds<-which(afile[[column]] > quantile(afile[[column]], probs = q, na.rm=T))
topsnps<-afile[topbnds,]
null<-rep(NA,10000)
for (i in 1:10000){
n = length(topbnds)
bnull = sample(1:length(afile[,1]), n, replace=F)
null[i] = sum(afile[[region]][bnull] == 1)
}
obs = sum(topsnps[[region]] == 1)
p<-mean(null >= obs)
xfold<-obs / mean(null)
resultdata<-cbind(obs, p, xfold)
}
return(resultdata)
}
randomFunc_near<-function(afile=NA, q=NA, column = NA, region1 = NA, region2 = NA){
{
topbnds<-which(afile[[column]] > quantile(afile[[column]], probs = q, na.rm=T))
topsnps<-afile[topbnds,]
null<-rep(NA,10000)
for (i in 1:10000){
n = length(topbnds)
bnull = sample(1:length(afile[,1]), n, replace=F)
null[i] = sum(afile[[region1]][bnull] == 1 | afile[[region2]][bnull] == 1)
}
obs = sum(topsnps[[region1]] == 1 | topsnps[[region2]] == 1)
p<-mean(null >= obs)
xfold<-obs / mean(null)
resultdata<-cbind(obs, p, xfold)
}
return(resultdata)
}
#doing randomizations across quantiles
quants <- c(0.99,0.98, 0.97, 0.96, 0.95,0.94,0.93,0.92,0.91,0.9)
### pct 1 #####
pct1_alpha_ongene<-vector("list",10)
pct1_alpha_neargene<-vector("list",10)
pct1_alpha_oncds<-vector("list",10)
pct1_alpha_nearcds<-vector("list",10)
pct1_alpha_onmRNA<-vector("list",10)
pct1_alpha_nearmRNA<-vector("list",10)
pct1_alpha_onexon<-vector("list",10)
pct1_alpha_nearexon<-vector("list",10)
pct1_alpha_onte<-vector("list",10)
pct1_alpha_nearte<-vector("list",10)
pct1_alpha_onprotein<-vector("list",10)
pct1_alpha_nearprotein<-vector("list",10)
#alpha
for (i in 1:10){
pct1_alpha_ongene[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'ongene')
pct1_alpha_neargene[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'neargene', 'ongene')
pct1_alpha_oncds[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'oncds')
pct1_alpha_nearcds[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'nearcds', 'oncds')
pct1_alpha_onmRNA[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'onmRNA')
pct1_alpha_nearmRNA[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'nearmRNA', 'onmRNA')
pct1_alpha_onexon[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'onexon')
pct1_alpha_nearexon[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'nearexon', 'onexon')
pct1_alpha_onte[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'onte')
pct1_alpha_nearte[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'nearte', 'onte')
pct1_alpha_onprotein[[i]]<-randomFunc_on(pct1, quants[i], 'alpha', 'onprotein')
pct1_alpha_nearprotein[[i]]<-randomFunc_near(pct1, quants[i], 'alpha', 'nearprotein', 'onprotein')
}
pct1_alpha<-cbind(t(as.data.frame(pct1_alpha_ongene)),t(as.data.frame(pct1_alpha_neargene)), t(as.data.frame(pct1_alpha_oncds)), t(as.data.frame(pct1_alpha_nearcds)), t(as.data.frame(pct1_alpha_onmRNA)), t(as.data.frame(pct1_alpha_nearmRNA)), t(as.data.frame(pct1_alpha_onte)), t(as.data.frame(pct1_alpha_nearte)), t(as.data.frame(pct1_alpha_onprotein)), t(as.data.frame(pct1_alpha_nearprotein)))
colnames(pct1_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct1_alpha, file="randomizations/pct1_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#beta
pct1_beta_ongene<-vector("list",10)
pct1_beta_neargene<-vector("list",10)
pct1_beta_oncds<-vector("list",10)
pct1_beta_nearcds<-vector("list",10)
pct1_beta_onmRNA<-vector("list",10)
pct1_beta_nearmRNA<-vector("list",10)
pct1_beta_onexon<-vector("list",10)
pct1_beta_nearexon<-vector("list",10)
pct1_beta_onte<-vector("list",10)
pct1_beta_nearte<-vector("list",10)
pct1_beta_onprotein<-vector("list",10)
pct1_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct1_beta_ongene[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'ongene')
pct1_beta_neargene[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'neargene', 'ongene')
pct1_beta_oncds[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'oncds')
pct1_beta_nearcds[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'nearcds', 'oncds')
pct1_beta_onmRNA[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'onmRNA')
pct1_beta_nearmRNA[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'nearmRNA', 'onmRNA')
pct1_beta_onexon[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'onexon')
pct1_beta_nearexon[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'nearexon', 'onexon')
pct1_beta_onte[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'onte')
pct1_beta_nearte[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'nearte', 'onte')
pct1_beta_onprotein[[i]]<-randomFunc_on(pct1, quants[i], 'beta', 'onprotein')
pct1_beta_nearprotein[[i]]<-randomFunc_near(pct1, quants[i], 'beta', 'nearprotein', 'onprotein')
}
pct1_beta<-cbind(t(as.data.frame(pct1_beta_ongene)),t(as.data.frame(pct1_beta_neargene)), t(as.data.frame(pct1_beta_oncds)), t(as.data.frame(pct1_beta_nearcds)), t(as.data.frame(pct1_beta_onmRNA)), t(as.data.frame(pct1_beta_nearmRNA)), t(as.data.frame(pct1_beta_onte)), t(as.data.frame(pct1_beta_nearte)), t(as.data.frame(pct1_beta_onprotein)), t(as.data.frame(pct1_beta_nearprotein)))
colnames(pct1_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct1_beta, file="randomizations/pct1_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#popanc
pct1_ancestry_ongene<-vector("list",10)
pct1_ancestry_neargene<-vector("list",10)
pct1_ancestry_oncds<-vector("list",10)
pct1_ancestry_nearcds<-vector("list",10)
pct1_ancestry_onmRNA<-vector("list",10)
pct1_ancestry_nearmRNA<-vector("list",10)
pct1_ancestry_onexon<-vector("list",10)
pct1_ancestry_nearexon<-vector("list",10)
pct1_ancestry_onte<-vector("list",10)
pct1_ancestry_nearte<-vector("list",10)
pct1_ancestry_onprotein<-vector("list",10)
pct1_ancestry_nearprotein<-vector("list",10)
for (i in 1:10){
pct1_ancestry_ongene[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'ongene')
pct1_ancestry_neargene[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'neargene', 'ongene')
pct1_ancestry_oncds[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'oncds')
pct1_ancestry_nearcds[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'nearcds', 'oncds')
pct1_ancestry_onmRNA[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'onmRNA')
pct1_ancestry_nearmRNA[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'nearmRNA', 'onmRNA')
pct1_ancestry_onexon[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'onexon')
pct1_ancestry_nearexon[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'nearexon', 'onexon')
pct1_ancestry_onte[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'onte')
pct1_ancestry_nearte[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'nearte', 'onte')
pct1_ancestry_onprotein[[i]]<-randomFunc_on(pct1, quants[i], 'ancestry', 'onprotein')
pct1_ancestry_nearprotein[[i]]<-randomFunc_near(pct1, quants[i], 'ancestry', 'nearprotein', 'onprotein')
}
pct1_ancestry<-cbind(t(as.data.frame(pct1_ancestry_ongene)),t(as.data.frame(pct1_ancestry_neargene)), t(as.data.frame(pct1_ancestry_oncds)), t(as.data.frame(pct1_ancestry_nearcds)), t(as.data.frame(pct1_ancestry_onmRNA)), t(as.data.frame(pct1_ancestry_nearmRNA)), t(as.data.frame(pct1_ancestry_onte)), t(as.data.frame(pct1_ancestry_nearte)), t(as.data.frame(pct1_ancestry_onprotein)), t(as.data.frame(pct1_ancestry_nearprotein)))
colnames(pct1_ancestry) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct1_ancestry, file="randomizations/pct1_ancestry_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#### pct2 #############
pct2_alpha_ongene<-vector("list",10)
pct2_alpha_neargene<-vector("list",10)
pct2_alpha_oncds<-vector("list",10)
pct2_alpha_nearcds<-vector("list",10)
pct2_alpha_onmRNA<-vector("list",10)
pct2_alpha_nearmRNA<-vector("list",10)
pct2_alpha_onexon<-vector("list",10)
pct2_alpha_nearexon<-vector("list",10)
pct2_alpha_onte<-vector("list",10)
pct2_alpha_nearte<-vector("list",10)
pct2_alpha_onprotein<-vector("list",10)
pct2_alpha_nearprotein<-vector("list",10)
#alpha
for (i in 1:10){
pct2_alpha_ongene[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'ongene')
pct2_alpha_neargene[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'neargene', 'ongene')
pct2_alpha_oncds[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'oncds')
pct2_alpha_nearcds[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'nearcds', 'oncds')
pct2_alpha_onmRNA[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'onmRNA')
pct2_alpha_nearmRNA[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'nearmRNA', 'onmRNA')
pct2_alpha_onexon[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'onexon')
pct2_alpha_nearexon[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'nearexon', 'onexon')
pct2_alpha_onte[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'onte')
pct2_alpha_nearte[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'nearte', 'onte')
pct2_alpha_onprotein[[i]]<-randomFunc_on(pct2, quants[i], 'alpha', 'onprotein')
pct2_alpha_nearprotein[[i]]<-randomFunc_near(pct2, quants[i], 'alpha', 'nearprotein', 'onprotein')
}
pct2_alpha<-cbind(t(as.data.frame(pct2_alpha_ongene)),t(as.data.frame(pct2_alpha_neargene)), t(as.data.frame(pct2_alpha_oncds)), t(as.data.frame(pct2_alpha_nearcds)), t(as.data.frame(pct2_alpha_onmRNA)), t(as.data.frame(pct2_alpha_nearmRNA)), t(as.data.frame(pct2_alpha_onte)), t(as.data.frame(pct2_alpha_nearte)), t(as.data.frame(pct2_alpha_onprotein)), t(as.data.frame(pct2_alpha_nearprotein)))
colnames(pct2_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct2_alpha, file="randomizations/pct2_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#beta
pct2_beta_ongene<-vector("list",10)
pct2_beta_neargene<-vector("list",10)
pct2_beta_oncds<-vector("list",10)
pct2_beta_nearcds<-vector("list",10)
pct2_beta_onmRNA<-vector("list",10)
pct2_beta_nearmRNA<-vector("list",10)
pct2_beta_onexon<-vector("list",10)
pct2_beta_nearexon<-vector("list",10)
pct2_beta_onte<-vector("list",10)
pct2_beta_nearte<-vector("list",10)
pct2_beta_onprotein<-vector("list",10)
pct2_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct2_beta_ongene[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'ongene')
pct2_beta_neargene[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'neargene', 'ongene')
pct2_beta_oncds[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'oncds')
pct2_beta_nearcds[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'nearcds', 'oncds')
pct2_beta_onmRNA[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'onmRNA')
pct2_beta_nearmRNA[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'nearmRNA', 'onmRNA')
pct2_beta_onexon[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'onexon')
pct2_beta_nearexon[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'nearexon', 'onexon')
pct2_beta_onte[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'onte')
pct2_beta_nearte[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'nearte', 'onte')
pct2_beta_onprotein[[i]]<-randomFunc_on(pct2, quants[i], 'beta', 'onprotein')
pct2_beta_nearprotein[[i]]<-randomFunc_near(pct2, quants[i], 'beta', 'nearprotein', 'onprotein')
}
pct2_beta<-cbind(t(as.data.frame(pct2_beta_ongene)),t(as.data.frame(pct2_beta_neargene)), t(as.data.frame(pct2_beta_oncds)), t(as.data.frame(pct2_beta_nearcds)), t(as.data.frame(pct2_beta_onmRNA)), t(as.data.frame(pct2_beta_nearmRNA)), t(as.data.frame(pct2_beta_onte)), t(as.data.frame(pct2_beta_nearte)), t(as.data.frame(pct2_beta_onprotein)), t(as.data.frame(pct2_beta_nearprotein)))
colnames(pct2_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct2_beta, file="randomizations/pct2_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#popanc
pct2_ancestry_ongene<-vector("list",10)
pct2_ancestry_neargene<-vector("list",10)
pct2_ancestry_oncds<-vector("list",10)
pct2_ancestry_nearcds<-vector("list",10)
pct2_ancestry_onmRNA<-vector("list",10)
pct2_ancestry_nearmRNA<-vector("list",10)
pct2_ancestry_onexon<-vector("list",10)
pct2_ancestry_nearexon<-vector("list",10)
pct2_ancestry_onte<-vector("list",10)
pct2_ancestry_nearte<-vector("list",10)
pct2_ancestry_onprotein<-vector("list",10)
pct2_ancestry_nearprotein<-vector("list",10)
for (i in 1:10){
pct2_ancestry_ongene[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'ongene')
pct2_ancestry_neargene[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'neargene', 'ongene')
pct2_ancestry_oncds[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'oncds')
pct2_ancestry_nearcds[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'nearcds', 'oncds')
pct2_ancestry_onmRNA[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'onmRNA')
pct2_ancestry_nearmRNA[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'nearmRNA', 'onmRNA')
pct2_ancestry_onexon[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'onexon')
pct2_ancestry_nearexon[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'nearexon', 'onexon')
pct2_ancestry_onte[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'onte')
pct2_ancestry_nearte[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'nearte', 'onte')
pct2_ancestry_onprotein[[i]]<-randomFunc_on(pct2, quants[i], 'ancestry', 'onprotein')
pct2_ancestry_nearprotein[[i]]<-randomFunc_near(pct2, quants[i], 'ancestry', 'nearprotein', 'onprotein')
}
pct2_ancestry<-cbind(t(as.data.frame(pct2_ancestry_ongene)),t(as.data.frame(pct2_ancestry_neargene)), t(as.data.frame(pct2_ancestry_oncds)), t(as.data.frame(pct2_ancestry_nearcds)), t(as.data.frame(pct2_ancestry_onmRNA)), t(as.data.frame(pct2_ancestry_nearmRNA)), t(as.data.frame(pct2_ancestry_onte)), t(as.data.frame(pct2_ancestry_nearte)), t(as.data.frame(pct2_ancestry_onprotein)), t(as.data.frame(pct2_ancestry_nearprotein)))
colnames(pct2_ancestry) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct2_ancestry, file="randomizations/pct2_ancestry_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#### pct3 #############
pct3_alpha_ongene<-vector("list",10)
pct3_alpha_neargene<-vector("list",10)
pct3_alpha_oncds<-vector("list",10)
pct3_alpha_nearcds<-vector("list",10)
pct3_alpha_onmRNA<-vector("list",10)
pct3_alpha_nearmRNA<-vector("list",10)
pct3_alpha_onexon<-vector("list",10)
pct3_alpha_nearexon<-vector("list",10)
pct3_alpha_onte<-vector("list",10)
pct3_alpha_nearte<-vector("list",10)
pct3_alpha_onprotein<-vector("list",10)
pct3_alpha_nearprotein<-vector("list",10)
#alpha
for (i in 1:10){
pct3_alpha_ongene[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'ongene')
pct3_alpha_neargene[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'neargene', 'ongene')
pct3_alpha_oncds[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'oncds')
pct3_alpha_nearcds[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'nearcds', 'oncds')
pct3_alpha_onmRNA[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'onmRNA')
pct3_alpha_nearmRNA[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'nearmRNA', 'onmRNA')
pct3_alpha_onexon[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'onexon')
pct3_alpha_nearexon[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'nearexon', 'onexon')
pct3_alpha_onte[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'onte')
pct3_alpha_nearte[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'nearte', 'onte')
pct3_alpha_onprotein[[i]]<-randomFunc_on(pct3, quants[i], 'alpha', 'onprotein')
pct3_alpha_nearprotein[[i]]<-randomFunc_near(pct3, quants[i], 'alpha', 'nearprotein', 'onprotein')
}
pct3_alpha<-cbind(t(as.data.frame(pct3_alpha_ongene)),t(as.data.frame(pct3_alpha_neargene)), t(as.data.frame(pct3_alpha_oncds)), t(as.data.frame(pct3_alpha_nearcds)), t(as.data.frame(pct3_alpha_onmRNA)), t(as.data.frame(pct3_alpha_nearmRNA)), t(as.data.frame(pct3_alpha_onte)), t(as.data.frame(pct3_alpha_nearte)), t(as.data.frame(pct3_alpha_onprotein)), t(as.data.frame(pct3_alpha_nearprotein)))
colnames(pct3_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct3_alpha, file="randomizations/pct3_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#beta
pct3_beta_ongene<-vector("list",10)
pct3_beta_neargene<-vector("list",10)
pct3_beta_oncds<-vector("list",10)
pct3_beta_nearcds<-vector("list",10)
pct3_beta_onmRNA<-vector("list",10)
pct3_beta_nearmRNA<-vector("list",10)
pct3_beta_onexon<-vector("list",10)
pct3_beta_nearexon<-vector("list",10)
pct3_beta_onte<-vector("list",10)
pct3_beta_nearte<-vector("list",10)
pct3_beta_onprotein<-vector("list",10)
pct3_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct3_beta_ongene[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'ongene')
pct3_beta_neargene[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'neargene', 'ongene')
pct3_beta_oncds[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'oncds')
pct3_beta_nearcds[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'nearcds', 'oncds')
pct3_beta_onmRNA[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'onmRNA')
pct3_beta_nearmRNA[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'nearmRNA', 'onmRNA')
pct3_beta_onexon[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'onexon')
pct3_beta_nearexon[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'nearexon', 'onexon')
pct3_beta_onte[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'onte')
pct3_beta_nearte[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'nearte', 'onte')
pct3_beta_onprotein[[i]]<-randomFunc_on(pct3, quants[i], 'beta', 'onprotein')
pct3_beta_nearprotein[[i]]<-randomFunc_near(pct3, quants[i], 'beta', 'nearprotein', 'onprotein')
}
pct3_beta<-cbind(t(as.data.frame(pct3_beta_ongene)),t(as.data.frame(pct3_beta_neargene)), t(as.data.frame(pct3_beta_oncds)), t(as.data.frame(pct3_beta_nearcds)), t(as.data.frame(pct3_beta_onmRNA)), t(as.data.frame(pct3_beta_nearmRNA)), t(as.data.frame(pct3_beta_onte)), t(as.data.frame(pct3_beta_nearte)), t(as.data.frame(pct3_beta_onprotein)), t(as.data.frame(pct3_beta_nearprotein)))
colnames(pct3_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct3_beta, file="randomizations/pct3_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
#popanc
pct3_ancestry_ongene<-vector("list",10)
pct3_ancestry_neargene<-vector("list",10)
pct3_ancestry_oncds<-vector("list",10)
pct3_ancestry_nearcds<-vector("list",10)
pct3_ancestry_onmRNA<-vector("list",10)
pct3_ancestry_nearmRNA<-vector("list",10)
pct3_ancestry_onexon<-vector("list",10)
pct3_ancestry_nearexon<-vector("list",10)
pct3_ancestry_onte<-vector("list",10)
pct3_ancestry_nearte<-vector("list",10)
pct3_ancestry_onprotein<-vector("list",10)
pct3_ancestry_nearprotein<-vector("list",10)
for (i in 1:10){
pct3_ancestry_ongene[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'ongene')
pct3_ancestry_neargene[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'neargene', 'ongene')
pct3_ancestry_oncds[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'oncds')
pct3_ancestry_nearcds[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'nearcds', 'oncds')
pct3_ancestry_onmRNA[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'onmRNA')
pct3_ancestry_nearmRNA[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'nearmRNA', 'onmRNA')
pct3_ancestry_onexon[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'onexon')
pct3_ancestry_nearexon[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'nearexon', 'onexon')
pct3_ancestry_onte[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'onte')
pct3_ancestry_nearte[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'nearte', 'onte')
pct3_ancestry_onprotein[[i]]<-randomFunc_on(pct3, quants[i], 'ancestry', 'onprotein')
pct3_ancestry_nearprotein[[i]]<-randomFunc_near(pct3, quants[i], 'ancestry', 'nearprotein', 'onprotein')
}
pct3_ancestry<-cbind(t(as.data.frame(pct3_ancestry_ongene)),t(as.data.frame(pct3_ancestry_neargene)), t(as.data.frame(pct3_ancestry_oncds)), t(as.data.frame(pct3_ancestry_nearcds)), t(as.data.frame(pct3_ancestry_onmRNA)), t(as.data.frame(pct3_ancestry_nearmRNA)), t(as.data.frame(pct3_ancestry_onte)), t(as.data.frame(pct3_ancestry_nearte)), t(as.data.frame(pct3_ancestry_onprotein)), t(as.data.frame(pct3_ancestry_nearprotein)))
colnames(pct3_ancestry) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct3_ancestry, file="randomizations/pct3_ancestry_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
###########################################################################################
################## Predictability #########################################################
###########################################################################################
randomFuncpredict_on<-function(datfile=NA, q=NA, column1 = NA, column2 = NA, region = NA){
{
anctop<-quantile(datfile[[column1]], probs=q, na.rm=T)
clitop<-quantile(datfile[[column2]], probs=q, na.rm=T)
comsnps<-datfile[which(datfile[[column1]] > anctop & datfile[[column2]] > clitop),]
null<-rep(NA, 10000)
for (i in 1:10000){
x<-round((1-q)*length(datfile[,1]),0)
n = length(comsnps[,1])
cnull = sample(1:length(datfile[,1]), n , replace=F)
null[i] = sum(datfile[[region]][cnull] == 1)
}
obs = sum(comsnps[[region]] == 1)
p<-mean(null >= obs)
xfold<- obs/mean(null)
resultdata<-cbind(obs, p, xfold)
}
return(resultdata)
}
randomFuncpredict_near<-function(datfile=NA, q=NA, column1 = NA, column2 = NA, region1 = NA, region2 = NA){
{
anctop<-quantile(datfile[[column1]], probs=q, na.rm=T)
clitop<-quantile(datfile[[column2]], probs=q, na.rm=T)
comsnps<-datfile[which(datfile[[column1]] > anctop & datfile[[column2]] > clitop),]
null<-rep(NA, 10000)
for (i in 1:10000){
x<-round((1-q)*length(datfile[,1]),0)
n = length(comsnps[,1])
cnull = sample(1:length(datfile[,1]), n , replace=F)
null[i] = sum(datfile[[region1]][cnull] == 1 | datfile[[region2]][cnull] == 1)
}
obs = sum(comsnps[[region1]] == 1 | comsnps[[region2]] == 1)
p<-mean(null >= obs)
xfold<- obs/mean(null)
resultdata<-cbind(obs, p, xfold)
}
return(resultdata)
}
#doing randomizations across quantiles
quants <- c(0.99,0.98, 0.97, 0.96, 0.95,0.94,0.93,0.92,0.91,0.9)
###################### I mainly did randomizations only for pct 3 ###################
## alpha ##
pct3_predict_alpha_ongene<-vector("list",10)
pct3_predict_alpha_neargene<-vector("list",10)
pct3_predict_alpha_oncds<-vector("list",10)
pct3_predict_alpha_nearcds<-vector("list",10)
pct3_predict_alpha_onmRNA<-vector("list",10)
pct3_predict_alpha_nearmRNA<-vector("list",10)
pct3_predict_alpha_onexon<-vector("list",10)
pct3_predict_alpha_nearexon<-vector("list",10)
pct3_predict_alpha_onte<-vector("list",10)
pct3_predict_alpha_nearte<-vector("list",10)
pct3_predict_alpha_onprotein<-vector("list",10)
pct3_predict_alpha_nearprotein<-vector("list",10)
for (i in 1:10){
pct3_predict_alpha_ongene[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean', 'alphamed', 'ongene')
pct3_predict_alpha_neargene[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean', 'alphamed', 'neargene', 'ongene')
pct3_predict_alpha_oncds[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean', 'alphamed', 'oncds')
pct3_predict_alpha_nearcds[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean', 'alphamed', 'nearcds', 'oncds')
pct3_predict_alpha_onmRNA[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean','alphamed', 'onmRNA')
pct3_predict_alpha_nearmRNA[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean','alphamed', 'nearmRNA', 'onmRNA')
pct3_predict_alpha_onexon[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean','alphamed', 'onexon')
pct3_predict_alpha_nearexon[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean','alphamed','nearexon', 'onexon')
pct3_predict_alpha_onte[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean','alphamed','onte')
pct3_predict_alpha_nearte[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean','alphamed', 'nearte', 'onte')
pct3_predict_alpha_onprotein[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancmean','alphamed', 'onprotein')
pct3_predict_alpha_nearprotein[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancmean','alphamed', 'nearprotein', 'onprotein')
}
pct3_predict_alpha<-cbind(t(as.data.frame(pct3_predict_alpha_ongene)),t(as.data.frame(pct3_predict_alpha_neargene)), t(as.data.frame(pct3_predict_alpha_oncds)), t(as.data.frame(pct3_predict_alpha_nearcds)), t(as.data.frame(pct3_predict_alpha_onmRNA)), t(as.data.frame(pct3_predict_alpha_nearmRNA)), t(as.data.frame(pct3_predict_alpha_onte)), t(as.data.frame(pct3_predict_alpha_nearte)), t(as.data.frame(pct3_predict_alpha_onprotein)), t(as.data.frame(pct3_predict_alpha_nearprotein)))
colnames(pct3_predict_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct3_predict_alpha, file="randomizations/pct3_predict_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
### beta ###
pct3_predict_beta_ongene<-vector("list",10)
pct3_predict_beta_neargene<-vector("list",10)
pct3_predict_beta_oncds<-vector("list",10)
pct3_predict_beta_nearcds<-vector("list",10)
pct3_predict_beta_onmRNA<-vector("list",10)
pct3_predict_beta_nearmRNA<-vector("list",10)
pct3_predict_beta_onexon<-vector("list",10)
pct3_predict_beta_nearexon<-vector("list",10)
pct3_predict_beta_onte<-vector("list",10)
pct3_predict_beta_nearte<-vector("list",10)
pct3_predict_beta_onprotein<-vector("list",10)
pct3_predict_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct3_predict_beta_ongene[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed', 'ongene')
pct3_predict_beta_neargene[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed', 'neargene', 'ongene')
pct3_predict_beta_oncds[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed', 'oncds')
pct3_predict_beta_nearcds[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed', 'nearcds', 'oncds')
pct3_predict_beta_onmRNA[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed', 'onmRNA')
pct3_predict_beta_nearmRNA[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed', 'nearmRNA', 'onmRNA')
pct3_predict_beta_onexon[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed', 'onexon')
pct3_predict_beta_nearexon[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed','nearexon', 'onexon')
pct3_predict_beta_onte[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed','onte')
pct3_predict_beta_nearte[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed', 'nearte', 'onte')
pct3_predict_beta_onprotein[[i]]<-randomFuncpredict_on(pct3_fold, quants[i], 'ancfold', 'betamed', 'onprotein')
pct3_predict_beta_nearprotein[[i]]<-randomFuncpredict_near(pct3_fold, quants[i], 'ancfold', 'betamed', 'nearprotein', 'onprotein')
}
pct3_predict_beta<-cbind(t(as.data.frame(pct3_predict_beta_ongene)),t(as.data.frame(pct3_predict_beta_neargene)), t(as.data.frame(pct3_predict_beta_oncds)), t(as.data.frame(pct3_predict_beta_nearcds)), t(as.data.frame(pct3_predict_beta_onmRNA)), t(as.data.frame(pct3_predict_beta_nearmRNA)), t(as.data.frame(pct3_predict_beta_onte)), t(as.data.frame(pct3_predict_beta_nearte)), t(as.data.frame(pct3_predict_beta_onprotein)), t(as.data.frame(pct3_predict_beta_nearprotein)))
colnames(pct3_predict_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct3_predict_beta, file="randomizations/pct3_predict_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
### I saved the files for top SNPs to check their annotations #####
#### alpha ######
#top 10%
anctop1<-quantile(pct3_fold$ancmean, probs=0.9, na.rm=T)
clitop1<-quantile(pct3_fold$alphamed, probs=0.9, na.rm=T)
length(which(pct3_fold$ancmean > anctop1 & pct3_fold$alphamed > clitop1))
hialpha_annot0.9<-pct3_fold[which(pct3_fold$ancmean > anctop1 & pct3_fold$alphamed > clitop1),]
write.table(hialpha_annot0.9, "randomizations/hialpha_ancmean_sharedsnps_annot0.9.txt", col.names=TRUE, row.names=TRUE, sep="\t", quote=F)
#top 0.1%
anctop2<-quantile(pct3_fold$ancmean, probs=0.99, na.rm=T)
clitop2<-quantile(pct3_fold$alphamed, probs=0.99, na.rm=T)
length(which(pct3_fold$ancmean > anctop2 & pct3_fold$alphamed > clitop2))
hialpha_annot0.99<-pct3_fold[which(pct3_fold$ancmean > anctop2 & pct3_fold$alphamed > clitop2),]
write.table(hialpha_annot0.99, "randomizations/hialpha_ancmean_sharedsnps_annot0.99.txt", col.names=TRUE, row.names=TRUE, sep="\t", quote=F)
#### beta #######
#top 10%
anctop1<-quantile(pct3_fold$ancfold, probs=0.9, na.rm=T)
clitop1<-quantile(pct3_fold$betamed, probs=0.9, na.rm=T)
length(which(pct3_fold$ancfold > anctop1 & pct3_fold$betamed > clitop1))
hibeta_annot0.9<-pct3_fold[which(pct3_fold$ancfold > anctop1 & pct3_fold$betamed > clitop1),]
write.table(hibeta_annot0.9, "randomizations/hibeta_ancfold_sharedsnps_annot0.9.txt", col.names=TRUE, row.names=TRUE, sep="\t", quote=F)
#top 0.1%
anctop2<-quantile(pct3_fold$ancfold, probs=0.99, na.rm=T)
clitop2<-quantile(pct3_fold$betamed, probs=0.99, na.rm=T)
length(which(pct3_fold$ancfold > anctop2 & pct3_fold$betamed > clitop2))
hibeta_annot0.99<-pct3_fold[which(pct3_fold$ancfold > anctop2 & pct3_fold$betamed > clitop2),]
write.table(hibeta_annot0.99, "randomizations/hibeta_ancfold_sharedsnps_annot0.99.txt", col.names=TRUE, row.names=TRUE, sep="\t", quote=F)
#### extra code for pct1 and pct2 which needs to be done once I create median and folds for this cutoff ########
####### pct1 ###################
## alpha ##
pct1_predict_alpha_ongene<-vector("list",10)
pct1_predict_alpha_neargene<-vector("list",10)
pct1_predict_alpha_oncds<-vector("list",10)
pct1_predict_alpha_nearcds<-vector("list",10)
pct1_predict_alpha_onmRNA<-vector("list",10)
pct1_predict_alpha_nearmRNA<-vector("list",10)
pct1_predict_alpha_onexon<-vector("list",10)
pct1_predict_alpha_nearexon<-vector("list",10)
pct1_predict_alpha_onte<-vector("list",10)
pct1_predict_alpha_nearte<-vector("list",10)
pct1_predict_alpha_onprotein<-vector("list",10)
pct1_predict_alpha_nearprotein<-vector("list",10)
for (i in 1:10){
pct1_predict_alpha_ongene[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean', 'alphamed', 'ongene')
pct1_predict_alpha_neargene[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean', 'alphamed', 'neargene', 'ongene')
pct1_predict_alpha_oncds[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean', 'alphamed', 'oncds')
pct1_predict_alpha_nearcds[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean', 'alphamed', 'nearcds', 'oncds')
pct1_predict_alpha_onmRNA[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean','alphamed', 'onmRNA')
pct1_predict_alpha_nearmRNA[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean','alphamed', 'nearmRNA', 'onmRNA')
pct1_predict_alpha_onexon[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean','alphamed', 'onexon')
pct1_predict_alpha_nearexon[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean','alphamed','nearexon', 'onexon')
pct1_predict_alpha_onte[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean','alphamed','onte')
pct1_predict_alpha_nearte[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean','alphamed', 'nearte', 'onte')
pct1_predict_alpha_onprotein[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancmean','alphamed', 'onprotein')
pct1_predict_alpha_nearprotein[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean','alphamed', 'nearprotein', 'onprotein')
}
pct1_predict_alpha<-cbind(t(as.data.frame(pct1_predict_alpha_ongene)),t(as.data.frame(pct1_predict_alpha_neargene)), t(as.data.frame(pct1_predict_alpha_oncds)), t(as.data.frame(pct1_predict_alpha_nearcds)), t(as.data.frame(pct1_predict_alpha_onmRNA)), t(as.data.frame(pct1_predict_alpha_nearmRNA)), t(as.data.frame(pct1_predict_alpha_onte)), t(as.data.frame(pct1_predict_alpha_nearte)), t(as.data.frame(pct1_predict_alpha_onprotein)), t(as.data.frame(pct1_predict_alpha_nearprotein)))
colnames(pct1_predict_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct1_predict_alpha, file="randomizations/pct1_predict_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
### beta ###
pct1_predict_beta_ongene<-vector("list",10)
pct1_predict_beta_neargene<-vector("list",10)
pct1_predict_beta_oncds<-vector("list",10)
pct1_predict_beta_nearcds<-vector("list",10)
pct1_predict_beta_onmRNA<-vector("list",10)
pct1_predict_beta_nearmRNA<-vector("list",10)
pct1_predict_beta_onexon<-vector("list",10)
pct1_predict_beta_nearexon<-vector("list",10)
pct1_predict_beta_onte<-vector("list",10)
pct1_predict_beta_nearte<-vector("list",10)
pct1_predict_beta_onprotein<-vector("list",10)
pct1_predict_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct1_predict_beta_ongene[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed', 'ongene')
pct1_predict_beta_neargene[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancfold', 'betamed', 'neargene', 'ongene')
pct1_predict_beta_oncds[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed', 'oncds')
pct1_predict_beta_nearcds[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancfold', 'betamed', 'nearcds', 'oncds')
pct1_predict_beta_onmRNA[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed', 'onmRNA')
pct1_predict_beta_nearmRNA[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancfold', 'betamed', 'nearmRNA', 'onmRNA')
pct1_predict_beta_onexon[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed', 'onexon')
pct1_predict_beta_nearexon[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancfold', 'betamed','nearexon', 'onexon')
pct1_predict_beta_onte[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed','onte')
pct1_predict_beta_nearte[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancfold', 'betamed', 'nearte', 'onte')
pct1_predict_beta_onprotein[[i]]<-randomFuncpredict_on(pct1, quants[i], 'ancfold', 'betamed', 'onprotein')
pct1_predict_beta_nearprotein[[i]]<-randomFuncpredict_near(pct1, quants[i], 'ancmean','alphamed', 'nearprotein', 'onprotein')
}
pct1_predict_beta<-cbind(t(as.data.frame(pct1_predict_beta_ongene)),t(as.data.frame(pct1_predict_beta_neargene)), t(as.data.frame(pct1_predict_beta_oncds)), t(as.data.frame(pct1_predict_beta_nearcds)), t(as.data.frame(pct1_predict_beta_onmRNA)), t(as.data.frame(pct1_predict_beta_nearmRNA)), t(as.data.frame(pct1_predict_beta_onte)), t(as.data.frame(pct1_predict_beta_nearte)), t(as.data.frame(pct1_predict_beta_onprotein)), t(as.data.frame(pct1_predict_beta_nearprotein)))
colnames(pct1_predict_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct1_predict_beta, file="randomizations/pct1_predict_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
###################### pct 2 ###################
## alpha ##
pct2_predict_alpha_ongene<-vector("list",10)
pct2_predict_alpha_neargene<-vector("list",10)
pct2_predict_alpha_oncds<-vector("list",10)
pct2_predict_alpha_nearcds<-vector("list",10)
pct2_predict_alpha_onmRNA<-vector("list",10)
pct2_predict_alpha_nearmRNA<-vector("list",10)
pct2_predict_alpha_onexon<-vector("list",10)
pct2_predict_alpha_nearexon<-vector("list",10)
pct2_predict_alpha_onte<-vector("list",10)
pct2_predict_alpha_nearte<-vector("list",10)
pct2_predict_alpha_onprotein<-vector("list",10)
pct2_predict_alpha_nearprotein<-vector("list",10)
for (i in 1:10){
pct2_predict_alpha_ongene[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean', 'alphamed', 'ongene')
pct2_predict_alpha_neargene[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean', 'alphamed', 'neargene', 'ongene')
pct2_predict_alpha_oncds[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean', 'alphamed', 'oncds')
pct2_predict_alpha_nearcds[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean', 'alphamed', 'nearcds', 'oncds')
pct2_predict_alpha_onmRNA[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean','alphamed', 'onmRNA')
pct2_predict_alpha_nearmRNA[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean','alphamed', 'nearmRNA', 'onmRNA')
pct2_predict_alpha_onexon[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean','alphamed', 'onexon')
pct2_predict_alpha_nearexon[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean','alphamed','nearexon', 'onexon')
pct2_predict_alpha_onte[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean','alphamed','onte')
pct2_predict_alpha_nearte[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean','alphamed', 'nearte', 'onte')
pct2_predict_alpha_onprotein[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancmean','alphamed', 'onprotein')
pct2_predict_alpha_nearprotein[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancmean','alphamed', 'nearprotein', 'onprotein')
}
pct2_predict_alpha<-cbind(t(as.data.frame(pct2_predict_alpha_ongene)),t(as.data.frame(pct2_predict_alpha_neargene)), t(as.data.frame(pct2_predict_alpha_oncds)), t(as.data.frame(pct2_predict_alpha_nearcds)), t(as.data.frame(pct2_predict_alpha_onmRNA)), t(as.data.frame(pct2_predict_alpha_nearmRNA)), t(as.data.frame(pct2_predict_alpha_onte)), t(as.data.frame(pct2_predict_alpha_nearte)), t(as.data.frame(pct2_predict_alpha_onprotein)), t(as.data.frame(pct2_predict_alpha_nearprotein)))
colnames(pct2_predict_alpha) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct2_predict_alpha, file="randomizations/pct2_predict_alpha_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)
### beta ###
pct2_predict_beta_ongene<-vector("list",10)
pct2_predict_beta_neargene<-vector("list",10)
pct2_predict_beta_oncds<-vector("list",10)
pct2_predict_beta_nearcds<-vector("list",10)
pct2_predict_beta_onmRNA<-vector("list",10)
pct2_predict_beta_nearmRNA<-vector("list",10)
pct2_predict_beta_onexon<-vector("list",10)
pct2_predict_beta_nearexon<-vector("list",10)
pct2_predict_beta_onte<-vector("list",10)
pct2_predict_beta_nearte<-vector("list",10)
pct2_predict_beta_onprotein<-vector("list",10)
pct2_predict_beta_nearprotein<-vector("list",10)
for (i in 1:10){
pct2_predict_beta_ongene[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold', 'betamed', 'ongene')
pct2_predict_beta_neargene[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed', 'neargene', 'ongene')
pct2_predict_beta_oncds[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold', 'betamed', 'oncds')
pct2_predict_beta_nearcds[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed', 'nearcds', 'oncds')
pct2_predict_beta_onmRNA[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold', 'betamed', 'onmRNA')
pct2_predict_beta_nearmRNA[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed', 'nearmRNA', 'onmRNA')
pct2_predict_beta_onexon[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold', 'betamed', 'onexon')
pct2_predict_beta_nearexon[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed','nearexon', 'onexon')
pct2_predict_beta_onte[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold','betamed','onte')
pct2_predict_beta_nearte[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed', 'nearte', 'onte')
pct2_predict_beta_onprotein[[i]]<-randomFuncpredict_on(pct2, quants[i], 'ancfold', 'betamed', 'onprotein')
pct2_predict_beta_nearprotein[[i]]<-randomFuncpredict_near(pct2, quants[i], 'ancfold', 'betamed', 'nearprotein', 'onprotein')
}
pct2_predict_beta<-cbind(t(as.data.frame(pct2_predict_beta_ongene)),t(as.data.frame(pct2_predict_beta_neargene)), t(as.data.frame(pct2_predict_beta_oncds)), t(as.data.frame(pct2_predict_beta_nearcds)), t(as.data.frame(pct2_predict_beta_onmRNA)), t(as.data.frame(pct2_predict_beta_nearmRNA)), t(as.data.frame(pct2_predict_beta_onte)), t(as.data.frame(pct2_predict_beta_nearte)), t(as.data.frame(pct2_predict_beta_onprotein)), t(as.data.frame(pct2_predict_beta_nearprotein)))
colnames(pct2_predict_beta) = c("ongene","neargene","oncds","nearcds","onmRNA","nearmRNA","onte","nearte","onprotein","nearprotein")
write.table(pct2_predict_beta, file="randomizations/pct2_predict_beta_randomizations.txt", col.names=TRUE, row.names=TRUE, sep = " ", quote=FALSE)