#Calculating the allele differences for PSSP between drought and #irrigation quadrats
#There are 7 treatment pairs.
#1/14/17
#updated 2/3/18
#revised 2/7/18
#Completed 2/11/18
#Set up working directory in Zach's frequency sub-directory
setwd("/uufs/chpc.utah.edu/common/home/u6000989/data/grasses/freqs/")
#Read in text files of allele frequencies that includes the mean, median, CI LB, and CI UP as the rows
X1.all<-read.table("p_o_PSSP_X1.txt",sep=",")
X2.all<-read.table("p_o_PSSP_X2.txt",sep=",")
X5.all<-read.table("p_o_PSSP_X5.txt",sep=",")
X6.all<-read.table("p_o_PSSP_X6.txt",sep=",")
X7.all<-read.table("p_o_PSSP_X7.txt",sep=",")
X8.all<-read.table("p_o_PSSP_X8.txt",sep=",")
X11.all<-read.table("p_o_PSSP_X11.txt",sep=",")
X12.all<-read.table("p_o_PSSP_X12.txt",sep=",")
X13.all<-read.table("p_o_PSSP_X13.txt",sep=",")
X14.all<-read.table("p_o_PSSP_X14.txt",sep=",")
X15.all<-read.table("p_o_PSSP_X15.txt",sep=",")
X17.all<-read.table("p_o_PSSP_X17.txt",sep=",")
X18.all<-read.table("p_o_PSSP_X18.txt",sep=",")
X19.all<-read.table("p_o_PSSP_X19.txt",sep=",")
X20.all<-read.table("p_o_PSSP_X20.txt",sep=",")
LE.all<-read.table("p_o_PSSP_LE.txt",sep=",")
HE.all<-read.table("p_o_PSSP_HE.txt",sep=",")
#Get rid of median and confidence interavals
X1<-t(X1.all[,1])
X2<-t(X2.all[,1])
X5<-t(X5.all[,1])
X6<-t(X6.all[,1])
X7<-t(X7.all[,1])
X8<-t(X8.all[,1])
X11<-t(X11.all[,1])
X12<-t(X12.all[,1])
X13<-t(X13.all[,1])
X14<-t(X14.all[,1])
X17<-t(X17.all[,1])
X18<-t(X18.all[,1])
X19<-t(X19.all[,1])
X20<-t(X20.all[,1])
LE<-t(LE.all[,1])
HE<-t(HE.all[,1])
#Combine drought and irrigation treatment pairs where the drought quad is always first. Only the mean allele frequencies are taken.
X2.X1<-rbind(X2,X1)
X5.X6<-rbind(X5,X6)
X8.X7<-rbind(X8,X7)
X11.X12<-rbind(X11,X12)
X13.X14<-rbind(X13,X14)
X17.X18<-rbind(X17,X17)
X20.X19<-rbind(X20,X19)
#Seperate out natural populations
LE.HE<-rbind(LE,HE)
#Take the difference of mean allele frequencies between pairs
DiffX2.X1<-X2-X1
DiffX5.X6<-X5-X6
DiffX8.X7<-X8-X7
DiffX11.X12<-X11-X12
DiffX13.X14<-X13-X14
DiffX17.X18<-X17-X18
DiffX20.X19<-X20-X19
#Seperate out natural populations
natural<-LE-HE
#Combine all pairs into one matrix
All.TxtPair<-rbind(DiffX2.X1,DiffX5.X6,DiffX8.X7,DiffX11.X12,DiffX13.X14,DiffX17.X18,DiffX20.X19)
#count the number of positive integars across SNPs
Sign<-t(as.matrix(apply(All.TxtPair, 2, function(x) sum(x>0))))
Sign2<-t(as.matrix(apply(natural, 2, function(x) sum(x>0))))
#combine the sum of positive integars with the treatment pairs
All<-as.matrix(rbind(All.TxtPair,Sign))
All2<-as.matrix(rbind(natural,Sign2))
#write a text file to Jacque's home directory
write.table(All,file="PSSP_PrecipPairDiff.txt",row.names=F,col.names=F,sep=",")
#ploting the distribution using the hist() function
All.plot<-barplot(table(All[8,]))
All2.plot<-barplot(table(All2[2,]))
#Read in text file that has all the paired treatment differences
txtdiff<-read.table("PSSP_PrecipPairDiff.txt",header=F,sep=",")
#Pull out the last row that sums how many mean allelic differences are positive
txtsign<-txtdiff[8,]
#There are 286 SNPs where all of the pair treatments are posisitve and there are 54 SNPs where all of the pair treatments are zero
all.seven<-as.matrix(which(txtsign == 7))
all.zero<-as.matrix(which(txtsign == 0))
#Read in the pair treatment differences for low and high sites
natural<-read.table("PSSP_NatPairDiff.txt",header=F,sep=",")
#pull out the last row that sums how many mean alleleic differences are positive
all.one<-as.matrix(which(natural == 1))
#SNPs that are all the same sign (all positive) after takingthe difference between paired treatments
#286 SNPs that had all 7 paired treatments differences positive
obs1 = 286
#54 SNPs that had all 7 paired treatmens differences negative
obs2 = 54
#10477 SNPs from the natural population of low and high elvation sites that all had positive values
obs3 = 10477
#438 SNPs that had at least one positive value
obs4 = 438
#1782 SNPs that had at least two positive values
obs5 = 1782
#4277 SNPs that had at least three positive values
obs6 = 4277
#7291 SNPs that had at least four positive values
obs7 = 7291
#5462 SNPs that had at least five positive values
obs8 = 5462
#1991 SNPs that had at least six positive values
obs9 = 1991
#0 or 7
obs10 = 286 + 54
#1 or 6
obs11 = 438 + 1991
#2 or 5
obs12 = 1782 + 5462
#3 or 4
obs13 = 4277 + 7291
#Run a binomial simulation to compare obsereved and expected SNPs that all share
#the same signs are significant or due to chance
#simulation for 286 SNPs that all have positive values
obs1<-286
trials1 <- 21581
null1<-rep(NA,10000)
for(i in 1:10000){
tmp1<-rbinom(n=trials1,size=7,prob=0.5)
null1[i]<-sum(tmp1 == 7)
}
p1=1-sum(obs1 >= null1)/10000
xfold1=obs1/mean(null1)
p1
xfold1
hist(null1,xlim=c(100,300))
abline(v=obs1,col="red")
#simulaiton for 54 SNPs that all have negative values
obs2 = 54
trials2 <- 21581
null2<-rep(NA,10000)
for(i in 1:10000){
tmp2<-rbinom(n=trials2,size=7,prob=0.5)
null2[i]<-sum(tmp2 == 0)
}
p2=1-sum(obs2 >= null2)/10000
xfold2=obs2/mean(null2)
p2
xfold2
hist(null2,xlim=c(50,300))
abline(v=obs2,col="red")
#Simulation for the natural populations where 10477 SNPs are all positive
obs3 = 10477
trials3 <- 21581
null3<-rep(NA,10000)
for(i in 1:10000){
tmp3<-rbinom(n=trials3,size=1,prob=0.5)
null3[i]<-sum(tmp3 == 1)
}
p3=1-sum(obs3 >= null3)/10000
xfold3=obs3/mean(null3)
p3
xfold3
hist(null3,xlim=c(10000,11500))
abline(v=obs3,col="red")
#simulation of 438 SNPs that had at least one positive value
obs4 = 438
trials4 <- 21581
null4<-rep(NA,10000)
for(i in 1:10000){
tmp4<-rbinom(n=trials4,size=7,prob=0.5)
null4[i]<-sum(tmp4 == 1)
}
p4=1-sum(obs4 >= null4)/10000
xfold4=obs4/mean(null4)
p4
xfold4
hist(null4,xlim=c(400,1500))
abline(v=obs4,col="red")
#simulation of 1782 SNPs that had at least two positive values
obs5 = 1782
trials5 <- 21581
null5<-rep(NA,10000)
for(i in 1:10000){
tmp5<-rbinom(n=trials5,size=7,prob=0.5)
null5[i]<-sum(tmp5 == 2)
}
p5=1-sum(obs5 >= null5)/10000
xfold5=obs5/mean(null5)
p5
xfold5
hist(null5,xlim=c(1000,5000))
abline(v=1782,col="red")
#simulation of 4277 SNPs that had at least three positive values
obs6 = 4277
trials6 <- 21581
null6<-rep(NA,10000)
for(i in 1:10000){
tmp6<-rbinom(n=trials6,size=7,prob=0.5)
null6[i]<-sum(tmp6 == 3)
}
p6=1-sum(obs6 >= null6)/10000
xfold6=obs6/mean(null6)
p6
xfold6
hist(null6,xlim=c(100,10000))
abline(v=obs6,col="red")
#simulation of 7291 SNPs that had at least four positive values
obs7 = 7291
trials7 <- 21581
null7<-rep(NA,10000)
for(i in 1:10000){
tmp7<-rbinom(n=trials7,size=7,prob=0.5)
null7[i]<-sum(tmp7 == 4)
}
p7=1-sum(obs7 >= null7)/10000
xfold7=obs7/mean(null7)
p7
xfold7
hist(null7,xlim=c(100,10000))
abline(v=obs7,col="red")
#simulation of 5462 SNPs that had at least five positive values
obs8 = 5462
trials8 <- 21581
null8<-rep(NA,10000)
for(i in 1:10000){
tmp8<-rbinom(n=trials8,size=7,prob=0.5)
null8[i]<-sum(tmp8 == 5)
}
p8=1-sum(obs8 >= null8)/10000
xfold8=obs8/mean(null8)
p8
xfold8
hist(null8,xlim=c(100,10000))
abline(v=obs8,col="red")
#simulation of 1991 SNPs that had at least six positive values
obs9 = 1991
trials9 <- 21581
null9<-rep(NA,10000)
for(i in 1:10000){
tmp9<-rbinom(n=trials9,size=7,prob=0.5)
null9[i]<-sum(tmp9 == 6)
}
p9=1-sum(obs9 >=null9)/10000
xfold9=obs9/mean(null9)
p9
xfold9
hist(null9,xlim=c(100,10000))
abline(v=obs9,col="red")
#simulation of 0 or 7
obs10 = 286 + 54
trials10 <- 21581
null10<-rep(NA,10000)
for(i in 1:10000){
tmp10<-rbinom(n=trials10,size=7,prob=0.5)
null10[i]<-sum(tmp10 == 0|tmp10 == 7)
}
p10<-1-sum(obs10 >= null10)/10000
xfold10<-obs10/mean(null10)
p10
xfold10
hist(null10,xlim=c(200,500))
abline(v=obs10,col="green")
#simulation of 1 or 6
obs11 = 438 + 1991
trials11 <- 21581
null11<-rep(NA,1000)
for(i in 1:10000){
tmp11<-rbinom(n=trials11,size=7,prob=0.5)
null11[i]<-sum(tmp11 == 1|tmp11 == 6)
}
p11=1-sum(obs11 >= null11)/10000
xfold11=obs11/mean(null11)
p11
xfold11
hist(null11,xlim=c(2000,3000))
abline(v=obs11,col="green")
#simulation of 2 or 5
obs12 = 1782 + 5462
trials12<-21581
null12<-rep(NA,10000)
for(i in 1:10000){
tmp12<-rbinom(n=trials12,size=7,prob=0.5)
null12[i]<-sum(tmp12 == 2|tmp12 == 5)
}
p12=1-sum(obs12 >= null12)/10000
xfold12=obs12/mean(null12)
p12
xfold12
hist(null12,xlim=c(6000,8000))
abline(v=obs12,col="green")
#simulation of 3 or 4
obs13 = 4277 + 7291
trials13<-21581
null13<-rep(NA,10000)
for(i in 1:10000){
tmp13<-rbinom(n=trials13,size=7,prob=0.5)
null13[i]<-sum(tmp13 == 3|tmp13 == 4)
}
p13=1-sum(obs13 >= null13)/10000
xfold13=obs13/mean(null13)
p13
xfold13
hist(null13,xlim=c(10000,15000))
abline(v=obs13,col="green")