Drought-Irrigation Differences

#Calculating the allele differences for PSSP between drought and #irrigation quadrats

#There are 7 treatment pairs.

#1/14/17

#updated 2/3/18

#revised 2/7/18

#Completed 2/11/18

#Set up working directory in Zach's frequency sub-directory

setwd("/uufs/chpc.utah.edu/common/home/u6000989/data/grasses/freqs/")

#Read in text files of allele frequencies that includes the mean, median, CI LB, and CI UP as the rows

X1.all<-read.table("p_o_PSSP_X1.txt",sep=",")

X2.all<-read.table("p_o_PSSP_X2.txt",sep=",")

X5.all<-read.table("p_o_PSSP_X5.txt",sep=",")

X6.all<-read.table("p_o_PSSP_X6.txt",sep=",")

X7.all<-read.table("p_o_PSSP_X7.txt",sep=",")

X8.all<-read.table("p_o_PSSP_X8.txt",sep=",")

X11.all<-read.table("p_o_PSSP_X11.txt",sep=",")

X12.all<-read.table("p_o_PSSP_X12.txt",sep=",")

X13.all<-read.table("p_o_PSSP_X13.txt",sep=",")

X14.all<-read.table("p_o_PSSP_X14.txt",sep=",")

X15.all<-read.table("p_o_PSSP_X15.txt",sep=",")

X17.all<-read.table("p_o_PSSP_X17.txt",sep=",")

X18.all<-read.table("p_o_PSSP_X18.txt",sep=",")

X19.all<-read.table("p_o_PSSP_X19.txt",sep=",")

X20.all<-read.table("p_o_PSSP_X20.txt",sep=",")

LE.all<-read.table("p_o_PSSP_LE.txt",sep=",")

HE.all<-read.table("p_o_PSSP_HE.txt",sep=",")

#Get rid of median and confidence interavals

X1<-t(X1.all[,1])

X2<-t(X2.all[,1])

X5<-t(X5.all[,1])

X6<-t(X6.all[,1])

X7<-t(X7.all[,1])

X8<-t(X8.all[,1])

X11<-t(X11.all[,1])

X12<-t(X12.all[,1])

X13<-t(X13.all[,1])

X14<-t(X14.all[,1])

X17<-t(X17.all[,1])

X18<-t(X18.all[,1])

X19<-t(X19.all[,1])

X20<-t(X20.all[,1])

LE<-t(LE.all[,1])

HE<-t(HE.all[,1])

#Combine drought and irrigation treatment pairs where the drought quad is always first. Only the mean allele frequencies are taken.

X2.X1<-rbind(X2,X1)

X5.X6<-rbind(X5,X6)

X8.X7<-rbind(X8,X7)

X11.X12<-rbind(X11,X12)

X13.X14<-rbind(X13,X14)

X17.X18<-rbind(X17,X17)

X20.X19<-rbind(X20,X19)

#Seperate out natural populations

LE.HE<-rbind(LE,HE)

#Take the difference of mean allele frequencies between pairs

DiffX2.X1<-X2-X1

DiffX5.X6<-X5-X6

DiffX8.X7<-X8-X7

DiffX11.X12<-X11-X12

DiffX13.X14<-X13-X14

DiffX17.X18<-X17-X18

DiffX20.X19<-X20-X19

#Seperate out natural populations

natural<-LE-HE

#Combine all pairs into one matrix

All.TxtPair<-rbind(DiffX2.X1,DiffX5.X6,DiffX8.X7,DiffX11.X12,DiffX13.X14,DiffX17.X18,DiffX20.X19)

#count the number of positive integars across SNPs

Sign<-t(as.matrix(apply(All.TxtPair, 2, function(x) sum(x>0))))

Sign2<-t(as.matrix(apply(natural, 2, function(x) sum(x>0))))

#combine the sum of positive integars with the treatment pairs

All<-as.matrix(rbind(All.TxtPair,Sign))

All2<-as.matrix(rbind(natural,Sign2))

#write a text file to Jacque's home directory

write.table(All,file="PSSP_PrecipPairDiff.txt",row.names=F,col.names=F,sep=",")

#ploting the distribution using the hist() function

All.plot<-barplot(table(All[8,]))

All2.plot<-barplot(table(All2[2,]))

#Read in text file that has all the paired treatment differences

txtdiff<-read.table("PSSP_PrecipPairDiff.txt",header=F,sep=",")

#Pull out the last row that sums how many mean allelic differences are positive

txtsign<-txtdiff[8,]

#There are 286 SNPs where all of the pair treatments are posisitve and there are 54 SNPs where all of the pair treatments are zero

all.seven<-as.matrix(which(txtsign == 7))

all.zero<-as.matrix(which(txtsign == 0))

#Read in the pair treatment differences for low and high sites

natural<-read.table("PSSP_NatPairDiff.txt",header=F,sep=",")

#pull out the last row that sums how many mean alleleic differences are positive

all.one<-as.matrix(which(natural == 1))

#SNPs that are all the same sign (all positive) after takingthe difference between paired treatments

#286 SNPs that had all 7 paired treatments differences positive

obs1 = 286

#54 SNPs that had all 7 paired treatmens differences negative

obs2 = 54

#10477 SNPs from the natural population of low and high elvation sites that all had positive values

obs3 = 10477

#438 SNPs that had at least one positive value

obs4 = 438

#1782 SNPs that had at least two positive values

obs5 = 1782

#4277 SNPs that had at least three positive values

obs6 = 4277

#7291 SNPs that had at least four positive values

obs7 = 7291

#5462 SNPs that had at least five positive values

obs8 = 5462

#1991 SNPs that had at least six positive values

obs9 = 1991

#0 or 7

obs10 = 286 + 54

#1 or 6

obs11 = 438 + 1991

#2 or 5

obs12 = 1782 + 5462

#3 or 4

obs13 = 4277 + 7291

#Run a binomial simulation to compare obsereved and expected SNPs that all share

#the same signs are significant or due to chance

#simulation for 286 SNPs that all have positive values

obs1<-286

trials1 <- 21581

null1<-rep(NA,10000)

for(i in 1:10000){

tmp1<-rbinom(n=trials1,size=7,prob=0.5)

null1[i]<-sum(tmp1 == 7)

}

p1=1-sum(obs1 >= null1)/10000

xfold1=obs1/mean(null1)

xfold1

hist(null1,xlim=c(100,300))

abline(v=obs1,col="red")

#simulaiton for 54 SNPs that all have negative values

obs2 = 54

trials2 <- 21581

null2<-rep(NA,10000)

for(i in 1:10000){

tmp2<-rbinom(n=trials2,size=7,prob=0.5)

null2[i]<-sum(tmp2 == 0)

}

p2=1-sum(obs2 >= null2)/10000

xfold2=obs2/mean(null2)

xfold2

hist(null2,xlim=c(50,300))

abline(v=obs2,col="red")

#Simulation for the natural populations where 10477 SNPs are all positive

obs3 = 10477

trials3 <- 21581

null3<-rep(NA,10000)

for(i in 1:10000){

tmp3<-rbinom(n=trials3,size=1,prob=0.5)

null3[i]<-sum(tmp3 == 1)

}

p3=1-sum(obs3 >= null3)/10000

xfold3=obs3/mean(null3)

xfold3

hist(null3,xlim=c(10000,11500))

abline(v=obs3,col="red")

#simulation of 438 SNPs that had at least one positive value

obs4 = 438

trials4 <- 21581

null4<-rep(NA,10000)

for(i in 1:10000){

tmp4<-rbinom(n=trials4,size=7,prob=0.5)

null4[i]<-sum(tmp4 == 1)

}

p4=1-sum(obs4 >= null4)/10000

xfold4=obs4/mean(null4)

xfold4

hist(null4,xlim=c(400,1500))

abline(v=obs4,col="red")

#simulation of 1782 SNPs that had at least two positive values

obs5 = 1782

trials5 <- 21581

null5<-rep(NA,10000)

for(i in 1:10000){

tmp5<-rbinom(n=trials5,size=7,prob=0.5)

null5[i]<-sum(tmp5 == 2)

}

p5=1-sum(obs5 >= null5)/10000

xfold5=obs5/mean(null5)

xfold5

hist(null5,xlim=c(1000,5000))

abline(v=1782,col="red")

#simulation of 4277 SNPs that had at least three positive values

obs6 = 4277

trials6 <- 21581

null6<-rep(NA,10000)

for(i in 1:10000){

tmp6<-rbinom(n=trials6,size=7,prob=0.5)

null6[i]<-sum(tmp6 == 3)

}

p6=1-sum(obs6 >= null6)/10000

xfold6=obs6/mean(null6)

xfold6

hist(null6,xlim=c(100,10000))

abline(v=obs6,col="red")

#simulation of 7291 SNPs that had at least four positive values

obs7 = 7291

trials7 <- 21581

null7<-rep(NA,10000)

for(i in 1:10000){

tmp7<-rbinom(n=trials7,size=7,prob=0.5)

null7[i]<-sum(tmp7 == 4)

}

p7=1-sum(obs7 >= null7)/10000

xfold7=obs7/mean(null7)

xfold7

hist(null7,xlim=c(100,10000))

abline(v=obs7,col="red")

#simulation of 5462 SNPs that had at least five positive values

obs8 = 5462

trials8 <- 21581

null8<-rep(NA,10000)

for(i in 1:10000){

tmp8<-rbinom(n=trials8,size=7,prob=0.5)

null8[i]<-sum(tmp8 == 5)

}

p8=1-sum(obs8 >= null8)/10000

xfold8=obs8/mean(null8)

xfold8

hist(null8,xlim=c(100,10000))

abline(v=obs8,col="red")

#simulation of 1991 SNPs that had at least six positive values

obs9 = 1991

trials9 <- 21581

null9<-rep(NA,10000)

for(i in 1:10000){

tmp9<-rbinom(n=trials9,size=7,prob=0.5)

null9[i]<-sum(tmp9 == 6)

}

p9=1-sum(obs9 >=null9)/10000

xfold9=obs9/mean(null9)

xfold9

hist(null9,xlim=c(100,10000))

abline(v=obs9,col="red")

#simulation of 0 or 7

obs10 = 286 + 54

trials10 <- 21581

null10<-rep(NA,10000)

for(i in 1:10000){

tmp10<-rbinom(n=trials10,size=7,prob=0.5)

null10[i]<-sum(tmp10 == 0|tmp10 == 7)

}

p10<-1-sum(obs10 >= null10)/10000

xfold10<-obs10/mean(null10)

p10

xfold10

hist(null10,xlim=c(200,500))

abline(v=obs10,col="green")

#simulation of 1 or 6

obs11 = 438 + 1991

trials11 <- 21581

null11<-rep(NA,1000)

for(i in 1:10000){

tmp11<-rbinom(n=trials11,size=7,prob=0.5)

null11[i]<-sum(tmp11 == 1|tmp11 == 6)

}

p11=1-sum(obs11 >= null11)/10000

xfold11=obs11/mean(null11)

p11

xfold11

hist(null11,xlim=c(2000,3000))

abline(v=obs11,col="green")

#simulation of 2 or 5

obs12 = 1782 + 5462

trials12<-21581

null12<-rep(NA,10000)

for(i in 1:10000){

tmp12<-rbinom(n=trials12,size=7,prob=0.5)

null12[i]<-sum(tmp12 == 2|tmp12 == 5)

}

p12=1-sum(obs12 >= null12)/10000

xfold12=obs12/mean(null12)

p12

xfold12

hist(null12,xlim=c(6000,8000))

abline(v=obs12,col="green")

#simulation of 3 or 4

obs13 = 4277 + 7291

trials13<-21581

null13<-rep(NA,10000)

for(i in 1:10000){

tmp13<-rbinom(n=trials13,size=7,prob=0.5)

null13[i]<-sum(tmp13 == 3|tmp13 == 4)

}

p13=1-sum(obs13 >= null13)/10000

xfold13=obs13/mean(null13)

p13

xfold13

hist(null13,xlim=c(10000,15000))

abline(v=obs13,col="green")

Page updated

Google Sites

Report abuse