Post date: Jan 30, 2017 8:28:41 PM
## /uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq
## for p-values and first test of trade-offs
analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){
fst<-1-(1-1/(2*ne))^t
alpha<-p0 * (1-fst)/fst
beta<-(1-p0) * (1-fst)/fst
pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)
pv
}
bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001
## showing the null distribution
pdf('null_example_example.pdf')
par(mar=c(5,5,4,2))
p0<-.3 ## initial allele frequency
ne<-100 ## estimated Ne
t<-16
fst<-1-(1-1/(2*ne))^t
alpha <-p0 * (1-fst)/fst
beta <-(1-p0) * (1-fst)/fst
x<-seq(0,1,0.01) # change in allele freq
y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
plot(x,y,type='l', ylab='density', xlab='expected allele frequency from drift', main='Null Drift Simulation', cex.lab=1.5)
abline(v=.472, lty=2) ## actual change p1 - p0
dev.off()
# pdf('null_example1.pdf')
# par(mar=c(5,5,4,2))
# p0<-pL14P[4,1]
# ne=68.8381
# t=16
# p1<-pL14F16A[4,1]
# fst<-1-(1-1/(2*ne))^t
# alpha <-p0 * (1-fst)/fst
# beta <-(1-p0) * (1-fst)/fst
# x<-seq(0,1,0.01) # change in allele freq
# y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
# plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', main='Null Drift Simulation', cex.lab=1.5, lwd=2, cex.main=2)
# legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=.95, bty='n')
# x.cord <- c(0,x,1)
# y.cord <- c(0,dbeta(x=x,alpha,beta),0)
# polygon(x.cord,y.cord,col='grey')
# abline(v=p1,lty=2) ## end values
# abline(v=p0, lty=1) ## beginning allele freq
# dev.off()
#
# pdf('null_example2.pdf')
# par(mar=c(5,5,4,2))
# p0<-pL14P[20174,1]
# ne=68.8381
# t=16
# p1<-pL14F16A[20174,1]
# fst<-1-(1-1/(2*ne))^t
# alpha <-p0 * (1-fst)/fst
# beta <-(1-p0) * (1-fst)/fst
# x<-seq(0,1,0.01) # change in allele freq
# y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
# plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', main='Null Drift Simulation', cex.lab=1.5, lwd=2, cex.main=2)
# legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=.95, bty='n')
# x.cord <- c(0,x,1)
# y.cord <- c(0,dbeta(x=x,alpha,beta),0)
# polygon(x.cord,y.cord,col='grey')
# abline(v=p1,lty=2) ## end values
# abline(v=p0, lty=1) ## beginning allele freq
# dev.off()
pdf('null_examples.pdf', width=14, height=7)
par(mar=c(5,5,4,2),mfrow=c(1,2))
p0<-pL14P[4,1]
ne=68.8381
t=16
p1<-pL14F16A[4,1]
fst<-1-(1-1/(2*ne))^t
alpha <-p0 * (1-fst)/fst
beta <-(1-p0) * (1-fst)/fst
x<-seq(0,1,0.01) # change in allele freq
y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', cex.lab=2, cex.axis=1.7, lwd=2)
title(main="(a)",adj=0,cex.main=2, line=.4)
legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=1.5, bty='n')
x.cord <- c(0,x,1)
y.cord <- c(0,dbeta(x=x,alpha,beta),0)
polygon(x.cord,y.cord,col='grey')
abline(v=p1,lty=2) ## end values
abline(v=p0, lty=1) ## beginning allele freq
p0<-pL14P[20174,1]
ne=68.8381
t=16
p1<-pL14F16A[20174,1]
fst<-1-(1-1/(2*ne))^t
alpha <-p0 * (1-fst)/fst
beta <-(1-p0) * (1-fst)/fst
x<-seq(0,1,0.01) # change in allele freq
y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', cex.lab=2, cex.axis=1.7, lwd=2)
title(main="(b)",adj=0,cex.main=2, line=.4)
legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=1.5, bty='n')
x.cord <- c(0,x,1)
y.cord <- c(0,dbeta(x=x,alpha,beta),0)
polygon(x.cord,y.cord,col='grey')
abline(v=p1,lty=2) ## end values
abline(v=p0, lty=1) ## beginning allele freq
dev.off()
### Allele frequency paired with above plot
files <- list.files(pattern=".+_p.txt")
files <- files[-c(1,10,11,12)]
A <- files[c(3,1,2,9,10,5,6,7,8,4)]
calA <- NULL
for (f in A) {
dat <- read.csv(f, header=FALSE, sep=",")
calA <- rbind(calA, t(dat))
}
# Delete everything but median
r <- 1:nrow(calA)
medians <- seq(2,40,4)
calA <- calA[medians ,]
pdf('Ne_freq1.pdf')
par(mar=c(5,5,4,2))
matplot(calA[,4],type='l', col=1:length(calA), xaxt='n', main='Allele Frequency', ylab='Frequency', xlab='Generation', ylim=c(0,1), cex.lab=1.5, lwd=2, cex.main=2)
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))
dev.off()
pdf('Ne_freq2.pdf')
par(mar=c(5,5,4,2))
matplot(calA[,20174],type='l', col=1:length(calA), xaxt='n', main='Allele Frequency', ylab='Frequency', xlab='Generation', ylim=c(0,1), cex.lab=1.5, lwd=2, cex.main=2)
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))
dev.off()
par(ask=TRUE)
for(i in 1:nrow(pL14P)){
p0<-pL14P[i,1]
ne=68.8381
t=16
p1<-pL14F4[i,1]
fst<-1-(1-1/(2*ne))^t
alpha <-p0 * (1-fst)/fst
beta <-(1-p0) * (1-fst)/fst
x<-seq(0,1,0.01) # change in allele freq
y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities
plot(x,y,type='l', ylab='density', xlab='expected allele frequency from drift', main=i, cex.lab=1.5)
abline(v=p1,lty=2) ## end values
abline(v=p0, lty=1) ## beginning allele freq
}
## All files of interest:
pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")
pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")
pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")
pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")
pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")
pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")
pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")
pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")
pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")
pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")
pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")
pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")
pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")
## Ne values for branches separating populations
## L14P v L14F16A, logit quantile
p0<-pL14P[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)
P_F16A<-log(pv/(1-pv))
## L14P v L14F16B, logit quantile
p0<-pL14P[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)
P_F16B<-log(pv/(1-pv))
## L14P v L14F4, logit quantile
p0<-pL14P[,1]
p1<-pL14F4[,1]
pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)
P_F4<-log(pv/(1-pv))
## L14F4 v L14F16A, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)
F4_F16A<-log(pv/(1-pv))
## L14F4 v L14F16B, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)
F4_F16B<-log(pv/(1-pv))
## manhattan plots
pdf("manhatP_F16.pdf",width=9,height=9)
par(mfrow=c(2,1))
par(mar=c(4,5.5,3,1.5))
plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(a) P to F16A",adj=0,cex.main=1.6)
plot(abs(P_F16B),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(b) P to F16B",adj=0,cex.main=1.6)
dev.off()
pdf("manhatP_F4.pdf",width=9,height=9)
par(mfrow=c(1,1))
par(mar=c(4,5.5,3,1.5))
plot(abs(P_F4),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(a) P to F4",adj=0,cex.main=1.6)
dev.off()
pdf("manhatF4_F16.pdf",width=9,height=9)
par(mfrow=c(2,1))
par(mar=c(4,5.5,3,1.5))
plot(abs(F4_F16A),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(a) F4 to F16A",adj=0,cex.main=1.6)
plot(abs(F4_F16B),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(b) F4 to F16B",adj=0,cex.main=1.6)
dev.off()
## Graph for manuscript
pdf("manhatsNull.pdf", width = 7, height=14)
par(mfrow=c(5,1))
par(mar=c(2.5, 1, 2, 1), oma=c(5,5.5,2,0))
plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(a) P to F16A",adj=0,cex.main=2, line=.4)
plot(abs(P_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(b) P to F16B",adj=0,cex.main=2, line=.4)
plot(abs(P_F4),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(c) P to F4",adj=0,cex.main=2, line=.4)
plot(abs(F4_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(d) F4 to F16A",adj=0,cex.main=2, line=.4)
plot(abs(F4_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="(e) F4 to F16B",adj=0,cex.main=2, line=.4)
mtext("negative logit quantile", side=2, line=2.5, cex=2.5, outer=TRUE)
mtext("locus", side=1,line=1.5, cex=2, outer=TRUE)
dev.off()
## counts and parallelism
sum(abs(P_F16A) > bnd) # 53
sum(abs(P_F16B) > bnd) # 36
sum(abs(P_F4) > bnd) # 185
sum(abs(F4_F16A) > bnd) # 30
sum(abs(F4_F16B) > bnd) # 27
sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared
sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared
## write combined allele frequencies for ABC for candidate SNPs
PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps
PF4snps<-which(abs(P_F4) > bnd) #185 snps
F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps
snps1<-unique(c(PF4snps,PF16snps)) #141
snps2<-unique(c(PF4snps,F4F16snps)) # snps
snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) # 188
## Creating file for ABC
lids<-read.table("../locusids.txt",header=FALSE)
plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],
pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],
pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])
write.table(plsnps,"../pLentilSnps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
save(list=ls(),file="null.Rdata")
### some checks
matplot(calA[,PF4snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line A', ylab='Allele Frequency', xlab='Generation')
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))
#legend("bottomright",legend=d, col=1:length(d), lty=1:length(d), cex=.95)
matplot(calB[,PF4snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line B', ylab='Allele Frequency', xlab='Generation')
axis(1, at=1:8, labels=c('P',1,2,3,4,5,8,16))
matplot(calA[,F4F16snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line A', ylab='Allele Frequency', xlab='Generation')
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))
matplot(calB[,F4F16snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line B', ylab='Allele Frequency', xlab='Generation')
axis(1, at=1:8, labels=c('P',1,2,3,4,5,8,16))
pdf("MAFs.pdf",width=18,height=9)
par(mfrow=c(2,1))
par(mar=c(4,5.5,3,1.5))
matplot(calA[,PF4snps],type='l', col=1:length(d), xaxt='n', ylab='Allele Frequency', xlab='Generation', cex.lab=2)
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
title(main="(b) Sub-Line A MAF",adj=0,cex.main=1.5)
matplot(calA[,F4F16snps],type='l', col=1:length(d), xaxt='n', ylab='Allele Frequency', xlab='Generation', cex.lab=2)
axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
title(main="(b) Sub-Line A MAF",adj=0,cex.main=1.5)
dev.off()
### With lower bound of 5:
## Ne values for branches separating populations
## L14P v L14F16A, logit quantile
p0<-pL14P[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)
P_F16A<-log(pv/(1-pv))
## L14P v L14F16B, logit quantile
p0<-pL14P[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)
P_F16B<-log(pv/(1-pv))
## L14P v L14F4, logit quantile
p0<-pL14P[,1]
p1<-pL14F4[,1]
pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)
P_F4<-log(pv/(1-pv))
## L14F4 v L14F16A, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)
F4_F16A<-log(pv/(1-pv))
## L14F4 v L14F16B, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)
F4_F16B<-log(pv/(1-pv))
sum(abs(P_F16A) > bnd) # 53
sum(abs(P_F16B) > bnd) # 36
sum(abs(P_F4) > bnd) # 128
sum(abs(F4_F16A) > bnd) # 30
sum(abs(F4_F16B) > bnd) # 27
sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared
sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared
## write combined allele frequencies for ABC for candidate SNPs
PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps
PF4snps<-which(abs(P_F4) > bnd) #128 snps
F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps
snps1<-unique(c(PF4snps,PF16snps)) #141
snps2<-unique(c(PF4snps,F4F16snps)) #185 snps
snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) #198
## write combined allele frequencies for ABC for candidate SNPs
lids<-read.table("../locusids.txt",header=FALSE)
plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],
pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],
pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])
write.table(plsnps,"../pLentilSnps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
save(list=ls(),file="null.Rdata")