Exceptional Allele Freq Differences

Post date: Jan 30, 2017 8:28:41 PM

## /uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq

## for p-values and first test of trade-offs

analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){

fst<-1-(1-1/(2*ne))^t

alpha<-p0 * (1-fst)/fst

beta<-(1-p0) * (1-fst)/fst

pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)

}

bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001

## showing the null distribution

pdf('null_example_example.pdf')

par(mar=c(5,5,4,2))

p0<-.3 ## initial allele frequency

ne<-100 ## estimated Ne

t<-16

fst<-1-(1-1/(2*ne))^t

alpha <-p0 * (1-fst)/fst

beta <-(1-p0) * (1-fst)/fst

x<-seq(0,1,0.01) # change in allele freq

y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

plot(x,y,type='l', ylab='density', xlab='expected allele frequency from drift', main='Null Drift Simulation', cex.lab=1.5)

abline(v=.472, lty=2) ## actual change p1 - p0

dev.off()

# pdf('null_example1.pdf')

# par(mar=c(5,5,4,2))

# p0<-pL14P[4,1]

# ne=68.8381

# t=16

# p1<-pL14F16A[4,1]

# fst<-1-(1-1/(2*ne))^t

# alpha <-p0 * (1-fst)/fst

# beta <-(1-p0) * (1-fst)/fst

# x<-seq(0,1,0.01) # change in allele freq

# y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

# plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', main='Null Drift Simulation', cex.lab=1.5, lwd=2, cex.main=2)

# legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=.95, bty='n')

# x.cord <- c(0,x,1)

# y.cord <- c(0,dbeta(x=x,alpha,beta),0)

# polygon(x.cord,y.cord,col='grey')

# abline(v=p1,lty=2) ## end values

# abline(v=p0, lty=1) ## beginning allele freq

# dev.off()

# pdf('null_example2.pdf')

# par(mar=c(5,5,4,2))

# p0<-pL14P[20174,1]

# ne=68.8381

# t=16

# p1<-pL14F16A[20174,1]

# fst<-1-(1-1/(2*ne))^t

# alpha <-p0 * (1-fst)/fst

# beta <-(1-p0) * (1-fst)/fst

# x<-seq(0,1,0.01) # change in allele freq

# y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

# plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', main='Null Drift Simulation', cex.lab=1.5, lwd=2, cex.main=2)

# legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=.95, bty='n')

# x.cord <- c(0,x,1)

# y.cord <- c(0,dbeta(x=x,alpha,beta),0)

# polygon(x.cord,y.cord,col='grey')

# abline(v=p1,lty=2) ## end values

# abline(v=p0, lty=1) ## beginning allele freq

# dev.off()

pdf('null_examples.pdf', width=14, height=7)

par(mar=c(5,5,4,2),mfrow=c(1,2))

p0<-pL14P[4,1]

ne=68.8381

t=16

p1<-pL14F16A[4,1]

fst<-1-(1-1/(2*ne))^t

alpha <-p0 * (1-fst)/fst

beta <-(1-p0) * (1-fst)/fst

x<-seq(0,1,0.01) # change in allele freq

y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', cex.lab=2, cex.axis=1.7, lwd=2)

title(main="(a)",adj=0,cex.main=2, line=.4)

legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=1.5, bty='n')

x.cord <- c(0,x,1)

y.cord <- c(0,dbeta(x=x,alpha,beta),0)

polygon(x.cord,y.cord,col='grey')

abline(v=p1,lty=2) ## end values

abline(v=p0, lty=1) ## beginning allele freq

p0<-pL14P[20174,1]

ne=68.8381

t=16

p1<-pL14F16A[20174,1]

fst<-1-(1-1/(2*ne))^t

alpha <-p0 * (1-fst)/fst

beta <-(1-p0) * (1-fst)/fst

x<-seq(0,1,0.01) # change in allele freq

y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

plot(x,y,type='l', ylab='Density', xlab='Allele Frequency', cex.lab=2, cex.axis=1.7, lwd=2)

title(main="(b)",adj=0,cex.main=2, line=.4)

legend("topleft",legend=c('Original Frequency', 'End Frequency'), lty=1:2, cex=1.5, bty='n')

x.cord <- c(0,x,1)

y.cord <- c(0,dbeta(x=x,alpha,beta),0)

polygon(x.cord,y.cord,col='grey')

abline(v=p1,lty=2) ## end values

abline(v=p0, lty=1) ## beginning allele freq

dev.off()

### Allele frequency paired with above plot

files <- list.files(pattern=".+_p.txt")

files <- files[-c(1,10,11,12)]

A <- files[c(3,1,2,9,10,5,6,7,8,4)]

calA <- NULL

for (f in A) {

dat <- read.csv(f, header=FALSE, sep=",")

calA <- rbind(calA, t(dat))

}

# Delete everything but median

r <- 1:nrow(calA)

medians <- seq(2,40,4)

calA <- calA[medians ,]

pdf('Ne_freq1.pdf')

par(mar=c(5,5,4,2))

matplot(calA[,4],type='l', col=1:length(calA), xaxt='n', main='Allele Frequency', ylab='Frequency', xlab='Generation', ylim=c(0,1), cex.lab=1.5, lwd=2, cex.main=2)

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))

dev.off()

pdf('Ne_freq2.pdf')

par(mar=c(5,5,4,2))

matplot(calA[,20174],type='l', col=1:length(calA), xaxt='n', main='Allele Frequency', ylab='Frequency', xlab='Generation', ylim=c(0,1), cex.lab=1.5, lwd=2, cex.main=2)

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))

dev.off()

par(ask=TRUE)

for(i in 1:nrow(pL14P)){

p0<-pL14P[i,1]

ne=68.8381

t=16

p1<-pL14F4[i,1]

fst<-1-(1-1/(2*ne))^t

alpha <-p0 * (1-fst)/fst

beta <-(1-p0) * (1-fst)/fst

x<-seq(0,1,0.01) # change in allele freq

y<-dbeta(x=x,alpha,beta) ## drift null simulation modeled with beta distribution, y are probabilities

plot(x,y,type='l', ylab='density', xlab='expected allele frequency from drift', main=i, cex.lab=1.5)

abline(v=p1,lty=2) ## end values

abline(v=p0, lty=1) ## beginning allele freq

}

## All files of interest:

pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")

pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")

pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")

pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")

pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")

pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")

pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")

pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")

pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")

pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")

pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")

pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")

pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")

## Ne values for branches separating populations

## L14P v L14F16A, logit quantile

p0<-pL14P[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)

P_F16A<-log(pv/(1-pv))

## L14P v L14F16B, logit quantile

p0<-pL14P[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)

P_F16B<-log(pv/(1-pv))

## L14P v L14F4, logit quantile

p0<-pL14P[,1]

p1<-pL14F4[,1]

pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)

P_F4<-log(pv/(1-pv))

## L14F4 v L14F16A, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)

F4_F16A<-log(pv/(1-pv))

## L14F4 v L14F16B, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)

F4_F16B<-log(pv/(1-pv))

## manhattan plots

pdf("manhatP_F16.pdf",width=9,height=9)

par(mfrow=c(2,1))

par(mar=c(4,5.5,3,1.5))

plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(a) P to F16A",adj=0,cex.main=1.6)

plot(abs(P_F16B),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(b) P to F16B",adj=0,cex.main=1.6)

dev.off()

pdf("manhatP_F4.pdf",width=9,height=9)

par(mfrow=c(1,1))

par(mar=c(4,5.5,3,1.5))

plot(abs(P_F4),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(a) P to F4",adj=0,cex.main=1.6)

dev.off()

pdf("manhatF4_F16.pdf",width=9,height=9)

par(mfrow=c(2,1))

par(mar=c(4,5.5,3,1.5))

plot(abs(F4_F16A),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(a) F4 to F16A",adj=0,cex.main=1.6)

plot(abs(F4_F16B),pch=19,col="darkgray",cex=0.5,ylab="negative logit quantile",xlab="",cex.lab=1.7,cex.axis=1.3)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(b) F4 to F16B",adj=0,cex.main=1.6)

dev.off()

## Graph for manuscript

pdf("manhatsNull.pdf", width = 7, height=14)

par(mfrow=c(5,1))

par(mar=c(2.5, 1, 2, 1), oma=c(5,5.5,2,0))

plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(a) P to F16A",adj=0,cex.main=2, line=.4)

plot(abs(P_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(b) P to F16B",adj=0,cex.main=2, line=.4)

plot(abs(P_F4),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(c) P to F4",adj=0,cex.main=2, line=.4)

plot(abs(F4_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(d) F4 to F16A",adj=0,cex.main=2, line=.4)

plot(abs(F4_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=2,cex.axis=1.7)

abline(h=bnd,lty=3,col="red",lwd=1.4)

title(main="(e) F4 to F16B",adj=0,cex.main=2, line=.4)

mtext("negative logit quantile", side=2, line=2.5, cex=2.5, outer=TRUE)

mtext("locus", side=1,line=1.5, cex=2, outer=TRUE)

dev.off()

## counts and parallelism

sum(abs(P_F16A) > bnd) # 53

sum(abs(P_F16B) > bnd) # 36

sum(abs(P_F4) > bnd) # 185

sum(abs(F4_F16A) > bnd) # 30

sum(abs(F4_F16B) > bnd) # 27

sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared

sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared

## write combined allele frequencies for ABC for candidate SNPs

PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps

PF4snps<-which(abs(P_F4) > bnd) #185 snps

F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps

snps1<-unique(c(PF4snps,PF16snps)) #141

snps2<-unique(c(PF4snps,F4F16snps)) # snps

snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) # 188

## Creating file for ABC

lids<-read.table("../locusids.txt",header=FALSE)

plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],

pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],

pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])

write.table(plsnps,"../pLentilSnps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)

save(list=ls(),file="null.Rdata")

### some checks

matplot(calA[,PF4snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line A', ylab='Allele Frequency', xlab='Generation')

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))

#legend("bottomright",legend=d, col=1:length(d), lty=1:length(d), cex=.95)

matplot(calB[,PF4snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line B', ylab='Allele Frequency', xlab='Generation')

axis(1, at=1:8, labels=c('P',1,2,3,4,5,8,16))

matplot(calA[,F4F16snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line A', ylab='Allele Frequency', xlab='Generation')

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16))

matplot(calB[,F4F16snps],type='l', col=1:length(d), xaxt='n', main='Minor Allele Frequency Change for line B', ylab='Allele Frequency', xlab='Generation')

axis(1, at=1:8, labels=c('P',1,2,3,4,5,8,16))

pdf("MAFs.pdf",width=18,height=9)

par(mfrow=c(2,1))

par(mar=c(4,5.5,3,1.5))

matplot(calA[,PF4snps],type='l', col=1:length(d), xaxt='n', ylab='Allele Frequency', xlab='Generation', cex.lab=2)

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)

title(main="(b) Sub-Line A MAF",adj=0,cex.main=1.5)

matplot(calA[,F4F16snps],type='l', col=1:length(d), xaxt='n', ylab='Allele Frequency', xlab='Generation', cex.lab=2)

axis(1, at=1:10, labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)

title(main="(b) Sub-Line A MAF",adj=0,cex.main=1.5)

dev.off()

### With lower bound of 5:

## Ne values for branches separating populations

## L14P v L14F16A, logit quantile

p0<-pL14P[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)

P_F16A<-log(pv/(1-pv))

## L14P v L14F16B, logit quantile

p0<-pL14P[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)

P_F16B<-log(pv/(1-pv))

## L14P v L14F4, logit quantile

p0<-pL14P[,1]

p1<-pL14F4[,1]

pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)

P_F4<-log(pv/(1-pv))

## L14F4 v L14F16A, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)

F4_F16A<-log(pv/(1-pv))

## L14F4 v L14F16B, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)

F4_F16B<-log(pv/(1-pv))

sum(abs(P_F16A) > bnd) # 53

sum(abs(P_F16B) > bnd) # 36

sum(abs(P_F4) > bnd) # 128

sum(abs(F4_F16A) > bnd) # 30

sum(abs(F4_F16B) > bnd) # 27

sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared

sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared

## write combined allele frequencies for ABC for candidate SNPs

PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps

PF4snps<-which(abs(P_F4) > bnd) #128 snps

F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps

snps1<-unique(c(PF4snps,PF16snps)) #141

snps2<-unique(c(PF4snps,F4F16snps)) #185 snps

snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) #198

## write combined allele frequencies for ABC for candidate SNPs

lids<-read.table("../locusids.txt",header=FALSE)

plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],

pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],

pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])

write.table(plsnps,"../pLentilSnps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)

save(list=ls(),file="null.Rdata")

Page updated

Google Sites

Report abuse