Post date: Apr 18, 2018 5:18:54 PM
## Blue rgb(maxColorValue=255,0,136,55)
## Red rgb(maxColorValue=255,202,0,32)
## Green rgb(maxColorValue=255,5,113,176)
#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq
library(RColorBrewer)
library(scales)
###### Will look at Allele Frequencies across populations splitting to A ######
files <- list.files(pattern=".+_p.txt")
files <- files[-c(9,10,11)]
A <- files[c(3,1,2,9,10,5,6,7,8,4)]
files <- list.files(pattern=".+_p.txt")
files <- files[-c(5,6,7,8,4)]
B <- files[c(3,1,2,7,8,5,6,4)]
calA <- NULL
for (f in A) {
dat <- read.csv(f, header=FALSE, sep=",")
calA <- rbind(calA, t(dat))
}
# Delete everything but median
r <- 1:nrow(calA)
medians <- seq(2,40,4)
calA <- calA[medians ,]
# Change to minor allele frequency
for(loci in 1:ncol(calA)){
if(calA[1,loci] > .5){
calA[,loci] <- 1 - calA[,loci]
}
}
calB <- NULL
for (f in B) {
dat <- read.csv(f, header=FALSE, sep=",")
calB <- rbind(calB, t(dat))
}
# Delete everything but median
r <- 1:nrow(calB)
medians <- seq(2,32,4)
calB <- calB[medians ,]
# Change to minor allele frequency
for(loci in 1:ncol(calB)){
if(calB[1,loci] > .5){
calB[,loci] <- 1 - calB[,loci]
}
}
## summaries of allele freq Change for minor allele
mean(abs(calA[10,]-calA[1,])) # 0.1547061
var(abs(calA[10,]-calA[1,])) # 0.02245264
mean(abs(calB[8,]-calB[1,])) # 0.1592494
var(abs(calB[8,]-calB[1,])) # 0.02414438
## Average heterozygosity over time
#P
hetP <- matrix(NA, nrow=ncol(calA), ncol=1)
for(loci in 1:ncol(calA)){
hetP[loci] <- 2*calA[1,loci]*(1-calA[1,loci])
}
mean(hetP) # 0.2738522
#F4
hetF4 <- matrix(NA, nrow=ncol(calA), ncol=1)
for(loci in 1:ncol(calA)){
hetF4[loci] <- 2*calA[5,loci]*(1-calA[5,loci])
}
mean(hetF4) # 0.2348638
#F16A
hetA <- matrix(NA, nrow=ncol(calA), ncol=1)
for(loci in 1:ncol(calA)){
hetA[loci] <- 2*calA[10,loci]*(1-calA[10,loci])
}
mean(hetA) # 0.2221502
#F16B
hetB <- matrix(NA, nrow=ncol(calB), ncol=1)
for(loci in 1:ncol(calB)){
hetB[loci] <- 2*calB[8,loci]*(1-calB[8,loci])
}
mean(hetB) # 0.219709
## Correlations of allele frequency across all SNPs
cor(calA[1,], calA[5,]) # 0.778315
cor(calA[5,], calA[10,]) # 0.9263824
cor(calB[5,], calB[8,]) # 0.9137599
cor(calA[6,], calB[6,]) # 0.9808455
cor(calA[10,], calB[8,]) # 0.9225139
## Correlations in allele frequency Change
cor(calA[10,]-calA[5,], calB[8,]-calB[5,]) # 0.5050718
cor(calA[10,]-calA[1,], calB[8,]-calB[1,]) # 0.8423544
cor(abs(calA[10,]-calA[5,]), abs(calB[8,]-calB[5,])) # 0.5027098
cor(abs(calA[10,]-calA[1,]), abs(calB[8,]-calB[1,])) # 0.7632899
## Across significant SNPs
snps<-read.table("~/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE)
snps<- snps[,-1]
cor(snps[,1],snps[,5]) # -0.4234998
cor(snps[,5],snps[,10]) # 0.8051508
cor(snps[,5],snps[,13]) # 0.8262071
cor(snps[,10]-snps[,5],snps[,13]-snps[,5]) # 0.7434413
cor(snps[,10]-snps[,1],snps[,13]-snps[,1]) # 0.9516583
cor(abs(snps[,10]-snps[,5]),abs(snps[,13]-snps[,5])) # 0.6294505
cor(abs(snps[,10]-snps[,1]),abs(snps[,13]-snps[,1])) # 0.8159153
pm <- read.table('../../../alt_lines/wfabc/in_pM14.txt',header=FALSE)
p1 <- read.table('../../../alt_lines/wfabc/L1/in_pM14-L1.txt',header=FALSE,skip=2)
p2 <- read.table('../../../alt_lines/wfabc/L2/in_pM14-L2.txt',header=FALSE,skip=2)
p3 <- read.table('../../../alt_lines/wfabc/L3/in_pM14-L3.txt',header=FALSE,skip=2)
snps<-read.table("~/projects/cmac_AR/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE)
snps[,1] <- sub('(:[[:digit:]]+$)','',snps[,1], perl=TRUE)
ids<-which(as.factor(snps[,1]) %in% as.factor(pm[,1]))
snps <- snps[ids,]
## Exceptional alleles
analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){
fst<-1-(1-1/(2*ne))^t
alpha<-p0 * (1-fst)/fst
beta<-(1-p0) * (1-fst)/fst
pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)
pv
}
bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001
pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")
pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")
pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")
pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")
pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")
pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")
pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")
pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")
pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")
pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")
pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")
pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")
pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")
## Ne values for branches separating populations
## L14P v L14F16A, logit quantile
p0<-pL14P[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)
P_F16A<-log(pv/(1-pv))
## L14P v L14F16B, logit quantile
p0<-pL14P[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)
P_F16B<-log(pv/(1-pv))
## L14P v L14F4, logit quantile
p0<-pL14P[,1]
p1<-pL14F4[,1]
pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)
P_F4<-log(pv/(1-pv))
## L14F4 v L14F16A, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)
F4_F16A<-log(pv/(1-pv))
## L14F4 v L14F16B, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)
F4_F16B<-log(pv/(1-pv))
## Graph for manuscript
pdf("manhatsNull.pdf", width = 9, height=15)
par(mfrow=c(5,1))
par(mar=c(4,5.5,3,1.5))
plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="P to F16A",adj=0,cex.main=1.6)
plot(abs(P_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="P to F16B",adj=0,cex.main=1.6)
plot(abs(P_F4),pch=19,col="darkgray",cex=0.5,ylab='',xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="P to F4",adj=0,cex.main=1.6)
plot(abs(F4_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="F4 to F16A",adj=0,cex.main=1.6)
plot(abs(F4_F16B),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=1.7,cex.axis=1.3)
abline(h=bnd,lty=3,col="red",lwd=1.4)
title(main="F4 to F16B",adj=0,cex.main=1.6)
mtext("negative logit quantile", side=2, line=2.8, cex=2)
mtext("locus", side=1, line=3, cex=2)
dev.off()
## counts and parallelism
sum(abs(P_F16A) > bnd) # 53
sum(abs(P_F16B) > bnd) # 36
sum(abs(P_F4) > bnd) # 128
sum(abs(F4_F16A) > bnd) # 30
sum(abs(F4_F16B) > bnd) # 27
sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared
sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared
## write combined allele frequencies for ABC for candidate SNPs
PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps
PF4snps<-which(abs(P_F4) > bnd) #128 snps
F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps
snps1<-unique(c(PF4snps,PF16snps)) #141
snps2<-unique(c(PF4snps,F4F16snps)) #185 snps
snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) #198
## Creating file for ABC
lids<-read.table("../locusids.txt",header=FALSE)
plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],
pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],
pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])
write.table(plsnps,"../pLentilSnps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
## Combined MAFs
set.seed(12)
rando<-sample(21342,25)
pdf("MAFs.pdf", width=7, height=7)
par(mfrow=c(3,1))
par(oma = c(3, 3.5, 1, 0)) # make room (i.e. the 4's) for the overall x and y axis titles
par(mar = c(2.5, 2, 1.5, 1)) # make the plots be closer together
matplot(x=c(0:8,11),y=calA[,PF4snps],type='l', lty='solid',col=alpha(rgb(0,0,0),0.3),
xaxt='n',xlim=c(0,11), cex.axis=1, xaxs='i')
axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
title(main="(a) selected for between P-F4",adj=0, cex.main=1.5)
matplot(x=c(0:8,11),y=calA[,F4F16snps],type='l', lty='solid', col=alpha(rgb(0,0,0),0.3),
xaxt='n',xlim=c(0,11), cex.axis=1, xaxs='i')
title(main="(b) selected for between F4-F16",adj=0, cex.main=1.5)
axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
matplot(x=c(0:8,11),y=calA[,rando],type='l', lty='solid', col=alpha(rgb(0,0,0),0.3),
xaxt='n',xlim=c(0,11), cex.axis=1, xaxs='i', ylim=c(0,1))
title(main="(c) random SNPs",adj=0, cex.main=1.5)
axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
mtext("allele frequency", side=2, line=1, cex=2, outer=TRUE)
mtext("generation", side=1,line=1.5, cex=2, outer=TRUE)
dev.off()
# for(i in 1:25){
# set.seed(i)
# rando<-sample(21342,25)
# par(ask=TRUE)
# par(mfrow=c(3,1))
# par(oma = c(3, 3, 1, 0)) # make room (i.e. the 4's) for the overall x and y axis titles
# par(mar = c(2.5, 1, 1.5, 1)) # make the plots be closer together
# matplot(x=c(0:8,11),y=calA[,PF4snps],type='l', lty='solid',col=alpha(rgb(0,0,0),0.3),
# xaxt='n',yaxt='n',xlim=c(0,11), cex.axis=1.5, xaxs='i')
# axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
# title(main="(a) selected for between P-F4",adj=0, cex.main=1.5)
#
# matplot(x=c(0:8,11),y=calA[,F4F16snps],type='l', lty='solid', col=alpha(rgb(0,0,0),0.3),
# xaxt='n', yaxt='n',xlim=c(0,11), cex.axis=1.5, xaxs='i')
# title(main="(b) selected for between F4-F16",adj=0, cex.main=1.5)
# axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
#
# matplot(x=c(0:8,11),y=calA[,rando],type='l', lty='solid', col=alpha(rgb(0,0,0),0.3),
# xaxt='n', yaxt='n',xlim=c(0,11), cex.axis=1.5, xaxs='i')
# title(main="(c) random SNPs",adj=0, cex.main=1.5)
# axis(1, at=c(0:8,11), labels=c('P',1,2,3,4,5,6,7,8,16), cex.axis=1.5)
# mtext("allele frequency", side=2, line=1, cex=2, outer=TRUE)
# mtext(paste(i), side=1,line=1.5, cex=2, outer=TRUE)
# }
## early A
mean(calA[10,PF4snps]-calA[1,PF4snps]) # 0.6816403
var(calA[10,PF4snps]-calA[1,PF4snps]) # 0.02206526
max(calA[10,PF4snps]) #0.997808
min(calA[10,PF4snps]) #0.46405
length(which(calA[10,PF4snps]>0.9))/128 # 0.8828125
## late A
mean(calA[10,F4F16snps]-calA[1,F4F16snps]) # 0.09435044
var(calA[10,F4F16snps]-calA[1,F4F16snps]) # 0.1497865
max(calA[10,F4F16snps]) # 0.992098
min(calA[10,F4F16snps]) # 0.005783
length(which(calA[10,F4F16snps]>0.9))/57 # 0.122807
length(which(calA[10,F4F16snps]<0.1))/57 # 0.3157895
## early B
mean(calB[8,PF4snps]-calB[1,PF4snps]) # 0.679088
var(calB[8,PF4snps]-calB[1,PF4snps]) # 0.02220894
max(calB[8,PF4snps]) #0.994669
min(calB[8,PF4snps]) #0.516201
length(which(calB[8,PF4snps]>0.9))/128 # 0.84375
## late B
mean(calB[8,F4F16snps]-calB[1,F4F16snps]) # 0.05303028
var(calB[8,F4F16snps]-calB[1,F4F16snps]) # 0.1587919
max(calB[8,F4F16snps]) # 0.991129
min(calB[8,F4F16snps]) # 0.007114
length(which(calB[8,F4F16snps]>0.9))/57 # 0.1929825
length(which(calB[8,F4F16snps]<0.1))/57 # 0.4385965
##Next: LD