Allele Frequencies

Post date: Apr 18, 2018 5:18:54 PM

## Blue rgb(maxColorValue=255,0,136,55)

## Red rgb(maxColorValue=255,202,0,32)

## Green rgb(maxColorValue=255,5,113,176)

#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq

library(RColorBrewer)

library(scales)

###### Will look at Allele Frequencies across populations splitting to A ######

files <- list.files(pattern=".+_p.txt")

files <- files[-c(9,10,11)]

A <- files[c(3,1,2,9,10,5,6,7,8,4)]

files <- list.files(pattern=".+_p.txt")

files <- files[-c(5,6,7,8,4)]

B <- files[c(3,1,2,7,8,5,6,4)]

calA <- NULL

for (f in A) {

dat <- read.csv(f, header=FALSE, sep=",")

calA <- rbind(calA, t(dat))

}

# Delete everything but median

r <- 1:nrow(calA)

medians <- seq(2,40,4)

calA <- calA[medians ,]

# Change to minor allele frequency

for(loci in 1:ncol(calA)){

if(calA[1,loci] > .5){

calA[,loci] <- 1 - calA[,loci]

}

calB <- NULL

for (f in B) {

dat <- read.csv(f, header=FALSE, sep=",")

calB <- rbind(calB, t(dat))

}

# Delete everything but median

r <- 1:nrow(calB)

medians <- seq(2,32,4)

calB <- calB[medians ,]

# Change to minor allele frequency

for(loci in 1:ncol(calB)){

if(calB[1,loci] > .5){

calB[,loci] <- 1 - calB[,loci]

}

## summaries of allele freq Change for minor allele

mean(abs(calA[10,]-calA[1,])) # 0.1547061

var(abs(calA[10,]-calA[1,])) # 0.02245264

mean(abs(calB[8,]-calB[1,])) # 0.1592494

var(abs(calB[8,]-calB[1,])) # 0.02414438

## Average heterozygosity over time

hetP <- matrix(NA, nrow=ncol(calA), ncol=1)

for(loci in 1:ncol(calA)){

hetP[loci] <- 2*calA[1,loci]*(1-calA[1,loci])

}

mean(hetP) # 0.2738522

#F4

hetF4 <- matrix(NA, nrow=ncol(calA), ncol=1)

for(loci in 1:ncol(calA)){

hetF4[loci] <- 2*calA[5,loci]*(1-calA[5,loci])

}

mean(hetF4) # 0.2348638

#F16A

hetA <- matrix(NA, nrow=ncol(calA), ncol=1)

for(loci in 1:ncol(calA)){

hetA[loci] <- 2*calA[10,loci]*(1-calA[10,loci])

}

mean(hetA) # 0.2221502

#F16B

hetB <- matrix(NA, nrow=ncol(calB), ncol=1)

for(loci in 1:ncol(calB)){

hetB[loci] <- 2*calB[8,loci]*(1-calB[8,loci])

}

mean(hetB) # 0.219709

## Correlations of allele frequency across all SNPs

cor(calA[1,], calA[5,]) # 0.778315

cor(calA[5,], calA[10,]) # 0.9263824

cor(calB[5,], calB[8,]) # 0.9137599

cor(calA[6,], calB[6,]) # 0.9808455

cor(calA[10,], calB[8,]) # 0.9225139

## Correlations in allele frequency Change

cor(calA[10,]-calA[5,], calB[8,]-calB[5,]) # 0.5050718

cor(calA[10,]-calA[1,], calB[8,]-calB[1,]) # 0.8423544

cor(abs(calA[10,]-calA[5,]), abs(calB[8,]-calB[5,])) # 0.5027098

cor(abs(calA[10,]-calA[1,]), abs(calB[8,]-calB[1,])) # 0.7632899

## Across significant SNPs

snps<-read.table("~/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE)

snps<- snps[,-1]

cor(snps[,1],snps[,5]) # -0.4234998

cor(snps[,5],snps[,10]) # 0.8051508

cor(snps[,5],snps[,13]) # 0.8262071

cor(snps[,10]-snps[,5],snps[,13]-snps[,5]) # 0.7434413

cor(snps[,10]-snps[,1],snps[,13]-snps[,1]) # 0.9516583

cor(abs(snps[,10]-snps[,5]),abs(snps[,13]-snps[,5])) # 0.6294505

cor(abs(snps[,10]-snps[,1]),abs(snps[,13]-snps[,1])) # 0.8159153

pm <- read.table('../../../alt_lines/wfabc/in_pM14.txt',header=FALSE)

p1 <- read.table('../../../alt_lines/wfabc/L1/in_pM14-L1.txt',header=FALSE,skip=2)

p2 <- read.table('../../../alt_lines/wfabc/L2/in_pM14-L2.txt',header=FALSE,skip=2)

p3 <- read.table('../../../alt_lines/wfabc/L3/in_pM14-L3.txt',header=FALSE,skip=2)

snps<-read.table("~/projects/cmac_AR/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE)

snps[,1] <- sub('(:[[:digit:]]+$)','',snps[,1], perl=TRUE)

ids<-which(as.factor(snps[,1]) %in% as.factor(pm[,1]))

snps <- snps[ids,]

## Exceptional alleles

analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){

fst<-1-(1-1/(2*ne))^t

alpha<-p0 * (1-fst)/fst

beta<-(1-p0) * (1-fst)/fst

pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)

}

bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001

pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")

pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")

pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")

pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")

pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")

pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")

pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")

pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")

pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")

pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")

pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")

pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")

pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")

## Ne values for branches separating populations

## L14P v L14F16A, logit quantile

p0<-pL14P[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)

P_F16A<-log(pv/(1-pv))

## L14P v L14F16B, logit quantile

p0<-pL14P[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)

P_F16B<-log(pv/(1-pv))

## L14P v L14F4, logit quantile

p0<-pL14P[,1]

p1<-pL14F4[,1]

pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)

P_F4<-log(pv/(1-pv))

## L14F4 v L14F16A, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)

F4_F16A<-log(pv/(1-pv))

## L14F4 v L14F16B, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)

F4_F16B<-log(pv/(1-pv))

## Graph for manuscript

pdf("manhatsNull.pdf", width = 9, height=15)

par(mfrow=c(5,1))

par(mar=c(4,5.5,3,1.5))

plot(abs(P_F16A),pch=19,col="darkgray",cex=0.5,ylab="",xlab="",cex.lab=1.7,cex.axis=1.3)