L11 Allele Freqs, Ne, LD

Post date: Jan 07, 2019 2:11:35 PM

#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq

library(RColorBrewer)

library(scales)

files <- list.files(pattern=".+_p.txt")

files<- files[-1]

files <- files[-c(9,10,11)]

A <- files[c(3,1,2,9,10,5,6,7,8,4)]

files <- list.files(pattern=".+_p.txt")

files<- files[-1]

files <- files[-c(5,6,7,8,4)]

B <- files[c(3,1,2,7,8,5,6,4)]

calC <- read.csv("L14-P_p.txt", header=FALSE, sep=",")

calC <- cbind(calC, read.csv("L11-F4_p.txt", header=FALSE, sep=","))

calC <- t(calC)

r <- 1:nrow(calC)

medians <- seq(2,8,4)

calC <- calC[medians,]

for(loci in 1:ncol(calC)){

if(calC[1,loci] > .5){

calC[,loci] <- 1 - calC[,loci]

}

calA <- NULL

for (f in A) {

dat <- read.csv(f, header=FALSE, sep=",")

calA <- rbind(calA, t(dat))

}

# Delete everything but median

r <- 1:nrow(calA)

medians <- seq(2,40,4)

calA <- calA[medians,]

# Change to minor allele frequency

for(loci in 1:ncol(calA)){

if(calA[1,loci] > .5){

calA[,loci] <- 1 - calA[,loci]

}

calB <- NULL

for (f in B) {

dat <- read.csv(f, header=FALSE, sep=",")

calB <- rbind(calB, t(dat))

}

# Delete everything but median

r <- 1:nrow(calB)

medians <- seq(2,32,4)

calB <- calB[medians ,]

# Change to minor allele frequency

for(loci in 1:ncol(calB)){

if(calB[1,loci] > .5){

calB[,loci] <- 1 - calB[,loci]

}

### Histograms of allele frequencies

pdf('p_hists.pdf',height=21, width=7)

par(mfrow=c(3,1))

hist(calC[1,],main='L14 P')

hist(calA[5,],main='L14 F4')

hist(calC[2,],main='L11 F4', xlim=c(0,1))

dev.off()

### Change in allele freq, L11

mean(abs(calC[2,]-calC[1,])) # 0.05250056

var(abs(calC[2,]-calC[1,])) # 0.002419182

## Average heterozygosity over time

# L11 F4

hetF4 <- matrix(NA, nrow=ncol(calC), ncol=1)

for(loci in 1:ncol(calC)){

hetF4[loci] <- 2*calC[2,loci]*(1-calC[2,loci])

}

mean(hetF4) # 0.2762631

## Correlations of allele frequency across all SNPs

cor(calA[1,], calC[2,]) # 0.9612362

cor(calA[5,], calC[2,]) # 0.76463

pdf('L11vL14_all.pdf',width=7,height=14)

par(mfrow=c(2,1))

plot(calA[1,], calC[2,], ylab='p L11 F4',xlab='p L14 P', main='L14 P vs L11 F4')

legend('topleft', legend=paste('cor=',round(cor(calA[1,], calC[2,]),3)),bty='n')

## cor = 961

plot(calA[5,], calC[2,], ylab='p L11 F4',xlab='p L14 F4', main='L14 F4 vs L11 F4')

legend('bottomright', legend=paste('cor=',round(cor(calA[5,], calC[2,]),3)),bty='n')

## cor = 765

dev.off()

## Correlations in allele frequency Change

cor(abs(calA[5,]-calA[1,]), abs(calC[2,]-calC[1,])) # 0.2298629

cor((calA[5,]-calA[1,]), (calC[2,]-calC[1,])) # 0.1132123

## Across significant SNPs

pm <- read.table('../../../../../alt_lines/alt_ne/wfabc/in_pM14.txt',header=FALSE)

p1 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L1-F100.txt',header=FALSE,sep=',')

p2 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L2-F87.txt',header=FALSE,sep=',')

p3 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L3-F85.txt',header=FALSE,',')

#cut -d "," -f 2 L11-F4_p.txt | paste in_pL11.txt - > pL11.txt

#sed -i 's/\t/ /g' pL11.txt

snps<-read.table("~/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE,skip=1)

snps[,1] <- sub('(:[[:digit:]]+$)','',snps[,1], perl=TRUE)

ids<-which(as.factor(snps[,1]) %in% as.factor(pm[,1]))

snps <- snps[ids,]

### L11 comparisons with significant SNPs from L14

pL11<-read.table('pL11.txt',header=FALSE,sep=' ')

ids<-which(as.factor(pL11[,1]) %in% as.factor(snps[,1]))

snps2<-pL11[ids,]

mean(abs(snps[,2]-snps2[,2])) ## .211

cor(snps[,2], snps2[,2]) # -0.1493546

cor(snps[,6], snps2[,2]) # 0.09405686

pdf('L11vL14_198.pdf',width=7,height=14)

par(mfrow=c(2,1))

plot(snps[,2], snps2[,2], ylab='p L11 F4',xlab='p L14 P', main='L14 P vs L11 F4')

legend('topleft', legend=paste('cor=',round(cor(snps[,2], snps2[,2]),3)),bty='n')

## cor = 961

plot(snps[,6], snps2[,2], ylab='p L11 F4',xlab='p L14 F4', main='L14 F4 vs L11 F4')

legend('bottomright', legend=paste('cor=',round(cor(snps[,6], snps2[,2]),3)),bty='n')

## cor = 765

dev.off()

### L11 comparisons with significant SNPs from L1-L3

pL11<-read.table('pL11.txt',header=FALSE,sep=' ')

ids<-which(as.factor(pL11[,1]) %in% as.factor(pm[,1]))

snps2<-pL11[ids,]

cor(p1[,2], snps2[,2]) # -0.22614

cor(p2[,2], snps2[,2]) # 0.3377458

cor(p3[,2], snps2[,2]) # 0.3153737

pdf('L11vAltL_188.pdf',width=6,height=18)

par(mfrow=c(3,1))

plot(p1[,2], snps2[,2], ylab='p L11 F4',xlab='p L1 F100', main='L1 F100 vs L11 F4')

legend('bottomleft', legend=paste('cor=',round(cor(p1[,2], snps2[,2]),3)),bty='n')

plot(p2[,2], snps2[,2], ylab='p L11 F4',xlab='p L2 F87', main='L2 F87 vs L11 F4')

legend('bottomleft', legend=paste('cor=',round(cor(p2[,2], snps2[,2]),3)),bty='n')

plot(p3[,2], snps2[,2], ylab='p L11 F4',xlab='p L3 F85', main='L3 F85 vs L11 F4')

legend('bottomleft', legend=paste('cor=',round(cor(p3[,2], snps2[,2]),3)),bty='n')

dev.off()

################## Ne

analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){

fst<-1-(1-1/(2*ne))^t

alpha<-p0 * (1-fst)/fst

beta<-(1-p0) * (1-fst)/fst

pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)

}

bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001

## All files of interest:

pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")

pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")

pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")

pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")

pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")

pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")

pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")

pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")

pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")

pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")

pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")

pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")

pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")

pL11F4<-read.table("L11-F4_p.txt",header=FALSE,sep=",")

### With lower bound of 5:

## Ne values for branches separating populations

## L14P v L14F16A, logit quantile

p0<-pL14P[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)

P_F16A<-log(pv/(1-pv))

## L14P v L14F16B, logit quantile

p0<-pL14P[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)

P_F16B<-log(pv/(1-pv))

## L14P v L14F4, logit quantile

p0<-pL14P[,1]

p1<-pL14F4[,1]

pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)

P_F4<-log(pv/(1-pv))

## L14F4 v L14F16A, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16A[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)

F4_F16A<-log(pv/(1-pv))

## L14F4 v L14F16B, logit quantile

p0<-pL14F4[,1]

p1<-pL14F16B[,1]

pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)

F4_F16B<-log(pv/(1-pv))

## L14P v L11F4, logit quantile

p0<-pL14P[,1]

p1<-pL11F4[,1]

pv<-analyNullSimP(p0,p1,t=4,ne=134.209,lower=TRUE)

P_F4_L11<-log(pv/(1-pv))

sum(abs(P_F4_L11) > bnd) # 303

sum(abs(P_F16A) > bnd) # 53

sum(abs(P_F16B) > bnd) # 36

sum(abs(P_F4) > bnd) # 128

sum(abs(F4_F16A) > bnd) # 30

sum(abs(F4_F16B) > bnd) # 27

sum(abs(P_F4_L11) > bnd) #303

sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared

sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared

## write combined allele frequencies for ABC for candidate SNPs

PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps

PF4snps<-which(abs(P_F4) > bnd) #128 snps

F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps

L11snps<-which(abs(P_F4_L11) > bnd)

shared<-which(L11snps %in% PF4snps) ## 16 shared

snps1<-unique(c(PF4snps,PF16snps)) #141

snps2<-unique(c(PF4snps,F4F16snps)) #185 snps

snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) #198

## write combined allele frequencies for ABC for candidate SNPs

lids<-read.table("../locusids.txt",header=FALSE)

lids[,1]<- sub('(:[[:digit:]]+$)','',lids[,1], perl=TRUE)

plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],

pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],

pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])

plsnps<-data.frame(lids[L11snps,1],pL14P[L11snps,1],pL11F4[L11snps,1])

write.table(plsnps,"L11Snps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)

mean(abs(plsnps[,2]-plsnps[,3])) ##0.207625

cor(plsnps[,2],plsnps[,3]) ## 0.5834494

cor(pL14F4[L11snps,2],plsnps[,3]) ## 0.4372361

## Shared 16 SNPs

cor(pL14P[shared,2],pL11F4[shared,2]) ## 0.9114769

cor(pL14F4[shared,2],pL11F4[shared,2]) ## 0.7972124

########

#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/ld

library(fields)

library(RColorBrewer)

library(dendextend)

library(igraph)

library(WGCNA)

N<-48

L<-21342

snps<-read.table("locusids2.txt",header=FALSE)

cl<-1.6

ca<-1.2

#soi<-read.table("pLentilSnps.txt",header=FALSE,skip=0)

lentil<-read.table("positions.txt",header=FALSE)

aa<-paste(snps[,1],snps[,2],sep="_")

bb<-paste(lentil[,1],lentil[,2],sep="_")

ids<-which(aa %in% bb)

L11ids<-read.table("L11Snps.txt", header=FALSE)

aa<-paste(snps[,1],snps[,2],sep=":")

bb<-paste(L11ids[,1])

ids<-which(aa %in% bb)

GP<-t(matrix(scan("L14-P_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldP<-cor(t(GP[ids,]))^2

GF1<-t(matrix(scan("L14-F1_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldF1<-cor(t(GF1[ids,]))^2

GF4<-t(matrix(scan("L14P-F4_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldF4<-cor(t(GF4[ids,]))^2

GF16A<-t(matrix(scan("L14A-F16_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldF16A<-cor(t(GF16A[ids,]))^2

GF16B<-t(matrix(scan("L14B-F16_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldF16B<-cor(t(GF16B[ids,]))^2

GL11F4<-t(matrix(scan("L11-F4_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))

ldL11F4<-cor(t(GL11F4[ids,]))^2

N<-6

LD<-list(ldP,ldF1,ldF4,ldF16A,ldF16B, ldL11F4)

oHc<-vector("list",6)

for(i in 1:N){

oHc[[i]]<-hclust(d=as.dist(1-LD[[i]]),method="ward")

}

Tree<-vector("list",6)

for(i in 1:N){

Tree[[i]]<-as.dendrogram(oHc[[i]])

}

ht<-c(5.06,4.97,14.7,9.94,9.55,8.31)

Cl<-vector("list",6)

for(i in 1:N){

#Cl[[i]]<-cutreeDynamic(dendro=oHc[[i]],distM=1-LD[[i]])

Cl[[i]]<-cutreeDynamic(dendro=oHc[[i]],distM=1-LD[[i]],method="hybrid",minClusterSize=5)

}

# ..cutHeight not given, setting it to 5.06 ===> 99% of the (truncated) height range in dendro.

# ..done.

# ..cutHeight not given, setting it to 4.97 ===> 99% of the (truncated) height range in dendro.

# ..done.

# ..cutHeight not given, setting it to 14.7 ===> 99% of the (truncated) height range in dendro.

# ..done.

# ..cutHeight not given, setting it to 9.94 ===> 99% of the (truncated) height range in dendro.

# ..done.

# ..cutHeight not given, setting it to 9.55 ===> 99% of the (truncated) height range in dendro.

# ..done.

# ..cutHeight not given, setting it to 8.31 ===> 99% of the (truncated) height range in dendro.

# ..done.

TreeC<-Tree

for(i in 1:N){

#TreeC[[i]]<-color_branches(Tree[[i]],k=max(Cl[[i]]),col=brewer.pal(12,"Set3"))

#TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[1]][oHc[[i]]$order],col=brewer.pal(12,"Set3"))

TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[i]][oHc[[i]]$order],col=brewer.pal(12,"Paired"))

#TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[i]][oHc[[i]]$order])

}

quantile(LD[[1]][upper.tri(LD[[1]])],probs=c(0.9,0.95,0.975,0.99))

# 90% 95% 97.5% 99%

#0.0869748 0.1426121 0.2517529 0.4991171

Graph<-vector("list",6)

amin<-0.25

#amin<-0.1426

for(i in 1:N){

anet<-LD[[i]]

anet[anet<amin]<-0

anet[anet>=amin]<-1

anet<-anet[oHc[[2]]$order,oHc[[2]]$order]

Graph[[i]]<-graph_from_adjacency_matrix(anet,mode="undirected",weighted=NULL,diag=FALSE)

}

LDwithin<-rep(NA,N)

LDbetween<-rep(NA,N)

for(i in 1:N){

wbmat<-matrix(0,nrow=303,ncol=303)

for(j in 1:302){for(k in (j+1):303){

if(Cl[[i]][j]==Cl[[i]][k]){

wbmat[j,k]<-1

wbmat[k,j]<-1

}

else{

wbmat[j,k]<--1

wbmat[k,j]<--1

}

}}

LDwithin[i]<-mean(as.vector(LD[[i]][wbmat==1]))

LDbetween[i]<-mean(as.vector(LD[[i]][wbmat==-1]))

}

# for(i in 1:N){

# wbmat<-matrix(0,nrow=198,ncol=198)

# for(j in 1:197){for(k in (j+1):198){

# if(Cl[[i]][j]==Cl[[i]][k]){

# wbmat[j,k]<-1

# wbmat[k,j]<-1

# }

# else{

# wbmat[j,k]<--1

# wbmat[k,j]<--1

# }

# }}

# LDwithin[i]<-mean(as.vector(LD[[i]][wbmat==1]))

# LDbetween[i]<-mean(as.vector(LD[[i]][wbmat==-1]))

# }

pdf("LDplot_303.pdf",width=7,height=8.5)

layout(matrix(c(1:20),nrow=5,ncol=4,byrow=TRUE),widths=c(1,2,2,2),heights=c(0.5,2,2,2,2,2))

tits<-c("","(a) LD dendrogram","(b) LD heatmap","(c) LD network")

for(i in 1:4){

par(mar=c(0,0,0,0))

plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")

mtext(tits[i],3,-2,cex=1.25)

}

myc<-get_leaves_branches_col(TreeC[[2]])

gen<-c("L14 P","L14 F1","L14 F4", "L14 F16A","L14 F16B", "L11 F4")

for(i in c(1,2,3,6)){

par(mar=c(0,0,0,0))

plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")

mtext(gen[i],3,-2,adj=0,cex=1.25)

mtext(paste("K = ",max(Cl[[i]]),sep=""),3,-5,adj=0)

mtext(bquote(r[W]^2 ~ '=' ~ .(round(LDwithin[i],3))),3,-7,adj=0)

mtext(bquote(r[B]^2 ~ '=' ~ .(round(LDbetween[i],3))),3,-9,adj=0)

par(mar=c(2,2,0.5,0.5))

plot(TreeC[[i]],leaflab="none")

image(LD[[i]][oHc[[i]]$order,oHc[[i]]$order],breaks=c(-.1,0.05,0.1,0.25,.5,1),col=brewer.pal(5,"Blues"),axes=FALSE)

#image(LD[[i]][oHc[[i]]$order,oHc[[i]]$order],breaks=c(-.1,0.01,0.05,0.1,0.5,1),col=brewer.pal(5,"Blues"),axes=FALSE)

box()

plot.igraph(Graph[[i]],vertex.size=5,vertex.color=myc,vertex.label=NA,edge.width=0.6)

#plot.igraph(Graph[[i]],vertex.size=5,vertex.color=rep(brewer.pal(12,"Paired")[unique(sort(Cl[[2]]))],2)[Cl[[2]]],vertex.label=NA,edge.width=0.4)

}

dev.off()

Page updated

Google Sites

Report abuse