Post date: Jan 07, 2019 2:11:35 PM
#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/freq
library(RColorBrewer)
library(scales)
files <- list.files(pattern=".+_p.txt")
files<- files[-1]
files <- files[-c(9,10,11)]
A <- files[c(3,1,2,9,10,5,6,7,8,4)]
files <- list.files(pattern=".+_p.txt")
files<- files[-1]
files <- files[-c(5,6,7,8,4)]
B <- files[c(3,1,2,7,8,5,6,4)]
calC <- read.csv("L14-P_p.txt", header=FALSE, sep=",")
calC <- cbind(calC, read.csv("L11-F4_p.txt", header=FALSE, sep=","))
calC <- t(calC)
r <- 1:nrow(calC)
medians <- seq(2,8,4)
calC <- calC[medians,]
for(loci in 1:ncol(calC)){
if(calC[1,loci] > .5){
calC[,loci] <- 1 - calC[,loci]
}
}
calA <- NULL
for (f in A) {
dat <- read.csv(f, header=FALSE, sep=",")
calA <- rbind(calA, t(dat))
}
# Delete everything but median
r <- 1:nrow(calA)
medians <- seq(2,40,4)
calA <- calA[medians,]
# Change to minor allele frequency
for(loci in 1:ncol(calA)){
if(calA[1,loci] > .5){
calA[,loci] <- 1 - calA[,loci]
}
}
calB <- NULL
for (f in B) {
dat <- read.csv(f, header=FALSE, sep=",")
calB <- rbind(calB, t(dat))
}
# Delete everything but median
r <- 1:nrow(calB)
medians <- seq(2,32,4)
calB <- calB[medians ,]
# Change to minor allele frequency
for(loci in 1:ncol(calB)){
if(calB[1,loci] > .5){
calB[,loci] <- 1 - calB[,loci]
}
}
### Histograms of allele frequencies
pdf('p_hists.pdf',height=21, width=7)
par(mfrow=c(3,1))
hist(calC[1,],main='L14 P')
hist(calA[5,],main='L14 F4')
hist(calC[2,],main='L11 F4', xlim=c(0,1))
dev.off()
### Change in allele freq, L11
mean(abs(calC[2,]-calC[1,])) # 0.05250056
var(abs(calC[2,]-calC[1,])) # 0.002419182
## Average heterozygosity over time
# L11 F4
hetF4 <- matrix(NA, nrow=ncol(calC), ncol=1)
for(loci in 1:ncol(calC)){
hetF4[loci] <- 2*calC[2,loci]*(1-calC[2,loci])
}
mean(hetF4) # 0.2762631
## Correlations of allele frequency across all SNPs
cor(calA[1,], calC[2,]) # 0.9612362
cor(calA[5,], calC[2,]) # 0.76463
pdf('L11vL14_all.pdf',width=7,height=14)
par(mfrow=c(2,1))
plot(calA[1,], calC[2,], ylab='p L11 F4',xlab='p L14 P', main='L14 P vs L11 F4')
legend('topleft', legend=paste('cor=',round(cor(calA[1,], calC[2,]),3)),bty='n')
## cor = 961
plot(calA[5,], calC[2,], ylab='p L11 F4',xlab='p L14 F4', main='L14 F4 vs L11 F4')
legend('bottomright', legend=paste('cor=',round(cor(calA[5,], calC[2,]),3)),bty='n')
## cor = 765
dev.off()
## Correlations in allele frequency Change
cor(abs(calA[5,]-calA[1,]), abs(calC[2,]-calC[1,])) # 0.2298629
cor((calA[5,]-calA[1,]), (calC[2,]-calC[1,])) # 0.1132123
## Across significant SNPs
pm <- read.table('../../../../../alt_lines/alt_ne/wfabc/in_pM14.txt',header=FALSE)
p1 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L1-F100.txt',header=FALSE,sep=',')
p2 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L2-F87.txt',header=FALSE,sep=',')
p3 <- read.table('../../../../../alt_lines/alt_ne/wfabc/p_L3-F85.txt',header=FALSE,',')
#cut -d "," -f 2 L11-F4_p.txt | paste in_pL11.txt - > pL11.txt
#sed -i 's/\t/ /g' pL11.txt
snps<-read.table("~/projects/cmac_AR/L14/alt_ne/popgen/AlleleFreq/pLentilSnps.txt",header=FALSE,skip=1)
snps[,1] <- sub('(:[[:digit:]]+$)','',snps[,1], perl=TRUE)
ids<-which(as.factor(snps[,1]) %in% as.factor(pm[,1]))
snps <- snps[ids,]
### L11 comparisons with significant SNPs from L14
pL11<-read.table('pL11.txt',header=FALSE,sep=' ')
ids<-which(as.factor(pL11[,1]) %in% as.factor(snps[,1]))
snps2<-pL11[ids,]
mean(abs(snps[,2]-snps2[,2])) ## .211
cor(snps[,2], snps2[,2]) # -0.1493546
cor(snps[,6], snps2[,2]) # 0.09405686
pdf('L11vL14_198.pdf',width=7,height=14)
par(mfrow=c(2,1))
plot(snps[,2], snps2[,2], ylab='p L11 F4',xlab='p L14 P', main='L14 P vs L11 F4')
legend('topleft', legend=paste('cor=',round(cor(snps[,2], snps2[,2]),3)),bty='n')
## cor = 961
plot(snps[,6], snps2[,2], ylab='p L11 F4',xlab='p L14 F4', main='L14 F4 vs L11 F4')
legend('bottomright', legend=paste('cor=',round(cor(snps[,6], snps2[,2]),3)),bty='n')
## cor = 765
dev.off()
### L11 comparisons with significant SNPs from L1-L3
pL11<-read.table('pL11.txt',header=FALSE,sep=' ')
ids<-which(as.factor(pL11[,1]) %in% as.factor(pm[,1]))
snps2<-pL11[ids,]
cor(p1[,2], snps2[,2]) # -0.22614
cor(p2[,2], snps2[,2]) # 0.3377458
cor(p3[,2], snps2[,2]) # 0.3153737
pdf('L11vAltL_188.pdf',width=6,height=18)
par(mfrow=c(3,1))
plot(p1[,2], snps2[,2], ylab='p L11 F4',xlab='p L1 F100', main='L1 F100 vs L11 F4')
legend('bottomleft', legend=paste('cor=',round(cor(p1[,2], snps2[,2]),3)),bty='n')
plot(p2[,2], snps2[,2], ylab='p L11 F4',xlab='p L2 F87', main='L2 F87 vs L11 F4')
legend('bottomleft', legend=paste('cor=',round(cor(p2[,2], snps2[,2]),3)),bty='n')
plot(p3[,2], snps2[,2], ylab='p L11 F4',xlab='p L3 F85', main='L3 F85 vs L11 F4')
legend('bottomleft', legend=paste('cor=',round(cor(p3[,2], snps2[,2]),3)),bty='n')
dev.off()
################## Ne
analyNullSimP<-function(p0=0.5,p1=0.5,t=100,ne=100,lower=TRUE){
fst<-1-(1-1/(2*ne))^t
alpha<-p0 * (1-fst)/fst
beta<-(1-p0) * (1-fst)/fst
pv<-pbeta(p1,alpha+0.001,beta+0.001,lower.tail=lower)
pv
}
bnd<-log(0.99995/0.00005) ## equivalent to p < 0.0001
## All files of interest:
pL14P<-read.table("L14-P_p.txt",header=FALSE,sep=",")
pL14F1<-read.table("L14-F1_p.txt",header=FALSE,sep=",")
pL14F2<-read.table("L14-F2_p.txt",header=FALSE,sep=",")
pL14F3<-read.table("L14P-F3_p.txt",header=FALSE,sep=",")
pL14F4<-read.table("L14P-F4_p.txt",header=FALSE,sep=",")
pL14F5A<-read.table("L14A-F5_p.txt",header=FALSE,sep=",")
pL14F6A<-read.table("L14A-F6_p.txt",header=FALSE,sep=",")
pL14F7A<-read.table("L14A-F7_p.txt",header=FALSE,sep=",")
pL14F8A<-read.table("L14A-F8_p.txt",header=FALSE,sep=",")
pL14F16A<-read.table("L14A-F16_p.txt",header=FALSE,sep=",")
pL14F5B<-read.table("L14B-F5_p.txt",header=FALSE,sep=",")
pL14F8B<-read.table("L14B-F8_p.txt",header=FALSE,sep=",")
pL14F16B<-read.table("L14B-F16_p.txt",header=FALSE,sep=",")
pL11F4<-read.table("L11-F4_p.txt",header=FALSE,sep=",")
### With lower bound of 5:
## Ne values for branches separating populations
## L14P v L14F16A, logit quantile
p0<-pL14P[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=28.6953,lower=TRUE)
P_F16A<-log(pv/(1-pv))
## L14P v L14F16B, logit quantile
p0<-pL14P[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=16,ne=27.2585,lower=TRUE)
P_F16B<-log(pv/(1-pv))
## L14P v L14F4, logit quantile
p0<-pL14P[,1]
p1<-pL14F4[,1]
pv<-analyNullSimP(p0,p1,t=4,ne=8.821,lower=TRUE)
P_F4<-log(pv/(1-pv))
## L14F4 v L14F16A, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16A[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=68.8381,lower=TRUE)
F4_F16A<-log(pv/(1-pv))
## L14F4 v L14F16B, logit quantile
p0<-pL14F4[,1]
p1<-pL14F16B[,1]
pv<-analyNullSimP(p0,p1,t=12,ne=56.7703,lower=TRUE)
F4_F16B<-log(pv/(1-pv))
## L14P v L11F4, logit quantile
p0<-pL14P[,1]
p1<-pL11F4[,1]
pv<-analyNullSimP(p0,p1,t=4,ne=134.209,lower=TRUE)
P_F4_L11<-log(pv/(1-pv))
sum(abs(P_F4_L11) > bnd) # 303
sum(abs(P_F16A) > bnd) # 53
sum(abs(P_F16B) > bnd) # 36
sum(abs(P_F4) > bnd) # 128
sum(abs(F4_F16A) > bnd) # 30
sum(abs(F4_F16B) > bnd) # 27
sum(abs(P_F4_L11) > bnd) #303
sum((P_F16A > bnd & P_F16B > bnd) | (P_F16A < -bnd & P_F16B < -bnd)) # 27; = 50.9% and 75% shared
sum((F4_F16A > bnd & F4_F16B > bnd) | (F4_F16A < -bnd & F4_F16B < -bnd)) # 0; = 3.3% and 1.8% shared
## write combined allele frequencies for ABC for candidate SNPs
PF16snps<-unique(which(abs(P_F16A) > bnd | abs(P_F16B) > bnd)) #62 snps
PF4snps<-which(abs(P_F4) > bnd) #128 snps
F4F16snps<-unique(which(abs(F4_F16A) > bnd | abs(F4_F16B) > bnd)) #57 snps
L11snps<-which(abs(P_F4_L11) > bnd)
shared<-which(L11snps %in% PF4snps) ## 16 shared
snps1<-unique(c(PF4snps,PF16snps)) #141
snps2<-unique(c(PF4snps,F4F16snps)) #185 snps
snps3<-unique(c(PF4snps,F4F16snps,PF16snps)) #198
## write combined allele frequencies for ABC for candidate SNPs
lids<-read.table("../locusids.txt",header=FALSE)
lids[,1]<- sub('(:[[:digit:]]+$)','',lids[,1], perl=TRUE)
plsnps<-data.frame(lids[snps3,1],pL14P[snps3,1],pL14F1[snps3,1],pL14F2[snps3,1],pL14F3[snps3,1],
pL14F4[snps3,1],pL14F5A[snps3,1],pL14F6A[snps3,1], pL14F7A[snps3,1],pL14F8A[snps3,1],pL14F16A[snps3,1],
pL14F5B[snps3,1],pL14F8B[snps3,1],pL14F16B[snps3,1])
plsnps<-data.frame(lids[L11snps,1],pL14P[L11snps,1],pL11F4[L11snps,1])
write.table(plsnps,"L11Snps.txt",row.names=FALSE,col.names=FALSE,quote=FALSE)
mean(abs(plsnps[,2]-plsnps[,3])) ##0.207625
cor(plsnps[,2],plsnps[,3]) ## 0.5834494
cor(pL14F4[L11snps,2],plsnps[,3]) ## 0.4372361
## Shared 16 SNPs
cor(pL14P[shared,2],pL11F4[shared,2]) ## 0.9114769
cor(pL14F4[shared,2],pL11F4[shared,2]) ## 0.7972124
########
#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/ld
library(fields)
library(RColorBrewer)
library(dendextend)
library(igraph)
library(WGCNA)
N<-48
L<-21342
snps<-read.table("locusids2.txt",header=FALSE)
cl<-1.6
ca<-1.2
#soi<-read.table("pLentilSnps.txt",header=FALSE,skip=0)
lentil<-read.table("positions.txt",header=FALSE)
aa<-paste(snps[,1],snps[,2],sep="_")
bb<-paste(lentil[,1],lentil[,2],sep="_")
ids<-which(aa %in% bb)
L11ids<-read.table("L11Snps.txt", header=FALSE)
aa<-paste(snps[,1],snps[,2],sep=":")
bb<-paste(L11ids[,1])
ids<-which(aa %in% bb)
GP<-t(matrix(scan("L14-P_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldP<-cor(t(GP[ids,]))^2
GF1<-t(matrix(scan("L14-F1_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF1<-cor(t(GF1[ids,]))^2
GF4<-t(matrix(scan("L14P-F4_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF4<-cor(t(GF4[ids,]))^2
GF16A<-t(matrix(scan("L14A-F16_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF16A<-cor(t(GF16A[ids,]))^2
GF16B<-t(matrix(scan("L14B-F16_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF16B<-cor(t(GF16B[ids,]))^2
GL11F4<-t(matrix(scan("L11-F4_g.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldL11F4<-cor(t(GL11F4[ids,]))^2
N<-6
LD<-list(ldP,ldF1,ldF4,ldF16A,ldF16B, ldL11F4)
oHc<-vector("list",6)
for(i in 1:N){
oHc[[i]]<-hclust(d=as.dist(1-LD[[i]]),method="ward")
}
Tree<-vector("list",6)
for(i in 1:N){
Tree[[i]]<-as.dendrogram(oHc[[i]])
}
ht<-c(5.06,4.97,14.7,9.94,9.55,8.31)
Cl<-vector("list",6)
for(i in 1:N){
#Cl[[i]]<-cutreeDynamic(dendro=oHc[[i]],distM=1-LD[[i]])
Cl[[i]]<-cutreeDynamic(dendro=oHc[[i]],distM=1-LD[[i]],method="hybrid",minClusterSize=5)
}
# ..cutHeight not given, setting it to 5.06 ===> 99% of the (truncated) height range in dendro.
# ..done.
# ..cutHeight not given, setting it to 4.97 ===> 99% of the (truncated) height range in dendro.
# ..done.
# ..cutHeight not given, setting it to 14.7 ===> 99% of the (truncated) height range in dendro.
# ..done.
# ..cutHeight not given, setting it to 9.94 ===> 99% of the (truncated) height range in dendro.
# ..done.
# ..cutHeight not given, setting it to 9.55 ===> 99% of the (truncated) height range in dendro.
# ..done.
# ..cutHeight not given, setting it to 8.31 ===> 99% of the (truncated) height range in dendro.
# ..done.
TreeC<-Tree
for(i in 1:N){
#TreeC[[i]]<-color_branches(Tree[[i]],k=max(Cl[[i]]),col=brewer.pal(12,"Set3"))
#TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[1]][oHc[[i]]$order],col=brewer.pal(12,"Set3"))
TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[i]][oHc[[i]]$order],col=brewer.pal(12,"Paired"))
#TreeC[[i]]<-color_branches(Tree[[i]],clusters=Cl[[i]][oHc[[i]]$order])
}
quantile(LD[[1]][upper.tri(LD[[1]])],probs=c(0.9,0.95,0.975,0.99))
# 90% 95% 97.5% 99%
#0.0869748 0.1426121 0.2517529 0.4991171
Graph<-vector("list",6)
amin<-0.25
#amin<-0.1426
for(i in 1:N){
anet<-LD[[i]]
anet[anet<amin]<-0
anet[anet>=amin]<-1
anet<-anet[oHc[[2]]$order,oHc[[2]]$order]
Graph[[i]]<-graph_from_adjacency_matrix(anet,mode="undirected",weighted=NULL,diag=FALSE)
}
LDwithin<-rep(NA,N)
LDbetween<-rep(NA,N)
for(i in 1:N){
wbmat<-matrix(0,nrow=303,ncol=303)
for(j in 1:302){for(k in (j+1):303){
if(Cl[[i]][j]==Cl[[i]][k]){
wbmat[j,k]<-1
wbmat[k,j]<-1
}
else{
wbmat[j,k]<--1
wbmat[k,j]<--1
}
}}
LDwithin[i]<-mean(as.vector(LD[[i]][wbmat==1]))
LDbetween[i]<-mean(as.vector(LD[[i]][wbmat==-1]))
}
# for(i in 1:N){
# wbmat<-matrix(0,nrow=198,ncol=198)
# for(j in 1:197){for(k in (j+1):198){
# if(Cl[[i]][j]==Cl[[i]][k]){
# wbmat[j,k]<-1
# wbmat[k,j]<-1
# }
# else{
# wbmat[j,k]<--1
# wbmat[k,j]<--1
# }
# }}
# LDwithin[i]<-mean(as.vector(LD[[i]][wbmat==1]))
# LDbetween[i]<-mean(as.vector(LD[[i]][wbmat==-1]))
# }
pdf("LDplot_303.pdf",width=7,height=8.5)
layout(matrix(c(1:20),nrow=5,ncol=4,byrow=TRUE),widths=c(1,2,2,2),heights=c(0.5,2,2,2,2,2))
tits<-c("","(a) LD dendrogram","(b) LD heatmap","(c) LD network")
for(i in 1:4){
par(mar=c(0,0,0,0))
plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")
mtext(tits[i],3,-2,cex=1.25)
}
myc<-get_leaves_branches_col(TreeC[[2]])
gen<-c("L14 P","L14 F1","L14 F4", "L14 F16A","L14 F16B", "L11 F4")
for(i in c(1,2,3,6)){
par(mar=c(0,0,0,0))
plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")
mtext(gen[i],3,-2,adj=0,cex=1.25)
mtext(paste("K = ",max(Cl[[i]]),sep=""),3,-5,adj=0)
mtext(bquote(r[W]^2 ~ '=' ~ .(round(LDwithin[i],3))),3,-7,adj=0)
mtext(bquote(r[B]^2 ~ '=' ~ .(round(LDbetween[i],3))),3,-9,adj=0)
par(mar=c(2,2,0.5,0.5))
plot(TreeC[[i]],leaflab="none")
image(LD[[i]][oHc[[i]]$order,oHc[[i]]$order],breaks=c(-.1,0.05,0.1,0.25,.5,1),col=brewer.pal(5,"Blues"),axes=FALSE)
#image(LD[[i]][oHc[[i]]$order,oHc[[i]]$order],breaks=c(-.1,0.01,0.05,0.1,0.5,1),col=brewer.pal(5,"Blues"),axes=FALSE)
box()
plot.igraph(Graph[[i]],vertex.size=5,vertex.color=myc,vertex.label=NA,edge.width=0.6)
#plot.igraph(Graph[[i]],vertex.size=5,vertex.color=rep(brewer.pal(12,"Paired")[unique(sort(Cl[[2]]))],2)[Cl[[2]]],vertex.label=NA,edge.width=0.4)
}
dev.off()