Post date: Feb 06, 2017 11:28:3 PM
### LINKAGE DISEQUILIBRIUM
#/uufs/chpc.utah.edu/common/home/u0795641/projects/cmac_AR/L14/alt_ne/ld
library(fields)
library(RColorBrewer)
library(gplots)
N<-48
L<-21342
snps<-read.table("locusids.txt",header=FALSE)
cl<-1.6
ca<-1.2
soi<-read.table("pLentilSnps.txt",header=FALSE,skip=0)
ids<-which(as.character(snps[,1]) %in% as.character(soi[,1]))
GP<-t(matrix(scan("L14-P_p.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldP<-cor(t(GP[ids,]))^2
GF1<-t(matrix(scan("L14-F1_p.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF1<-cor(t(GF1[ids,]))^2
GF4<-t(matrix(scan("L14P-F4_p.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF4<-cor(t(GF4[ids,]))^2
GF16A<-t(matrix(scan("L14A-F16_p.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF16A<-cor(t(GF16A[ids,]))^2
GF16B<-t(matrix(scan("L14B-F16_p.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldF16B<-cor(t(GF16B[ids,]))^2
cm<-2.2
cl<-2
library(dendextend)
library(gplots)
dend <- as.dendrogram(hclust(dist(ldP)))
dend1 <- color_branches(dend,k=7)
col_labels <- get_leaves_branches_col(dend1)
# col_labels[col_labels=="#CC476B"] <- as.numeric(1)
# col_labels[col_labels=="#917600"] <- as.numeric(2)
# col_labels[col_labels=="#009232"] <- as.numeric(3)
# col_labels[col_labels=="#008FB7"] <- as.numeric(4)
# col_labels[col_labels=="#A352D1"] <- as.numeric(5)
col_labels <- as.factor(col_labels)
levels(col_labels) <- 1:length(levels(col_labels))
col_labels <- as.numeric(col_labels)
dend2<-color_branches(dend,cluster=col_labels)
pdf("P_Heatmap.pdf")
heatmap.2(ldP, main="P", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF4)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F4_Heatmap.pdf")
heatmap.2(ldF4, main="F4", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF16A)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F16A_Heatmap.pdf")
heatmap.2(ldF16A, main="F16A", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF16B)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F16B_Heatmap.pdf")
heatmap.2(ldF16B, main="F16B", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
# pdfjam --suffix nup --nup 2x1 --landscape P_Heatmap.pdf F4_Heatmap.pdf
# pdfjam --suffix nup --nup 2x2 --landscape P_Heatmap.pdf F4_Heatmap.pdf F16A_Heatmap.pdf F16B_Heatmap.pdf
## summaries of LD for F1
ld<-ldF1
diag(ld)<-NA
mean(apply(ld,1,mean,na.rm=TRUE)) ## 0.336029
sd(apply(ld,1,mean,na.rm=TRUE)) ## 0.02847544
mean(apply(ld,1,max,na.rm=TRUE)) ## 0.9901822
##LD within clusters
dend <- as.dendrogram(hclust(dist(ldF1)))
dend1 <- color_branches(dend,k=7)
col_labels <- get_leaves_branches_col(dend1)
col_labels <- as.factor(col_labels)
levels(col_labels) <- 1:length(levels(col_labels))
col_labels <- as.numeric(col_labels)
dend2<-color_branches(dend,cluster=col_labels)
test <- ldF1[y$rowInd,y$colInd]
length(col_labels[col_labels==7]) ##15
ld<-test[1:15,1:15]
length(col_labels[col_labels==5]) ##41
ld<-test[16:56,16:56]
length(col_labels[col_labels==4]) ##45
ld<-test[56:101,56:101]
length(col_labels[col_labels==2]) ##21
ld<-test[102:122,102:122]
length(col_labels[col_labels==1]) ##23
ld<-test[122:145,122:145]
length(col_labels[col_labels==3]) ##22
ld<-test[146:167,146:167]
length(col_labels[col_labels==6]) ##31
ld<-test[168:198,168:198]
## Correlation between time periods
cor.test(ldF1[upper.tri(ldF1)],ldF4[upper.tri(ldF1)]) # 0.1990356
cor.test(ldF16A[upper.tri(ldF1)],ldF16B[upper.tri(ldF1)]) # 0.6468124
cor.test(ldF4[upper.tri(ldF1)],ldF16A[upper.tri(ldF1)]) # 0.6048272
cor.test(ldF4[upper.tri(ldF1)],ldF16B[upper.tri(ldF1)]) # 0.5686653
## Determining best clusters: 7
library(NbClust)
upgma <- function(data)
{ set.seed(123)
data_scaled <- scale(data)
d <- (dist(data_scaled, method = "euclidean"))^2
nb <- NbClust(data_scaled, distance=NULL, diss = d, min.nc = 1,
max.nc = 10, method = "average", index ="silhouette")
k <- nb$Best.nc[1]
upgma <- hclust(d, method = "average")
plot <- plot(upgma, cex = 0.6, hang = -1)
if(k > 1){
rect <- rect.hclust(upgma, k = k, border = 2:5)
grp <- cutree(upgma, k = k)
table <- table(grp)
table <- as.data.frame(table)
} else {rect = NULL; table=NULL}
result <- list("dend"=plot, "rect"=rect, "table"=table, "best.nc" = nb$Best.nc)
return(result)
}
d <- (dist(ldP, method = "euclidean"))^2
NbClust(ldP, distance=NULL, diss = d, min.nc = 1,
max.nc = 15, method = "average", index ="silhouette")
data_scaled <- scale(ldP)
d <- (dist(data_scaled, method = "euclidean"))^2
NbClust(data_scaled, distance=NULL, diss = d, min.nc = 1,
max.nc = 15, method = "average", index ="silhouette")
selected <- c( "kl",
"ch", "hartigan", "ccc", "scott", "marriot", "trcovw",
"tracew", "friedman", "rubin", "cindex", "db", "silhouette",
"duda", "pseudot2", "beale", "ratkowsky", "ball",
"ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus",
"tau", "dunn", "hubert", "sdindex", "dindex", "sdbw")
for (i in 1:length(selected)) {
results[[i]] <- try(NbClust(ldP, distance=NULL, diss = d, min.nc = 1,
max.nc = 15, method = "average", index=selected[i]))
}
for (i in 1:length(selected)) {
results[[i]] <- try(NbClust(ldF1, distance=NULL, diss = d, min.nc = 1,
max.nc = 15, method = "average", index=selected[i]))
}
### Change in LD from P to F1:
cm<-2.2
cl<-2
library(dendextend)
library(gplots)
dend <- as.dendrogram(hclust(dist(ldP)))
dend1 <- color_branches(dend,k=7)
col_labels <- get_leaves_branches_col(dend1)
# col_labels[col_labels=="#CC476B"] <- as.numeric(1)
# col_labels[col_labels=="#917600"] <- as.numeric(2)
# col_labels[col_labels=="#009232"] <- as.numeric(3)
# col_labels[col_labels=="#008FB7"] <- as.numeric(4)
# col_labels[col_labels=="#A352D1"] <- as.numeric(5)
col_labels <- as.factor(col_labels)
levels(col_labels) <- 1:length(levels(col_labels))
col_labels <- as.numeric(col_labels)
dend2<-color_branches(dend,cluster=col_labels)
pdf("P_Heatmap.pdf")
heatmap.2(ldP, main="P", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF1)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F1_Heatmap.pdf")
heatmap.2(ldF1, main="F1", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
# pdfjam --suffix P_F1 --nup 2x1 --landscape P_Heatmap.pdf F1_Heatmap.pdf
### New LDs based on starting at F1:
cm<-2.2
cl<-2
library(dendextend)
library(gplots)
dend <- as.dendrogram(hclust(dist(ldF1)))
dend1 <- color_branches(dend,k=7)
col_labels <- get_leaves_branches_col(dend1)
# col_labels[col_labels=="#CC476B"] <- as.numeric(1)
# col_labels[col_labels=="#917600"] <- as.numeric(2)
# col_labels[col_labels=="#009232"] <- as.numeric(3)
# col_labels[col_labels=="#008FB7"] <- as.numeric(4)
# col_labels[col_labels=="#A352D1"] <- as.numeric(5)
col_labels <- as.factor(col_labels)
levels(col_labels) <- 1:length(levels(col_labels))
col_labels <- as.numeric(col_labels)
dend2<-color_branches(dend,cluster=col_labels)
pdf("F1_Heatmap.pdf")
heatmap.2(ldF1, main="P", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF4)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F4_Heatmap.pdf")
heatmap.2(ldF4, main="F4", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF16A)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F16A_Heatmap.pdf")
heatmap.2(ldF16A, main="F16A", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
dend <- as.dendrogram(hclust(dist(ldF16B)))
col_labels <- col_labels[order.dendrogram(dend1)]
dend2<-color_branches(dend,cluster=col_labels)
pdf("F16B_Heatmap.pdf")
heatmap.2(ldF16B, main="F16B", Rowv=dend2, Colv=rev(dend2), trace='none', key=TRUE,
labRow=FALSE, labCol=FALSE, symm=T)
dev.off()
# pdfjam --suffix nup --nup 2x1 --landscape P_Heatmap.pdf F4_Heatmap.pdf
# pdfjam --suffix nup --nup 2x2 --landscape P_Heatmap.pdf F4_Heatmap.pdf F16A_Heatmap.pdf F16B_Heatmap.pdf
## Get ordering
tree <- cutree(dend,k=7)
order(tree)
otree <- soi[order(tree),]
dend <- as.dendrogram(hclust(dist(ldF1)))
dend1 <- color_branches(dend,k=7)
col_labels <- get_leaves_branches_col(dend1)
## Next: Selection L14