Correlation Matrix
(email from chc101@health.ucsd.edu - Correlation Matrix for Annotations, July 8th 2020
CH slack, July 9th 2020)
(email from chc101@health.ucsd.edu - Correlation Matrix for Annotations, July 8th 2020
CH slack, July 9th 2020)
We will make multiple plots that look similar to this:
Data given: The .annot file s in the following directories:
DDir="/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/DMR/invar/annot/"
BDir="/space/chen-syn01/1/data/chen/bin/ldsc/1000G_Phase3_baselineLD_v2.2_ldscores/"
We combine 6.annot file for each corr plot made. In total we will make 22 corr plots.
Could you help me with generating correlation matrix plots for all 22 chromosomes from this R code? each chromosome has one heatmap.
You need to write a loop to complete the code.
/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/DMR/invar/corMat/cal_cor.R
You can use this command to run a R script
Rscript cal_cor.R
Some useful information:
Install R packages to store my R library on my user directory on the server:
install.packages("GWASinlps", lib="/home/chc101/R/my_R_library")
Load R package one at a time:
library('GWASinlps', lib.loc="/home/chc101/R/my_R_library/")
More conveniently, add these lines to the scripts to add the path of my R library
path = "/home/chc101/R/my_R_library"
.libPaths(path)
library(GWASinlps)
There is an error when trying to combine u1 ('Unclassified_chr9.annot') into the data because it does not have the same number of rows as the others. (u1 only has 438106 objects where as all the other ones have 510501). I think this is because u1 is from ch9 instead of ch10. I
Fix: only combining files from the same chromosome
FEEDBACK:
CH liked the first one
Size should be about 100 x 100
Generating one averaged correlation plot by averaging over these 22 plots. Please make a circle plot and just the upper triangle.
Could you expand this correlation matrix by incorporating this annotation (so it's even bigger size)? An average one across all chromosomes will be good.
/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/motor/annot/chromatin_motor_chr*.annot
Could you extend the plot even bigger by adding the following annotations in this order?
/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/motor/annot/union_sum/union_sum_chr*.annot
/space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/chromatin_motor_chr*.annot
/space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/annot/union_sum/union_sum_chr*.annot
A sample of extended graph was send, it looked similar to this:
/space/chen-syn01/1/data/cinliu/plots/correlation/cal_cor_plot_.R
rm(list=ls())
#setwd("/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/DMR/invar/corMat/")
DDir="/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/DMR/invar/annot/"
#DDir1=/space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/annot/union_sum/union_sum_chr*.annot
DDir1="/space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/"
BDir="/space/chen-syn01/1/data/chen/bin/ldsc/1000G_Phase3_baselineLD_v2.2_ldscores/"
#ODir ="/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/DMR/invar/corMat/"
ODir ="/space/chen-syn01/1/data/cinliu/plots/correlation/data/"
# here we use chr10 as an example, please write a loop through 1-22 chromosome
for (i in 1:22){
base=read.table(paste0(BDir,'baselineLD.',i,'.annot'), h=T)
n=read.table(paste0(DDir,'Neanderthal_chr',i,'.annot'),h=T)
d=read.table(paste0(DDir,'Denisovan_chr',i,'.annot'),h=T)
p=read.table(paste0(DDir,'Presentday_Human_chr',i,'.annot'),h=T)
u1=read.table(paste0(DDir,'Unclassified_chr',i,'.annot'),h=T)
u2=read.table(paste0(DDir,'/union/union_chr',i,'.annot'),h=T)
cm1=read.table(paste0('/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/motor/annot/chromatin_motor_chr',i,'.annot'),h=T)
us1=read.table(paste0('/space/chen-syn01/1/data/chen/gwas_gclust/UKB/LDSC/make_annot/motor/annot/union_sum/union_sum_chr',i,'.annot'),h=T)
#/space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/chromatin_motor_chr*.annot
cm2=read.table(paste0(DDir1,'chromatin_motor_chr',i,'.annot'),h=T)
#space/chen-syn01/1/data/cmakowski/GWASgclus_UKB/ldsc/partitioned_h2/Mar2020/motor_annot/annot/union_sum/union_sum_chr*.annot
us2=read.table(paste0(DDir1,'/annot/union_sum/union_sum_chr',i,'.annot'),h=T)
data=cbind(base,d,n,p,u1,u2,cm1,us1,cm2,us2)
data1=data[,-c(1:5)]
cormat=cor(data1)
write.table(cormat,file=paste0(ODir,'corMat',i,'.txt'),quote=F)
### corrplots for individual chromosomes (22 plots will be generated)
install.packages("corrplot")
library(corrplot)
plot.new()
tiff(paste0(file="/space/chen-syn01/1/data/cinliu/plots/correlation/chr", i, "_testplot.tiff"),
width=2500,height=2000)
corrplot(cormat, method="circle", tl.col="black")
#dev.off()
}
#####average data
rm(list=ls())
first <- read.table("/space/chen-syn01/1/data/cinliu/plots/correlation/data/corMat1.txt", header=TRUE, quote="", comment.char="")
for (i in 2:22){ #22 chromosomes
data1 <- read.table(paste0("/space/chen-syn01/1/data/cinliu/plots/correlation/data/corMat",i,".txt"), header=TRUE, quote="", comment.char="")
for (j in 1:145){ #145 vraiables, update to number of variables
first[j,]<- first[j,] + data1[j,]
}
}
first[1,1]
averagedf <- first/22 #22 for 22 chromosomes
class(averagedf)
averagedf[1,1]
avaeragematirx <- data.matrix(averagedf)
write.table(avaeragematirx,file='corMatAverage.txt',quote=F)
#making average plot
library(corrplot)
plot.new()
tiff(paste0(file="/space/chen-syn01/1/data/cinliu/plots/correlation/avaerage_plot_update3.tiff"),
width=2500,height=2000)
corrplot(avaeragematirx, method="circle", tl.col="black", type='upper')
dev.off()