12 traits line graph
(CH email title: Cin Liu summer internship. May 19th 2019)
(CH email title: Cin Liu summer internship. May 19th 2019)
location on server: /space/chen-syn01/1/data/cinliu/plots/12_traits_line_graph
I am sending some more training materials for you to play with real data in R - for comparison of polygenic scores between Alzheimer's patients and matched controls. I am sharing the files on google drive (https://drive.google.com/drive/folders/1IKhXSXtmOFB9Y26HNq9irmn7bi_ftebg?usp=sharing).
The learning objectives are-
- to get familiar with the concept of polygenic scores in Alzheimer's disease (please read the word doc about polygenic scores in the shared folder)
- to learn R programming for statistical analysis (starting with basic statistics -calculating mean, t-test for comparing means)
- to generate scientific figures by R
I am also sharing an example R code with you (polygenic_score_comparison.R). Please feel free to modify and improve it, especially the part for making figure. Please re-generate a figure like the last slide in the powerpoint (Project Concept). You can improve the figure using ggplot. The code for making the figure needs to be updated because the data format has changed since last time we generated the figure.
## clear the workspace
rm(list=ls())
## Set working directory and data directory
WDir = "~/Desktop/alz/" # UPDATE
setwd(WDir)
DDir = "~/Desktop/alz/data/" # UPDATE
import the two .txt files
## Read and adjust data
score = read.table(paste0(DDir, "PRS_1e5_positive.txt"), header=T) # loading file with p-value < 1e5
score=score[complete.cases(score),] # remove rows with NAs
score_s = score # make a copy of the data
score_s[,18:76] = scale(score[,18:76],center=T,scale=T) # over-write the data copy with standardized PRS (overall mean = 0 and SD = 1)
summary(score_s)
Compare mean of positive ploygenic score between groups
######## select 12 traits ############
##"Total Cholesterol","Type 2 Diabetes","Body Mass Index","Diastolic Blood Pressure",
##"Systolic Blood Pressure","Education","Intelligence","Cognition","Left Hippocampus",
##"Susceptibility Weighted Imaging","Subjective Well-being","C-reactive Protein"
######################################
trait = c("Total Cholesterol","Type 2 Diabetes","Body Mass Index","Diastolic Blood Pressure",
"Systolic Blood Pressure","Education","Intelligence","Cognition","Left Hippocampus",
"Susceptibility Weighted Imaging","Subjective Well-being","C-reactive Protein")
score1=score_s[,c(1:17,20,22,29,33,34,38,41,42,50,52,59,67)] # select columns of subject info and 12 traits
score_mean = matrix(NA,12,3) # create an empty matrix (NA) to store means of PRS in controls and AD and their differences in p value
colnames(score_mean) = c("mean_CON","mean_AD","pvalue")
rownames(score_mean) = trait
Mean score for each triats
for (i in c(1:12)) {
score_mean[i,1:2] = tapply(score1[,i+17],score1$diagAD1,mean) ### obtain mean in cases and controls, skipping the first 17th columns for subj info
score_mean[i,3] = t.test(score1[,i+17]~diagAD1,data=score1)$p.value ### obtain p-value: comparison PRS between cases and controls.
}
score_mean
We want to first separate the data into patient data and control data. Than, within each group we want to compare the male and female.
### Compare mean of positive ploygenic score between female and male
score2=score1[score1$diagAD1==1,] # patients only
score3=score1[score1$diagAD1==0,] # control only
Patients
#creating matrix of PATIENTS with gender difference
score_mean_sex_pai = matrix(NA,12,3) # create an empty matrix (NA)
colnames(score_mean_sex_pai) = c("mean_M","mean_F","pvalue")
rownames(score_mean_sex_pai) = trait
for (i in c(1:12)) {
score_mean_sex_pai[i,1:2] = tapply(score2[,i+17],score2$sexM1F2,mean) ### obtain mean in female and male patients
score_mean_sex_pai[i,3] = t.test(score2[,i+17]~sexM1F2,data=score2)$p.value ### obtain p-value
}
score_mean_sex_pai
Control
#creating matrix of CONTROL with gender difference
score_mean_sex_con = matrix(NA,12,3) # create an empty matrix (NA)
colnames(score_mean_sex_con) = c("mean_M","mean_F","pvalue")
rownames(score_mean_sex_con) = trait
for (i in c(1:12)) {
score_mean_sex_con[i,1:2] = tapply(score3[,i+17],score3$sexM1F2,mean) ### obtain mean in female and male patients
score_mean_sex_con[i,3] = t.test(score3[,i+17]~sexM1F2,data=score3)$p.value ### obtain p-value
}
score_mean_sex_con
#############################################################
###### making figures to plot group mean and p values #######
#############################################################
## the following code requires debugging
tiff(file="/Users/niniliu/Desktop/alz/data/mean_sex_12_line.tiff",width=3000,height=2000) #UPDATE
par(mfrow=c(3,4),mar=c(5,10,7,2),mgp=c(7,3,0))
if (score_mean_sex_pai[,3]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4") # if pvlue <0.05 make pink
#graph 1 (with ylab of "polygenic score")
plot(score_mean_sex_con[1,1:2],type="n",xlab="",ylab="Polygenetic Score",xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[1],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_con[1,1:2],type="b",lty=2,lwd=6,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_pai[1,1:2],type="b",lty=1,lwd=6,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_con[1,2],paste("P =",signif(score_mean_sex_con[i,3])),font=2,cex=4)
text(2.7,score_mean_sex_pai[1,2],paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
legend(x=1.2, y=0.02,c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=6,col=color,bty="n",cex=4)
# graph 2-12 (no need for ylab)
for (i in 2:12){
if (score_mean_sex_pai[i,3]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4") # if pvlue <0.05 make pink
plot(score_mean_sex_pai[i,1:2],type="n",xlab="",ylab="",xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[i],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_con[i,1:2],type="b",lty=2,lwd=6,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_pai[i,1:2],type="b",lty=1,lwd=6,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_con[i,2],paste("P =",signif(score_mean_sex_con[i,3])),font=2,cex=4)
if (i==4 | i==8) text(2.7,score_mean_sex_pai[i,2]-0.015,paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
else text(2.7,score_mean_sex_pai[i,2],paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
#legend("topleft",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=2,col=c("dodgerblue","blue4"),title=gsub(".positive","",trait[i]),bty="n")
}
dev.off()
######### nini's edits end ###############
Nini_polygen_graph_project-1.R
### Compare polygenic risk scores of multiple traits in Alzheimer's disease and matched controls
## clear the workspace
rm(list=ls())
## Set working directory and data directory
WDir = "~/Desktop/alz/" # UPDATE
setwd(WDir)
DDir = "~/Desktop/alz/data/" # UPDATE
## Read and adjust data
score = read.table(paste0(DDir, "PRS_1e5_positive.txt"), header=T) # loading file with p-value < 1e5
score=score[complete.cases(score),] # remove rows with NAs
score_s = score # make a copy of the data
score_s[,18:76] = scale(score[,18:76],center=T,scale=T) # over-write the data copy with standardized PRS (overall mean = 0 and SD = 1)
summary(score_s)
### Compare mean of positive ploygenic score between groups ##############################################################
######## select 12 traits ############
##"Total Cholesterol","Type 2 Diabetes","Body Mass Index","Diastolic Blood Pressure",
##"Systolic Blood Pressure","Education","Intelligence","Cognition","Left Hippocampus",
##"Susceptibility Weighted Imaging","Subjective Well-being","C-reactive Protein"
######################################
trait = c("Total Cholesterol","Type 2 Diabetes","Body Mass Index","Diastolic Blood Pressure",
"Systolic Blood Pressure","Education","Intelligence","Cognition","Left Hippocampus",
"Susceptibility Weighted Imaging","Subjective Well-being","C-reactive Protein")
score1=score_s[,c(1:17,20,22,29,33,34,38,41,42,50,52,59,67)] # select columns of subject info and 12 traits
score_mean = matrix(NA,12,3) # create an empty matrix (NA) to store means of PRS in controls and AD and their differences in p value
colnames(score_mean) = c("mean_CON","mean_AD","pvalue")
rownames(score_mean) = trait
for (i in c(1:12)) {
score_mean[i,1:2] = tapply(score1[,i+17],score1$diagAD1,mean) ### obtain mean in cases and controls, skipping the first 17th columns for subj info
score_mean[i,3] = t.test(score1[,i+17]~diagAD1,data=score1)$p.value ### obtain p-value: comparison PRS between cases and controls.
}
score_mean
####### the follwing code has been edited by nini##########
### Compare mean of positive ploygenic score between female and male
score2=score1[score1$diagAD1==1,] # patients only
score3=score1[score1$diagAD1==0,] # control only
#creating matrix of PAITENTS with gender difference
score_mean_sex_pai = matrix(NA,12,3) # create an empty matrix (NA)
colnames(score_mean_sex_pai) = c("mean_M","mean_F","pvalue")
rownames(score_mean_sex_pai) = trait
for (i in c(1:12)) {
score_mean_sex_pai[i,1:2] = tapply(score2[,i+17],score2$sexM1F2,mean) ### obtain mean in female and male patients
score_mean_sex_pai[i,3] = t.test(score2[,i+17]~sexM1F2,data=score2)$p.value ### obtain p-value
}
score_mean_sex_pai
#creating matrix of CONTROL with gender difference
score_mean_sex_con = matrix(NA,12,3) # create an empty matrix (NA)
colnames(score_mean_sex_con) = c("mean_M","mean_F","pvalue")
rownames(score_mean_sex_con) = trait
for (i in c(1:12)) {
score_mean_sex_con[i,1:2] = tapply(score3[,i+17],score3$sexM1F2,mean) ### obtain mean in female and male patients
score_mean_sex_con[i,3] = t.test(score3[,i+17]~sexM1F2,data=score3)$p.value ### obtain p-value
}
score_mean_sex_con
#############################################################
###### making figures to plot group mean and p values #######
#############################################################
## the following code requires debugging
tiff(file="/Users/niniliu/Desktop/alz/data/mean_sex_12_line.tiff",width=3000,height=2000) #UPDATE
par(mfrow=c(3,4),mar=c(5,10,7,2),mgp=c(7,3,0))
if (score_mean_sex_pai[,3]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4") # if pvlue <0.05 make pink
#graph 1 (with ylab of "polygenic score")
plot(score_mean_sex_con[1,1:2],type="n",xlab="",ylab="Polygenetic Score",xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[1],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_con[1,1:2],type="b",lty=2,lwd=6,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_pai[1,1:2],type="b",lty=1,lwd=6,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_con[1,2],paste("P =",signif(score_mean_sex_con[i,3])),font=2,cex=4)
text(2.7,score_mean_sex_pai[1,2],paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
legend(x=1.2, y=0.02,c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=6,col=color,bty="n",cex=4)
# graph 2-12 (no need for ylab)
for (i in 2:12){
if (score_mean_sex_pai[i,3]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4") # if pvlue <0.05 make pink
plot(score_mean_sex_pai[i,1:2],type="n",xlab="",ylab="",xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[i],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_con[i,1:2],type="b",lty=2,lwd=6,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_pai[i,1:2],type="b",lty=1,lwd=6,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_con[i,2],paste("P =",signif(score_mean_sex_con[i,3])),font=2,cex=4)
if (i==4 | i==8) text(2.7,score_mean_sex_pai[i,2]-0.015,paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
else text(2.7,score_mean_sex_pai[i,2],paste("P =",signif(score_mean_sex_pai[i,3])),font=2,cex=4)
#legend("topleft",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=2,col=c("dodgerblue","blue4"),title=gsub(".positive","",trait[i]),bty="n")
}
dev.off()
######### nini's edits end ###############
score_mean_scl_1e5 <- score_mean_scl_1e5
### mean of score line plot
tiff(file ="/Users/niniliu/Desktop/alz/data/mean_sex_line.tiff",width=4000,height=4000)
par(mfrow=c(8,7))
for (i in 1:56){
if (score_mean_scl_1e5[i,10]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4")
plot(c(1.4,2.5,1.4,2.5),score_mean_scl_1e5[i,1:4],type="n",xlab="Sex",ylab="PRS",xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",main=gsub("
.positive","",trait[i]))
lines(c(1.4,2.5),score_mean_scl_1e5[i,1:2],type="b",lty=2,pch=1,lwd=4,col=color[1])
lines(c(1.4,2.5),score_mean_scl_1e5[i,3:4],type="b",lty=1,pch=16,lwd=4,col=color[2])
axis(1,c(1.4,2.5),labels=c("male","female"))
text(2.75,score_mean_scl_1e5[i,2],paste("P =",signif(score_mean_scl_1e5[i,9],3)),font=2,cex=2)
text(2.75,score_mean_scl_1e5[i,4],paste("P =",signif(score_mean_scl_1e5[i,10],3)),font=2,cex=2)
legend("topleft",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=2,col=c("dodgerblue","blue4"),title=gsub(".positive","",trait[i])
,bty="n")
}
dev.off()
tiff(file="/home/mtlo/ADGC/mean_age_line.tiff",width=4000,height=4000)
par(mfrow=c(8,7))
for (i in 1:56){
if (score_mean_scl_1e5[i,12]<0.05) color=c("orange","orange4") else color=c("chartreuse3","darkgreen")
plot(c(1.4,2.5,1.4,2.5),score_mean_scl_1e5[i,5:8],type="n",xlab="Sex",ylab="PRS",xlim=c(1,3),ylim=c(-0.15,0.2),xaxt="n",main=gsub(".
positive","",trait[i]))
lines(c(1.4,2.5),score_mean_scl_1e5[i,5:6],type="b",lty=2,pch=1,lwd=4,col=color[1])
lines(c(1.4,2.5),score_mean_scl_1e5[i,7:8],type="b",lty=1,pch=16,lwd=4,col=color[2])
axis(1,c(1.4,2.5),labels=c("male","female"))
text(2.75,score_mean_scl_1e5[i,6],paste("P =",signif(score_mean_scl_1e5[i,11],3)),font=2,cex=2)
text(2.75,score_mean_scl_1e5[i,8],paste("P =",signif(score_mean_scl_1e5[i,12],3)),font=2,cex=2)
#legend("topleft",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=2,col=c("dodgerblue","blue4"),title=gsub(".positive","",trait[i]),bty="n")
}
dev.off()
trait[c(3,5,14,16,17,22,24,25,33,35,42,49)] = c("Total Cholesterol","Type 2 Diabetes","Body Mass Index","Diastolic Blood Pressure",
"Systolic Blood Pressure","Education","Intelligence","Cognition","Left Hippocampus",
"Susceptibility Weighted Imaging","Subjective Well-being","C-reactive Protein")
tiff(file="/home/mtlo/ADGC/mean_sex_12_line.tiff",width=3000,height=2000)
par(mfrow=c(3,4),mar=c(5,10,7,2),mgp=c(7,3,0))
if (score_mean_sex_scl[3,22]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4")
plot(c(1.3,2.3,1.3,2.3),score_mean_sex_scl[3,c(2,6,10,14)],type="n",xlab="",ylab="Polygenic Scores",,xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[3],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_scl[3,c(2,6)],type="b",lty=2,lwd=5,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_scl[3,c(10,14)],type="b",lty=1,lwd=5,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_scl[3,6],paste("P =",signif(score_mean_sex_scl[3,18],3)),font=2,cex=4)
text(2.7,score_mean_sex_scl[3,14],paste("P =",signif(score_mean_sex_scl[3,22],3)),font=2,cex=4)
legend("left",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=6,col=color,bty="n",cex=4)
for (i in c(5,14,16,17,22,24,25,33,35,42,49)){
if (score_mean_sex_scl[i,22]<0.05) color=c("hotpink","deeppink3") else color=c("dodgerblue","blue4")
plot(c(1.3,2.3,1.3,2.3),score_mean_sex_scl[i,c(2,6,10,14)],type="n",xlab="",ylab="",,xlim=c(1,3),ylim=c(-0.15,0.15),xaxt="n",
cex.axis=4,cex.lab=4,font.lab=2,tck=0.02,main=trait[i],cex.main=4,font.main=2)
lines(c(1.3,2.3),score_mean_sex_scl[i,c(2,6)],type="b",lty=2,lwd=6,pch=1,cex=2.5,col=color[1])
lines(c(1.3,2.3),score_mean_sex_scl[i,c(10,14)],type="b",lty=1,lwd=6,pch=16,cex=2.5,col=color[2])
axis(1,c(1.3,2.3),labels=c("Male","Female"),cex.axis=4,font=2,tck=0.02)
text(2.7,score_mean_sex_scl[i,6],paste("P =",signif(score_mean_sex_scl[i,18],3)),font=2,cex=4)
if (i==14 | i==16) text(2.7,score_mean_sex_scl[i,14]-0.015,paste("P =",signif(score_mean_sex_scl[i,22],3)),font=2,cex=4) else text(2.7,score_mean_sex_scl[i,14],paste("P =",signif(score_mean_sex_scl[i,22],3)),font=2,cex=4)
#legend("topleft",c("controls","cases"),lty=c(2,1),pch=c(1,16),lwd=2,col=c("dodgerblue","blue4"),title=gsub(".positive","",trait[i]),bty="n")
}
dev.off()