River plot & Brainplots
Fw: updated partitioned heritability spreadsheet
Fw: updated partitioned heritability spreadsheet
location on server:/space/chen-syn01/1/data/cinliu/plots/fig3/fig3a
README:
location on server:/space/chen-syn01/1/data/cinliu/plots/fig3/fig3a
#original message date: Jul 16 2020
#email titile: Fw: updated partitioned heritability spreadsheet
#email between chc101@health.ucsd.edu, carolina.mak3@gmail.com, and cinliu@berkeley.edu
#goal plots: 1 river plot (resulting in html file), 1 brain map
#This was a project that underwent many revisions so only the final copy is kept here.
The /space/chen-syn01/1/data/cinliu/plots/fig3/fig3a/figure3A.zip file
contains the same information as /space/chen-syn01/1/data/cinliu/plots/fig3/fig3a/figure3A
#Contents explained:
#/brainmap
Contains two .pngs(brainmaps) with code and data that generated the images
note: the "ggsegChen.R" file is fixing an error in the ggsegChen package, be sure to run the file before you run "chen_brain_map.R"
"chen_brain_map.R" is the R file that will generate the brain maps
"sig_enrichment_3categ_July16.xlsx" is the data file used.
#/riverplot
Contains three .png that are just the color key of the plots, those were manually cropped.
Please read the /riverplot_code_with_README/README_Expalanation_NEEDS_MINOR_UPDATE_but_mostly_find_to_read_the_code_and_data_are_up_tod.docx for details on how to make the river plot.
The README is a little outdated in terms of the labeling and positions changed in regards to the final graph that was made.
(we ended up adding little things like numbers before the some of the names, or changing the order of some of the things).
The code and the data files included are all up to date and good to use though, it's just that the README doc might have some minor differences. For the most part you should be able to replicate the plot perfectly following the readme and using the R files and data provided in the folder.
1_get_color_from_ggplots.R
rm(list=ls())
setwd("~/Desktop/2020lab/ch-email/partitioned heritability /new river/")
library(readxl)
library(xlsx)
library(dplyr)
library(data.table)
library(ggplot2)
#notes about this df: manually changes were made
#1. DGF change to category 3
thre_ldsc_z1_64_enrichmentp_categ <- read_excel("thre_ldsc_z1.64_enrichmentp_categ.xlsx", sheet = "relabel for plotting")
df <- thre_ldsc_z1_64_enrichmentp_categ
#group name update need to add numbers
hits_perregion <- read_excel("hits_perregion.xlsx")
for (i in 1:24) {
hits_perregion$region_with_number[i] <- paste(hits_perregion$Region_number[i],hits_perregion$Region[i], sep = ".")
}
df$number_region <- hits_perregion$region_with_number[match(df$region, hits_perregion$Region)]
head(df)
#add log p
df$logp<- -log10(df$Enrichment_p)
#seperate by group
group1 <- df[df$group ==1,]
group2 <- df[df$group ==2,]
group3 <- df[df$group ==3,]
###manually input posistions to assigen the x column
library(readr)
write.table(group1, "group1.csv" ,sep = '\t', quote = F, row.names = F)
write.table(group2, "group2.csv", sep = '\t', quote = F, row.names = F)
write.table(group3, "group3.csv", sep = '\t', quote = F, row.names = F)
# I took the output of each group and copied and pasted it to an excel called "manual assign x for ggplot.xlsx"
# and then I manually added a column called 'colAssigned' and then manually assigned the x value
###get colors using ggplot ####
#we are going to borrow the colors from ggplot becasue I am not sure how to assign the colors directly in river plot
#plot1
a <- read_excel("manual assign x for ggplot.xlsx", sheet = "group1")
names(a)
(setattr(a, "row.names", a$`Re-labeling for plotting`))
plot1 <- ggplot(a, aes(x=colAssigned,y=region,color=logp, label = rownames(a))) +
geom_point(size=4)
plot1 <- plot1 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+
scale_x_continuous(limits=c(0.1, 3))+
labs( x= NULL, colour = "conserved")+
theme(axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))+
scale_color_gradient(limits = c(2.5,8.3),low="#e5f5e0", high="#31a354") #optional make 3 sclaes match, will generate a small warning to overwrite the current scale but it's fine
ggsave(filename = "group1.png", plot1, width = 15, height = 5, dpi = 300, units = "in")
####VERY IMPORTANT COLOR CHECK STEP ####
##check the order and color manually, make sure they line up with the order of group 1
ggplot_build(plot1)$data[[1]]
#the results should be in the same order as the df even thought the printed plot is in alphobetical order
group1$color <- ggplot_build(plot1)$data[[1]]$colour
#####################
####plot2####
b <- read_excel("manual assign x for ggplot.xlsx", sheet = "group2")
names(b)
(setattr(b, "row.names", b$`Re-labeling for plotting`))
plot2 <- ggplot(b, aes(x=colAssigned,y=region,color=logp, label = rownames(b))) +
geom_point(size=4)
plot2 <- plot2 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+
scale_x_continuous(limits=c(0.1, 3))+
labs( x= NULL, colour = "developmental")+
theme(axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))+
scale_color_gradient(limits = c(2.5,8.3),low="#fee8c8", high="#e34a33") #optional make 3 sclaes match
ggsave(filename = "group2.png", plot2, width = 15, height = 5, dpi = 300, units = "in")
####VERY IMPORTANT COLOR CHECK STEP ####
##check the order and color manually, make sure they line up with the order of group 1
ggplot_build(plot2)$data[[1]]
#the results should be in the same order as the df even thought the printed plot is in alphobetical order
group2$color <- ggplot_build(plot2)$data[[1]]$colour
#####plot3####
c <- read_excel("manual assign x for ggplot.xlsx", sheet = "group3")
names(c)
(setattr(c, "row.names", c$`Re-labeling for plotting`))
plot3 <- ggplot(c, aes(x=colAssigned,y=region,color=logp, label = rownames(c))) +
geom_point(size=4)
plot3 <- plot3 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+
scale_x_continuous(limits=c(0.1, 3))+
labs( x= NULL, colour = "regulatory")+
theme(axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
panel.border = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"))+
scale_color_gradient(limits = c(2.5,8.3),low="#deebf7", high="#3182bd") #optional make 3 sclaes match
ggsave(filename = "group3.png", plot3, width = 15, height = 5, dpi = 300, units = "in")
####VERY IMPORTANT COLOR CHECK STEP ####
##check the order and color manually, make sure they line up with the order of group 1
ggplot_build(plot3)$data[[1]]
#the results should be in the same order as the df even thought the printed plot is in alphobetical order
group3$color <- ggplot_build(plot3)$data[[1]]$colour
###arrange onto one page
library(gridExtra)
all_plots <- grid.arrange(plot1, plot2, plot3, nrow = 3)
ggsave(filename = "allplots.png", all_plots, width = 12.5, height = 12.5, dpi = 300, units = "in")
#####update the groups with colors
df_with_colors <- rbind(group1, group2, group3)
write.table(df_with_colors, "df_with_colors.csv" ,sep = '\t', quote = F, row.names = F)
## this is the new df we will be working with for river plot
2_new_river_with_number.R
##river plot
rm(list=ls())
setwd("~/Desktop/2020lab/ch-email/partitioned heritability /new river/")
library(readxl)
library(xlsx)
library(readr)
library(networkD3)
library(dplyr)
library(htmlwidgets)
#note about this DF: number_region numebrs were added manually
df_with_colors <- read.delim("~/Desktop/2020lab/ch-email/partitioned heritability /new river/df_with_colors.csv")
df <- df_with_colors
#river plot needs 2 inputs: nodes and linkes
###NODES####
# first input is a list of names of all the nodes and a correspoinding number that starts with 0
number_region <- unique(df$number_region)
label <- unique(df$Re.labeling.for.plotting)
nodes <- as.data.frame(union(number_region, label))
nodes$numbers <- c(0:31) #31 comes from nrow(nodes)-1
names(nodes) = c('names','numbers')
#write.table(nodes, file = "nodes.csv", sep = '\t', quote = F, row.names = F)
#####LINKS######
## match the names to numebrs from the nodes
df$n1 = nodes$numbers[match(df$number_region,nodes$names)]
df$n2 = nodes$numbers[match(df$Re.labeling.for.plotting,nodes$names)]
#assigen them all value of 1 for the same width of river for each node
df$value = c(1)
#pulling our our desired values and renaming them for the purpose of ploting later.
link <- as.data.frame(cbind(df$n1, df$n2, df$value))
class(link)
head(link)
names(link) = c('target','source','value')
nrow(link)
link$group <- as.factor(c(1:39))
nodes$group <- as.factor(c("my_unique_group"))
#######STOP for manual color update
df$color
# manually copy and past df$color into the code below (in order)
#the last color "lightgrey" speccifys the color of the nodes, make sure you keep it
hex_color <- 'd3.scaleOrdinal() .domain(["1","2", "3", "4", "5","6","7", "8", "9", "10",
"11","12", "13", "14", "15","16","17", "18", "19", "20",
"21","22", "23", "24", "25","26","27", "28", "29", "30",
"31","32", "33", "34", "35","36","37", "38", "39", "my_unique_group"]) .range([
"#60B571", "#BEE2BD", "#D7EED4", "#DFF2DA", "#97CF9C", "#CEEACC", "#D0EBCD", "#C5E5C4", "#DFF2DA",
"#DCF1D8", "#DFF2DB", "#C6E6C5", "#E44C34", "#FBAE8B", "#F79A78", "#FEE3C2", "#FEC8A6", "#6F9FCE",
"#8AB0D7", "#7AA6D2", "#4B8DC3", "#B9D0E9", "#D8E7F5", "#DAE8F6", "#D9E7F5", "#7FA9D3", "#B9D0E9",
"#C2D7EC", "#AAC6E3", "#A1C0E0", "#B1CBE6", "#ABC7E4", "#8DB2D8", "#C8DBEF", "#AFC9E5", "#C2D6EC",
"#DAE8F5", "#C2D7EC", "#CFE0F1",
"lightgrey"])'
sn <- sankeyNetwork(Links = link, Nodes = nodes,
Source = 'source',
Target = 'target',
Value = 'value',
NodeID = 'names',
colourScale = hex_color,LinkGroup="group", NodeGroup="group",
iterations = 0, fontSize = 25, nodeWidth = 10,nodePadding=10, sinksRight=FALSE,
margin = list(right = 400), width = 1400, height = 1100)
sn
#moving the text on the left side out side of the rivers
sn <- onRender(
sn,
'
function(el,x){
// select all our node text
d3.select(el)
.selectAll(".node text")
.filter(function(d) { return (d.name.startsWith("A") || d.name.startsWith("B") || d.name.startsWith("D")
|| d.name.startsWith("C")|| d.name.startsWith("F")|| d.name.startsWith("P") || d.name.startsWith("E")
||d.name.startsWith("H")|| d.name.startsWith("G")); })
.attr("x", x.options.nodeWidth - 16)
.attr("text-anchor", "end");
}
'
)
sn
ggsegChen.R
(for correcting an error that occurs in ggsegChen, run this before you try to use ggsegChen in the next script)
library(ggsegChen)
library(ggseg)
library(dplyr)
for (i in 1:37){
chenAr[[6]][[i]]$.subid = 1
}
ggseg(atlas = "chenAr", mapping = aes(fill = region)) +
scale_fill_brain("chenAr", package = "ggsegChen") +
labs(title = "Chen areal (chenAr)") +
theme(legend.position = "bottom",
legend.text = element_text(size = 9)) +
guides(fill = guide_legend(ncol = 3))
for (i in 1:37){
chenTh[[6]][[i]]$.subid = 1
}
ggseg(atlas = chenTh, mapping = aes(fill = region)) +
scale_fill_brain("chenTh", package = "ggsegChen") +
labs(title = "Chen thickness (chenTh)") +
theme(legend.position = "bottom",
legend.text = element_text(size = 9)) +
guides(fill = guide_legend(ncol = 2))
chen_brain_map.R
rm(list=ls())
setwd("~/Desktop/2020lab/ch-email/partitioned heritability /brain_map_ggsegChen")
#brain map
library(ggsegChen)
library(ggseg)
library(dplyr)
library(ggplot2)
library(readxl)
#match/ add a row of region type first data first
sig_enrichment_3categ_July16 <- read_excel("~/Desktop/2020lab/ch-email/partitioned heritability /old river/sig_enrichment_3categ_July16.xlsx")
df2 <- sig_enrichment_3categ_July16
#changes/update
#superiorparietal_area, superiorparietal_thickness, and superiortemporal_area where S1, A1 are located can be sensory_motor regions.
df2$region_type[df2$region=="superiorparietal_area"] <- "sensory_motor"
df2$region_type[df2$region=="superiorparietal_thickness"] <- "sensory_motor"
df2$region_type[df2$region=="superiortemporal_area"] <- "sensory_motor"
grep("_thickness$", df2$region)
grep("_area$", df2$region)
thicnkess <- df2[grep("_thickness$", df2$region),]
area <- df2[grep("_area$", df2$region),]
#thicckness, replace the names manually to matche the atlas names
unique(thicnkess$region)
thicnkess$region[thicnkess$region=="ventralfrontal_thickness"] <- "ventral frontal"
thicnkess$region[thicnkess$region=="superiorparietal_thickness"] <- "superior parietal"
thicnkess$region[thicnkess$region=="temporalpole_thickness"] <- "temporal pole"
thicnkess$region[thicnkess$region=="medialprefrontal_thickness"] <- "medial prefrontal"
thicnkess$region[thicnkess$region=="perisylvian_thickness"] <- "perisylvian"
thicnkess$region[thicnkess$region=="ventromedialoccipital_thickness"] <- "ventromedial occipital"
thicnkess$region[thicnkess$region=="inferiorparietal_thickness"] <- "inferior parietal"
thicnkess$region[thicnkess$region=="middletemporal_thickness"] <- "middle temporal"
thicnkess$region[thicnkess$region=="occipital_thickness"] <- "occipital"
#
additional_region <- c("dorsolateral prefrontal", "medial temporal", "middle temporal", "motor-premotor-supplementary motor area", "occipital")
additional_region_type <- c("association",'paralimbic', "association", "sensory_motor", "sensory_motor" )
thick_add <- tibble(
region= additional_region,
region_type = additional_region_type
)
someData = tibble(
region= thicnkess$region,
region_type = thicnkess$region_type
)
someData <- rbind(thick_add, someData)
someData$region_type <- factor(someData$region_type, levels = c("paralimbic", "sensory_motor", "association"))
#add data to atlas
chenTh %>%
left_join(someData) %>%
head(10)
chenTh %>%
left_join(someData) %>%
arrange(region_type) %>%
head(10)
#plot thickness
for (i in 1:37){
chenTh[[6]][[i]]$.subid = 1
}
plot1 <- ggseg(someData, atlas=chenTh, mapping=aes(fill=region_type), color= 'white')
plot1+ scale_fill_manual(values=c("#182B49", "#00B0DA", "#006A96"))
ggsave("thickness_brainmap.png", width = 10, height = 3)
#######area#####
# replace the names manually to matche the atlas names
unique(area$region)
area$region[area$region=="anteromedialtemporal_area"] <- "anteromedial temporal"
area$region[area$region=="inferiorparietal_area"] <- "inferior parietal"
area$region[area$region=="occipital_area"] <- "occipital"
area$region[area$region=="posterolateraltemporal_area"] <- "posterolateral temporal"
area$region[area$region=="precuneus_area"] <- "precuneus"
area$region[area$region=="superiortemporal_area"] <- "superior temporal"
area$region[area$region=="motorpremotor_area"] <- "motor-premotor"
area$region[area$region=="orbitofrontal_area"] <- "orbitofrontal"
area$region[area$region=="dorsomedialfrontal_area"] <- "dorsomedial frontal"
area$region[area$region=="superiorparietal_area"] <- "superior parietal"
area$region[area$region=="dorsolateralprefrontal_area"] <- "dorsolateral prefrontal"
area$region[area$region=="parsopercularis_area"] <- "pars opercularis,subcentral"
#
area$region_type <- factor(area$region_type, levels = c("paralimbic", "sensory_motor", "association"))
head(chenAr)
someData = tibble(
region= area$region,
region_type = area$region_type
)
someData
chenAr %>%
left_join(someData) %>%
head(10)
chenAr %>%
left_join(someData) %>%
arrange(region_type) %>%
head(10)
#plot thickness
for (i in 1:37){
chenAr[[6]][[i]]$.subid = 1
}
plot2 <- ggseg(someData, atlas=chenAr, mapping=aes(fill=region_type), color = 'white')
plot2+ scale_fill_manual(values=c("#182B49", "#00B0DA", "#006A96"))
ggsave("area_brainmap.png", width = 10, height = 3)