River plot & Brainplots

Fw: updated partitioned heritability spreadsheet

location on server:/space/chen-syn01/1/data/cinliu/plots/fig3/fig3a

Goal plot:

README:

location on server:/space/chen-syn01/1/data/cinliu/plots/fig3/fig3a

#original message date: Jul 16 2020

#email titile: Fw: updated partitioned heritability spreadsheet

#email between chc101@health.ucsd.edu, carolina.mak3@gmail.com, and cinliu@berkeley.edu

#goal plots: 1 river plot (resulting in html file), 1 brain map

#This was a project that underwent many revisions so only the final copy is kept here.

The /space/chen-syn01/1/data/cinliu/plots/fig3/fig3a/figure3A.zip file

contains the same information as /space/chen-syn01/1/data/cinliu/plots/fig3/fig3a/figure3A

#Contents explained:

#/brainmap

Contains two .pngs(brainmaps) with code and data that generated the images

note: the "ggsegChen.R" file is fixing an error in the ggsegChen package, be sure to run the file before you run "chen_brain_map.R"

"chen_brain_map.R" is the R file that will generate the brain maps

"sig_enrichment_3categ_July16.xlsx" is the data file used.

#/riverplot

Contains three .png that are just the color key of the plots, those were manually cropped.

Please read the /riverplot_code_with_README/README_Expalanation_NEEDS_MINOR_UPDATE_but_mostly_find_to_read_the_code_and_data_are_up_tod.docx for details on how to make the river plot.

The README is a little outdated in terms of the labeling and positions changed in regards to the final graph that was made.

(we ended up adding little things like numbers before the some of the names, or changing the order of some of the things).

The code and the data files included are all up to date and good to use though, it's just that the README doc might have some minor differences. For the most part you should be able to replicate the plot perfectly following the readme and using the R files and data provided in the folder.

Full code river plot

1_get_color_from_ggplots.R

rm(list=ls())

setwd("~/Desktop/2020lab/ch-email/partitioned heritability /new river/")

library(readxl)

library(xlsx)

library(dplyr)

library(data.table)

library(ggplot2)

#notes about this df: manually changes were made

#1. DGF change to category 3

thre_ldsc_z1_64_enrichmentp_categ <- read_excel("thre_ldsc_z1.64_enrichmentp_categ.xlsx", sheet = "relabel for plotting")

df <- thre_ldsc_z1_64_enrichmentp_categ

#group name update need to add numbers

hits_perregion <- read_excel("hits_perregion.xlsx")

for (i in 1:24) {

hits_perregion$region_with_number[i] <- paste(hits_perregion$Region_number[i],hits_perregion$Region[i], sep = ".")

}

df$number_region <- hits_perregion$region_with_number[match(df$region, hits_perregion$Region)]

head(df)

#add log p

df$logp<- -log10(df$Enrichment_p)

#seperate by group

group1 <- df[df$group ==1,]

group2 <- df[df$group ==2,]

group3 <- df[df$group ==3,]

###manually input posistions to assigen the x column

library(readr)

write.table(group1, "group1.csv" ,sep = '\t', quote = F, row.names = F)

write.table(group2, "group2.csv", sep = '\t', quote = F, row.names = F)

write.table(group3, "group3.csv", sep = '\t', quote = F, row.names = F)

# I took the output of each group and copied and pasted it to an excel called "manual assign x for ggplot.xlsx"

# and then I manually added a column called 'colAssigned' and then manually assigned the x value

###get colors using ggplot ####

#we are going to borrow the colors from ggplot becasue I am not sure how to assign the colors directly in river plot

#plot1

a <- read_excel("manual assign x for ggplot.xlsx", sheet = "group1")

names(a)

(setattr(a, "row.names", a$`Re-labeling for plotting`))

plot1 <- ggplot(a, aes(x=colAssigned,y=region,color=logp, label = rownames(a))) +

geom_point(size=4)

plot1 <- plot1 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+

scale_x_continuous(limits=c(0.1, 3))+

labs( x= NULL, colour = "conserved")+

theme(axis.ticks.x = element_blank(),

axis.ticks.y = element_blank(),

panel.border = element_blank(),

panel.grid.major = element_blank(),

panel.grid.minor = element_blank(),

panel.background = element_blank(),

axis.line = element_line(colour = "black"))+

scale_color_gradient(limits = c(2.5,8.3),low="#e5f5e0", high="#31a354") #optional make 3 sclaes match, will generate a small warning to overwrite the current scale but it's fine

ggsave(filename = "group1.png", plot1, width = 15, height = 5, dpi = 300, units = "in")

####VERY IMPORTANT COLOR CHECK STEP ####

##check the order and color manually, make sure they line up with the order of group 1

ggplot_build(plot1)$data[[1]]

#the results should be in the same order as the df even thought the printed plot is in alphobetical order

group1$color <- ggplot_build(plot1)$data[[1]]$colour

#####################

####plot2####

b <- read_excel("manual assign x for ggplot.xlsx", sheet = "group2")

names(b)

(setattr(b, "row.names", b$`Re-labeling for plotting`))

plot2 <- ggplot(b, aes(x=colAssigned,y=region,color=logp, label = rownames(b))) +

geom_point(size=4)

plot2 <- plot2 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+

scale_x_continuous(limits=c(0.1, 3))+

labs( x= NULL, colour = "developmental")+

theme(axis.ticks.x = element_blank(),

axis.ticks.y = element_blank(),

panel.border = element_blank(),

panel.grid.major = element_blank(),

panel.grid.minor = element_blank(),

panel.background = element_blank(),

axis.line = element_line(colour = "black"))+

scale_color_gradient(limits = c(2.5,8.3),low="#fee8c8", high="#e34a33") #optional make 3 sclaes match

ggsave(filename = "group2.png", plot2, width = 15, height = 5, dpi = 300, units = "in")

####VERY IMPORTANT COLOR CHECK STEP ####

##check the order and color manually, make sure they line up with the order of group 1

ggplot_build(plot2)$data[[1]]

#the results should be in the same order as the df even thought the printed plot is in alphobetical order

group2$color <- ggplot_build(plot2)$data[[1]]$colour

#####plot3####

c <- read_excel("manual assign x for ggplot.xlsx", sheet = "group3")

names(c)

(setattr(c, "row.names", c$`Re-labeling for plotting`))

plot3 <- ggplot(c, aes(x=colAssigned,y=region,color=logp, label = rownames(c))) +

geom_point(size=4)

plot3 <- plot3 + geom_text(nudge_x=-0.45,size = 3.5, color ='black', check_overlap = TRUE)+

scale_x_continuous(limits=c(0.1, 3))+

labs( x= NULL, colour = "regulatory")+

theme(axis.ticks.x = element_blank(),

axis.ticks.y = element_blank(),

panel.border = element_blank(),

panel.grid.major = element_blank(),

panel.grid.minor = element_blank(),

panel.background = element_blank(),

axis.line = element_line(colour = "black"))+

scale_color_gradient(limits = c(2.5,8.3),low="#deebf7", high="#3182bd") #optional make 3 sclaes match

ggsave(filename = "group3.png", plot3, width = 15, height = 5, dpi = 300, units = "in")

####VERY IMPORTANT COLOR CHECK STEP ####

##check the order and color manually, make sure they line up with the order of group 1

ggplot_build(plot3)$data[[1]]

#the results should be in the same order as the df even thought the printed plot is in alphobetical order

group3$color <- ggplot_build(plot3)$data[[1]]$colour

###arrange onto one page

library(gridExtra)

all_plots <- grid.arrange(plot1, plot2, plot3, nrow = 3)

ggsave(filename = "allplots.png", all_plots, width = 12.5, height = 12.5, dpi = 300, units = "in")

#####update the groups with colors

df_with_colors <- rbind(group1, group2, group3)

write.table(df_with_colors, "df_with_colors.csv" ,sep = '\t', quote = F, row.names = F)

## this is the new df we will be working with for river plot

2_new_river_with_number.R

##river plot

rm(list=ls())

setwd("~/Desktop/2020lab/ch-email/partitioned heritability /new river/")

library(readxl)

library(xlsx)

library(readr)

library(networkD3)

library(dplyr)

library(htmlwidgets)

#note about this DF: number_region numebrs were added manually

df_with_colors <- read.delim("~/Desktop/2020lab/ch-email/partitioned heritability /new river/df_with_colors.csv")

df <- df_with_colors

#river plot needs 2 inputs: nodes and linkes

###NODES####

# first input is a list of names of all the nodes and a correspoinding number that starts with 0

number_region <- unique(df$number_region)

label <- unique(df$Re.labeling.for.plotting)

nodes <- as.data.frame(union(number_region, label))

nodes$numbers <- c(0:31) #31 comes from nrow(nodes)-1

names(nodes) = c('names','numbers')

#write.table(nodes, file = "nodes.csv", sep = '\t', quote = F, row.names = F)

#####LINKS######

## match the names to numebrs from the nodes

df$n1 = nodes$numbers[match(df$number_region,nodes$names)]

df$n2 = nodes$numbers[match(df$Re.labeling.for.plotting,nodes$names)]

#assigen them all value of 1 for the same width of river for each node

df$value = c(1)

#pulling our our desired values and renaming them for the purpose of ploting later.

link <- as.data.frame(cbind(df$n1, df$n2, df$value))

class(link)

head(link)

names(link) = c('target','source','value')

nrow(link)

link$group <- as.factor(c(1:39))

nodes$group <- as.factor(c("my_unique_group"))

#######STOP for manual color update

df$color

# manually copy and past df$color into the code below (in order)

#the last color "lightgrey" speccifys the color of the nodes, make sure you keep it

hex_color <- 'd3.scaleOrdinal() .domain(["1","2", "3", "4", "5","6","7", "8", "9", "10",

"11","12", "13", "14", "15","16","17", "18", "19", "20",

"21","22", "23", "24", "25","26","27", "28", "29", "30",

"31","32", "33", "34", "35","36","37", "38", "39", "my_unique_group"]) .range([

"#60B571", "#BEE2BD", "#D7EED4", "#DFF2DA", "#97CF9C", "#CEEACC", "#D0EBCD", "#C5E5C4", "#DFF2DA",

"#DCF1D8", "#DFF2DB", "#C6E6C5", "#E44C34", "#FBAE8B", "#F79A78", "#FEE3C2", "#FEC8A6", "#6F9FCE",

"#8AB0D7", "#7AA6D2", "#4B8DC3", "#B9D0E9", "#D8E7F5", "#DAE8F6", "#D9E7F5", "#7FA9D3", "#B9D0E9",

"#C2D7EC", "#AAC6E3", "#A1C0E0", "#B1CBE6", "#ABC7E4", "#8DB2D8", "#C8DBEF", "#AFC9E5", "#C2D6EC",

"#DAE8F5", "#C2D7EC", "#CFE0F1",

"lightgrey"])'

sn <- sankeyNetwork(Links = link, Nodes = nodes,

Source = 'source',

Target = 'target',

Value = 'value',

NodeID = 'names',

colourScale = hex_color,LinkGroup="group", NodeGroup="group",

iterations = 0, fontSize = 25, nodeWidth = 10,nodePadding=10, sinksRight=FALSE,

margin = list(right = 400), width = 1400, height = 1100)

#moving the text on the left side out side of the rivers

sn <- onRender(

sn,

function(el,x){

// select all our node text

d3.select(el)

.selectAll(".node text")

.filter(function(d) { return (d.name.startsWith("A") || d.name.startsWith("B") || d.name.startsWith("D")

|| d.name.startsWith("C")|| d.name.startsWith("F")|| d.name.startsWith("P") || d.name.startsWith("E")

||d.name.startsWith("H")|| d.name.startsWith("G")); })

.attr("x", x.options.nodeWidth - 16)

.attr("text-anchor", "end");

}

)

Full code brainmaps

ggsegChen.R

(for correcting an error that occurs in ggsegChen, run this before you try to use ggsegChen in the next script)

library(ggsegChen)

library(ggseg)

library(dplyr)

for (i in 1:37){

chenAr[[6]][[i]]$.subid = 1

}

ggseg(atlas = "chenAr", mapping = aes(fill = region)) +

scale_fill_brain("chenAr", package = "ggsegChen") +

labs(title = "Chen areal (chenAr)") +

theme(legend.position = "bottom",

legend.text = element_text(size = 9)) +

guides(fill = guide_legend(ncol = 3))

for (i in 1:37){

chenTh[[6]][[i]]$.subid = 1

}

ggseg(atlas = chenTh, mapping = aes(fill = region)) +

scale_fill_brain("chenTh", package = "ggsegChen") +

labs(title = "Chen thickness (chenTh)") +

theme(legend.position = "bottom",

legend.text = element_text(size = 9)) +

guides(fill = guide_legend(ncol = 2))

chen_brain_map.R

rm(list=ls())

setwd("~/Desktop/2020lab/ch-email/partitioned heritability /brain_map_ggsegChen")

#brain map

library(ggsegChen)

library(ggseg)

library(dplyr)

library(ggplot2)

library(readxl)

#match/ add a row of region type first data first

sig_enrichment_3categ_July16 <- read_excel("~/Desktop/2020lab/ch-email/partitioned heritability /old river/sig_enrichment_3categ_July16.xlsx")

df2 <- sig_enrichment_3categ_July16

#changes/update

#superiorparietal_area, superiorparietal_thickness, and superiortemporal_area where S1, A1 are located can be sensory_motor regions.

df2$region_type[df2$region=="superiorparietal_area"] <- "sensory_motor"

df2$region_type[df2$region=="superiorparietal_thickness"] <- "sensory_motor"

df2$region_type[df2$region=="superiortemporal_area"] <- "sensory_motor"

grep("_thickness$", df2$region)

grep("_area$", df2$region)

thicnkess <- df2[grep("_thickness$", df2$region),]

area <- df2[grep("_area$", df2$region),]

#thicckness, replace the names manually to matche the atlas names

unique(thicnkess$region)

thicnkess$region[thicnkess$region=="ventralfrontal_thickness"] <- "ventral frontal"

thicnkess$region[thicnkess$region=="superiorparietal_thickness"] <- "superior parietal"

thicnkess$region[thicnkess$region=="temporalpole_thickness"] <- "temporal pole"

thicnkess$region[thicnkess$region=="medialprefrontal_thickness"] <- "medial prefrontal"

thicnkess$region[thicnkess$region=="perisylvian_thickness"] <- "perisylvian"

thicnkess$region[thicnkess$region=="ventromedialoccipital_thickness"] <- "ventromedial occipital"

thicnkess$region[thicnkess$region=="inferiorparietal_thickness"] <- "inferior parietal"

thicnkess$region[thicnkess$region=="middletemporal_thickness"] <- "middle temporal"

thicnkess$region[thicnkess$region=="occipital_thickness"] <- "occipital"

additional_region <- c("dorsolateral prefrontal", "medial temporal", "middle temporal", "motor-premotor-supplementary motor area", "occipital")

additional_region_type <- c("association",'paralimbic', "association", "sensory_motor", "sensory_motor" )

thick_add <- tibble(

region= additional_region,

region_type = additional_region_type

)

someData = tibble(

region= thicnkess$region,

region_type = thicnkess$region_type

)

someData <- rbind(thick_add, someData)

someData$region_type <- factor(someData$region_type, levels = c("paralimbic", "sensory_motor", "association"))

#add data to atlas

chenTh %>%

left_join(someData) %>%

head(10)

chenTh %>%

left_join(someData) %>%

arrange(region_type) %>%

head(10)

#plot thickness

for (i in 1:37){

chenTh[[6]][[i]]$.subid = 1

}

plot1 <- ggseg(someData, atlas=chenTh, mapping=aes(fill=region_type), color= 'white')

plot1+ scale_fill_manual(values=c("#182B49", "#00B0DA", "#006A96"))

ggsave("thickness_brainmap.png", width = 10, height = 3)

#######area#####

# replace the names manually to matche the atlas names

unique(area$region)

area$region[area$region=="anteromedialtemporal_area"] <- "anteromedial temporal"

area$region[area$region=="inferiorparietal_area"] <- "inferior parietal"

area$region[area$region=="occipital_area"] <- "occipital"

area$region[area$region=="posterolateraltemporal_area"] <- "posterolateral temporal"

area$region[area$region=="precuneus_area"] <- "precuneus"

area$region[area$region=="superiortemporal_area"] <- "superior temporal"

area$region[area$region=="motorpremotor_area"] <- "motor-premotor"

area$region[area$region=="orbitofrontal_area"] <- "orbitofrontal"

area$region[area$region=="dorsomedialfrontal_area"] <- "dorsomedial frontal"

area$region[area$region=="superiorparietal_area"] <- "superior parietal"

area$region[area$region=="dorsolateralprefrontal_area"] <- "dorsolateral prefrontal"

area$region[area$region=="parsopercularis_area"] <- "pars opercularis,subcentral"

area$region_type <- factor(area$region_type, levels = c("paralimbic", "sensory_motor", "association"))

head(chenAr)

someData = tibble(

region= area$region,

region_type = area$region_type

)

someData

chenAr %>%

left_join(someData) %>%

head(10)

chenAr %>%

left_join(someData) %>%

arrange(region_type) %>%

head(10)

#plot thickness

for (i in 1:37){

chenAr[[6]][[i]]$.subid = 1

}

plot2 <- ggseg(someData, atlas=chenAr, mapping=aes(fill=region_type), color = 'white')

plot2+ scale_fill_manual(values=c("#182B49", "#00B0DA", "#006A96"))

ggsave("area_brainmap.png", width = 10, height = 3)

Page updated

Report abuse