I am using random forest with breeding values for caterpillar and plant traits.
Some major questions:
Can I use Random forest to predict cat breeding value from plant genotype breeding value?
If I cbind the IR and plant traits together and run random forest does prediction strength of caterpillar traits increases with IR data?
Can we predict plant traits from IR traits?
Relevant output is ranking of top predictors and OOB values
Trait breeding values were calculated and are located at:
/uufs/chpc.utah.edu/common/home/u6000989/projects/mtrunc_mel/gemma/output/prediction
These were transferred into my mtrun file with
scp -r /uufs/chpc.utah.edu/common/home/u6000989/projects/mtrunc_mel/gemma/output/prediction/bv_pred_cat.txt /uufs/chpc.utah.edu/common/home/u6015714/mtrun/data
And onto my computer with
scp -r u6015714@kingspeak.chpc.utah.edu:/uufs/chpc.utah.edu/common/home/u6015714/mtrun/data/pntest_mtrunc_pruned.txt ~/Documents/mtrun
The relevant files are bv_pred_cat.txt, bv_pred_plant.txt and bv_pred_ir.txt.
Each of these files contains 94 rows, which are the individual plants which are in the same order as the genome. The traits are also in the same order as the other files.
Bv_cat has 4 columns (cat traits)
Bv_plant has 9 columns of plant traits
Bv IR has 19 columns of unknown IR spec freq, will refer to these as IR 1-19 for now.
##Set up
setwd("Documents/LAB/mtrun/")
#install.packages("randomForest")
library(randomForest)
library(ggplot2)
library(plotmo)
#load bv matrix
bv_plant<- read.table("bv_pred_plant_headers.txt", header=T,check.names=FALSE)
bv_cat<- read.table("bv_pred_cat_with_headers.txt", header=T, check.names=FALSE)
cat8<-bv_cat$p_cat_wgt_8
cat16<-bv_cat$p_cat_wgt_16
bv_ir<- read.table("bv_pred_ir_with_headers.txt", header=T, check.names=FALSE)
cat_all16<-cbind(cat16, bv_ir)
cat_all16<-cbind(cat_all16,bv_plant)
cat_all8<-cbind(cat8, bv_ir)
cat_all8<-cbind(cat_all8,bv_plant)
eclose<-bv_cat$p_cat_surv_eclose
eclose_all<-cbind(eclose, bv_ir)
eclose_all<-cbind(eclose_all,bv_plant)
pupa<-bv_cat$p_cat_surv_pupa
pupa_all<-cbind(pupa, bv_ir)
pupa_all<-cbind(pupa_all,bv_plant)
#load in csv with MSE for scatter plot
cat<-read.csv("cat_trait_Plant_bv_final_rf.csv", header=T, sep="")
##Run rf for 8 day
#MTRY=18,NODESIZE=2 == 31.93
set.seed(2)
weight8<-randomForest(cat8~., data=cat_all8, importance=TRUE, proximity=TRUE, mtry=18, nodesize=2)
weight8
importance
varImpPlot(weight8)
importance(weight8)
##Run rf for 16 day
# MTRY=12,NODESIZE=9 ==14.43;;
set.seed(7)
weight16<-randomForest(cat16~., data=cat_all16, importance=TRUE, proximity=TRUE, mtry=12, nodesize=9)
weight16
varImpPlot(weight16)
importance(weight16)
##Run rf for ECLOSION
#mtry=3, nodesize=15==5.27
set.seed(7)
ecloserf<-randomForest(eclose~., data=eclose_all, importance=TRUE, proximity=TRUE, mtry=3, nodesize=15)
ecloserf
importance(ecloserf)
varImpPlot(ecloserf)
#Scatter plot 8 day
cat$Trait <- factor(cat$Trait, levels = cat$Trait[order(cat$day_8_IncMSE)])
ggplot(cat, aes(x=day_8_IncMSE, y= Trait)) +
geom_point(aes(colour=Color, size=0.25)) +
scale_color_manual(values=c("#E31A1C","#33A02C"))+
xlab("% IncMSE") + ylab("Plant trait")+
theme(legend.position="none")+
theme(axis.text.x = element_text(size=8),
axis.text.y = element_text(size=8),
axis.title.x=element_text(size=10),
axis.title.y=element_text(size=10))
#Scatter plot 16 day
cat$Trait <-factor(cat$Trait, levels = cat$Trait[order(cat$Day_16_IncMSE)])
ggplot(cat, aes(x=Day_16_IncMSE, y= Trait)) +
geom_point(aes(colour=Color, size=.25)) +
scale_color_manual(values=c("#E31A1C","#33A02C"))+
xlab("% IncMSE") + ylab("Plant trait")+
theme(legend.position="none")+
theme(axis.text.x = element_text(size=8),
axis.text.y = element_text(size=8),
axis.title.x=element_text(size=10),
axis.title.y=element_text(size=10))
#Scatter plot eclosion
cat$Trait <- factor(cat$Trait, levels = cat$Trait[order(cat$eclose_MSE)])
ggplot(cat, aes(x=eclose_MSE, y= Trait)) +
geom_point(aes(colour=Color, size=0.25)) +
scale_color_manual(values=c("#E31A1C","#33A02C"))+
xlab("% IncMSE") + ylab("Plant trait")+
theme(legend.position="none")+
theme(axis.text.x = element_text(size=8),
axis.text.y = element_text(size=8),
axis.title.x=element_text(size=10),
axis.title.y=element_text(size=10))
#Surface plot 8 day
attach(weight8)
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_892.38", "IR_985.1"), ylab="8 day weight")
## Surface plot 16 day
attach(weight16)
plotmo(weight16, degree1=FALSE,caption="", main="", degree2=c("Leaf_tough", "IR_1104.64"), ylab="16 day weight")
#Surface plot 8 day
attach(ecloserf)
plotmo(ecloserf, degree1=FALSE, caption="", main="", degree2=c("Leaf_tough", "IR_830.13"), ylab="Eclosion")
##Supplementary figure
#8day
attach(weight8)
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_892.38", "IR_985.1"), ylab="8 day weight")
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_892.38", "Plant_height"), ylab="8 day weight")
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_892.38", "IR_1024.17"), ylab="8 day weight")
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_985.1", "Plant_height"), ylab="8 day weight")
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_985.1", "IR_1024.17"), ylab="8 day weight")
plotmo(weight8, degree1=FALSE, caption="", main="", degree2=c("IR_1024.17", "Plant_height"), ylab="8 day weight")
#Supplementary Figure
#16 day
plotmo(weight16, degree1=FALSE, caption="", main="", degree2=c("Leaf_tough", "IR_1104.64","IR_830.13","IR_818.21"), ylab="16_day_weight")