L. melissa/M. truncatula, Gemma runs

Post date: Oct 05, 2017 7:51:1 PM

Getting things ready for gemma.

1. I put all of the phenotype data together, which included some manual curation of the caterpillar data (the original version is on google drive). Here is the code. We end up with 13 phenotypes (9 plant and 4 caterpillar) for 94 plant lines (3 didn't germinate and 3 lacked genetic data).

Run combine_dat.R, in /uufs/chpc.utah.edu/common/home/u6000989/projects/mtrunc_mel/data

## trichome and leaf data

dat<-read.csv("Trichome-density_leaf-area.csv")

p_leaf_length<-tapply(X=dat$Leaf.length_.mm.,INDEX=dat$Line,mean,na.rm=TRUE)

p_leaf_width<-tapply(X=dat$Leaf.width_.mm.,INDEX=dat$Line,mean,na.rm=TRUE)

p_trichome<-tapply(X=dat$Trichome.number,INDEX=dat$Line,mean,na.rm=TRUE)

p_leaf_area<-tapply(X=dat$Leaf.length_.mm. * dat$Leaf.width_.mm.,INDEX=dat$Line,mean,na.rm=TRUE)

p_leaf_shape<-tapply(X=dat$Leaf.length_.mm. / dat$Leaf.width_.mm.,INDEX=dat$Line,mean,na.rm=TRUE)

#### height

h<-read.csv("height.csv")

p_height<-apply(h[,seq(3,15,3)],1,mean,na.rm=TRUE)

## leaf toughness

pen<-read.csv("penetrometer.csv")

pmn<-apply(pen[,3:5],1,mean,na.rm=TRUE)

p_leaf_tough<-tapply(X=pmn,INDEX=pen$Line,mean,na.rm=TRUE)

## leaf weigth and SLA

wgt<-read.csv("leaf_weight.csv")

p_leaf_weight<-tapply(INDEX=wgt$Line,X=wgt$Weight_of_dry_middle_leaflet_.g.,mean,na.rm= TRUE)

p_leaf_sla<-tapply(X=(dat$Leaf.length_.mm. * dat$Leaf.width_.mm.)/wgt$Weight_of_dry_middle_leaflet_.g.,INDEX=dat$Line,mean,na.rm=TRUE)

leafDat<-cbind(p_leaf_length,p_leaf_width,p_leaf_area,p_leaf_shape,p_leaf_weight,p_leaf_sla,p_trichome,p_leaf_tough,p_height)

lmat<-matrix(1,nrow=9,ncol=9)

for(i in 1:8){

for(j in (i+1):9){

lmat[i,j]<-cor(leafDat[,i],leafDat[,j])

lmat[j,i]<-lmat[i,j]

}

colnames(lmat)<-colnames(leafDat)

rownames(lmat)<-colnames(lmat)

write.table(data.frame(lines=names(p_leaf_sla),leafDat),"pheno_plant.txt",quote=FALSE,row.names=FALSE,col.names=TRUE)

## subset with genetic data

gids<-scan("indIdsNum.txt")

x<-which(as.numeric(names(p_leaf_sla)) %in% gids)

subLeafDat<-leafDat[x,]

write.table(data.frame(lines=names(p_leaf_sla[x]),subLeafDat),"sub_pheno_plant.txt",quote=FALSE,row.names=FALSE,col.names=TRUE)

## add caterpillar data

dat<-read.table("caterpillars_matrix.csv",header=TRUE,sep=",")

p_cat_wgt_8<-tapply(X=dat$X8_day_weight_.g.,INDEX=dat$Plant_line,mean,na.rm=TRUE)

p_cat_wgt_16<-tapply(X=dat$X16_day_weight_.g.,INDEX=dat$Plant_line,mean,na.rm=TRUE)

p_cat_surv_pupa<-tapply(X=is.na(dat$Pupal_weight_.g.)==FALSE,INDEX=dat$Plant_line,mean,na.rm=TRUE)

p_cat_surv_eclose<-tapply(X=is.na(dat$Eclosion_date)==FALSE,INDEX=dat$Plant_line,mean,na.rm=TRUE)

## big pheno table

pheno<-cbind(p_leaf_length,p_leaf_width,p_leaf_area,p_leaf_shape,p_leaf_weight,p_leaf_sla,p_trichome,p_leaf_tough,p_height,p_cat_wgt_8,p_cat_wgt_16,p_cat_surv_pupa,p_cat_surv_eclose)

phmat<-matrix(1,nrow=13,ncol=13)

for(i in 1:12){

for(j in (i+1):13){

phmat[i,j]<-cor(pheno[,i],pheno[,j])

phmat[j,i]<-phmat[i,j]

}

colnames(phmat)<-colnames(pheno)

rownames(phmat)<-colnames(phmat)

library(RColorBrewer)

cs<-brewer.pal(n=10,"BrBG")

br<-seq(-1,1,0.2)

image(phmat,breaks=br,col=cs)

write.table(data.frame(lines=names(p_leaf_sla),pheno),"pheno_plant_cat.txt",quote=FALSE,row.names=FALSE,col.names=TRUE)

## subset with genetic data

gids<-scan("indIdsNum.txt")

x<-which(as.numeric(names(p_leaf_sla)) %in% gids)

subpheno<-pheno[x,]

write.table(data.frame(lines=names(p_leaf_sla[x]),subpheno),"sub_pheno_plant_cat.txt",quote=FALSE,row.names=FALSE,col.names=TRUE)

2. For the genotype data, I just needed to format the file.

First get SNP ids,

tail -n +3 ../mtruncSnps/mtrunc_pruned.gl | cut -f 1 -d " " | perl -p -i -e 's/(\S+)/snp\1/' > snpCoords.txt

Then make the genotype file,

perl scripts/mkGeno.pl pntest_mtrunc_pruned.txt

3. I put everything in the gemma sub-directory (/uufs/chpc.utah.edu/common/home/u6000989/projects/mtrunc_mel/gemma) which contains its own scripts sub-directory (copied from the wing mapping project and modified). I ran the BSLMM on all 13 traits, each with 16 chains, 2 million sampling steps, 500,000 burnin, 20 thinning interval, and a MAF cutoff of 1%. Note, I am using gemma version 0.94.1. I ran,

perl scripts/wrap_qsub_slurm_gemma.pl

This just calls a fork script for each phenotype,