Post date: Sep 05, 2019 4:32:34 PM
We re-organized the various manuscripts we are working on for the Timema results. We now have a manuscript focused on the indel (large-scale mutations versus supergenes). I am re-working the core figures based on this. I have current versions of these in a cvs project = ms_timema_indel. Here are descriptions of any relevant code, etc. for each.
Figure 1. This is a conceptual figure. The files needed for it are all in the cvs directory; this includes the Timema images and Patrik's schematics.
Figure 2. This is the old coverage figure minus Timema podura. I simply tweaked it in inkscape, but the original code is in /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/indel/.
Figure 3. This includes single SNP GWA mapping, PCA and structure haplotypes. The code is in /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/mk_indel_figure3.R
library(scales)
library(latex2exp)
## read files or load workspace
bartRG<-read.table("TbartJLin_GenABEL_Zach_inputs/latRG/Tbart_JL_latRG.GenABEL_sparse_without_correction_for_pop_structure.txt",header=TRUE)
chumashRG<-read.table("Tchumash_GR8.06_GenABEL_v1.3b2/latRG/Tchum_GR8.06.latRG.GenABEL_sparse_without_correction_for_pop_structure.txt",header=TRUE)
bartGB<-read.table("TbartJLin_GenABEL_Zach_inputs/latGB/Tbart_JL_latGB.GenABEL_sparse_without_correction_for_pop_structure.txt",header=TRUE)
chumashGB<-read.table("Tchumash_GR8.06_GenABEL_v1.3b2/latGB/Tchum_GR8.06.latGB.GenABEL_sparse_without_correction_for_pop_structure.txt",header=TRUE)
## figure with pca, structure haps and trees
gTbart<-read.table("../pcaStruct/pcafiles4Zach/Tbartmani_JL/Tbartmani_JL.allgreen.bin.bial.noindel.qs20.cov50.mdp204Mdp4080.maf0.01_scaf128_4.97Mbp-6.19Mbp.geno",header=FALSE)
gTchum<-read.table("../pcaStruct/pcafiles4Zach/Tchumash_GR8.06/Tchumash_GR8.06.color.bin.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01_scaf128.geno_4.97Mbp-6.19Mbp.geno",header=FALSE)
## 5-6mb
gTbart<-t(as.matrix(gTbart[1:126,-c(1:3)]))
gTchum<-t(as.matrix(gTchum[1:126,-c(1:3)]))## yes these both just happen to be 1:126
## KL classifications
klBart<-read.table("../pcaStruct/pcafiles4Zach/Tbartmani_JL/bartKL.txt",header=TRUE)
idsBart<-read.table("../pcaStruct/pcafiles4Zach/Tbartmani_JL/Tbartmani_JL_PCA_pheno_morphs.dsv",header=TRUE)
keep<-which(as.character(idsBart$id) %in% as.character(klBart$id))
idsBart<-idsBart[keep,]
gTbart<-gTbart[keep,]
colBart<-rep(NA,dim(idsBart)[1])
N<-length(colBart)
for(i in 1:N){
a<-which(klBart$id==as.character(idsBart[i,1]))
colBart[i]<-klBart$bartKL[a]
}
klChum<-read.table("../pcaStruct/pcafiles4Zach/Tchumash_GR8.06/chumKL.txt",header=TRUE)
idsChum<-read.table("../pcaStruct/pcafiles4Zach/Tchumash_GR8.06/Tchumash_GR8.06_PCA_color.bin.dsv",header=TRUE)
keep<-which(klChum$id %in% idsChum$id)
colChum<-rep(NA,dim(idsChum)[1])
N<-length(colChum)
for(i in 1:N){
a<-which(klChum$id==as.character(idsChum[i,1]))
colChum[i]<-klChum$chumKL[a]
}
## pcas
pcTbart<-prcomp(gTbart,center=TRUE,scale=FALSE)
pcTchum<-prcomp(gTchum,center=TRUE,scale=FALSE)
## struct
bart1_g<-read.table("../pcaStruct/in_tbart_nogaps.str",skip=2)
bart1<-read.table("../pcaStruct/o_tbart_gaps_ch1_ss",header=F)
bart_ph<-bart1_g[seq(1,816,2),2]
chum1_g<-read.table("../pcaStruct/in_tchum_nogaps.str",skip=2)
chum1<-read.table("../pcaStruct/o_tchum_gaps_ch1_ss",header=F)
chum_ph<-chum1_g[seq(1,1062,2),2]
strPlot<-function(X=NA,mval=0.5){
N<-max(X[,1])
L<-max(X[,2])
X_q11<-matrix(X[,3]*X[,5],nrow=N,ncol=L,byrow=TRUE)
X_q22<-matrix(X[,4]*X[,6],nrow=N,ncol=L,byrow=TRUE)
X_q12<-matrix((X[,3]*X[,6])+(X[,4]*X[,5]),nrow=N,ncol=L,byrow=TRUE)
X_est<-matrix(NA,nrow=N,ncol=L)
X_est[X_q11 > mval]<-0
X_est[X_q12 > mval]<-1
X_est[X_q22 > mval]<-2
return(X_est)
}
## make plots
pdf("Fig3_indel_mapping.pdf",width=22,height=8)
layout(matrix(c(1,2,5,6,3,4,7,8),nrow=2,ncol=4,byrow=TRUE),widths=c(5,5,4,7),heights=c(3,3))
cs<-c("darkgray","indianred2","cornflowerblue")
par(mar=c(4.5,5.5,3,.5))
mkplotLg8<-function(X=NA,c1=NA,hdr=NA){
a<-which(X$scaf==702.1)
X$rel.pos[a]<-rev(X$rel.pos[a])
lg8<-which((X$scaf == 702.1 & X$pos < 6000000) | (X$scaf == 128 & X$pos < 8000000))
#lg8<-which(X$scaf == 702.1 | X$scaf == 128)
#lg8<-which(X$lg == 8)
np<-X$neg.log.p[lg8]
xx<-X$rel.pos[lg8]
a<-which(np > .5)
cx<-1.6
ca<-1.2
cm<-1.6
plot(xx[a],np[a],col=c1,pch=19,axes=FALSE,cex.lab=cx,xlab="LG 8 SNPs",ylab="Neg. log p",type='n')
indel<-which(X$scaf==128 & X$pos >= 5000000 & X$pos <= 6000000)
lb<-min(X$rel.pos[indel])
ub<-max(X$rel.pos[indel])
ylb<-0.5
yub<-max(np)
polygon(c(lb,lb,ub,ub),c(ylb,yub,yub,ylb),col=alpha("goldenrod2",.5),border=NA,lty=2)
mel<-c(which(X$scaf==702.1 & X$pos<=4139489),which(X$scaf==128 & X$pos<5000000))
lb<-min(X$rel.pos[mel])
ub<-max(X$rel.pos[mel])
polygon(c(lb,lb,ub,ub),c(ylb,yub,yub,ylb),col=alpha("darkgray",.5),border=NA,lty=2)
points(xx[a],np[a],col=c1,pch=19)
axis(2,cex.axis=ca)
title(main=hdr,cex.main=cm)
box()
}
mkplotGw<-function(X=NA,c2=NA,hdr=NA){
a<-which(X$scaf==702.1)
X$rel.pos[a]<-rev(X$rel.pos[a])
lg<-which(X$lg != 14)
np<-X$neg.log.p[lg]
xx<-X$rel.pos[lg]
a<-which(np > 1)
L<-length(lg)
ps<-1:L
mids<-tapply(X=ps,INDEX=X$lg[lg],mean)[-14]
cx<-1.6
ca<-1.2
cm<-1.6
mycs<-rep(c2[1],L)
mycs[X$lg==8]<-c2[2]
plot(xx[a],np[a],col=mycs[a],pch=19,axes=FALSE,cex.lab=cx,xlab="Linkage group",ylab="Neg. log p")
axis(1,at=mids,1:13,cex.axis=ca)
axis(2,cex.axis=ca)
title(main=hdr,cex.main=cm)
box()
}
## GWA
mkplotGw(X=bartRG,c2=cs[1:2],hdr=expression(paste("(a) ",italic("T. bartmani")," Genome-wide GWA, RG",sep="")))
mkplotLg8(X=bartRG,c1=cs[2],hdr=expression(paste("(b) ",italic("T. bartmani")," LG 8 GWA, RG",sep="")))
mkplotGw(X=chumashRG,c2=cs[1:2],hdr=expression(paste("(e) ",italic("T. chumash")," Genome-wide GWA, RG",sep="")))
mkplotLg8(X=chumashRG,c1=cs[2],hdr=expression(paste("(f) ",italic("T. chumash")," LG 8 GWA, RG",sep="")))
cl<-1.6
cm<-1.6
# bartmani
par(pty='s')
par(mar=c(5,5.5,3.5,0.5))
plot(pcTbart$x[,1],pcTbart$x[,2],col=c("darkgreen","brown")[colBart+1],pch=20,cex.lab=cl,xlab="PC1",ylab="PC2",cex=cx)
title(main=expression(paste("(c) PCA, ",italic("T. bartmani"),sep="")),cex.main=cm)
par(pty='m')
par(mar=c(5,5.5,3.5,0.5))
snps<-read.table("../pcaStruct/bart_snps.txt",header=FALSE)
bart_est<-strPlot(X=bart1,mval=0.5)
scafs<-which((snps[,1] == 128 & snps[,2] < 10000000) | (snps[,1] == 702.1 & snps[,2] > 4000000))
image(t(bart_est[order(bart_ph),scafs]),breaks=c(-0.5,0.5,1.5,2.5),col=c("darkgreen","lightgray","brown"),axes=FALSE,xlab="LG 8",ylab="Individual",cex.lab=cl)
oo<-tapply(INDEX=snps[scafs,1],X=1:length(scafs),mean)
bnd<-mean(bart_ph)
abline(h=1-bnd,lwd=2)
loc<-which(snps[scafs,1]==128 & (snps[scafs,2] > 5000000 & snps[scafs,2] < 6000000))/length(scafs)
lb<-min(loc)
ub<-max(loc)
par(xpd=NA)
ylb<--0.07;yub<--0.03
polygon(c(lb,lb,ub,ub,lb),c(ylb,yub,yub,ylb,ylb),col="black")
par(xpd=FALSE)
box()
title(main=expression(paste("(d) Haplotype blocks, ",italic("T. bartmani"),sep="")),cex.main=cm)
## chumash
par(pty='s')
par(mar=c(5,5.5,3.5,0.5))
plot(pcTchum$x[,1],pcTchum$x[,2],col=c("darkgreen","brown")[colChum+1],pch=20,cex.lab=cl,xlab="PC1",ylab="PC2",cex=cx)
title(main=expression(paste("(g) PCA, ",italic("T. chumash"),sep="")),cex.main=cm)
par(pty='m')
par(mar=c(5,5.5,3.5,0.5))
snps<-read.table("../pcaStruct/chum_snps.txt",header=FALSE)
chum_est<-strPlot(X=chum1,mval=0.5)
scafs<-which((snps[,1] == 128 & snps[,2] < 10000000) | (snps[,1] == 702.1 & snps[,2] > 4000000))
image(t(chum_est[order(chum_ph),scafs]),breaks=c(-0.5,0.5,1.5,2.5),col=c("brown","lightgray","darkgreen"),axes=FALSE,xlab="LG 8",ylab="Individual",cex.lab=cl)
oo<-tapply(INDEX=snps[,1],X=1:length(scafs),mean)
bnd<-mean(chum_ph)
abline(h=1-bnd,lwd=2)
loc<-which(snps[scafs,1]==128 & (snps[scafs,2] > 5000000 & snps[scafs,2] < 6000000))/length(scafs)
lb<-min(loc)
ub<-max(loc)
par(xpd=NA)
ylb<--0.07;yub<--0.03
polygon(c(lb,lb,ub,ub,lb),c(ylb,yub,yub,ylb,ylb),col="black")
par(xpd=FALSE)
box()
title(main=expression(paste("(h) Haplotype blocks, ",italic("T. chumash"),sep="")),cex.main=cm)
dev.off()
Figure 4. This figure includes most of the polygenic mapping T. chumash results. Most of this is the same as before, but it now includes QTL estimates for the indel region and for the rest of Mel-Stripe. Relevant input files are in /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/ and the script to make the figure is
mk_indel_figure4.R
library(scales)
library(latex2exp)
## read files or load workspace
#RG<-read.table("/uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/Tchumash_GR8.06_gemma_lgNA_excluded_v1.3b2/latRG/output/Tchumash_GR8.06.latRG.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01.lgNA.excluded_sparse_effects_all_SNPs_except_MAFcutoff_formatted.txt",header=TRUE)
#GB<-read.table("/uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/Tchumash_GR8.06_gemma_lgNA_excluded_v1.3b2/latGB/output/Tchumash_GR8.06.latGB.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01.lgNA.excluded_sparse_effects_all_SNPs_except_MAFcutoff_formatted.txt",header=TRUE)
#load("fig2x.rdat")
## make plots
pdf("Fig4_indel_chumash.pdf",width=10,height=12)
layout(matrix(c(1:8),nrow=4,ncol=2,byrow=TRUE),widths=c(5,5),heights=c(3,3,3,3))
cs<-c("darkgray","indianred2","cornflowerblue")
mkplotInDel<-function(X=NA,c1=NA,hdr=NA){
indel<-which(X$scaffold == 128 & X$PS >= 5000000 & X$PS <= 6000000)
cx<-1.6
ca<-1.2
cm<-1.6
plot(X$PS[indel],X$gamma[indel],col=c1,pch=19,axes=FALSE,ylim=c(0,1),cex=1.1,cex.lab=cx,xlab="Scaffold 128 pos. (bp)",ylab="PIP")
#points(X$PS[indel],X$gamma[indel],pch=21,cex=1.2,lwd=.3)
axis(1,cex.axis=ca)
axis(2,at=c(0,.5,1),cex.axis=ca)
title(main=hdr,cex.main=cm)
box()
}
mkplotPips<-function(X=NA,c1=NA,hdr=NA){
indel<-which(X$scaffold == 128 & X$PS >= 5000000 & X$PS <= 6000000)
mel<-c(which(X$scaffold == 128 & X$PS < 5000000),which(X$scffold==702.1 & X$PS <= 4139489))
Nindel<-rep(NA,5000)
Nmel<-Nindel
for(i in 1:5000){
Nindel[i]<-sum(rbinom(n=length(indel),size=1,prob=X$gamma[indel]))
Nmel[i]<-sum(rbinom(n=length(mel),size=1,prob=X$gamma[mel]))
}
mnIndel<-mean(Nindel)
mnMel<-mean(Nmel)
sdIndel<-sd(Nindel)
sdMel<-sd(Nmel)
cx<-1.6
ca<-1.2
cm<-1.6
xx<-barplot(c(mnIndel,mnMel),width=.25,space=c(.6,.7),xlim=c(0,1),beside=TRUE,col=c1,ylim=c(0,6),names.arg=c("indel","mel-stripe"),cex.lab=cx,cex.axis=ca,
ylab="No. of QTN",cex.names=cx)
ubs<-c(mnIndel+sdIndel,mnMel+sdMel)
lbs<-c(mnIndel-sdIndel,mnMel-sdMel)
lbs[1]<-max(lbs[1],0)
lbs[2]<-max(lbs[2],0)
segments(xx,ubs,xx,lbs,lwd=2)
box()
title(main=hdr,cex.main=cm)
box()
o<-rbind(c(mnIndel,lbs[1],ubs[1]),c(mnMel,lbs[2],ubs[2]))
return(o)
}
mkplotGw<-function(X=NA,c2=NA,hdr=NA){
L<-dim(X)[1]
minp<-which(X$gamma>0.0005)
ps<-1:L
mids<-tapply(X=ps,INDEX=X$CHR,mean)
cx<-1.6
ca<-1.2
cm<-1.6
mycs<-rep(c2[1],L)
mycs[X$CHR==8]<-c2[2]
plot(ps[minp],X$gamma[minp],col=mycs[minp],pch=19,axes=FALSE,ylim=c(0,1),cex.lab=cx,xlab="Linkage group",ylab="PIP")
axis(1,at=mids,1:13,cex.axis=ca)
axis(2,at=c(0,.5,1),cex.axis=ca)
title(main=hdr,cex.main=cm)
box()
}
## blank fo images
par(mar=c(4.5,5.5,3,.5))
## pip plots
mkplotGw(X=RG,c2=cs[1:2],hdr=expression(paste("(a) Genome-wide PIPs, RG")))
mkplotInDel(X=RG,c1=cs[2],hdr=expression(paste("(b) Scaf. 128 PIPs, RG")))
mkplotGw(X=GB,c2=cs[c(1,3)],hdr=expression(paste("(c) Genome-wide PIPs, GB")))
mkplotInDel(X=GB,c1=cs[c(3)],hdr=expression(paste("(d) Scaf. 128 PIPs, GB")))
estRG<-mkplotPips(X=RG,c1=cs[2],hdr=expression(paste("(e) Number of QTN, RG")))
estGB<-mkplotPips(X=GB,c1=cs[3],hdr=expression(paste("(f) Number of QTN, GB")))
## LD plot
gen<-read.table("Tchumash_GR8.06.latGB.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01.scaf128_5-6Mbp.geno")
mapGB<-read.table("/uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/Tchumash_GR8.06_gemma_scaf128_5-6Mbp_v1.3b2/latGB/output/Tchumash_GR8.06.latGB.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01.scaf128_5-6Mbp_sparse_effects_all_SNPs_except_MAFcutoff_formatted.txt",header=TRUE)
mapRG<-read.table("/uufs/chpc.utah.edu/common/home/u6000989/projects/timema_genarch/mapping/Tchumash_GR8.06_gemma_scaf128_5-6Mbp_v1.3b2/latRG/output/Tchumash_GR8.06.latRG.bial.noindel.qs20.cov50.mdp265Mdp5310.maf0.01.scaf128_5-6Mbp_sparse_effects_all_SNPs_except_MAFcutoff_formatted.txt",header=TRUE)
G<-as.matrix(gen[,-c(1:3)])
r2Gb<-matrix(NA,nrow=126,ncol=126)
for(i in 1:126){for(j in 1:126){
r2Gb[i,j]<-cor(G[i,],G[j,])^2
}}
distGb<-matrix(NA,nrow=126,ncol=126)
for(i in 1:126){for(j in 1:126){
distGb[i,j]<-abs(mapGB$PS[i]-mapGB$PS[j])
}}
i<-0.4
plot(distGb[lower.tri(distGb)],r2Gb[lower.tri(r2Gb)],col="darkgray",pch=20,xlab="Distance (bp)",ylab="LD (r2)",cex.lab=1.6,cex.axis=1.2)
hi<-which(mapGB$gamma > i | mapRG$gamma > i)
hi_distGb<-distGb[hi,hi]
hi_r2Gb<-r2Gb[hi,hi]
x<-hi_distGb[lower.tri(hi_distGb)]
y<-hi_r2Gb[lower.tri(hi_r2Gb)]
points(hi_distGb[lower.tri(hi_distGb)],hi_r2Gb[lower.tri(hi_r2Gb)],col="orange",pch=19)
title(main=expression(paste("(g) LD, PIP > 0.4")),cex.main=1.6)
## epistasis
# function to convert rgb to hex
rgb2hex <-
function(col) {
rgb(col[1],col[2],col[3], maxColorValue=255)
}
# information about T. chumash individuals, including colour (coded in hex)
# (provided by R. Villoutreix)
inds.info<-read.csv("2015Timema_sp_complete.csv", header=T)
# 10 SNPs from scaffold 128 (LG8) most associated with latRG in T. chummash
geno<-read.table("10snps_lg8scaf128_latRG.dsv", header=T)
pheno<-scan("Tchumash_GR8.06.latRG.pheno")
names(pheno)<-colnames(geno)[-c(1:3)]
# melanic allele dosage (sum of allele dosages for all SNPs)
meldosage<-apply(geno[,-c(1:3)], 2, sum)
# data frame with melanic allele dosage and phenotype scores
mel.df<-data.frame(meldosage,pheno)
# colour of bugs
colours<-as.character(inds.info$lat.s)
names(colours)<-paste("X",inds.info$id, sep="")
colours<-colours[names(pheno)]
fit1 <-lm(pheno~meldosage, data= mel.df)
# mean color for each category
phenosi<-list()
for (i in 0:20){
idsi<-names(meldosage[meldosage==i])
if (length(idsi)>0){
phenosi[[i+1]]<-as.numeric(pheno[idsi])
}
else{
phenosi[[i+1]]<-NA
}
}
# discretize to integers to be used in boxplots
meldosage.discrete<-round(apply(geno[,-c(1:3)], 2, sum))
mel.df2<-data.frame(meldosage.discrete, pheno)
# mean colour for each discrete category (0-20)
mean.colours<-vector()
for (md in sort(unique(meldosage.discrete))){
ids<-rownames(mel.df2[mel.df2$meldosage.discrete==md,])
ids.cols<-colours[ids]
mean.col<-rgb2hex(as.numeric(apply(col2rgb(ids.cols), 1, mean)))
mean.colours<-c(mean.colours, mean.col)
}
boxplot(pheno~meldosage.discrete,col=mean.colours,xlab="Melanic allele dosage",ylab="Phenotypic score (RG)",cex.lab=1.6,cex.axis=1.2)
title(main=expression(paste("(h) Phenotypic effects, RG")),cex.main=1.6)
dev.off()