Post date: Sep 22, 2015 10:39:42 PM
All results regarding LD are in /labs/evolution/projects/cmaclentil/popgen/ld.
I calculated LD between all pairs of antagonistic pleiotropy SNPs in each line (I used r^2). The R script I used to make plots and calculate summaries is mkApLdPlots.R. Here it is:
N<-40
Nm<-48
L<-51371
snps<-read.table("../locusids.txt",header=FALSE)
cl<-1.6
ca<-1.2
## L1 AP SNPs
l1Snps<-read.table("l1APsnps.txt",header=FALSE)
l1ids<-which(as.character(snps[,1]) %in% as.character(l1Snps[,1]))
Gl1<-t(matrix(scan("g_L1-F100.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldl1<-cor(t(Gl1[l1ids,]))^2
Gr1<-t(matrix(scan("g_L1R-F46.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr1<-cor(t(Gr1[l1ids,]))^2
ld1<-ldl1
ld1[upper.tri(ld1)]<-ldr1[upper.tri(ldr1)]
pdf("ldl1.pdf",width=5,height=10)
layout(matrix(1:2,nrow=2),heights=c(5,5),widths=5)
par(mar=c(4,5.5,2,0.5))
dl<-density(as.vector(ldl1[upper.tri(ldl1)]),from=0,to=1,adj=1.5)
dr<-density(as.vector(ldr1[upper.tri(ldl1)]),from=0,to=1,adj=1.5)
plot(dl,col="orange",lwd=1.5,xlab=expression(paste("linkage disequilibrium (",r^2,")")),main="",cex.lab=cl,cex.axis=ca)
lines(dr,lwd=1.5,col="blue")
legend(0.53,16.2,c("lentil line","reversion line"),col=c("orange","blue"),lwd=1.5)
title(main="(a) distribution of pairwise LD",adj=0)
image.plot(ld1,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),horizontal=TRUE,axes=FALSE,xlab="",ylab="locus (SNP)",legend.shrink=0.8,legend.mar=4,cex.lab=cl,legend.lab=expression(paste("LD (",r^2,")")),legend.line=-1.8)
box()
title(main="(b) matrix of pairwise LD",adj=0)
dev.off()
## L2 AP SNPs
l2Snps<-read.table("l2APsnps.txt",header=FALSE)
l2ids<-which(as.character(snps[,1]) %in% as.character(l2Snps[,1]))
Gl2<-t(matrix(scan("g_L2-F87.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldl2<-cor(t(Gl2[l2ids,]))^2
Gr2<-t(matrix(scan("g_L2R-F45.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr2<-cor(t(Gr2[l2ids,]))^2
ld2<-ldl2
ld2[upper.tri(ld2)]<-ldr2[upper.tri(ldr2)]
pdf("ldl2.pdf",width=5,height=10)
layout(matrix(1:2,nrow=2),heights=c(5,5),widths=5)
par(mar=c(4,5.5,2,0.5))
dl<-density(as.vector(ldl2[upper.tri(ldl2)]),from=0,to=1,adj=1.5)
dr<-density(as.vector(ldr2[upper.tri(ldl2)]),from=0,to=1,adj=1.5)
plot(dl,col="orange",lwd=1.5,xlab=expression(paste("linkage disequilibrium (",r^2,")")),main="",ylim=c(0,10),cex.lab=cl,cex.axis=ca)
lines(dr,lwd=1.5,col="blue")
legend(0.53,10,c("lentil line","reversion line"),col=c("orange","blue"),lwd=1.5)
title(main="(a) distribution of pairwise LD",adj=0)
image.plot(ld2,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),horizontal=TRUE,axes=FALSE,xlab="",ylab="locus (SNP)",legend.shrink=0.8,legend.mar=4,cex.lab=cl,legend.lab=expression(paste("LD (",r^2,")")),legend.line=-1.8)
box()
title(main="(b) matrix of pairwise LD",adj=0)
dev.off()
## L3 AP SNPs
l3Snps<-read.table("l3APsnps.txt",header=FALSE)
l3ids<-which(as.character(snps[,1]) %in% as.character(l3Snps[,1]))
Gl3<-t(matrix(scan("g_L3-F85.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldl3<-cor(t(Gl3[l3ids,]))^2
Gr3<-t(matrix(scan("g_L3R-F45.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr3<-cor(t(Gr3[l3ids,]))^2
ld3<-ldl3
ld3[upper.tri(ld3)]<-ldr3[upper.tri(ldr3)]
pdf("ldl3.pdf",width=5,height=10)
layout(matrix(1:2,nrow=2),heights=c(5,5),widths=5)
par(mar=c(4,5.5,2,0.5))
dl<-density(as.vector(ldl3[upper.tri(ldl3)]),from=0,to=1,adj=1.5)
dr<-density(as.vector(ldr3[upper.tri(ldl3)]),from=0,to=1,adj=1.5)
plot(dl,col="orange",lwd=1.5,xlab=expression(paste("linkage disequilibrium (",r^2,")")),main="",cex.lab=cl,cex.axis=ca)
lines(dr,lwd=1.5,col="blue")
legend(0.53,31,c("lentil line","reversion line"),col=c("orange","blue"),lwd=1.5)
title(main="(a) distribution of pairwise LD",adj=0)
image.plot(ld3,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),horizontal=TRUE,axes=FALSE,xlab="",ylab="locus (SNP)",legend.shrink=0.8,legend.mar=4,cex.lab=cl,legend.lab=expression(paste("LD (",r^2,")")),legend.line=-1.8)
box()
title(main="(b) matrix of pairwise LD",adj=0)
dev.off()
#### all pairwise matrixes, multiple time points ############
Gr1a<-t(matrix(scan("g_L1R-F35.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr1a<-cor(t(Gr1a[l1ids,]))^2
Gr2a<-t(matrix(scan("g_L2R-F35.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr2a<-cor(t(Gr2a[l2ids,]))^2
Gr3a<-t(matrix(scan("g_L3R-F35.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldr3a<-cor(t(Gr3a[l3ids,]))^2
Gl1a<-t(matrix(scan("g_L1-F91.txt",n=N*L,sep=","),nrow=N,ncol=L,byrow=T))
ldl1a<-cor(t(Gl1a[l1ids,]))^2
cm<-2.2
cl<-2
pdf("ldmats.pdf",width=10,height=15)
layout(matrix(1:6,nrow=3,byrow=T),heights=c(5,5,5),widths=c(5,5))
par(mar=c(3,5,5,3))
par(oma=c(0,0,0,4.5))
ld<-ldl1
ld[upper.tri(ld)]<-ldl1a[upper.tri(ldl1a)]
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="locus (SNP)",cex.lab=cl)
box()
title(main="(a) L1-F91 and L1-F100",adj=0,cex.main=cm)
ld<-ldl2
ld[upper.tri(ld)]<-NA
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="")
box()
title(main="(b) L2-F87",adj=0,cex.main=cm)
ld<-ldl3
ld[upper.tri(ld)]<-NA
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="locus (SNP)",cex.lab=cl)
box()
title(main="(c) L3-F84",adj=0,cex.main=cm)
ld<-ldr1
ld[upper.tri(ld)]<-ldr1a[upper.tri(ldr1a)]
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="")
box()
title(main="(d) L1R-F35 and L1R-F46",adj=0,cex.main=cm)
ld<-ldr2
ld[upper.tri(ld)]<-ldr2a[upper.tri(ldr2a)]
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="locus (SNP)",cex.lab=cl)
box()
title(main="(e) L2R-F35 and L2R-F45",adj=0,cex.main=cm)
ld<-ldr3
ld[upper.tri(ld)]<-ldr3a[upper.tri(ldr3a)]
image(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),axes=FALSE,xlab="",ylab="")
box()
title(main="(f) L3R-F35 and L3R-F45",adj=0,cex.main=cm)
par(oma=c(0,0,0,1))
image.plot(ld,col= brewer.pal(n=5,"Blues"),breaks=c(-1,0.01,0.1,0.2,0.5,1.1),horizontal=FALSE,axes=FALSE,xlab="",ylab="",legend.shrink=1,legend.mar=4,cex.lab=cl,legend.lab=expression(paste("LD (",r^2,")")),legend.line=-1.8,legend.only=TRUE,reset.graphics=TRUE)
dev.off()
## summaries of LD
ld<-ldl3 ## set to different ones
diag(ld)<-NA
mean(apply(ld,1,mean,na.rm=TRUE))
sd(apply(ld,1,mean,na.rm=TRUE))
mean(apply(ld,1,max,na.rm=TRUE))
sd(apply(ld,1,max,na.rm=TRUE))
I then calculated LD between all pairs of SNPs on the same scaffold and with an overall MAF of > 5%. First from the cmac variants directory, I generated a list of the SNPs with MAF > 5% with:
perl getCommonSnps.pl filtered_cmacvariants.vcf
I then used a second script to generate a list (ld.list) of all of the SNPs on the same scaffold or contig:
perl ldlist.pl
Next I used bcftools to calculate pairwise LD (r2) for each pair of SNPs, here is an example (generated with perl wrap_qsub_rc_calcld.pl /labs/evolution/data/callosobruchus/gbs/Assemblies/gbs2ref/var*bcf):
cd /labs/evolution/projects/cmaclentil/popgen/ld/
bcftools index /labs/evolution/data/callosobruchus/gbs/Assemblies/gbs2ref/varM-14.bcf
bcftools ldpair /labs/evolution/data/callosobruchus/gbs/Assemblies/gbs2ref/varM-14.bcf /labs/evolution/data/callosobruchus/gbs/Assemblies/gbs2ref/ld.list > ldM-14.out