Post date: Jun 29, 2018 9:50:11 PM
Hre are the basic population genetic analyses of the SVs themselves. This was all done from /uufs/chpc.utah.edu/common/home/u6000989/projects/timema_SV/SVs/.
1. Extract/fitler the delly sv files based on the results from intansv.
perl filterSvVcf.pl sv_inversions.txt delly/Delly_filtering_out_overlapping_reads/vcfs/all_inv.vcf
Finished processing sv_inversions.txt and delly/Delly_filtering_out_overlapping_reads/vcfs/all_inv.vcf
Retained 492 SVs
perl filterSvVcf.pl sv_deletions.txt delly/Delly_filtering_out_overlapping_reads/vcfs/all_del.vcf
Finished processing sv_deletions.txt and delly/Delly_filtering_out_overlapping_reads/vcfs/all_del.vcf
Retained 194 SVs
perl filterSvVcf.pl sv_dups.txt delly/Delly_filtering_out_overlapping_reads/vcfs/all_dup.vcf
Finished processing sv_dups.txt and delly/Delly_filtering_out_overlapping_reads/vcfs/all_dup.vcf
Retained 223 SVs
2. Convert to gl, this makes them standard phred-scaled gls.
perl vcf2gl.pl sv_dups.vcf
Number of loci: 221; number of individuals 20
perl vcf2gl.pl sv_deletions.vcf
Number of loci: 192; number of individuals 20
perl vcf2gl.pl sv_inversions.vcf
Number of loci: 483; number of individuals 20
3. Obtain E-M ML estimates of allele frequencies.
perl runEstpEM.pl timema_SVs_*gl
#!/usr/bin/perl
#
foreach $gl (@ARGV){
$out = "p_"."$gl";
$out =~ s/gl/txt/;
system "estpEM -i $gl -o $out -e 0.001 -m 50 -h 1\n";
}
Split populations and then repeat.
perl splitPops.pl timema_SVs_deletions.gl
perl splitPops.pl timema_SVs_dups.gl
perl splitPops.pl timema_SVs_inversions.gl
4. Population genetics and basic summaries were all done with R, see svSummary.R (also below).
## minor allele freqs. etc for SVs
inv<-read.table("p_timema_SVs_inversions.txt",header=FALSE)
dup<-read.table("p_timema_SVs_dups.txt",header=FALSE)
del<-read.table("p_timema_SVs_deletions.txt",header=FALSE)
P<-list(inv,dup,del)
for(i in 1:3){
a<-which(P[[i]][,3] > 0.5)
P[[i]][a,3]<-1-P[[i]][a,3]
}
## pop allele frequences for fst
DelFiles<-c("p_timema_SVs_deletions_HVA.txt","p_timema_SVs_deletions_R12C.txt","p_timema_SVs_deletions_HVC.txt","p_timema_SVs_deletions_R23A.txt")
DupFiles<-c("p_timema_SVs_dups_HVA.txt","p_timema_SVs_dups_R12C.txt","p_timema_SVs_dups_HVC.txt","p_timema_SVs_dups_R23A.txt")
InvFiles<-c("p_timema_SVs_inversions_HVA.txt","p_timema_SVs_inversions_R12C.txt","p_timema_SVs_inversions_HVC.txt","p_timema_SVs_inversions_R23A.txt")
Pdel<-vector("list",4)
Pdup<-vector("list",4)
Pinv<-vector("list",4)
for(i in 1:4){
Pdel[[i]]<-read.table(DelFiles[i],header=FALSE)
Pdup[[i]]<-read.table(DupFiles[i],header=FALSE)
Pinv[[i]]<-read.table(InvFiles[i],header=FALSE)
}
fstFunc<-function(p1=NA,p2=NA){
hw<-(2 * p1 * (1-p1) + 2 * p2 * (1-p2))/2
pbar<-(p1+p2)/2
ht<-2 * pbar * (1-pbar)
num<-ht-hw
den<-ht
fst<-mean(num)/mean(den)
L<-length(p1)
fstb<-rep(NA,L)
for(j in 1:1000){
x<-sample(1:L,L,replace=TRUE)
fstb[j]<-mean(num[x])/mean(den[x])
}
fst95<-quantile(fstb,probs=c(0.025,0.975))
o<-c(fst,fst95)
return(o)
}
plotArrowsBox<-function(PP=NA){
c1<-"orange"
c2<-"cyan"
l<-0.17
plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")
mtext("Host plant",1,2,cex=1.2)
mtext("Site",2,2,las=3,cex=1.2)
lb<-.35
ub<-.65
arrows(lb,1,0.05,1,length=l)
arrows(ub,1,0.95,1,length=l)
arrows(lb,0,0.05,0,length=l)
arrows(ub,0,0.95,0,length=l)
arrows(1,lb,1,0.05,length=l)
arrows(1,ub,1,0.95,length=l)
arrows(0,lb,0,0.05,length=l)
arrows(0,ub,0,0.95,length=l)
arrows(.25,.75,0.05,0.95,length=l)
arrows(.75,.75,0.95,0.95,length=l)
arrows(.4,.6,0.95,0.05,length=l)
arrows(.6,.6,0.05,0.05,length=l)
points(c(0,0,1,1),c(0,1,0,1),pch=c(16,17,16,17),col=c(c1,c1,c2,c2),cex=2.5)
## order HVA, HVC, R12A, R12C
## 1, 3, 4, 2
a<--1.64
b<--0.75
cx<-0.7
## top
fst<-round(fstFunc(PP[[1]][,3],PP[[3]][,3]),3)
#mtext(paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),3,a,cex=cx)
mtext(paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),3,a,cex=cx)
## bottom
fst<-round(fstFunc(PP[[2]][,3],PP[[4]][,3]),3)
mtext(paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),1,b,cex=cx)
#mtext(paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),1,b,cex=cx)
## left
fst<-round(fstFunc(PP[[1]][,3],PP[[4]][,3]),3)
mtext(paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),2,a,cex=cx)
#mtext(paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),2,a,cex=cx)
## right
fst<-round(fstFunc(PP[[3]][,3],PP[[2]][,3]),3)
mtext(paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),4,b,cex=cx)
#mtext(paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""),4,b,cex=cx)
## ldiag
fst<-round(fstFunc(PP[[1]][,3],PP[[2]][,3]),3)
text(0.31,0.68,paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""))
#text(0.31,0.68,paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""))
## rdiag
fst<-round(fstFunc(PP[[3]][,3],PP[[4]][,3]),3)
text(0.68,0.68,paste(fst[1],"\n(",fst[2],"-",fst[3],")",sep=""))
#text(0.68,0.68,paste("Fst=",fst[1],"\n(",fst[2],"-",fst[3],")",sep=""))
}
tits<-c("(a) Inversion MAFs","(b) Duplication MAF","(c) Deletion MAF",
expression(paste("(d) Inversion ",F[ST],sep="")),
expression(paste("(e) Duplication ",F[ST],sep="")),
expression(paste("(f) Deletion ",F[ST],sep="")))
#tits<-c("(a) Inversions","(b) Duplications","(c) Deletions")
c1<-"orange"
c2<-"cyan"
Px<-list(Pinv,Pdup,Pdel)
pdf("svPopGen.pdf",width=10,height=6)
layout(matrix(c(1:3,0,4:6,7),nrow=2,ncol=4,byrow=TRUE),widths=c(3,3,3,1),heights=c(3,3))
par(mar=c(4,4.5,3,.5))
for(i in 1:3){
hist(P[[i]][,3],xlab="Minor allele frequency",ylab="No. of SVs",cex.lab=1.7,cex.axis=1.2,col="lightgray",main=tits[i],cex.main=1.5)
}
for(i in 1:3){
plotArrowsBox(Px[[i]])
title(main=tits[i+3],cex.main=1.5)
}
par(mar=c(0,0,0,0))
plot(c(0,1),c(0,1),type='n',axes=FALSE,xlab="",ylab="")
legend(0.15,0.925,c("HVA","HVC","R23A","R12C"),pch=c(17,17,16,16),col=c(c1,c2,c1,c2),bty='n',cex=1.3,pt.cex=2.2)
dev.off()
## size distributions, etc
sv_inv<-read.table("sv_inversions.txt",header=TRUE)
sv_dup<-read.table("sv_dups.txt",header=TRUE)
sv_del<-read.table("sv_deletions.txt",header=TRUE)