Summary of popanc AIM results

Post date: May 09, 2017 11:7:20 PM

I have parameter estimates from 1388 autosomal and 115 sex-linked AIMs with window sizes of 1, 3, 5 or 7 in /uufs/chpc.utah.edu/common/home/u6000989/projects/lyc_hybanc/ancfreqs/params/aims/ and linked to the results sub-directory. Here is the final summary of these, starting with the 3 bp window. The code below comes from summarizePopAncAims.R, but is interspersed with results and comments.

1. Data are read in and organized.

# read in all of the data

autoMTR<-read.table("q_out_aims_w3MTR.txt",header=FALSE,sep=",")

autoCSP<-read.table("q_out_aims_w3CSP.txt",header=FALSE,sep=",")

autoCRP<-read.table("q_out_aims_w3CRP.txt",header=FALSE,sep=",")

autoSOP<-read.table("q_out_aims_w3SOP.txt",header=FALSE,sep=",")

autoLAE<-read.table("q_out_aims_w3LAE.txt",header=FALSE,sep=",")

autoSWM<-read.table("q_out_aims_w3SWM.txt",header=FALSE,sep=",")

autoTIC<-read.table("q_out_aims_w3TIC.txt",header=FALSE,sep=",")

autoSOF<-read.table("q_out_aims_w3SOF.txt",header=FALSE,sep=",")

autoCLH<-read.table("q_out_aims_w3CLH.txt",header=FALSE,sep=",")

autoREF<-read.table("q_out_aims_w3REF.txt",header=FALSE,sep=",")

autoEGP<-read.table("q_out_aims_w3EGP.txt",header=FALSE,sep=",")

autoBKM<-read.table("q_out_aims_w3BKM.txt",header=FALSE,sep=",")

autoSTM<-read.table("q_out_aims_w3STM.txt",header=FALSE,sep=",")

sexMTR<-read.table("q_out_aims_sex_w3MTR.txt",header=FALSE,sep=",")

sexCSP<-read.table("q_out_aims_sex_w3CSP.txt",header=FALSE,sep=",")

sexCRP<-read.table("q_out_aims_sex_w3CRP.txt",header=FALSE,sep=",")

sexSOP<-read.table("q_out_aims_sex_w3SOP.txt",header=FALSE,sep=",")

sexLAE<-read.table("q_out_aims_sex_w3LAE.txt",header=FALSE,sep=",")

sexSWM<-read.table("q_out_aims_sex_w3SWM.txt",header=FALSE,sep=",")

sexTIC<-read.table("q_out_aims_sex_w3TIC.txt",header=FALSE,sep=",")

sexSOF<-read.table("q_out_aims_sex_w3SOF.txt",header=FALSE,sep=",")

sexCLH<-read.table("q_out_aims_sex_w3CLH.txt",header=FALSE,sep=",")

sexREF<-read.table("q_out_aims_sex_w3REF.txt",header=FALSE,sep=",")

sexEGP<-read.table("q_out_aims_sex_w3EGP.txt",header=FALSE,sep=",")

sexBKM<-read.table("q_out_aims_sex_w3BKM.txt",header=FALSE,sep=",")

sexSTM<-read.table("q_out_aims_sex_w3STM.txt",header=FALSE,sep=",")

anc<-list(rbind(autoMTR,sexMTR),rbind(autoCSP,sexCSP),rbind(autoCRP,sexCRP),rbind(autoSOP,sexSOP),rbind(autoLAE,sexLAE),rbind(autoSWM,sexSWM),rbind(autoTIC,sexTIC),rbind(autoSOF,sexSOF),rbind(autoCLH,sexCLH),rbind(autoREF,sexREF),rbind(autoEGP,sexEGP),rbind(autoBKM,sexBKM),rbind(autoSTM,sexSTM))

pnms<-c("MTR","CSP","CRP","SOP","LAE","SWM","TIC","SOF","CLH","REF","EGP","BKM","STM")

## get d

anna<-read.table("../infiles/comb_lychybAutos_Anna.txt",skip=1,header=F)

anna_m<-as.matrix(anna[,-1])

anna_p<-apply(anna_m,1,mean)/2

anna_sex<-read.table("../infiles/comb_lychybSex_Anna.txt",skip=1,header=F)

anna_m<-as.matrix(anna_sex[,-1])

anna_p<-c(anna_p,apply(anna_m,1,mean)/2)

mel<-read.table("../infiles/comb_lychybAutos_MelWest.txt",skip=1,header=F)

mel_m<-as.matrix(mel[,-1])

mel_p<-apply(mel_m,1,mean)/2

mel_sex<-read.table("../infiles/comb_lychybSex_MelWest.txt",skip=1,header=F)

mel_m<-as.matrix(mel_sex[,-1])

mel_p<-c(mel_p,apply(mel_m,1,mean)/2)

## define aims as those with |p_anna - p_mel| >= 2

aims<-which(abs(anna_p-mel_p) > 0.2)

d<-(abs(anna_p-mel_p))

aims<-which(d>0.2)

2. Linkage group level summaries

## read locus information and subset

lgs<-scan("lgs.txt")

lgs<-c(lgs,rep(23,852))[aims]

## LG meanms

lgm<-matrix(NA,nrow=13,ncol=23)

for(i in 1:13){

lgm[i,]<-tapply(X=anc[[i]][,1],INDEX=lgs,mean)

}

## sds by lg

lgsd<-apply(lgm,2,sd)

autoS<-lgs

autoS[lgs<23]<-1

autoS[lgs==23]<-2

lgSa<-matrix(NA,nrow=13,ncol=2)

for(i in 1:13){lgSa[i,]<-tapply(X=anc[[i]][,1],INDEX=autoS,mean)}

## plot of autosomal vs Z ancestry

plot(lgSa[,1],lgSa[,2])

abline(a=0,b=1)

devs<-lgm

mns<-rep(NA,13)

for(i in 1:13){mns[i]<-mean(anc[[i]][,1])}

for(i in 1:13){devs[i,]<-lgm[i,]-mns[i]}

boxplot(devs,col="gray")

abline(h=0)

Chromosome Z (23) has less L. melissa ancestry than the rest of the genome (mean across populations, 40.6% melissa for Z vs. 68.3% for the 22 autosomes). This holds for all populations individually as well. Additionally Z is less variable in mean ancestry across populations (s.d. Z = 0.040, mean for others = 0.122, 32.3% of the mean and it is the least variable overall). Thus, all populations show a reduction in L. melissa ancestry, but the reduction is greater in more L. melissa-like populations reducing the overall variability across populations.

2b. Overall correlations across LGs

mns<-vector("list",13)

sds<-vector("list",13)

for(i in 1:13){

mns[[i]]<-tapply(X=anc[[i]][,1],INDEX=lgs,mean)

sds[[i]]<-tapply(X=anc[[i]][,1],INDEX=lgs,sd)

}

names(mns)<-pnms

names(sds)<-pnms

## cormatrixes

lgCors<-matrix(NA,nrow=13,ncol=13)

for(i in 1:13){for(j in 1:13){

if(i > j){

lgCors[i,j]<-cor(sds[[i]],sds[[j]])

}

if(i < j){

lgCors[i,j]<-cor(mns[[i]],mns[[j]])

}

}}

library(fields)

pdf("ancLgCors.pdf",width=7,height=6)

bls<-brewer.pal(8,"Blues")

par(mar=c(4,4,1,6))

par(xpd=TRUE)

image(lgCors,breaks=c(-1,0,seq(0.1,0.7,0.1),1),col=c("gray",bls),axes=FALSE)

axis(1,at=seq(0,12,1)/12,pnms,las=2)

axis(2,at=seq(0,12,1)/12,pnms,las=2)

box()

par(xpd=FALSE)

image.plot(lgCors,breaks=c(-1,0,seq(0.1,0.7,0.1),1),col=c("gray",bls),legend.lab="cor. coef. (r)",legend.only=TRUE,add=TRUE)

dev.off()

## correlations for means

summary(lgCors[upper.tri(lgCors)])

Min. 1st Qu. Median Mean 3rd Qu. Max.

-0.01006 0.57760 0.75810 0.68680 0.89540 0.97560

## correlations for sds

summary(lgCors[lower.tri(lgCors)])

Min. 1st Qu. Median Mean 3rd Qu. Max.

-0.2453 0.3578 0.4514 0.4482 0.5926 0.9210

Overall, LG means and s.d. show positive correlations across populations, with a higher average correlation for means (0.69) than s.d. (0.45). Thus, there is greater consistency in LG level differences than in variability across LGs, though both are non-trivial. MTR and CSP are the greatest outliers, particularly with means, while SOP, LAE, SWM and TIC clearly hold together.

3. Plots/analyses of ancestry along chromosomes

## population level-plots

N<-length(lgs)

bnds<-which(lgs[2:N] != lgs[1:(N-1)])

bndsPlus<-c(1,bnds,length(lgs))

mids<-rep(NA,23)

for(i in 1:23){

mids[i]<-(bndsPlus[i]+bndsPlus[i+1])/2

}

## population plots

for(i in 1:13){

plotanc(pop=anc[[i]][,1],nm=pnms[i],mids,bnds)

}

## population plots for talk

pdf("aims_ancplotMTR-CSP.pdf",width=10,height=8)

par(mfrow=c(2,1))

par(mar=c(5.5,5.5,2,0.5))

for(i in c(1,8)){

plotanc(pop=anc[[i]][,1],nm=pnms[i],mids,bnds)

}

dev.off()

Here are summaries of the number of regions with (nearly) fixed (point estimates of 0.01 or 0.99 and CIs of 0.05 or 0.95) or high frequency ancestry regions (point estimates of 0.05 or 0.95 and CIs of 0.1 or 0.9). These suggest most of the genome is segregating for ancestry.

fixedHi<-matrix(NA,nrow=13,ncol=4)

for(i in 1:13){

fixedHi[i,1]<-sum(anc[[i]][,1] > 0.99 & anc[[1]][,3] > 0.95)

fixedHi[i,2]<-sum(anc[[i]][,1] <0.01 & anc[[1]][,4] < 0.05)

fixedHi[i,3]<-sum(anc[[i]][,1] > 0.95 & anc[[1]][,3] > 0.9)

fixedHi[i,4]<-sum(anc[[i]][,1] <0.05 & anc[[1]][,4] < 0.1)

}

colnames(fixedHi)<-c("fixMel","fixAnna","hiMel","hiAnna")

rownames(fixedHi)<-pnms

fixMel fixAnna hiMel hiAnna

MTR 3 27 6 67

CSP 0 2 0 3

CRP 0 0 9 2

SOP 0 1 3 1

LAE 0 0 10 2

SWM 0 0 1 3

TIC 0 1 3 1

SOF 0 0 10 2

CLH 0 1 3 1

REF 0 0 10 2

EGP 0 2 3 2

BKM 0 2 10 2

STM 0 0 3 2

And lastly (for now), I defined excess ancestry as the top 1% or 5% of SNPs (most extreme melissa or anna ancestry) and tested for overlap across populations. There is a clear signal on the Z.

## excess ancestry, here top (and bottom) 1%

bnds<-c(0.01,0.99)

#bnds<-c(0.05,0.95)

## matrix of excess anna

exAnna<-matrix(NA,nrow=dim(anc[[1]])[1],ncol=13)

for(i in 1:13){

ub<-quantile(anc[[i]][,1],probs=bnds[1])

exAnna[,i]<-as.numeric(anc[[i]][,1] < ub)

}

nullMx<-rep(NA,1000)

null99<-rep(NA,1000)

## perm

for(i in 1:1000){

X<-exAnna

for(j in 1:13){

X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]

}

cnts<-apply(X,1,sum)

nullMx[i]<-max(cnts)

null99[i]<-quantile(cnts,probs=0.99)

}

## matrix of excess melissa

exMel<-matrix(NA,nrow=dim(anc[[1]])[1],ncol=13)

for(i in 1:13){

ub<-quantile(anc[[i]][,1],probs=bnds[2])

exMel[,i]<-as.numeric(anc[[i]][,1] > ub)

}

nullMxM<-rep(NA,1000)

null99M<-rep(NA,1000)

## perm

for(i in 1:1000){

X<-exMel

for(j in 1:13){

X[,j]<-X[sample(1:dim(X)[1],dim(X),replace=FALSE),j]

}

cnts<-apply(X,1,sum)

nullMxM[i]<-max(cnts)

null99M[i]<-quantile(cnts,probs=0.99)

}

pdf("excessAncestryAims.pdf",width=8,heigh=5)

plot(apply(exAnna,1,sum),type='l',ylim=c(0,13))

abline(h=quantile(nullMx,probs=0.95),col="red")

abline(h=quantile(null99,probs=0.95),col="orange")

plot(apply(exMel,1,sum),type='l',ylim=c(0,13))

abline(h=quantile(nullMxM,probs=0.95),col="red")

abline(h=quantile(null99M,probs=0.95),col="orange")

dev.off()

save(list=ls(),file="aimAnc.Rdat")

Page updated

Google Sites

Report abuse