Post date: May 22, 2014 9:25:51 PM
#We will use sample allele frequencies, not population allele frequencies, since getting h for each locus plus uncertainty in allele freq will be too messy. Used R:
## read genotypes
G<-as.matrix(read.table("filtered_varEsosorumNoHdr.genest",header=F))
## read population ids
pops<-read.table("popIds.txt",header=F)
## data dimensions
nind<-dim(G)[2]
nloc<-dim(G)[1]
npop<-length(unique(pops[,1]))
## calculate sample allele frequencies
p<-matrix(NA,nrow=npop,ncol=nloc)
for(i in 1:nloc){
p[,i]<-tapply(INDEX=pops[,1],G[i,],mean)/2
}
row.names(p)<-unique(pops[,1])
pdf("alleleFreq.pdf",width=10,height=6)
vioplot(p[1,],p[2,],p[3,],p[4,],p[5,],p[6,],p[7,],p[8,],p[9,],names=row.names(h),col="lightblue")
dev.off()
## calculate expected heterozygosity
h<- 2 * p * (1-p)
pdf("expectedHet.pdf",width=10,height=6)
vioplot(h[1,],h[2,],h[3,],h[4,],h[5,],h[6,],h[7,],h[8,],h[9,],names=row.names(h),col="lightblue")
dev.off()
## rare alleles
rare<-which(p[1,] < 0.05) ## rare alleles in captive parents
## proportion at p > 0.01 in captive
mean(p[2,rare] > 0.01)
#F1 1
mean(p[3,rare] > 0.01)
#F2 0.9986799
mean(p[4,rare] > 0.01)
#F3 0.8910891
## proportion of alleles that are rare, but there is a potential for ascertainment bias, there are more EL individuals and thus you have more power to detect rare alleles that are only present in EL than those that are only present in other pops
apply(p < 0.05,1,mean)
# C_EL C_F1 C_F2 C_F3 W_CS W_EL W_PO
#0.10822202 0.08857776 0.08929209 0.23730266 0.23887421 0.12772341 0.17251232
# W_SG W_UP
#0.33923852 0.37802700