##convert vcf to gl file
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaBart.vcf timemaBart.gl #Number of loci: 3074; number of individuals 195
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaCali.vcf timemaCali.gl #Number of loci: 7858; number of individuals 77
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaChum.vcf timemaChum.gl #Number of loci: 4172; number of individuals 358
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaCris.vcf timemaCris.gl #Number of loci: 196252; number of individuals 205
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaKnul.vcf timemaKnul.gl #Number of loci: 11139; number of individuals 89
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaLand.vcf timemaLand.gl #Number of loci: 8548; number of individuals 125
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaPodu.vcf timemaPodu.gl #Number of loci: 6000; number of individuals 255
perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaPopp.vcf timemaPopp.gl #Number of loci: 7157; number of individuals 116
#modify population ids of each gl file using modifySampleIDglfile.py
195 3074
77 7858
358 4172
205 196252
89 11139
125 8548
255 6000
116 7157
#create individuals file for each species using this command
head -n1 -q Lmpop_*.gl | cut -d '' -f1 > individuals.txt
#estimate population allele frequencies using estPM
/uufs/chpc.utah.edu/common/home/u6000989/bin/estpEM -i Lmpop_BME.gl -o p_BME.txt -h 2
#use for loop for each species
for f in Lmpop_*.gl; do /uufs/chpc.utah.edu/common/home/u6000989/bin/estpEM -i $f -o "p_$f.txt" -h 2; done
#use R to calculate allele counts using the calcPopCounts.R code saved in the glfiles directory
#create the final genotype count file
paste popcounts_BMCG popcounts_BMP popcounts_BMPCT popcounts_JL popcounts_PCTCR | column -s $'\t' -t > bart_genofile.txt
paste popcounts_LICK.txt popcounts_LP.txt popcounts_SM.txt | column -s $'\t' -t > cali_genofile.txt
paste popcounts_BALD.txt popcounts_BMT.txt popcounts_BS.txt popcounts_DZ.txt popcounts_GR104.txt popcounts_GR603.txt popcounts_GR806.txt popcounts_HF4.txt popcounts_HF6.txt popcounts_HFRBP.txt popcounts_HFRS.txt popcounts_HFTP.txt popcounts_PF.txt | column -s $'\t' -t > chum_genofile.txt
paste popcounts_BY popcounts_EC popcounts_OUT popcounts_R2 popcounts_R9 popcounts_VP | column -s $'\t' -t > cris_genofile.txt
paste popcounts_BCE popcounts_BCTUR popcounts_BCWP popcounts_H1M37 popcounts_HB | column -s $'\t' -t > knul_genofile.txt
paste popcounts_BCBOG popcounts_BCHC popcounts_BCOG popcounts_BCSUM | column -s $'\t' -t > land_genofile.txt
paste popcounts_BMCG3 popcounts_BME popcounts_BMLC popcounts_BMOKC popcounts_BMPCT popcounts_BMT popcounts_BS popcounts_DZ243 popcounts_PCT8000 popcounts_PF243 popcounts_SRHWY | column -s $'\t' -t > podu_genofile.txt
paste popcounts_FROCK popcounts_LP popcounts_MM popcounts_SM popcounts_TBARN | column -s $'\t' -t > popp_genofile.txt
###preparing environment file for latitude and longitude (got lat long from the timema site info file from sampleinfo folder)
extension<- "txt"
filenames<-Sys.glob(paste("*.", extension, sep = ""))
filenums<-seq(filenames)
for (filenum in filenums){
newfilename1<-paste("lat-", sub(paste("\\.", extension, sep = ""), "", filenames[filenum]),
".", extension, sep = "")
newfilename2<-paste("long-", sub(paste("\\.", extension, sep = ""), "", filenames[filenum]),
".", extension, sep = "")
sample<-read.csv(filenames[filenum], header=TRUE, sep=",")
lat<-sample[,2]
lon<-sample[,3]
write.table(t(lat), newfilename1, append=FALSE, sep ="\t", row.names=FALSE, col.names=FALSE)
write.table(t(lon), newfilename2, append=FALSE, sep ="\t", row.names=FALSE, col.names=FALSE)
}