baypass

##convert vcf to gl file

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaBart.vcf timemaBart.gl #Number of loci: 3074; number of individuals 195

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaCali.vcf timemaCali.gl #Number of loci: 7858; number of individuals 77

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaChum.vcf timemaChum.gl #Number of loci: 4172; number of individuals 358

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaCris.vcf timemaCris.gl #Number of loci: 196252; number of individuals 205

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaKnul.vcf timemaKnul.gl #Number of loci: 11139; number of individuals 89

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaLand.vcf timemaLand.gl #Number of loci: 8548; number of individuals 125

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaPodu.vcf timemaPodu.gl #Number of loci: 6000; number of individuals 255

perl vcf2gl.pl morefilter3X_filtered3x_variantsTimemaPopp.vcf timemaPopp.gl #Number of loci: 7157; number of individuals 116

#modify population ids of each gl file using modifySampleIDglfile.py

195 3074

77 7858

358 4172

205 196252

89 11139

125 8548

255 6000

116 7157

#create individuals file for each species using this command

head -n1 -q Lmpop_*.gl | cut -d '' -f1 > individuals.txt

#estimate population allele frequencies using estPM

/uufs/chpc.utah.edu/common/home/u6000989/bin/estpEM -i Lmpop_BME.gl -o p_BME.txt -h 2

#use for loop for each species

for f in Lmpop_*.gl; do /uufs/chpc.utah.edu/common/home/u6000989/bin/estpEM -i $f -o "p_$f.txt" -h 2; done

#use R to calculate allele counts using the calcPopCounts.R code saved in the glfiles directory

#create the final genotype count file

paste popcounts_BMCG popcounts_BMP popcounts_BMPCT popcounts_JL popcounts_PCTCR | column -s $'\t' -t > bart_genofile.txt

paste popcounts_LICK.txt popcounts_LP.txt popcounts_SM.txt | column -s $'\t' -t > cali_genofile.txt

paste popcounts_BALD.txt popcounts_BMT.txt popcounts_BS.txt popcounts_DZ.txt popcounts_GR104.txt popcounts_GR603.txt popcounts_GR806.txt popcounts_HF4.txt popcounts_HF6.txt popcounts_HFRBP.txt popcounts_HFRS.txt popcounts_HFTP.txt popcounts_PF.txt | column -s $'\t' -t > chum_genofile.txt

paste popcounts_BY popcounts_EC popcounts_OUT popcounts_R2 popcounts_R9 popcounts_VP | column -s $'\t' -t > cris_genofile.txt

paste popcounts_BCE popcounts_BCTUR popcounts_BCWP popcounts_H1M37 popcounts_HB | column -s $'\t' -t > knul_genofile.txt

paste popcounts_BCBOG popcounts_BCHC popcounts_BCOG popcounts_BCSUM | column -s $'\t' -t > land_genofile.txt

paste popcounts_BMCG3 popcounts_BME popcounts_BMLC popcounts_BMOKC popcounts_BMPCT popcounts_BMT popcounts_BS popcounts_DZ243 popcounts_PCT8000 popcounts_PF243 popcounts_SRHWY | column -s $'\t' -t > podu_genofile.txt

paste popcounts_FROCK popcounts_LP popcounts_MM popcounts_SM popcounts_TBARN | column -s $'\t' -t > popp_genofile.txt

###preparing environment file for latitude and longitude (got lat long from the timema site info file from sampleinfo folder)

extension<- "txt"

filenames<-Sys.glob(paste("*.", extension, sep = ""))

filenums<-seq(filenames)

for (filenum in filenums){

newfilename1<-paste("lat-", sub(paste("\\.", extension, sep = ""), "", filenames[filenum]),

".", extension, sep = "")

newfilename2<-paste("long-", sub(paste("\\.", extension, sep = ""), "", filenames[filenum]),

".", extension, sep = "")

sample<-read.csv(filenames[filenum], header=TRUE, sep=",")

lat<-sample[,2]

lon<-sample[,3]

write.table(t(lat), newfilename1, append=FALSE, sep ="\t", row.names=FALSE, col.names=FALSE)

write.table(t(lon), newfilename2, append=FALSE, sep ="\t", row.names=FALSE, col.names=FALSE)

}

Page updated

Google Sites

Report abuse