File saved on Desktop: Documents/dubois_ms/plink.sh
######### preparing the ped file for each population in R #############
## read in the genEst matrix files from entropy folder
bcr<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BCR.txt", header=F)
bic<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BIC.txt", header=F)
bld<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BLD.txt", header=F)
bnp<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BNP.txt", header=F)
bst<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BST.txt", header=F)
btb<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BTB.txt", header=F)
cdy<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_CDY.txt", header=F)
ckv<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_CKV.txt", header=F)
dbs<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_DBS.txt", header=F)
frc<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_FRC.txt", header=F)
gnp<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_GNP.txt", header=F)
khl<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_KHL.txt", header=F)
lan<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_LAN.txt", header=F)
mon<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_MON.txt", header=F)
pin<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_PIN.txt", header=F)
psp<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_PSP.txt", header=F)
rdl<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_RDL.txt", header=F)
rnv<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_RNV.txt", header=F)
sdc<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_SDC.txt", header=F)
sin<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_SIN.txt", header=F)
syc<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_SYC.txt", header=F)
vic<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_VIC.txt", header=F)
ywp<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_YWP.txt", header=F)
#transpose the matrices
bcr<-t(bcr)
bic<-t(bic)
bld<-t(bld)
bnp<-t(bnp)
bst<-t(bst)
btb<-t(btb)
cdy<-t(cdy)
ckv<-t(ckv)
dbs<-t(dbs)
frc<-t(frc)
gnp<-t(gnp)
khl<-t(khl)
#for loop to create the ped file
files= list.files(path="/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc", pattern = "genEstMatrix_", full.names = TRUE)
makeped = function(input) {
#read in the file
pop<-read.table(input, header=F)
#transpose the file
pop<-t(pop)
#replace snps
p<-pop[,-1]
p[,][p[,] == 2] <- "2 2"
p[,][p[,] == 1] <- "1 2"
p[,][p[,] == 0] <- "1 1"
p[,][is.na(p[,])] <- "0 0"
#create seq for snp ids
snpids<-1:nrow(pop)
newpop<-cbind(snpids, p)
index = which(files == input)
outname = paste('mod_', index, '.ped', sep="")
write.table(newpop, file=outname, sep=" ", row.names=FALSE, col.names=FALSE, quote=FALSE)
}
for (i in 1:length(files)){
makeped(files[i])
}
## rename files with population names on command line
mv mod_1.ped bcr.ped
mv mod_12.ped bic.ped
mv bic.ped khl.ped
mv mod_2.ped bic.ped
mv mod_3.ped bld.ped
mv mod_4.ped bnp.ped
mv mod_5.ped bst.ped
mv mod_6.ped btb.ped
mv mod_7.ped cdy.ped
mv mod_8.ped ckv.ped
mv mod_9.ped dbs.ped
mv mod_10.ped frc.ped
mv mod_11.ped gnp.ped
mv mod_13.ped lan.ped
mv mod_14.ped mon.ped
mv mod_15.ped pin.ped
mv mod_16.ped psp.ped
mv mod_17.ped rdl.ped
mv mod_18.ped rnv.ped
mv mod_19.ped sdc.ped
mv mod_20.ped sin.ped
mv mod_21.ped syc.ped
mv mod_22.ped vic.ped
mv mod_23.ped ywp.ped
############ create map files #######################################
#rename chromosomes in mappos file in R and save it as dbs_mod.map
mp<-read.table("mappos.txt", header=F, sep=":")
chrom<-mp[,1]
#rename chromosomes
chrom[chrom == 11] <- 2
chrom[chrom == 4] <- 11
chrom[chrom == 1648] <- 4
chrom[chrom == 1647] <- 18
chrom[chrom == 1646] <- 3
chrom[chrom == 1645] <- 8
chrom[chrom == 1644] <- 14
chrom[chrom == 1642] <- 7
chrom[chrom == 1641] <- 9
chrom[chrom == 1640] <- 15
chrom[chrom == 1639] <- 10
chrom[chrom == 1638] <- 19
chrom[chrom == 1636] <- 5
chrom[chrom == 1632] <- 13
chrom[chrom == 1633] <- 21
chrom[chrom == 1631] <- 23
chrom[chrom == 1628] <- 1
chrom[chrom == 1627] <- 22
chrom[chrom == 1095] <- 21
chrom[chrom == 833] <- 12
chrom[chrom == 588] <- 20
chrom[chrom == 833] <- 12
chrom[chrom == 503] <- 16
chrom[chrom == 309] <- 17
chrom[chrom == 228] <- 6
chrom[chrom == 29] <- 24
chrom[chrom == 30] <- 25
chrom[chrom == 40] <- 26
chrom[chrom == 46] <- 27
chrom[chrom == 82] <- 28
chrom[chrom == 84] <- 29
chrom[chrom == 96] <- 30
chrom[chrom == 97] <- 31
chrom[chrom == 103] <- 32
chrom[chrom == 118] <- 33
chrom[chrom == 130] <- 34
chrom[chrom == 137] <- 35
chrom[chrom == 158] <- 36
chrom[chrom == 166] <- 37
chrom[chrom == 185] <- 38
chrom[chrom == 200] <- 39
chrom[chrom == 217] <- 40
chrom[chrom == 218] <- 41
chrom[chrom == 220] <- 42
chrom[chrom == 222] <- 43
chrom[chrom == 225] <- 44
chrom[chrom == 233] <- 45
chrom[chrom == 237] <- 46
chrom[chrom == 253] <- 47
chrom[chrom == 255] <- 48
chrom[chrom == 257] <- 49
chrom[chrom == 270] <- 50
chrom[chrom == 305] <- 51
chrom[chrom == 369] <- 52
chrom[chrom == 385] <- 53
chrom[chrom == 390] <- 54
chrom[chrom == 391] <- 55
chrom[chrom == 395] <- 56
chrom[chrom == 409] <- 57
chrom[chrom == 435] <- 58
chrom[chrom == 448] <- 59
chrom[chrom == 482] <- 60
chrom[chrom == 497] <- 61
chrom[chrom == 513] <- 62
chrom[chrom == 519] <- 63
chrom[chrom == 596] <- 64
chrom[chrom == 624] <- 65
chrom[chrom == 665] <- 66
chrom[chrom == 696] <- 67
chrom[chrom == 753] <- 68
chrom[chrom == 758] <- 69
chrom[chrom == 764] <- 70
chrom[chrom == 806] <- 71
chrom[chrom == 874] <- 72
chrom[chrom == 930] <- 73
chrom[chrom == 1012] <- 74
chrom[chrom == 1019] <- 75
chrom[chrom == 1073] <- 76
chrom[chrom == 1136] <- 77
chrom[chrom == 1141] <- 78
chrom[chrom == 1143] <- 79
chrom[chrom == 1147] <- 80
chrom[chrom == 1170] <- 81
chrom[chrom == 1260] <- 82
chrom[chrom == 1274] <- 83
chrom[chrom == 1307] <- 84
chrom[chrom == 1468] <- 85
chrom[chrom == 1558] <- 86
chrom[chrom == 1626] <- 87
chrom[chrom == 1629] <- 88
chrom[chrom == 1630] <- 89
chrom[chrom == 1635] <- 90
chrom[chrom == 1643] <- 91
chrom[chrom == 1649] <- 92
chrom[chrom == 1650] <- 93
#col2 in map file is snp ids
snpid<-seq(1:39193)
#col3 in map file is Position in morgans or centimorgans (optional; also safe to use dummy value of '0')
pos<-rep(0,39193)
#col4 is Base-pair coordinate
snppos<-mp[,2]
#create final matrix
map<-cbind(chrom,snpid,pos,snppos)
write.table(map,"dbs_mod.map", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
## create map files for each population in command line
cp dbs_mod.map bcr.map
cp dbs_mod.map bic.map
cp dbs_mod.map khl.map
cp dbs_mod.map bic.map
cp dbs_mod.map bld.map
cp dbs_mod.map bnp.map
cp dbs_mod.map bst.map
cp dbs_mod.map btb.map
cp dbs_mod.map cdy.map
cp dbs_mod.map ckv.map
cp dbs_mod.map dbs.map
cp dbs_mod.map frc.map
cp dbs_mod.map gnp.map
cp dbs_mod.map lan.map
cp dbs_mod.map mon.map
cp dbs_mod.map pin.map
cp dbs_mod.map psp.map
cp dbs_mod.map rdl.map
cp dbs_mod.map rnv.map
cp dbs_mod.map sdc.map
cp dbs_mod.map sin.map
cp dbs_mod.map syc.map
cp dbs_mod.map vic.map
cp dbs_mod.map ywp.map
## PLINK ##############
#r2 threshold 0.0, ld window 10 kb
#bcr
plink --file bcr --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bcr
plink --file bcr --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bcr --r2 --ld-window-r2 0.0 --ld-window-kb 10
#bic
plink --file bic --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bic
plink --file bic --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bic --r2 --ld-window-r2 0.0 --ld-window-kb 10
#bld
plink --file bld --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld
plink --file bld --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld --r2 --ld-window-r2 0.0 --ld-window-kb 10
#bnp
plink --file bnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bnp
plink --file bnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bnp --r2 --ld-window-r2 0.0 --ld-window-kb 10
#bst
plink --file bst --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bst
plink --file bst --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bst --r2 --ld-window-r2 0.0 --ld-window-kb 10
#btb
plink --file btb --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out btb
plink --file btb --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out btb --r2 --ld-window-r2 0.0 --ld-window-kb 10
#cdy
plink --file cdy --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out cdy
plink --file cdy --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out cdy --r2 --ld-window-r2 0.0 --ld-window-kb 10
#ckv
plink --file ckv --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out ckv
plink --file ckv --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out ckv --r2 --ld-window-r2 0.0 --ld-window-kb 10
#dbs
plink --file dbs --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs
plink --file dbs --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs --r2 --ld-window-r2 0.0 --ld-window-kb 10
#frc
plink --file frc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out frc
plink --file frc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out frc --r2 --ld-window-r2 0.0 --ld-window-kb 10
#gnp
plink --file gnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp
plink --file gnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp --r2 --ld-window-r2 0.0 --ld-window-kb 10
#khl
plink --file khl --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out khl
plink --file khl --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out khl --r2 --ld-window-r2 0.0 --ld-window-kb 10
#lan
plink --file lan --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out lan
plink --file lan --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out lan --r2 --ld-window-r2 0.0 --ld-window-kb 10
#mon
plink --file mon --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out mon
plink --file mon --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out mon --r2 --ld-window-r2 0.0 --ld-window-kb 10
#pin
plink --file pin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out pin
plink --file pin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out pin --r2 --ld-window-r2 0.0 --ld-window-kb 10
#psp
plink --file psp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out psp
plink --file psp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out psp --r2 --ld-window-r2 0.0 --ld-window-kb 10
#rdl
plink --file rdl --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out rdl
plink --file rdl --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out rdl --r2 --ld-window-r2 0.0 --ld-window-kb 10
#rnv
plink --file rnv --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out rnv
plink --file rnv --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out rnv --r2 --ld-window-r2 0.0 --ld-window-kb 10
#sdc
plink --file sdc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sdc
plink --file sdc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sdc --r2 --ld-window-r2 0.0 --ld-window-kb 10
#sin
plink --file sin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin
plink --file sin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin --r2 --ld-window-r2 0.0 --ld-window-kb 10
#syc
plink --file syc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out syc
plink --file syc --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out syc --r2 --ld-window-r2 0.0 --ld-window-kb 10
#vic
plink --file vic --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out vic
plink --file vic --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out vic --r2 --ld-window-r2 0.0 --ld-window-kb 10
#ywp
plink --file ywp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out ywp
plink --file ywp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out ywp --r2 --ld-window-r2 0.0 --ld-window-kb 10
## number of LD values for each population
42075 bcr.ld
17427 bic.ld
57003 bld.ld
21987 bnp.ld
12748 bst.ld
48010 btb.ld
19154 cdy.ld
5704 ckv.ld
87504 dbs.ld
16335 frc.ld
55188 gnp.ld
18174 khl.ld
20887 lan.ld
14631 mon.ld
24034 pin.ld
21949 psp.ld
28435 rdl.ld
30101 rnv.ld
14547 sdc.ld
46653 sin.ld
14443 syc.ld
18059 ywp.ld
## subsetting samples and rerunning plink
#bld 74
bld.p<-read.table("bld.ped", header=F)
bld_sub<-bld.p[sample(nrow(bld.p), 40),] #40
write.table(bld_sub, "bld_s.ped", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
#dbs
dbs.p<-read.table("dbs.ped", header=F)
dbs_sub<-dbs.p[sample(nrow(dbs.p), 55),] #40
write.table(dbs_sub, "dbs_s.ped", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
#gnp
gnp.p<-read.table("gnp.ped", header=F)
gnp_sub<-gnp.p[sample(nrow(gnp.p), 55),] #40
write.table(gnp_sub, "gnp_s.ped", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
#sin
sin.p<-read.table("sin.ped", header=F)
sin_sub<-sin.p[sample(nrow(sin.p), 55),] #40
write.table(sin_sub, "sin_s.ped", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
#also creat map files
cp bld.map bld_s.map
cp dbs.map dbs_s.map
cp gnp.map gnp_s.map
cp sin.map sin_s.map
#run plink
plink --file bld_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld_s
plink --file bld_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld_s --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file dbs_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs_s
plink --file dbs_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs_s --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file gnp_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp_s
plink --file gnp_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp_s --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file sin_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin_s
plink --file sin_s --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin_s --r2 --ld-window-r2 0.0 --ld-window-kb 10
## plots in r
## trying to see what plots look like ###
getQuants<-function(input){
pop<-read.table(input, header=T)
pop<-pop[order(pop$BP_B-pop$BP_A),]
dsn<-pop$BP_B-pop$BP_A
ldn<-pop$R2
#get quantiles
s1<-which(dsn > 0 & dsn < 100)
sq1<-quantile(ldn[s1],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s2<-which(dsn > 100 & dsn < 200)
sq2<-quantile(ldn[s2],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s3<-which(dsn > 200 & dsn < 500)
sq3<-quantile(ldn[s3],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s4<-which(dsn > 500 & dsn < 1000)
sq4<-quantile(ldn[s4],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s5<-which(dsn > 1000 & dsn < 2000)
sq5<-quantile(ldn[s5],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s6<-which(dsn > 2000 & dsn < 5000)
sq6<-quantile(ldn[s6],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s7<-which(dsn > 5000 & dsn < 10000)
sq7<-quantile(ldn[s7],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s8<-which(dsn > 10000 & dsn < 50000)
sq8<-quantile(ldn[s8],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s9<-which(dsn > 50000 & dsn < 100000)
sq9<-quantile(ldn[s9],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s10<-which(dsn > 100000 & dsn < 1000000)
sq10<-quantile(ldn[s10],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s11<-which(dsn > 1000000 & dsn < 5000000)
sq11<-quantile(ldn[s11],probs=c(.75,.9,.95,.99),na.rm=TRUE)
index = which(files == input)
outname = paste('quants', index, '.txt', sep = "")
quant_mat<-rbind(sq1,sq2,sq3,sq4,sq5,sq6,sq7,sq8,sq9,sq10,sq11)
write.table(quant_mat, file=outname, sep=" ", row.names=FALSE, quote=FALSE)
}
files= list.files(path="./ldfiles", pattern = "*.ld", full.names = TRUE)
files= list.files(path="./ldfiles", pattern = "_s.ld", full.names = TRUE)
for (i in 1:length(files)){
getQuants(files[i])
}
###plotting
qfiles<-list.files(path=".", pattern = "quants", full.names=TRUE)
##function to read in file and plot quantiles
plotLD<-function(l){
pop<-read.table(l, header=T)
col = c("red", "green","blue")
plot(pop[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1.5),xlab="Distance (Kb)",ylab="LD(r^2)"
, main = l, cex.lab=1.5, cex.main=1.5, cex.axis=1)
lines(pop[,2], type="b", pch=19, col=col[2], lty =1)
lines(pop[,3], type="b", pch=19, col=col[3], lty =1)
#legend("topright", legend=c("75%", "90%","95%"),col=col, lty = 1, cex=1.5)
}
pdf("hybrids_dbs.pdf",width=15,height=10, bg="white")
par(mfrow=c(5,2))
for (i in hybrids){
plotLD(i)
}
dev.off()
## bld, dbs, gnp, sin
bld<-read.table("quants_bld.txt", header=T) #74
bld<-bld[1:7,]
dbs<-read.table("quants_dbs.txt", header=T) #115
dbs<-dbs[1:7,]
gnp<-read.table("quants_gnp.txt", header=T) #98
gnp<-gnp[1:7,]
sin<-read.table("quants_sin.txt", header=T) #97
sin<-sin[1:7,]
blds<-read.table("quants_bld_s.txt", header=T) #40
blds<-blds[1:7,]
dbss<-read.table("quants_dbs_s.txt", header=T) #55
dbss<-dbss[1:7,]
gnps<-read.table("quants_gnp_s.txt", header=T) #55
gnps<-gnps[1:7,]
sins<-read.table("quants_sin_s.txt", header=T) #55
sins<-sins[1:7,]
############### main LD plot ###################################################################
col = rainbow(3)
pdf("plot_ld_samplesize.pdf", width=8, height=10, bg="white")
par(mfrow=c(4,2))
par(mar=c(6,6,4,2))
plot(bld[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab=" ",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(bld[,2], type="b", pch=19, col=col[2], lty =1)
lines(bld[,3], type="b", pch=19, col=col[3], lty =1)
title(main="A). BLD, N = 74",cex.main = 1.6, adj=0)
plot(blds[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="",ylab=""
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(blds[,2], type="b", pch=19, col=col[2], lty =1)
lines(blds[,3], type="b", pch=19, col=col[3], lty =1)
legend("topright", legend=c("75%", "90%","95%"),col=col, lty = 1, cex=1.2)
title(main="BLD, N = 40",cex.main = 1.6, adj=0)
plot(dbs[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(dbs[,2], type="b", pch=19, col=col[2], lty =1)
lines(dbs[,3], type="b", pch=19, col=col[3], lty =1)
title(main="B). DBS, N = 115",cex.main = 1.6, adj=0)
plot(dbss[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="",ylab=""
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(dbss[,2], type="b", pch=19, col=col[2], lty =1)
lines(dbss[,3], type="b", pch=19, col=col[3], lty =1)
title(main="DBS, N = 55",cex.main = 1.6, adj=0)
plot(gnp[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(gnp[,2], type="b", pch=19, col=col[2], lty =1)
lines(gnp[,3], type="b", pch=19, col=col[3], lty =1)
title(main="C). GNP, N = 98",cex.main = 1.6, adj=0)
plot(gnps[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="",ylab=""
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(gnps[,2], type="b", pch=19, col=col[2], lty =1)
lines(gnps[,3], type="b", pch=19, col=col[3], lty =1)
title(main="GNP, N = 55",cex.main = 1.6, adj=0)
plot(sin[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="Distance (Kb)",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(sin[,2], type="b", pch=19, col=col[2], lty =1)
lines(sin[,3], type="b", pch=19, col=col[3], lty =1)
title(main="D). SIN, N = 97",cex.main = 1.6, adj=0)
plot(sins[,1], type="b", pch=19, col=col[1],ylim=c(0.005,0.5),xlab="Distance (Kb)",ylab=""
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(sins[,2], type="b", pch=19, col=col[2], lty =1)
lines(sins[,3], type="b", pch=19, col=col[3], lty =1)
title(main="SIN, N = 55",cex.main = 1.6, adj=0)
dev.off()
############################################ Extra stuff #########################################
ld10<-read.table("dbs_10kb.ld", header=T)
ld10<-ld10[order(ld10$BP_B-ld10$BP_A),]
dsn<-ld10$BP_B-ld10$BP_A
ldn<-ld10$R2
s1<-which(dsn > 0 & dsn < 100)
sq1<-quantile(ldn[s1],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s2<-which(dsn > 100 & dsn < 200)
sq2<-quantile(ldn[s2],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s3<-which(dsn > 200 & dsn < 500)
sq3<-quantile(ldn[s3],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s4<-which(dsn > 500 & dsn < 1000)
sq4<-quantile(ldn[s4],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s5<-which(dsn > 1000 & dsn < 2000)
sq5<-quantile(ldn[s5],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s6<-which(dsn > 2000 & dsn < 5000)
sq6<-quantile(ldn[s6],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s7<-which(dsn > 5000 & dsn < 10000)
sq7<-quantile(ldn[s7],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s8<-which(dsn > 10000 & dsn < 50000)
sq8<-quantile(ldn[s8],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s9<-which(dsn > 50000 & dsn < 100000)
sq9<-quantile(ldn[s9],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s10<-which(dsn > 100000 & dsn < 1000000)
sq10<-quantile(ldn[s10],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s11<-which(dsn > 1000000 & dsn < 5000000)
sq11<-quantile(ldn[s11],probs=c(.75,.9,.95,.99),na.rm=TRUE)
quant_mat<-rbind(sq1,sq2,sq3,sq4,sq5,sq6,sq7,sq8,sq9,sq10,sq11)
plot(pop[,1], type="b", pch=19, col="red",ylim=c(0.005,1.5),xlab="Distance (Kb)",ylab="LD(r^2)", cex.lab=1.5, cex.main=1.5, cex.axis=1)
lines(pop[,2], type="b", pch=19, col="green", lty =1)
lines(pop[,3], type="b", pch=19, col="blue", lty =1)
####################################
########## AIMS ################
bcr<-read.table("/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc/genEstMatrix_BCR.txt", header=T)
map<-read.table("temp/mappos.txt", header=F, sep=":")
bcr.a<-cbind(map,bcr)
write.table(bcr.a, "bcr.txt", col.names=F, row.names=F, quote=F, sep=" ")
files<-list.files(path="/uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/entropy/complete/mcmc", pattern = "genEstMatrix_", full.names=TRUE)
transMat<-function(l){
pop<-read.table(l, header=T)
pop.a<-cbind(map,pop)
index = which(files == l)
outname<-paste('aims', index, '.txt', sep = "")
write.table(pop.a, outname, col.names=F, row.names=F, quote=F, sep=" ")
}
for (i in 1:length(files)){
transMat(files[i])
}
python subset_aims_genoEst.py aims1.txt aims_bcr.ped
python subset_aims_genoEst.py aims2.txt aims_bic.ped
python subset_aims_genoEst.py aims3.txt aims_bld.ped
python subset_aims_genoEst.py aims4.txt aims_bnp.ped
python subset_aims_genoEst.py aims5.txt aims_bst.ped
python subset_aims_genoEst.py aims6.txt aims_btb.ped
python subset_aims_genoEst.py aims7.txt aims_cdy.ped
python subset_aims_genoEst.py aims8.txt aims_ckv.ped
python subset_aims_genoEst.py aims9.txt aims_dbs.ped
python subset_aims_genoEst.py aims10.txt aims_frc.ped
python subset_aims_genoEst.py aims11.txt aims_gnp.ped
python subset_aims_genoEst.py aims12.txt aims_khl.ped
python subset_aims_genoEst.py aims13.txt aims_lan.ped
python subset_aims_genoEst.py aims14.txt aims_mon.ped
python subset_aims_genoEst.py aims15.txt aims_pin.ped
python subset_aims_genoEst.py aims16.txt aims_psp.ped
python subset_aims_genoEst.py aims17.txt aims_rdl.ped
python subset_aims_genoEst.py aims18.txt aims_rnv.ped
python subset_aims_genoEst.py aims19.txt aims_sdc.ped
python subset_aims_genoEst.py aims20.txt aims_sin.ped
python subset_aims_genoEst.py aims21.txt aims_syc.ped
python subset_aims_genoEst.py aims21.txt aims_syc.ped
python subset_aims_genoEst.py aims22.txt aims_vic.ped
python subset_aims_genoEst.py aims23.txt aims_ywp.ped
##get final files, I am editing the same file in R and resaving it
#transpose the matrices
bcr<-t(bcr)
bic<-t(bic)
bld<-t(bld)
bnp<-t(bnp)
bst<-t(bst)
btb<-t(btb)
cdy<-t(cdy)
ckv<-t(ckv)
dbs<-t(dbs)
frc<-t(frc)
gnp<-t(gnp)
khl<-t(khl)
#for loop to create the ped file
files= list.files(path=".", pattern = "aims_", full.names = TRUE)
files = files[-24]
makeped = function(input) {
#read in the file
pop<-read.table(input, header=F)
pop<-pop[,-c(1,2)]
#transpose the file
pop<-t(pop)
#replace snps
p<-pop
p[,][p[,] == 2] <- "2 2"
p[,][p[,] == 1] <- "1 2"
p[,][p[,] == 0] <- "1 1"
p[,][is.na(p[,])] <- "0 0"
#create seq for snp ids
snpids<-1:nrow(pop)
newpop<-cbind(snpids, p)
index = which(files == input)
outname = paste('mod_', index, '.ped', sep="")
write.table(newpop, file=outname, sep=" ", row.names=FALSE, col.names=FALSE, quote=FALSE)
}
for (i in 1:length(files)){
makeped(files[i])
}
## rename files with population names on command line
mv mod_1.ped bcr.ped
mv mod_2.ped bic.ped
mv mod_3.ped bld.ped
mv mod_4.ped bnp.ped
mv mod_5.ped bst.ped
mv mod_6.ped btb.ped
mv mod_7.ped cdy.ped
mv mod_8.ped ckv.ped
mv mod_9.ped dbs.ped
mv mod_10.ped frc.ped
mv mod_11.ped gnp.ped
mv mod_12.ped khl.ped
mv mod_13.ped lan.ped
mv mod_14.ped mon.ped
mv mod_15.ped pin.ped
mv mod_16.ped psp.ped
mv mod_17.ped rdl.ped
mv mod_18.ped rnv.ped
mv mod_19.ped sdc.ped
mv mod_20.ped sin.ped
mv mod_21.ped syc.ped
mv mod_22.ped vic.ped
mv mod_23.ped ywp.ped
##map file
map<-read.table("mappos3.txt", header=F)
> chrom<-map[,1]
> chrom[chrom == 11] <- 2
> chrom[chrom == 4] <- 11
> chrom[chrom == 1648] <- 4
> chrom[chrom == 1647] <- 18
> chrom[chrom == 1646] <- 3
> chrom[chrom == 1645] <- 8
> chrom[chrom == 1644] <- 14
> chrom[chrom == 1642] <- 7
> chrom[chrom == 1641] <- 9
> chrom[chrom == 1640] <- 15
> chrom[chrom == 1639] <- 10
> chrom[chrom == 1638] <- 19
> chrom[chrom == 1636] <- 5
> chrom[chrom == 1632] <- 13
> chrom[chrom == 1633] <- 21
> chrom[chrom == 1631] <- 23
> chrom[chrom == 1628] <- 1
> chrom[chrom == 1627] <- 22
> chrom[chrom == 1095] <- 21
> chrom[chrom == 833] <- 12
> chrom[chrom == 588] <- 20
> chrom[chrom == 833] <- 12
> chrom[chrom == 503] <- 16
> chrom[chrom == 309] <- 17
> chrom[chrom == 228] <- 6
> chrom[chrom == 255] <- 48
> chrom[chrom == 270] <- 50
> chrom[chrom == 385] <- 53
> chrom[chrom == 758] <- 69
> chrom[chrom == 1558] <- 86
> chrom[chrom == 1635] <- 90
> chrom[chrom == 1643] <- 91
> chrom[chrom == 1650] <- 93
snpid<-seq(1:1164)
pos<-rep(0,1164)
snppos<-map[,2]
map<-cbind(chrom,snpid,pos,snppos)
write.table(map,"aims3.map", quote=FALSE, col.names=FALSE, row.names=FALSE, sep=" ")
cp aims3.map bcr.map
cp aims3.map bic.map
cp aims3.map khl.map
cp aims3.map bic.map
cp aims3.map bld.map
cp aims3.map bnp.map
cp aims3.map bst.map
cp aims3.map btb.map
cp aims3.map cdy.map
cp aims3.map ckv.map
cp aims3.map dbs.map
cp aims3.map frc.map
cp aims3.map gnp.map
cp aims3.map lan.map
cp aims3.map mon.map
cp aims3.map pin.map
cp aims3.map psp.map
cp aims3.map rdl.map
cp aims3.map rnv.map
cp aims3.map sdc.map
cp aims3.map sin.map
cp aims3.map syc.map
cp aims3.map vic.map
cp aims3.map ywp.map
#run plink
plink --file bld --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld
plink --file bld --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out bld --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file dbs --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs
plink --file dbs --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out dbs --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file gnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp
plink --file gnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out gnp --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file sin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin
plink --file sin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --out sin --r2 --ld-window-r2 0.0 --ld-window-kb 10
## plots in r
## trying to see what plots look like ###
getQuants<-function(input){
pop<-read.table(input, header=T)
pop<-pop[order(pop$BP_B-pop$BP_A),]
dsn<-pop$BP_B-pop$BP_A
ldn<-pop$R2
#get quantiles
s1<-which(dsn > 0 & dsn < 100)
sq1<-quantile(ldn[s1],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s2<-which(dsn > 100 & dsn < 200)
sq2<-quantile(ldn[s2],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s3<-which(dsn > 200 & dsn < 500)
sq3<-quantile(ldn[s3],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s4<-which(dsn > 500 & dsn < 1000)
sq4<-quantile(ldn[s4],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s5<-which(dsn > 1000 & dsn < 2000)
sq5<-quantile(ldn[s5],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s6<-which(dsn > 2000 & dsn < 5000)
sq6<-quantile(ldn[s6],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s7<-which(dsn > 5000 & dsn < 10000)
sq7<-quantile(ldn[s7],probs=c(.75,.9,.95,.99),na.rm=TRUE)
index = which(files == input)
outname = paste('quants', index, '.txt', sep = "")
quant_mat<-rbind(sq1,sq2,sq3,sq4,sq5,sq6,sq7)
write.table(quant_mat, file=outname, sep=" ", row.names=FALSE, quote=FALSE)
}
files= list.files(path="./ldfiles", pattern = ".ld", full.names = TRUE)
for (i in 1:length(files)){
getQuants(files[i])
}
bld<-read.table("quants1.txt", header=T)
dbs<-read.table("quants2.txt", header=T)
gnp<-read.table("quants3.txt", header=T)
sin<-read.table("quants4.txt", header=T)
col = rainbow(3)
pdf("ld_aims3.pdf", width=8, height=10, bg="white")
par(mfrow=c(4,1))
par(mar=c(6,6,4,2))
plot(bld[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab=" ",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(bld[,2], type="b", pch=19, col=col[2], lty =1)
lines(bld[,3], type="b", pch=19, col=col[3], lty =1)
title(main="A). BLD, N = 74",cex.main = 1.6, adj=0)
plot(dbs[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(dbs[,2], type="b", pch=19, col=col[2], lty =1)
lines(dbs[,3], type="b", pch=19, col=col[3], lty =1)
title(main="B). DBS, N = 115",cex.main = 1.6, adj=0)
plot(gnp[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(gnp[,2], type="b", pch=19, col=col[2], lty =1)
lines(gnp[,3], type="b", pch=19, col=col[3], lty =1)
title(main="C). GNP, N = 98",cex.main = 1.6, adj=0)
plot(sin[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="Distance (Kb)",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(sin[,2], type="b", pch=19, col=col[2], lty =1)
lines(sin[,3], type="b", pch=19, col=col[3], lty =1)
title(main="D). SIN, N = 97",cex.main = 1.6, adj=0)
dev.off()
### redoing plink with maf filter for all of the SNPs
plink --file bld --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --maf 0.05 --out bld_maf --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file dbs --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --maf 0.05 --out dbs_maf --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file gnp --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --maf 0.05 --out gnp_maf --r2 --ld-window-r2 0.0 --ld-window-kb 10
plink --file sin --chr-set 93 --no-fid --no-parents --no-sex --no-pheno --maf 0.05 --out sin_maf --r2 --ld-window-r2 0.0 --ld-window-kb 10
## plots in r
## trying to see what plots look like ###
getQuants<-function(input){
pop<-read.table(input, header=T)
pop<-pop[order(pop$BP_B-pop$BP_A),]
dsn<-pop$BP_B-pop$BP_A
ldn<-pop$R2
#get quantiles
s1<-which(dsn > 0 & dsn < 100)
sq1<-quantile(ldn[s1],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s2<-which(dsn > 100 & dsn < 200)
sq2<-quantile(ldn[s2],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s3<-which(dsn > 200 & dsn < 500)
sq3<-quantile(ldn[s3],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s4<-which(dsn > 500 & dsn < 1000)
sq4<-quantile(ldn[s4],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s5<-which(dsn > 1000 & dsn < 2000)
sq5<-quantile(ldn[s5],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s6<-which(dsn > 2000 & dsn < 5000)
sq6<-quantile(ldn[s6],probs=c(.75,.9,.95,.99),na.rm=TRUE)
s7<-which(dsn > 5000 & dsn < 10000)
sq7<-quantile(ldn[s7],probs=c(.75,.9,.95,.99),na.rm=TRUE)
index = which(files == input)
outname = paste('maf_quants', index, '.txt', sep = "")
quant_mat<-rbind(sq1,sq2,sq3,sq4,sq5,sq6,sq7)
write.table(quant_mat, file=outname, sep=" ", row.names=FALSE, quote=FALSE)
}
files= list.files(path="./ldfiles", pattern = "maf", full.names = TRUE)
for (i in 1:length(files)){
getQuants(files[i])
}
bld<-read.table("maf_quants1.txt", header=T)
dbs<-read.table("maf_quants2.txt", header=T)
gnp<-read.table("maf_quants3.txt", header=T)
sin<-read.table("maf_quants4.txt", header=T)
col = rainbow(3)
pdf("ld_maf.pdf", width=8, height=10, bg="white")
par(mfrow=c(4,1))
par(mar=c(6,6,4,2))
plot(bld[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab=" ",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(bld[,2], type="b", pch=19, col=col[2], lty =1)
lines(bld[,3], type="b", pch=19, col=col[3], lty =1)
title(main="A). BLD, N = 74",cex.main = 1.6, adj=0)
plot(dbs[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(dbs[,2], type="b", pch=19, col=col[2], lty =1)
lines(dbs[,3], type="b", pch=19, col=col[3], lty =1)
title(main="B). DBS, N = 115",cex.main = 1.6, adj=0)
plot(gnp[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(gnp[,2], type="b", pch=19, col=col[2], lty =1)
lines(gnp[,3], type="b", pch=19, col=col[3], lty =1)
title(main="C). GNP, N = 98",cex.main = 1.6, adj=0)
plot(sin[,1], type="b", pch=19, col=col[1],ylim=c(0.005,1),xlab="Distance (Kb)",ylab="LD(r^2)"
, main = "", cex.lab=2, cex.main=1.5, cex.axis=1)
lines(sin[,2], type="b", pch=19, col=col[2], lty =1)
lines(sin[,3], type="b", pch=19, col=col[3], lty =1)
title(main="D). SIN, N = 97",cex.main = 1.6, adj=0)
dev.off()