Code to figure out weird individuals and drop them from PCA
cols<-c("yellow", "honeydew3" , "pink", "olivedrab", "blue", "magenta", "brown", "tomato", "black", "black","darkgray", "lightblue","orange", "gold" , "seagreen1" , "plum" , "darkslateblue" , "darkgreen" , "maroon","gray" ,"tan","darkred" ,"salmon","mediumorchid4")
palette(cols)
plot(pc_drop_dbs[,3], pc_drop_dbs[,4], col=family, pch=18, cex=3)
legend("topright", levels(family), fill=cols)
#[1] "BCR" "BIC" "BLD-" "BNP" "BST-" "BTB" "CDY" "CKV" "DBS" "DBS-"
#[11] "FRC-" "GNP" "KHL" "LAN" "MON" "PIN" "PSP" "RDL" "RNV" "SDC"
#[21] "SIN" "SYC" "VIC" "YWP"
#combine data frame
pc_drop<-cbind(pops,g.pca$x[,1], g.pca$x[,2])
pc.bld<-pc_drop[which(pc_drop[,1] == "BLD-"),]
pc.pin<-pc_drop[which(pc_drop[,1] == "PIN"),]
pc.dbs<-pc_drop[which(pc_drop[,1] == "DBS"),]
pc.dbs2<-pc_drop[which(pc_drop[,1] == "DBS-"),]
pc.dbs<-rbind(pc.dbs,pc.dbs2)
pc.sdc<-pc_drop[which(pc_drop[,1] == "SDC"),]
pc.bcr<-pc_drop[which(pc_drop[,1] == "BCR"),]
pc.ywp<-pc_drop[which(pc_drop[,1] == "YWP"),]
pc.rdl<-pc_drop[which(pc_drop[,1] == "RDL"),]
pc.gnp<-pc_drop[which(pc_drop[,1] == "GNP"),]
#find the individuals with weird scores
pc.bld[which(pc.bld[,4] > 15 & pc.gnp[,4] < 20),]
pc.dbs[which(pc.dbs[,4] > 18 & pc.dbs[,4] < 20),]
pc.ywp[which(pc.ywp[,4] > 15 & pc.ywp[,4] < 20),]
pc.bcr[which(pc.bcr[,4] > 15 & pc.bcr[,4] < 20),]
pc.pin[which(pc.pin[,4] > 15 & pc.pin[,4] < 20),]
pc.sdc[which(pc.sdc[,4] > 15 & pc.sdc[,4] < 20),]
pc.rdl[which(pc.rdl[,4] > 15 & pc.rdl[,4] < 20),]
pc.gnp[which(pc.gnp[,3] > 0 & pc.gnp[,3] < 15),]
#drop these rows to plot PCA
pc_drop_p<-pc_drop[-c(44,115,323,406,566,605,624,675,830),]
plot(pc_drop_p[,3], pc_drop_p[,4], col=family, pch=18, cex=3)
legend("topright", levels(family), fill=cols)
#samples with low scores and I will drop them from original PCA
V1 samids g.pca$x[, 1] g.pca$x[, 2]
V44 BCR BCR12-37F.fastq 10.82537 16.04421
V115 BLD- BLD-14-25.fastq 12.40999 15.78816
V323 DBS- DBS-13-21.fastq 12.147474 15.52694
##V373 DBS- DBS-16-25.fastq -7.613613 16.95704
V406 GNP GNP08-10M.fastq 12.47793 15.75869
V566 PIN PIN12-10M.fastq 9.752396 16.72925
V605 RDL RDL12-09M.fastq 12.43513 15.62324
V624 RDL RDL12-28F.fastq 12.60747 15.67070
V675 SDC SDC09-02F.fastq 12.54173 15.21183
V830 YWP YWP12-23M.fastq 8.843105 16.44815
# Go to R in: /uufs/chpc.utah.edu/common/home/u6007910/projects/lyc_dubois/lycaeides_dubois/Alignments/bamfiles/pca_coverage
Script saved as: plots_cov.R in this folder
##coverage
bcr<-read.table("memBCR12-37F.fastq.sorted.bam.ids", header=F)
bld<-read.table("memBLD-14-25F.fastq.sorted.bam.ids", header=F)
dbs1<-read.table("memDBS-13-21F.fastq.sorted.bam.ids", header=F)
dbs2<-read.table("memDBS-16-25F.fastq.sorted.bam.ids", header=F)
gnp<-read.table("memGNP08-10M.fastq.sorted.bam.ids", header=F)
rdl1<-read.table("memRDL12-09M.fastq.sorted.bam.ids", header=F)
rdl2<-read.table("memRDL12-28F.fastq.sorted.bam.ids", header=F)
sdc<-read.table("memSDC09-02F.fastq.sorted.bam.ids", header=F)
ywp<-read.table("memYWP12-23M.fastq.sorted.bam.ids", header=F)
bcr.scafs = ddply(bcr, "bcr[,1]", numcolwise(mean))
bld.scafs = ddply(bld, "bld[,1]", numcolwise(mean))
dbs1.scafs = ddply(dbs1, "dbs1[,1]", numcolwise(mean))
dbs2.scafs = ddply(dbs2, "dbs2[,1]", numcolwise(mean))
gnp.scafs = ddply(gnp, "gnp[,1]", numcolwise(mean))
rdl1.scafs = ddply(rdl1, "rdl1[,1]", numcolwise(mean))
rdl2.scafs = ddply(rdl2, "rdl2[,1]", numcolwise(mean))
sdc.scafs = ddply(sdc, "sdc[,1]", numcolwise(mean))
ywp.scafs = ddply(ywp, "ywp[,1]", numcolwise(mean))
par(mfrow=c(3,3))
plot(bcr.scafs$V3, main="BCR", ylab="# mapped reads", xlab="Scaffold")
plot(bld.scafs$V3, main="BLD", ylab="# mapped reads", xlab="Scaffold")
plot(dbs1.scafs$V3,main="DBS", ylab="# mapped reads", xlab="Scaffold")
plot(gnp.scafs$V3, main="GNP", ylab="# mapped reads", xlab="Scaffold")
plot(rdl1.scafs$V3, main="RDL1", ylab="# mapped reads", xlab="Scaffold")
plot(rdl2.scafs$V3, main="RDL2", ylab="# mapped reads", xlab="Scaffold")
plot(sdc.scafs$V3, main="SDC", ylab="# mapped reads", xlab="Scaffold")
dev.copy(pdf, "pca_lowcov_inds.pdf")
dev.off()