Plot PC1 spatially

Post date: Nov 18, 2019 2:59:36 AM

I kept the PC1 score for every sample for which we have a coordinate (274 out of 296, check with Karen), and map them according to their PC1 value (no filtering).

The map, rather ugly but informative, can be foundhere.

It looks like the Pando clone clusters on positive values of PC1, and separates pretty well from the other genotypes.

Updated post: also look when read depth is constrained to be > 4.

###############################################################

# R code used

# Draw the genetic variance on the spatial map.

# The objective of this script is to keep the PC score for each tree and to map it in space.

# What the PCs tell you is where the largest variance in the dataset lies.

# PCA to visualize the data

#install.packages('FactoMineR')

library(jpeg)

library(Cairo)

library(FactoMineR)

library(factoextra)

library(dplyr)

# Work with FactoMineR

#http://factominer.free.fr/factomethods/principal-components-analysis.html

######## Prepare the data ##############

pntest <- as.matrix(read.table("pntest_filtered2xHiCov_pando_variants.txt"))

names <- as.matrix(read.table("sampleID.txt"))

type <- as.matrix(read.table("type_clean.txt"))

# Specify which individuals are Pando

track_type = rep("aroundPando",each=length(type))

for (i in c(which(type == "potr-017-B"):(which(type == "potr-277-B"))))

{

track_type[i] <- "Pando"

}

for (i in c(which(type == "potr-603-B"):(which(type == "potr-659-B"))))

{

track_type[i] <- "nearbyClone"

}

dp <- as.matrix(read.table("depth_matrix.txt"))

dp <- t(dp)

indMean <- rowMeans(dp)

size <- as.matrix(read.table("size.txt"))

size <- as.factor(size)

pairs <- as.matrix(read.table("pairs.txt"))

pairs <- as.factor(pairs)

data <- rbind(indMean,size,pairs,names,track_type,pntest)

data <- as.data.frame(t(data))

data[1:10,1:10]

#data <- rbind(type,pntest)

# Transpose matrix to have it with individuals as rows and variables as columns

#data[,c(1,2)] <- as.character(data[,c(1,2)])

for (i in c(6:dim(data)[2]))

{

data[,i] <- as.numeric(as.character(unlist(data[[i]])))

}

###################################################

# Get the pairs that should be deleted because there is not enough reads

##### read depth

rdp <- 4

ind <- which(data$indMean>rdp)

data_1 <- data[ind,]

##################################################

# Do the pca now

res.pca <- prcomp(data[,-c(1:5)], center = TRUE)

# Store PC1 coordinates

PC1coor <- res.pca$x[,1]

###############################################

# Mapping

samps <- read.csv("PandoSamples.csv")

samps[1,]

samps <- samps[order(samps[,1]),]

# Associate the samples PC1 with their coordinates

common <- intersect(samps[,1],data$V4)

# Keep the samples that pairs up

idx_coor <- c()

idx_PC1 <- c()

pairvec <- common

for (i in c(1:length(pairvec)))

{

tempA <- which(data$V4==pairvec[i])

tempB <- which(samps[,1]==pairvec[i])

idx_coor[i] <- tempB

idx_PC1[i] <- tempA

tempA <- c()

tempB <- c()

}

# There is 274 individuals with coordinate data (why not 296?)

data_2 <- data[idx_PC1,]

PC1_map <- PC1coor[idx_PC1]

coor <- samps[idx_coor,]

# Create a unique dataset containing coordinates, PC1

# They should be the same order, they are the same length

data_3 <- cbind(coor,PC1_map)

# Assign a color according to the value of the coordinate

#Create a function to generate a continuous color palette

rbPal <- colorRampPalette(c('purple4','deeppink'))

rbPal <- colorRampPalette(c('yellow','red'))

#This adds a column of color values

# based on the y values

data_3$color <- rbPal(100)[as.numeric(cut(data_3[,4],breaks = 100))]

#h <- ggmap(base)

h <- ggplot(data_3,aes(x=data_3[,2], y=data_3[,3]))

h <- h + geom_point(aes(colour = data_3[,4]), cex = 3.5) + scale_colour_gradient2(guide="colourbar", low = 'lightblue', high = 'darkblue')

h <- h + theme_dark() + theme(legend.position="bottom", axis.text = element_text(size = rel(0.75)))# # tweak the plot's appearance and legend position

h <- h + labs(x="Longitude", y="Latitude", title="Collection sites")

#####################################################

Code used to filter read depth

###################################################

# Get the pairs that should be deleted because there is not enough reads

##### read depth

rdp <- 4

data_1 <- data[indMean>rdp,]

dim(data_1)

data_1[1:10,1:10]

##################################################

PCA was done after filtering.

Page updated

Google Sites

Report abuse