Post date: Nov 26, 2019 7:6:50 PM
k-means for k=4 did not cluster the data as observed for PC2.
I use an euclidean distance matrix to make clusters for the PC2 axis. I label each cluster on the PCA plot and on the map using the same legend.
Plots can be foundhere.
The data is processed in main.R script, and calls the two following scripts:
1- Make the clusters and the PCA plot:
######################################################
res.pca <- prcomp(data_1[,-c(1:5)], center = TRUE)
# ----------------------------------------------------------------------------#
######## Make the clusters ##############
# Calculate distance
dist <- res.pca$x[,c(1,2)], method = "euclidean")
# Make clusters using Euclidean distance
fit <- hclust(d, method="ward.D")
# Cluster labels in a vector
clusID <- cutree(fit, k=4)
# Plot the PCA and color according to clusID
pdf("PCA4clusters.pdf")
fviz_pca_ind(res.pca,
axes=c(1,2),
label = "none", # individual labels
habillage = clusID,
palette = c("#5e3c99", "#737373", "#d01c8b", "#2b8cbe"),
point.size = 5,
title = "Color by cluster"
)
dev.off()
# Export the data with the cluster information
data_3 <- cbind(clusID, data_1)
#############################################################
2- Make the map:
############################################################
PCcoor <- res.pca$x[,2]
# Associate the samples PC2 with their coordinates
common <- intersect(coor[,1],data_1$V4)
length(common)
# 217 samples
# Keep the samples that pairs up
idx_coor <- c()
idx_PC <- c()
pairvec <- common
for (i in c(1:length(pairvec)))
{
tempA <- which(data_1$V4==pairvec[i])
tempB <- which(coor[,1]==pairvec[i])
idx_coor[i] <- tempB
idx_PC[i] <- tempA
tempA <- c()
tempB <- c()
}
data_2 <- data_1[idx_PC,]
PC_map <- PCcoor[idx_PC]
coor_1 <- coor[idx_coor,]
clusID <- clusID[idx_PC]
# Create a unique dataset containing coordinates, PC1
# They should be the same order, they are the same length
data_3 <- cbind(coor_1,clusID,PC_map)
data_3$clusID <- as.factor(data_3$clusID)
# Make a map and colour by cluster
pdf(file="PC2ClusterMap.pdf")
base = get_map(location=c(-111.77,38.51,-111.738,38.5350), zoom=15, maptype="terrain-background")
myMap <- ggmap(base)
myMap <- myMap + geom_point(data=data_3, aes(x=long, y=lat, colour=clusID), cex = 2.5) + scale_color_manual(values=c("#5e3c99", "#737373", "#d01c8b", "#2b8cbe"))
myMap <- myMap + labs(x="Longitude", y="Latitude", title="PC2 score", colour="Cluster")
myMap <- myMap + theme_bw() + theme(legend.position="bottom", axis.text = element_text(size = rel(0.75)))
myMap
dev.off()
#############################################################