comparison cloud
Comparison Wordcloud wordclouds
The other type of graphic in the wordcloud package is the comparison wordcloud, really cool
Example: mobile carriers
Step 1: collect tweets from mobile companies
# att tweets
att_tweets = userTimeline("ATT", n=1000)
# verizon tweets
ver_tweets = userTimeline("Verizon", n=1000)
# verizon tweest
mob_tweets = userTimeline("TMobile", n=1000)
# metro pcs tweets
pcs_tweets = userTimeline("MetroPCS", n=1000)
Step 2: extract text
# get text
att_txt = sapply(att_tweets, function(x) x$getText())
ver_txt = sapply(ver_tweets, function(x) x$getText())
mob_txt = sapply(mob_tweets, function(x) x$getText())
pcs_txt = sapply(pcs_tweets, function(x) x$getText())
Step 3: clean text
clean.text = function(x)
{
# tolower
x = tolower(x)
# remove rt
x = gsub("rt", "", x)
# remove at
x = gsub("@\\w+", "", x)
# remove punctuation
x = gsub("[[:punct:]]", "", x)
# remove numbers
x = gsub("[[:digit:]]", "", x)
# remove links http
x = gsub("http\\w+", "", x)
# remove tabs
x = gsub("[ |\t]{2,}", "", x)
# remove blank spaces at the beginning
x = gsub("^ ", "", x)
# remove blank spaces at the end
x = gsub(" $", "", x)
return(x)
}
Step 4: Apply function clean.text
# clean texts
att_clean = clean.text(att_txt)
ver_clean = clean.text(ver_txt)
mob_clean = clean.text(mob_txt)
pcs_clean = clean.text(pcs_txt)
Step 5: Join texts in a vector for each company
att = paste(att_clean, collapse=" ")
ver = paste(ver_clean, collapse=" ")
mob = paste(mob_clean, collapse=" ")
pcs = paste(pcs_clean, collapse=" ")
# put everything in a single vector
all = c(att, ver, mob, pcs)
Step 6: Let's remove stopwords
# remove stop-words
all = removeWords(all,
c(stopwords("english"), "att", "verizon", "tmobile", "metropcs"))
Step 7: Corpus and term-document matrix
# create corpus
corpus = Corpus(VectorSource(all))
# create term-document matrix
tdm = TermDocumentMatrix(corpus)
# convert as matrix
tdm = as.matrix(tdm)
# add column names
colnames(tdm) = c("ATT", "Verizon", "T-Mobile", "MetroPCS")
Step 8: Plot comparison wordcloud
# comparison cloud
comparison.cloud(tdm, random.order=FALSE,
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
title.size=1.5, max.words=500)
Step 9: Plot commonality cloud
# commonality cloud
commonality.cloud(tdm, random.order=FALSE,
colors = brewer.pal(8, "Dark2"),
title.size=1.5)
If you want to save the images in nice pdf format
pdf("CarriersCompCloud.pdf", width=8, height=8)
comparison.cloud(tdm, random.order=FALSE,
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
title.size=1.5, max.words=500)
dev.off()
pdf("CarriersCommCloud.pdf", width=8, height=8)
commonality.cloud(tdm, random.order=FALSE,
colors = brewer.pal(8, "Dark2"),
title.size=1.5)
dev.off()
© Gaston Sanchez