Comparison Wordcloud    wordclouds

The other type of graphic in the wordcloud package is the comparison wordcloud, really cool


Example: mobile carriers

Step 1: collect tweets from mobile companies
# att tweets
att_tweets = userTimeline("ATT", n=1000)

# verizon tweets
ver_tweets = userTimeline("Verizon", n=1000)

# verizon tweest
mob_tweets = userTimeline("TMobile", n=1000)

# metro pcs tweets
pcs_tweets = userTimeline("MetroPCS", n=1000)


Step 2: extract text
# get text
att_txt = sapply(att_tweets, function(x) x$getText())
ver_txt = sapply(ver_tweets, function(x) x$getText())
mob_txt = sapply(mob_tweets, function(x) x$getText())
pcs_txt = sapply(pcs_tweets, function(x) x$getText())


Step 3: clean text
clean.text = function(x)
{
   # tolower
   x = tolower(x)
   # remove rt
   x = gsub("rt", "", x)
   # remove at
   x = gsub("@\\w+", "", x)
   # remove punctuation
   x = gsub("[[:punct:]]", "", x)
   # remove numbers
   x = gsub("[[:digit:]]", "", x)
   # remove links http
   x = gsub("http\\w+", "", x)
   # remove tabs
   x = gsub("[ |\t]{2,}", "", x)
   # remove blank spaces at the beginning
   x = gsub("^ ", "", x)
   # remove blank spaces at the end
   x = gsub(" $", "", x)
   return(x)
}


Step 4: Apply function clean.text
# clean texts
att_clean = clean.text(att_txt)
ver_clean = clean.text(ver_txt)
mob_clean = clean.text(mob_txt)
pcs_clean = clean.text(pcs_txt)


Step 5: Join texts in a vector for each company
att = paste(att_clean, collapse=" ")
ver = paste(ver_clean, collapse=" ")
mob = paste(mob_clean, collapse=" ")
pcs = paste(pcs_clean, collapse=" ")

# put everything in a single vector
all = c(att, ver, mob, pcs)


Step 6: Let's remove stopwords
# remove stop-words
all = removeWords(all,
c(stopwords("english"), "att", "verizon", "tmobile", "metropcs"))


Step 7: Corpus and term-document matrix
# create corpus
corpus = Corpus(VectorSource(all))

# create term-document matrix
tdm = TermDocumentMatrix(corpus)

# convert as matrix
tdm = as.matrix(tdm)

# add column names
colnames(tdm) = c("ATT", "Verizon", "T-Mobile", "MetroPCS")



Step 8: Plot comparison wordcloud 
# comparison cloud
comparison.cloud(tdm, random.order=FALSE, 
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
title.size=1.5, max.words=500)



Step 9: Plot commonality cloud
# commonality cloud
commonality.cloud(tdm, random.order=FALSE, 
colors = brewer.pal(8, "Dark2"),
title.size=1.5)



If you want to save the images in nice pdf format
pdf("CarriersCompCloud.pdf", width=8, height=8)
comparison.cloud(tdm, random.order=FALSE, 
colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),
title.size=1.5, max.words=500)
dev.off()

pdf("CarriersCommCloud.pdf", width=8, height=8)
commonality.cloud(tdm, random.order=FALSE, 
colors = brewer.pal(8, "Dark2"),
title.size=1.5)
dev.off()


Comments