comparison cloud

Comparison Wordcloud wordclouds

The other type of graphic in the wordcloud package is the comparison wordcloud, really cool

Example: mobile carriers

Step 1: collect tweets from mobile companies

# att tweets

att_tweets = userTimeline("ATT", n=1000)

# verizon tweets

ver_tweets = userTimeline("Verizon", n=1000)

# verizon tweest

mob_tweets = userTimeline("TMobile", n=1000)

# metro pcs tweets

pcs_tweets = userTimeline("MetroPCS", n=1000)

Step 2: extract text

# get text

att_txt = sapply(att_tweets, function(x) x$getText())

ver_txt = sapply(ver_tweets, function(x) x$getText())

mob_txt = sapply(mob_tweets, function(x) x$getText())

pcs_txt = sapply(pcs_tweets, function(x) x$getText())

Step 3: clean text

clean.text = function(x)

{

# tolower

x = tolower(x)

# remove rt

x = gsub("rt", "", x)

# remove at

x = gsub("@\\w+", "", x)

# remove punctuation

x = gsub("[[:punct:]]", "", x)

# remove numbers

x = gsub("[[:digit:]]", "", x)

# remove links http

x = gsub("http\\w+", "", x)

# remove tabs

x = gsub("[ |\t]{2,}", "", x)

# remove blank spaces at the beginning

x = gsub("^ ", "", x)

# remove blank spaces at the end

x = gsub(" $", "", x)

return(x)

}

Step 4: Apply function clean.text

# clean texts

att_clean = clean.text(att_txt)

ver_clean = clean.text(ver_txt)

mob_clean = clean.text(mob_txt)

pcs_clean = clean.text(pcs_txt)

Step 5: Join texts in a vector for each company

att = paste(att_clean, collapse=" ")

ver = paste(ver_clean, collapse=" ")

mob = paste(mob_clean, collapse=" ")

pcs = paste(pcs_clean, collapse=" ")

# put everything in a single vector

all = c(att, ver, mob, pcs)

Step 6: Let's remove stopwords

# remove stop-words

all = removeWords(all,

c(stopwords("english"), "att", "verizon", "tmobile", "metropcs"))

Step 7: Corpus and term-document matrix

# create corpus

corpus = Corpus(VectorSource(all))

# create term-document matrix

tdm = TermDocumentMatrix(corpus)

# convert as matrix

tdm = as.matrix(tdm)

# add column names

colnames(tdm) = c("ATT", "Verizon", "T-Mobile", "MetroPCS")

Step 8: Plot comparison wordcloud

# comparison cloud

comparison.cloud(tdm, random.order=FALSE,

colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),

title.size=1.5, max.words=500)

Step 9: Plot commonality cloud

# commonality cloud

commonality.cloud(tdm, random.order=FALSE,

colors = brewer.pal(8, "Dark2"),

title.size=1.5)

If you want to save the images in nice pdf format

pdf("CarriersCompCloud.pdf", width=8, height=8)

comparison.cloud(tdm, random.order=FALSE,

colors = c("#00B2FF", "red", "#FF0099", "#6600CC"),

title.size=1.5, max.words=500)

dev.off()

pdf("CarriersCommCloud.pdf", width=8, height=8)

commonality.cloud(tdm, random.order=FALSE,

colors = brewer.pal(8, "Dark2"),

title.size=1.5)

dev.off()

© Gaston Sanchez