Comparison Wordcloud wordclouds
Example: mobile carriers Step 1: collect tweets from mobile companies # att tweets att_tweets = userTimeline("ATT", n=1000) # verizon tweets ver_tweets = userTimeline("Verizon", n=1000) # verizon tweest mob_tweets = userTimeline("TMobile", n=1000) # metro pcs tweets pcs_tweets = userTimeline("MetroPCS", n=1000) Step 2: extract text # get text att_txt = sapply(att_tweets, function(x) x$getText()) ver_txt = sapply(ver_tweets, function(x) x$getText()) mob_txt = sapply(mob_tweets, function(x) x$getText()) pcs_txt = sapply(pcs_tweets, function(x) x$getText()) Step 3: clean text clean.text = function(x) { # tolower x = tolower(x) # remove rt x = gsub("rt", "", x) # remove at x = gsub("@\\w+", "", x) # remove punctuation x = gsub("[[:punct:]]", "", x) # remove numbers x = gsub("[[:digit:]]", "", x) # remove links http x = gsub("http\\w+", "", x) # remove tabs x = gsub("[ |\t]{2,}", "", x) # remove blank spaces at the beginning x = gsub("^ ", "", x) # remove blank spaces at the end x = gsub(" $", "", x) return(x) } Step 4: Apply function clean.text # clean texts att_clean = clean.text(att_txt) ver_clean = clean.text(ver_txt) mob_clean = clean.text(mob_txt) pcs_clean = clean.text(pcs_txt) Step 5: Join texts in a vector for each company att = paste(att_clean, collapse=" ") ver = paste(ver_clean, collapse=" ") mob = paste(mob_clean, collapse=" ") pcs = paste(pcs_clean, collapse=" ") # put everything in a single vector all = c(att, ver, mob, pcs) Step 6: Let's remove stopwords # remove stop-words all = removeWords(all, c(stopwords("english"), "att", "verizon", "tmobile", "metropcs")) Step 7: Corpus and term-document matrix # create corpus corpus = Corpus(VectorSource(all)) # create term-document matrix tdm = TermDocumentMatrix(corpus) # convert as matrix tdm = as.matrix(tdm) # add column names colnames(tdm) = c("ATT", "Verizon", "T-Mobile", "MetroPCS") Step 8: Plot comparison wordcloud # comparison cloud comparison.cloud(tdm, random.order=FALSE, colors = c("#00B2FF", "red", "#FF0099", "#6600CC"), title.size=1.5, max.words=500) Step 9: Plot commonality cloud # commonality cloud commonality.cloud(tdm, random.order=FALSE, colors = brewer.pal(8, "Dark2"), title.size=1.5) If you want to save the images in nice pdf format pdf("CarriersCompCloud.pdf", width=8, height=8) comparison.cloud(tdm, random.order=FALSE, colors = c("#00B2FF", "red", "#FF0099", "#6600CC"), title.size=1.5, max.words=500) dev.off() pdf("CarriersCommCloud.pdf", width=8, height=8) commonality.cloud(tdm, random.order=FALSE, colors = brewer.pal(8, "Dark2"), title.size=1.5) dev.off() |
© Gaston Sanchez