23. Word Counter

This example shows how to count words in a textfile using std::map. The example is a bit complicated because it also strips html and sorts the words by the number of occurrences.

Maps are a kind of associative container that stores elements formed by the combination of a key value and a mapped value.

#include "testApp.h"

int lineCounter=0;
typedef map<string,int> Words;
Words words;

struct Word {    
    Word(string t, int c) { text=t; count=c; }
    string text;
    int count;
};

bool sortByWordCount(const Word& a, const Word& b) {
    return a.count < b.count;
}

//--------------------------------------------------------------
void testApp::setup(){
    string filename = ofToDataPath("water20k.txt");
    ifstream f(filename.c_str(),ios::in);
    string line;
    while (getline(f,line)) {
        //lines.push_back(ofxTrimStringRight(line));
        vector<string> items = ofSplitString(line, " >");
        for (int i=0; i<items.size(); i++) {
            words[items[i]]++;
        }
        
        if (lineCounter++>1000) {
            lineCounter=0;
            cout << ".";
        }
    }
    cout << endl;
    f.close();


    vector<Word> sortedWords;
    
    for (Words::iterator it=words.begin(); it!=words.end(); ++it) {
        //string &word = it->second;
        //cout << it->first << ":" << it->second << endl;
        sortedWords.push_back(Word(it->first,it->second));
    }
    
    cout << "numWords=" << sortedWords.size() << endl;
    
    sort(sortedWords.begin(), sortedWords.end(), sortByWordCount);

    for (int i=0; i<sortedWords.size(); i++) {
        if (sortedWords[i].count>50 && 
            sortedWords[i].text[1]!='/' && 
            sortedWords[i].text[0]=='<') 
                cout << sortedWords[i].text 
                     << " --------- " 
                     << sortedWords[i].count << endl;
    }
    
    std::exit(0);
}
Comments