產生字詞頻度表 (NLTK)

程式列表

import nltk import sys import os def WordFreqFromFile(textFileName): textFileHandle = open(textFileName, 'rU') textFile = textFileHandle.read() textTokens = nltk.word_tokenize(textFile) wordFrequency = nltk.FreqDist(textTokens) textFileHandle.close() return wordFrequency def OutputWordFreq(wordFrequency): for word in wordFrequency: print '"%s",%d' % (word, wordFrequency[word]) if __name__=="__main__": if len(sys.argv) == 0: print('Text File Required.') exit() textFileName = sys.argv[1] wordFrequency = WordFreqFromFile(textFileName) OutputWordFreq(wordFrequency)