英文稿分析呈現-1

分析呈現程式碼

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89

#!/usr/bin/env python3# -*- coding: utf-8 -*-import sysimport codecsimport stringimport nltkimport numpyimport randomfrom matplotlib import pyplot as plt from matplotlib.font_manager import FontProperties import igraphfrom TextFileProcessing import TextFileProcessing class TextMining(TextFileProcessing): chartColors = ['lightgrey', 'lightgreen', 'lightblue', 'tan', 'lightcyan', 'yellow', 'pink', 'lightskyblue', 'plum'] def __init__(self): TextFileProcessing.__init__(self) def PlotGraph(self, g): for component in g.components(): iColor = 0 for vidx in component: color = self.chartColors[iColor] g.vs[vidx]["color"] = color iColor += 1 iColor = iColor % len(self.chartColors) igraph.plot(g, layout=g.layout("kk"), vertex_size=120, bbox=(1024, 800), margin=120, dpi=600, vertex_label=[value['name'] for index, value in enumerate(g.vs)]) def PlotChart(self, termFreqList, minimal=None): _termFreqList = sorted(termFreqList, key=termFreqList.get, reverse=True) _freq = [] for term in _termFreqList: _freq.append(termFreqList[term]) freq = numpy.array(_freq) if minimal is None: minimal = freq.mean() + freq.std() _Y = [] for term in _termFreqList: if termFreqList[term] > minimal: _Y.append(termFreqList[term]) X = range(1, 1 + len(_Y)) Y = numpy.array(_Y) font = FontProperties().copy() font.set_family('monospace') plt.plot(X, Y, color='r') plt.bar(X, Y, color=plt.cm.Blues(Y * 10)) plt.xlabel("Term") plt.xticks(X, _termFreqList, size='small', rotation='vertical') plt.ylabel('Frequency') plt.ylim(ymin=minimal - 1) plt.show() if __name__ == '__main__': fileName = 'data/1.txt' if len(sys.argv) > 1: fileName = sys.argv[1] worker = TextMining() taskControl = [False, True] if taskControl[0]: content, termFreqList = worker.LoadFile(fileName) worker.PlotChart(termFreqList) if taskControl[1]: g = worker.LoadFile2Graph(fileName) worker.PlotGraph(g)

程式輸出