統計分析(三)

目的

    • 以網路圖展現多變數間相關係數
    • 可過濾可接受相關係數範圍
    • 網路圖節點大小與樣本數有關
    • 網路圖連接線粗細與相關係數有關

輸出結果

IDX_1: (15) - IDX_2: -0.2935 - IDX_3: 0.6648 - IDX_4: -0.2124 - IDX_5: -0.5205 - IDX_6: -0.6905 - IDX_7: 0.4310 - IDX_8: -0.0954 - IDX_9: -0.6381 -IDX_10: -0.1770 -IDX_11: -0.5761 -IDX_12: -0.1328 -IDX_13: -0.5700 -IDX_14: -0.1015 IDX_2: (61) - IDX_3: -0.3787 - IDX_4: 0.8630 - IDX_5: -0.3263 - IDX_6: -0.3173 - IDX_7: 0.0248 - IDX_8: -0.0960 - IDX_9: 0.4017 -IDX_10: 0.3883 -IDX_11: 0.4058 -IDX_12: 0.3747 -IDX_13: 0.4548 -IDX_14: 0.4135

程式碼

# -*- coding: utf-8 -*- import sys, csv, math import numpy as np import networkx as nx import matplotlib.pyplot as plt import matplotlib.font_manager as font_manager class LABOR_INDICES_CORREL: nameOfFields = ['YYYY', 'IDX_1', 'IDX_2', 'IDX_3', 'IDX_4', 'IDX_5', 'IDX_6', 'IDX_7', 'IDX_8', 'IDX_9', 'IDX_10', 'IDX_11', 'IDX_12', 'IDX_13', 'IDX_14'] IDX = [] IDX_Node = {} IDX_Edge = {} def __init__(self): pass def LoadDataFromFile(self, dataFile='調整後增率.csv'): self.IDX = [] fieldHeader = True with open(dataFile) as csvF: for dbRow in csv.reader(csvF, delimiter=','): if fieldHeader: fieldHeader = False else: self.IDX.append(dbRow) csvF.close() self.CalcNodeCount() self.CalcEdgeCorRel() def CalcNodeCount(self): self.IDX_Node = {} for i in range(1, len(self.nameOfFields)): self.IDX_Node[self.nameOfFields[i]] = 0 for rowDB in self.IDX: for i in range(1, len(self.nameOfFields)): if rowDB[i] != 'NA': self.IDX_Node[self.nameOfFields[i]] = self.IDX_Node[self.nameOfFields[i]] + 1 def CalcEdgeCorRel(self): self.IDX_Edge = {} for i in range(1, len(self.nameOfFields) - 1): for j in range(i + 1, len(self.nameOfFields)): (CorRel, count) = self.CalcCorRel(i, j) self.IDX_Edge[ (self.nameOfFields[i], self.nameOfFields[j]) ] = '%.4f' % (CorRel) def CalcCorRel(self, fieldID1, fieldID2): _IDX = [] count = 0 for rowDB in self.IDX: if rowDB[fieldID1] != 'NA' and rowDB[fieldID2] != 'NA': _IDX.append([ float(rowDB[fieldID1]), float(rowDB[fieldID2]) ]) count += 1 _IDX2 = np.array(_IDX) _CORREL = np.corrcoef(_IDX2[:, 0], _IDX2[:, 1]) return (_CORREL[0][1], count) def List(self): for i in range(1, len(self.nameOfFields) - 1): print "%6s: (%d)" % (self.nameOfFields[i], int(self.IDX_Node[self.nameOfFields[i]])) for j in range(i + 1, len(self.nameOfFields)): print "\t-%6s: %8.4f" % (self.nameOfFields[j], float(self.IDX_Edge[ (self.nameOfFields[i], self.nameOfFields[j]) ])) def getFieldNames(self): return self.nameOfFields def PlotNetwork(self, correlValve=0.25): G = nx.Graph() _Edges = {} for key in self.IDX_Edge.keys(): fieldName1 = key[0] fieldName2 = key[1] if abs(float(self.IDX_Edge[key])) > correlValve: _Edges[key] = self.IDX_Edge[key] G.add_edge(fieldName1, fieldName2) graph_pos = nx.shell_layout(G) nx.draw_networkx_labels(G, graph_pos, font_size=10, font_family='sans-serif') nx.draw_networkx_edges(G, graph_pos, width=[1 + int(2 * float(_Edges[v])) for v in _Edges.keys()], alpha=0.3, edge_color='blue') nx.draw_networkx_nodes(G, graph_pos, node_size=[500 + self.IDX_Node[v] * 50 for v in self.IDX_Node.keys()], alpha=0.3, node_color='red') nx.draw_networkx_edge_labels(G, graph_pos, edge_labels=_Edges, label_pos=0.3) plt.show() if __name__ == '__main__': jobTasks = [True, True] emprogria = LABOR_INDICES_CORREL() emprogria.LoadDataFromFile('調整後增率.csv') if jobTasks[0]: emprogria.List() if jobTasks[1]: emprogria.PlotNetwork()