統計分析(二)

分析

    • 探討失業率與 GDP 能指引公共政策方向。
    • 兩年度指標資料儲存於 NoSQL (Cassandra)。
    • 兩指標年度範圍並不一致,GDP 資料多於失業率。
    • 失業率與 GDP 反向。
    • 兩指標振幅比例不同,放在同一圖表中,容易解讀失真。
    • 兩指標振幅比例依照各指標振幅(最大 - 最小)計算比例。

分析輸出

失業率與 GDP 相關係數=-0.1948 當 GDP 增長為正 (7) 時,失業率卻提高 (2) 之機率=28.57% 當 GDP 增長為負 (8) 時,失業率卻降低 (4) 之機率=50.00% GDP 與失業率增長發生乖離 (GDP 貢獻與勞動力增長相背離) 之機率=40.00%

分析圖

未調整圖:

分析圖

指數成長率圖:

指數成長率累積圖:

指數振幅比例圖:

程式碼

# -*- coding: utf-8 -*- from cassandra.cluster import Cluster import numpy as np import matplotlib.pyplot as plt import matplotlib.font_manager as font_manager class LABOR_INDICES_IDX_02: nameOfKeyspace = 'emprogria' dbSession = None XYZ = None def __init__(self): pass def Open(self): dbCluster = Cluster(contact_points=['127.0.0.1']) self.dbSession = dbCluster.connect(self.nameOfKeyspace) def Close(self): if self.dbSession is not None: self.dbSession.shutdown() def LoadTable(self, SQL): dbDataSet = self.dbSession.execute(SQL) myData = {} for row in dbDataSet: myData[row[0]] = row[1] _XY = {} for key in sorted(myData.keys()): _XY[key] = myData[key] return _XY def LoadData(self): if self.dbSession is None: return _XY_01 = self.LoadTable("SELECT * FROM LABOR_INDICES_IDX_01") _XY_02 = self.LoadTable("SELECT * FROM LABOR_INDICES_IDX_02") _XY = {} for key in _XY_01.keys(): _XY[key] = [_XY_01[key], None] for key in _XY_02.keys(): if key in _XY_01.keys(): _XY[key] = [_XY_01[key], _XY_02[key]] else: _XY[key] = [None, _XY_02[key]] _XYZ = [] for key in sorted(_XY.keys()): if (_XY[key][0] is not None) and (_XY[key][1] is not None): _XYZ.append([key, _XY[key][0], _XY[key][1]]) self.XYZ = np.array(_XYZ) def List(self, YYYY=None): if self.XYZ is None: self.LoadData() for y in range(0, len(self.XYZ[:, 0])): if (self.XYZ[:, 0][y] == YYYY): print "%d: %.2f %.2f" % (self.XYZ[:, 0][y], self.XYZ[:, 1][y], self.XYZ[:, 2][y]) break else: for y in range(0, len(self.XYZ[:, 0])): print "%d: %.2f %.2f" % (self.XYZ[:, 0][y], self.XYZ[:, 1][y], self.XYZ[:, 2][y]) def Stat(self): if self.XYZ is None: self.LoadData() _XYZ = np.corrcoef(self.XYZ[:, 1], self.XYZ[:, 2]) print u"失業率與 GDP 相關係數=%.4f" % (_XYZ[0][1]) self.Stat2() def Stat2(self): if self.XYZ is None: self.LoadData() _X1 = [] _X2 = [] for i in range(0, len(self.XYZ[:, 0]) - 1): _X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i]) _X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i]) _GDP = 0.0 _UEP = 0.0 for i in range(0, len(_X1)): if _X2[i] > 0: _GDP += 1 if _X1[i] > 0: _UEP += 1 if _GDP > 0: print '當 GDP 增長為正 (%d) 時,失業率卻提高 (%d) 之機率=%.2f%%' % (_GDP, _UEP, (_UEP / _GDP) * 100.0) _GDP = 0.0 _UEP = 0.0 for i in range(0, len(_X1)): if _X2[i] < 0: _GDP += 1 if _X1[i] < 0: _UEP += 1 if _GDP > 0: print '當 GDP 增長為負 (%d) 時,失業率卻降低 (%d) 之機率=%.2f%%' % (_GDP, _UEP, (_UEP / _GDP) * 100.0) _GDP = 0.0 _UEP = 0.0 for i in range(0, len(_X1)): if _X1[i] * _X2[i] > 0: _GDP += 1 print 'GDP 與失業率增長發生乖離 (GDP 貢獻與勞動力增長相背離) 之機率=%.2f%%' % ((_GDP / len(_X1)) * 100.0) def PlotLines(self): if self.XYZ is None: self.LoadData() plt.rcParams["font.family"] = 'STHeiti' _X1 = plt.plot(self.XYZ[:, 0], self.XYZ[:, 1], linewidth=2.0, color='red', label=u'失業率') _X2 = plt.plot(self.XYZ[:, 0], self.XYZ[:, 2], linewidth=2.0, label='GDP') plt.legend(loc='upper left') plt.xlabel(u'年份') plt.ylabel(u'指數') plt.title(u'失業率 - GDP') # plt.show() plt.savefig(u'失業率-GDP 指數圖.png') plt.close() def PlotLines2(self): if self.XYZ is None: self.LoadData() _X1_Min = self.XYZ[:, 1].min() _X2_Min = self.XYZ[:, 2].min() _X1_Range = self.XYZ[:, 1].max() - _X1_Min _X2_Range = self.XYZ[:, 2].max() - _X2_Min _X10 = [] _X20 = [] for i in range(0, len(self.XYZ)): _X10.append(1 - (self.XYZ[:, 1][i] - _X1_Min) / _X1_Range) _X20.append((self.XYZ[:, 2][i] - _X2_Min) / _X2_Range) plt.rcParams["font.family"] = 'STHeiti' _XYZ = np.corrcoef(_X10, _X20) _Text = u"相關係數 = %.4f" % (_XYZ[0][1]) plt.text(self.XYZ[:, 0].mean(), 0.1, _Text, ha='center', va='center') _X1 = plt.plot(self.XYZ[:, 0], _X10, linewidth=2.0, color='red', label=u'1 - 失業率') _X2 = plt.plot(self.XYZ[:, 0], _X20, linewidth=2.0, label='GDP') plt.legend(loc='upper left') plt.xlabel(u'年份') plt.ylabel(u'指數振幅比') plt.title(u'失業率 - GDP') # plt.show() plt.savefig(u'失業率-GDP 指數振幅比圖.png') plt.close() def PlotLines3(self): if self.XYZ is None: self.LoadData() _X1 = [] _X2 = [] _X = [] for i in range(0, len(self.XYZ[:, 0]) - 1): _X.append(self.XYZ[:, 0][i + 1]) _X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i]) _X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i]) X1 = np.array(_X1) X2 = np.array(_X2) X = np.array(_X) _X1_Min = X1.min() _X2_Min = X2.min() _X1_Range = X1.max() - _X1_Min _X2_Range = X2.max() - _X2_Min _X1 = [] _X2 = [] for i in range(0, len(_X)): _X1.append(1 - (X1[i] - _X1_Min) / _X1_Range) _X2.append((X2[i] - _X2_Min) / _X2_Range) plt.rcParams["font.family"] = 'STHeiti' _XYZ = np.corrcoef(_X1, _X2) _Text = u"相關係數 = %.4f" % (_XYZ[0][1]) plt.text(X.mean(), 0.1, _Text, ha='center', va='center') _X1 = plt.plot(_X, _X1, linewidth=2.0, color='red', label=u'1 - 失業率') _X2 = plt.plot(_X, _X2, linewidth=2.0, label='GDP') plt.legend(loc='upper left') plt.xlabel(u'年份') plt.ylabel(u'指數成長率振幅比') plt.title(u'失業率 - GDP') # plt.show() plt.savefig(u'失業率-GDP 指數成長率振幅比圖.png') plt.close() def PlotLines4(self): if self.XYZ is None: self.LoadData() _X1 = [] _X2 = [] _X = [] for i in range(0, len(self.XYZ[:, 0]) - 1): _X.append(self.XYZ[:, 0][i + 1]) _X1.append(self.XYZ[:, 1][i + 1] - self.XYZ[:, 1][i]) _X2.append(self.XYZ[:, 2][i + 1] - self.XYZ[:, 2][i]) for i in range(1, len(_X)): _X1[i] = _X1[i] + _X1[i - 1] _X2[i] = _X2[i] + _X2[i - 1] X1 = np.array(_X1) X2 = np.array(_X2) X = np.array(_X) _X1_Min = X1.min() _X2_Min = X2.min() _X1_Range = X1.max() - _X1_Min _X2_Range = X2.max() - _X2_Min _X1 = [] _X2 = [] for i in range(0, len(_X)): _X1.append(1 - (X1[i] - _X1_Min) / _X1_Range) _X2.append((X2[i] - _X2_Min) / _X2_Range) plt.rcParams["font.family"] = 'STHeiti' _XYZ = np.corrcoef(_X1, _X2) _Text = u"相關係數 = %.4f" % (_XYZ[0][1]) plt.text(X.mean(), 0.1, _Text, ha='center', va='center') _X1 = plt.plot(_X, _X1, linewidth=2.0, color='red', label=u'1 - 失業率') _X2 = plt.plot(_X, _X2, linewidth=2.0, label='GDP') plt.legend(loc='upper left') plt.xlabel(u'年份') plt.ylabel(u'指數成長率累積振幅比') plt.title(u'失業率 - GDP') # plt.show() plt.savefig(u'失業率-GDP 指數成長率累積振幅比圖.png') plt.close() def ListFonts(self): for f in font_manager.fontManager.ttflist: print f.name if __name__ == '__main__': jobTasks = [False, False, True, False, False, False, False] emprogria = LABOR_INDICES_IDX_02() emprogria.Open() if jobTasks[0]: emprogria.List() if jobTasks[1]: emprogria.List(2010) if jobTasks[2]: emprogria.Stat() if jobTasks[3]: emprogria.PlotLines() if jobTasks[4]: emprogria.PlotLines2() if jobTasks[5]: emprogria.PlotLines3() if jobTasks[6]: emprogria.PlotLines4() emprogria.Close()