統計分析-使用 MongoDB

資料源

  • 範例資料
    • 讀取 CSV 檔案,運用 NumPy 計算平均值與標準差。
    • 調用 Python 元件繪圖範例
    • 調用 R 元件繪圖範例

程式碼

# -*- coding: utf-8 -*- import csv from pymongo import MongoClient import sys import numpy as np class MongoDB_INF: dbHost = 'localhost' dbPort = 27017 dbClient = None dbName = 'emprogria' dbConn = None def __init__(self, dbHost='localhost', dbPort=27017): self.dbHost = dbHost self.dbPort = dbPort def openDB(self, dbName='emprogria'): OK = False try: self.dbClient = MongoClient('mongodb://%s:%d/' % (self.dbHost, self.dbPort)) self.dbConn = self.dbClient[self.dbName] OK = True except: self.dbConn = None self.dbClient = None return OK def closeDB(self): if self.dbClient is not None: self.dbClient.close() # 刪除資料表內所有資料 def removeDocs(self, criteria={}, nameCollection='SP500'): self.dbConn[nameCollection].remove(criteria) # 查詢資料表內所有資料 def listDocs(self, criteria={}, nameCollection='SP500'): for doc in self.dbConn[nameCollection].find(criteria): print '%s-%s-%s' % (doc['StockDate'][0:4], doc['StockDate'][4:6], doc['StockDate'][6:8]) print u'\t開盤=%.2f' % (doc['OpenIndex']) print u'\t收盤=%.2f' % (doc['CloseIndex']) print u'\t盤後=%.2f' % (doc['AdjIndex']) print u'\t最高=%.2f' % (doc['HighIndex']) print u'\t最低=%.2f' % (doc['LowIndex']) print u'\t交易量=%d (M)' % (doc['StockVol']) def importFromCSV(self, csvFile, nameCollection='SP500'): csvF = open(csvFile, 'r') fieldHead = True recCount = 0 for rowDB in csv.DictReader(csvF, ["StockDate", "OpenIndex", "HighIndex", "LowIndex", "CloseIndex", "StockVol", "AdjIndex"]): if fieldHead: fieldHead = False else: if (rowDB is not None): stockData = { 'StockDate': rowDB['StockDate'], 'OpenIndex': float(rowDB['OpenIndex']), 'HighIndex': float(rowDB['HighIndex']), 'LowIndex': float(rowDB['LowIndex']), 'CloseIndex': float(rowDB['CloseIndex']), 'StockVol': float(rowDB['StockVol']) / 1000000.0, 'AdjIndex': float(rowDB['AdjIndex']) } # 寫入資料庫 self.dbConn[nameCollection].insert(stockData) recCount += 1 return recCount def getStats(self, criteria={}, nameCollection='SP500'): OpenIndex = [] HighIndex = [] LowIndex = [] CloseIndex = [] AdjIndex = [] StockVol = [] for doc in self.dbConn[nameCollection].find(criteria): OpenIndex.append(doc['OpenIndex']) CloseIndex.append(doc['CloseIndex']) AdjIndex.append(doc['AdjIndex']) HighIndex.append(doc['HighIndex']) LowIndex.append(doc['LowIndex']) StockVol.append(doc['StockVol']) _OpenIndex = np.array(OpenIndex) _CloseIndex = np.array(CloseIndex) _AdjIndex = np.array(AdjIndex) _HighIndex = np.array(HighIndex) _LowIndex = np.array(LowIndex) _StockVol = np.array(StockVol) print "%s\t: %8.2f\t%8.2f" % (u'開盤', _OpenIndex.mean(), _OpenIndex.std()) print "%s\t: %8.2f\t%8.2f" % (u"收盤", _CloseIndex.mean(), _CloseIndex.std()) print "%s\t: %8.2f\t%8.2f" % (u"盤後", _AdjIndex.mean(), _AdjIndex.std()) print "%s\t: %8.2f\t%8.2f" % (u"最高", _HighIndex.mean(), _HighIndex.std()) print "%s\t: %8.2f\t%8.2f" % (u"最低", _LowIndex.mean(), _LowIndex.std()) print "%s\t: %8.2f\t%8.2f" % (u"交易量", _StockVol.mean(), _StockVol.std()) if __name__ == '__main__': csvFile = 'SP500.csv' if len(sys.argv) > 1: csvFile = sys.argv[1] jobTask = [False, True, True] queryCat = 2 mongoDB_Inf = MongoDB_INF() if mongoDB_Inf.openDB(): if jobTask[0]: mongoDB_Inf.removeDocs() print u'筆數: %d' % (mongoDB_Inf.importFromCSV(csvFile)) if jobTask[1]: if queryCat == 0: # 列出所有資料 criteria = {} elif queryCat == 1: # 列出交易量 > 4200 資料 criteria = {'StockVol': {'$gt': 4000}} elif queryCat == 2: # 2040 < 列出盤後 < 2050 資料 criteria = { '$and': [ {'AdjIndex': {'$gt': 2040}}, {'AdjIndex': {'$lt': 2050}} ] } mongoDB_Inf.listDocs(criteria) if jobTask[2]: criteria = {} mongoDB_Inf.getStats(criteria) mongoDB_Inf.closeDB() else: print u'資料庫錯誤'

2015-03-11 開盤=2044.69 收盤=2040.24 盤後=2040.24 最高=2050.08 最低=2039.69 交易量=3406 (M) 2015-03-10 開盤=2076.14 收盤=2044.16 盤後=2044.16 最高=2076.14 最低=2044.16 交易量=3668 (M) 2015-02-09 開盤=2053.47 收盤=2046.74 盤後=2046.74 最高=2056.16 最低=2041.88 交易量=3549 (M) 2015-02-04 開盤=2048.86 收盤=2041.51 盤後=2041.51 最高=2054.74 最低=2036.72 交易量=4141 (M) 2015-02-02 開盤=1996.67 收盤=2020.85 盤後=2020.85 最高=2021.66 最低=1980.90 交易量=4008 (M) 2015-01-30 開盤=2019.35 收盤=1994.99 盤後=1994.99 最高=2023.32 最低=1993.38 交易量=4538 (M) 2015-01-29 開盤=2002.45 收盤=2021.25 盤後=2021.25 最高=2024.64 最低=1989.18 交易量=4127 (M) 2015-01-28 開盤=2032.34 收盤=2002.16 盤後=2002.16 最高=2042.49 最低=2001.49 交易量=4067 (M) 2015-01-27 開盤=2047.86 收盤=2029.55 盤後=2029.55 最高=2047.86 最低=2019.91 交易量=3329 (M) 2015-01-21 開盤=2020.19 收盤=2032.12 盤後=2032.12 最高=2038.29 最低=2012.04 交易量=3730 (M) 2015-01-20 開盤=2020.76 收盤=2022.55 盤後=2022.55 最高=2028.94 最低=2004.49 交易量=3944 (M) 2015-01-16 開盤=1992.25 收盤=2019.42 盤後=2019.42 最高=2020.46 最低=1988.12 交易量=4056 (M) 2015-01-15 開盤=2013.75 收盤=1992.67 盤後=1992.67 最高=2021.35 最低=1991.47 交易量=4276 (M) 2015-01-14 開盤=2018.40 收盤=2011.27 盤後=2011.27 最高=2018.40 最低=1988.44 交易量=4378 (M) 2015-01-13 開盤=2031.58 收盤=2023.03 盤後=2023.03 最高=2056.93 最低=2008.25 交易量=4107 (M) 2015-01-12 開盤=2046.13 收盤=2028.26 盤後=2028.26 最高=2049.30 最低=2022.58 交易量=3456 (M) 2015-01-09 開盤=2063.45 收盤=2044.81 盤後=2044.81 最高=2064.43 最低=2038.33 交易量=3364 (M) 2015-01-07 開盤=2005.55 收盤=2025.90 盤後=2025.90 最高=2029.61 最低=2005.55 交易量=3805 (M) 2015-01-06 開盤=2022.15 收盤=2002.61 盤後=2002.61 最高=2030.25 最低=1992.44 交易量=4460 (M) 2015-01-05 開盤=2054.44 收盤=2020.58 盤後=2020.58 最高=2054.44 最低=2017.34 交易量=3799 (M) 開盤 : 2063.59 33.21 收盤 : 2063.94 33.69 盤後 : 2063.94 33.69 最高 : 2073.87 29.59 最低 : 2051.49 35.99 交易量 : 3635.84 478.01