修正證交所交易歷史資料下載檔

資料來源

    • 證交所交易歷史資料檔存放目錄:./data

    • 證交所交易歷史資料檔案格式:CSV

    • 證交所交易歷史資料檔名規則:《類股代碼-年月日.csv》

系統說明

    • 證交所交易歷史資料檔存有非結構化文字,必須加以移除。

    • 證交所交易歷史資料檔內容無【交易日期】,必須加補。

    • 修正後證交所交易歷史資料檔名規則:《TWN-類股代碼-年月日.csv》

    • 運用 Bash Script 修正資料檔存放目錄下,所有交易歷史資料檔。

程式碼

# -*- coding: utf-8 -*- import sys import os class FixStockQuote: def __init__(self): pass def LoadFile(self, nameOfFile): csvFile = open(nameOfFile, 'r') csvFileContent = csvFile.readlines() csvFile.close() return csvFileContent def FixField(self, line): fieldList = line.split(',') _fieldList = [] for field in fieldList: _field = field.replace('"', '').strip() _fieldList.append('"%s"' % (_field)) return ','.join(_fieldList) def FixFile(self, csvFileContent): _csvFileContent = [] ignoreLine = 2 for line in csvFileContent: if (line.count('"') == 32): if ignoreLine > 0: ignoreLine -= 1 continue _line = self.FixField(line.strip()) _csvFileContent.append(_line) return _csvFileContent def WriteFile(self, nameOfFile, csvFileContent, tradeDate): csvFile = open(nameOfFile, 'w') for line in csvFileContent: csvFile.write('"%s",%s\n' % (tradeDate, line)) csvFile.flush() csvFile.close() def Fix(self, fileFolder, catStock, tradeY, tradeM, tradeD): nameOfFile = '%s/%s-%04d%02d%02d.csv' % (fileFolder, catStock, tradeY, tradeM, tradeD) _nameOfFile = '%s/TWN-%s-%04d%02d%02d.csv' % (fileFolder, catStock, tradeY, tradeM, tradeD) if os.path.exists(nameOfFile): print u'讀取 %s\n\t寫入 %s' % (nameOfFile, _nameOfFile) csvFileContent = self.LoadFile(nameOfFile) _csvFileContent = self.FixFile(csvFileContent) self.WriteFile(_nameOfFile, _csvFileContent, '%04d-%02d-%02d' % (tradeY, tradeM, tradeD)) else: print u'$s 不存在' % (nameOfFile) if __name__ == '__main__': if len(sys.argv) < 2: print u'用法: 資料目錄 類股代碼 年月日' sys.exit(1) OK = True fileFolder = '' catStock = '' tradeY = 0 tradeM = 0 tradeD = 0 try: fileFolder = sys.argv[1] stockFile = sys.argv[2] _stockFile = stockFile.split('-') catStock = _stockFile[0] tradeFile = _stockFile[1] _tradeFile = tradeFile.split('.') tradeDate = _tradeFile[0] print catStock, tradeDate tradeY = int(tradeDate[0:4]) tradeM = int(tradeDate[4:6]) tradeD = int(tradeDate[6:8]) except: OK = False if OK: fixStockQuote = FixStockQuote() csvFileContent = fixStockQuote.Fix(fileFolder, catStock, tradeY, tradeM, tradeD) else: print u'引數錯誤'

修正所有檔

PATH=/Python27:$PATH STOCK_DATA=data STOCK_CAT=25 STOCK_FILE=$STOCK_DATA/$STOCK_CAT-*.csv for F in $STOCK_FILE do F2=$(basename $F) python FixStockQuote.py $STOCK_DATA $F2 done

合併修正檔

PATH=/Python27:$PATH STOCK_DATA=data STOCK_CAT=25 STOCK_FILE=$STOCK_DATA/TWN-$STOCK_CAT-*.csv echo "StockDate","StockID","StockName","TradeQty","TransQty","TradeAmount", "OpenPrice","HighPrice","LowPrice","ClosePrice","Gain","DeltaPrice", "BuyPrice","BuyQty","SellPrice","SellQty","CR" > data/TWN.csv FF=`ls $STOCK_FILE` cat $FF >> data/TWN.csv