巨量系統日誌檔解析 (Big Log File Parsing)

日誌檔格式

2012-09-30 23:59:59 W3SVC1547060318 10.0.1.55 GET /footer.aspx - 80 - 64.33.244.70 Mozilla/5.0+(Windows+NT+6.1;+WOW64)+AppleWebKit/537.4+(KHTML,+like+Gecko)+Chrome/22.0.1229.79+Safari/537.4 200 0 0 2012-09-30 23:59:59 W3SVC1547060318 10.0.1.55 GET /images/helper/helper.swf - 80 - 1.167.244.130 Mozilla/4.0+(compatible;+MSIE+6.0;+Windows+NT+5.0;+ezPeer++v1.0+Beta+(0.4.1.98);+.NET+CLR+2.0.50727) 200 0 64

主程式:(BigLogAnalyzer.py)

# -*- coding: utf-8 -*- import sys import re from LogSpliter import LogSpliter if __name__ == '__main__': logFileName = 'Data/Sample.log' if len(sys.argv) > 1: logFIleName = sys.argv[1] logFile = open(logFIleName, 'r') logSpliter = LogSpliter() for line in iter(logFile): if not re.match('^#', line): logSpliter.parse(line) logSpliter.printOut() logFile.close()

處理程式:(LogSpliter.py)

# -*- coding: utf-8 -*- class LogSpliter: def __init__(self): self.logDate = None self.logTime = None self.sysName = None self.hostIP = None self.userIP = None self.userEnv = None self.sysStatus = None self.userBrower = None self.userOS = None def parse(self, line): if line == '' or line is None: return fieldList = line.split(' ') self.logDate = fieldList[0] if len(fieldList) > 0: self.logTime = fieldList[1] if len(fieldList) > 1: self.sysName = fieldList[2] if len(fieldList) > 2: self.hostIP = fieldList[3] if len(fieldList) > 8: self.userIP = fieldList[9] if len(fieldList) > 9: self.userEnv = fieldList[10] self.parse2(self.userEnv) if len(fieldList) > 10: self.sysStatus = fieldList[11] def printOut(self): outString = "%s %s\t%s:%s:%s\t%s\t" % \ (self.logDate, self.logTime, self.sysName, self.sysStatus, self.hostIP, self.userIP) if not self.userEnv is None: outString = outString + "%s\t%s" % (self.userBrower, self.userOS) print outString def parse2(self, line): if line == '' or line is None: return fieldList = line.split('/') self.userBrower = fieldList[0] if len(fieldList) > 1: if 'Android' in fieldList[1]: self.userOS = 'Android' elif 'Safari' in fieldList[1]: self.userOS = 'iOS' elif 'Windows' in fieldList[1]: self.userOS = 'Windows' elif 'Linux' in fieldList[1]: self.userOS = 'Linux' else: self.userOS = fieldList[1]

執行結果

2012-09-30 23:59:59 W3SVC1547060318:200:10.0.1.55 64.33.244.70 Mozilla Windows 2012-09-30 23:59:59 W3SVC1547060318:200:10.0.1.55 1.167.244.130 Mozilla Windows 2012-10-01 00:00:00 W3SVC1547060318:200:10.0.1.55 1.169.238.199 Dalvik Android 2012-10-01 00:00:00 W3SVC1547060318:200:10.0.1.55 64.33.244.70 Mozilla Windows 2012-10-01 00:00:00 W3SVC1547060318:200:10.0.1.55 61.64.102.130 Dalvik Android 2012-10-01 00:00:01 W3SVC1547060318:200:10.0.1.55 64.33.244.70 Mozilla Windows