Bintray usage logs as csv
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
this script downloads usage/download logs from Bintray, turns them into 1 large CSV file for Splunk processing, also removes empty/unnecessary columns
'''
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
import json
import os.path
import gzip
import csv
# suppress warning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
bintray_org = "your org"
bintray_repo = "your repo"
bintray_pkg = "your pkg"
username = 'your username'
apikey = 'your api key'
url = 'https://api.bintray.com/packages/'+bintray_org+'/'+bintray_repo+'/'+bintray_pkg+'/logs'
csvdir = "csv"
tempfile = csvdir+'/temp.csv'
tempfile2 = csvdir+'/temp2.csv'
outfile = csvdir+'/outfile.csv'
#---------------------------------------------------------------------------
def download_file(url):
# check directory
if not os.path.exists(csvdir):
os.makedirs(csvdir)
local_filename = url.split('/')[-1]
# NOTE the stream=True parameter
r = requests.get(url, auth=(username, apikey), verify=False, stream=True)
with open(local_filename, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
inf = gzip.open(local_filename, 'rb')
out = open(tempfile, 'ab')
out.write( inf.read() )
inf.close()
out.close()
# remove first row
with open(tempfile,'r') as f:
with open(tempfile2,'ab') as f1:
next(f) # skip header line
for line in f:
f1.write(line)
# delete gz file
os.remove(local_filename)
os.remove(tempfile)
#---------------------------------------------------------------------------
req = requests.get(url, auth=(username, apikey), verify=False)
jsonfile = req.json()
if os.path.exists(tempfile2):
os.remove(tempfile2)
# insert header row
header = [ 'ip_address', 'hostname', 'organization', 'country', 'region', 'zipcode', 'username', 'callback_id', 'date', 'http_method', 'path_information', 'http_status', 'length', 'referrer', 'user_agent' ]
with open(tempfile2, 'wb') as outcsv:
writer = csv.writer(outcsv)
writer.writerow(header)
for item in jsonfile:
name = item.get("name").encode('utf-8')
# get file extension
ext = os.path.splitext(os.path.splitext(name)[0])[1]
# download CSV logs
if ext == ".csv":
download_file(url+'/'+name)
# remove empty columns from csv
data = [] #Buffer list
with open(tempfile2, "rb") as the_file:
reader = csv.reader(the_file, delimiter=",")
for row in reader:
try:
# column headers that will be kept in final csv output
new_row = [row[0], row[3], row[6], row[7], row[8], row[9], row[10] ]
data.append(new_row)
except IndexError as e:
print e
pass
with open(outfile, "w+") as to_file:
writer = csv.writer(to_file, delimiter=",")
for new_row in data:
writer.writerow(new_row)
os.remove(tempfile2)