Bintray usage logs as csv

#!/usr/bin/env python

# -*- coding: utf-8 -*-

'''

this script downloads usage/download logs from Bintray, turns them into 1 large CSV file for Splunk processing, also removes empty/unnecessary columns

'''

import requests

from requests.packages.urllib3.exceptions import InsecureRequestWarning

import json

import os.path

import gzip

import csv

# suppress warning

requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

bintray_org = "your org"

bintray_repo = "your repo"

bintray_pkg = "your pkg"

username = 'your username'

apikey = 'your api key'

url = 'https://api.bintray.com/packages/'+bintray_org+'/'+bintray_repo+'/'+bintray_pkg+'/logs'

csvdir = "csv"

tempfile = csvdir+'/temp.csv'

tempfile2 = csvdir+'/temp2.csv'

outfile = csvdir+'/outfile.csv'

#---------------------------------------------------------------------------

def download_file(url):

# check directory

if not os.path.exists(csvdir):

os.makedirs(csvdir)

local_filename = url.split('/')[-1]

# NOTE the stream=True parameter

r = requests.get(url, auth=(username, apikey), verify=False, stream=True)

with open(local_filename, 'wb') as f:

for chunk in r.iter_content(chunk_size=1024):

if chunk: # filter out keep-alive new chunks

f.write(chunk)

inf = gzip.open(local_filename, 'rb')

out = open(tempfile, 'ab')

out.write( inf.read() )

inf.close()

out.close()

# remove first row

with open(tempfile,'r') as f:

with open(tempfile2,'ab') as f1:

next(f) # skip header line

for line in f:

f1.write(line)

# delete gz file

os.remove(local_filename)

os.remove(tempfile)

#---------------------------------------------------------------------------

req = requests.get(url, auth=(username, apikey), verify=False)

jsonfile = req.json()

if os.path.exists(tempfile2):

os.remove(tempfile2)

# insert header row

header = [ 'ip_address', 'hostname', 'organization', 'country', 'region', 'zipcode', 'username', 'callback_id', 'date', 'http_method', 'path_information', 'http_status', 'length', 'referrer', 'user_agent' ]

with open(tempfile2, 'wb') as outcsv:

writer = csv.writer(outcsv)

writer.writerow(header)

for item in jsonfile:

name = item.get("name").encode('utf-8')

# get file extension

ext = os.path.splitext(os.path.splitext(name)[0])[1]

# download CSV logs

if ext == ".csv":

download_file(url+'/'+name)

# remove empty columns from csv

data = [] #Buffer list

with open(tempfile2, "rb") as the_file:

reader = csv.reader(the_file, delimiter=",")

for row in reader:

try:

# column headers that will be kept in final csv output

new_row = [row[0], row[3], row[6], row[7], row[8], row[9], row[10] ]

data.append(new_row)

except IndexError as e:

print e

pass

with open(outfile, "w+") as to_file:

writer = csv.writer(to_file, delimiter=",")

for new_row in data:

writer.writerow(new_row)

os.remove(tempfile2)