With headers, pagination and retries.
def get_obs_data_csv(query, url='http://observations.metoceanapi.com/api/records',
headers={'apikey': 'xxx'}, page_size=500):
import pandas as pd
import requests
from io import StringIO
import os
from urllib import urlencode
offset = 0
df = pd.DataFrame()
query['details'] = True
query['format'] = 'csv'
first_time = True
while True:
query['offset'] = offset
query['limit'] = page_size
if headers:
r = requests.get(url, query, headers = headers)
print r.url
qurl = StringIO(r.text)
else:
qurl = os.path.join(url, '?' + urlencode(query))
print qurl
itry = 0
max_retries = 5
no_time = True
while no_time:
if itry > max_retries:
raise Exception('Retries exceeded')
itry += 1
if headers:
r = requests.get(url, query, headers = headers)
qurl = StringIO(r.text)
dfi = pd.read_csv(qurl) # only w
print dfi.shape
print dfi.iloc[[0,1,-2,-1]]
print dfi.columns
if 'time' in dfi.columns:
no_time = False
else:
print 'repeating'
print 'here1'
if dfi.shape[0] == 0:
print 'no data'
return dfi
df = pd.concat((df, dfi), axis=0)
if dfi.shape[0] < page_size:
if first_time:
print 'WARNING: query is not paginated, might give different results.'
print 'Try smaller page_size (< %i) to paginate at least once' % dfi.shape[0]
break
offset += page_size
first_time = False
df['time'] = pd.to_datetime(df['time'])
df.sort_values(by='time', inplace=True)
df.set_index('time', inplace=True)
return df
import pandas as pd
from sqlalchemy import create_engine
engine = create_engine('mysql://{user}:{pass}@{host}/{database}')
conn = engine.connect()
q = conn.execute("show databases")
print q.fetchall()
pd.read_sql('select site,time,wsp from {table}', conn, index_col='time')