Accessing Google Sheets via API

There are two ways to access google sheets via API

Google Service Account

A service account is similar to google account, it can receive a document share.

An ordinary Google user account can share his/her document to this service account.

A web developer can create a service account at this page https://console.developers.google.com/iam-admin/serviceaccounts/. That web page will automatically download a google application credentials for you.

Set an environment variable named GOOGLE_APPLICATION_CREDENTIALS to the path of that downloaded credentials file.

Write your main Python script like this:

from apiclient import discovery service = discovery.build('sheets', 'v4') spreadsheetId = 'the-spreadsheet-id' rangeName = 'The spreadsheet tab name!B4:B100' result = service.spreadsheets().values().get(spreadsheetId=spreadsheetId, range=rangeName).execute() values = result.get('values', [])  if not values:     print('No data found.') else:     for row in values:         print('%s' % (row[0],)) 

A spreadsheet id is the string you can find at the end of a Google sheet URL:

https://docs.google.com/spreadsheets/d/the-spreadsheet-id/

OAuth2

OAuth2 requires interaction with a human user. There are so many documentations about OAuth2. I won't write another article about OAuth2. I will write about how to remove user interactions, so you can access Google sheets via cron job as an ordinary Google user account, not as a service account.

You will need PhantomJS installed in your system, and write a JavaScript to emulate user interaction like this:

/** * This script will write oauth2 authorization code to the standard output * call this script like this: * phantomjs authorize_google.js authorize_url username password */ var system = require('system'); var fs = require('fs'); var page = require('webpage').create(); page.onError = function (msg, trace) { var msgStack = ['ERROR: ' + msg]; if (trace && trace.length) { msgStack.push('TRACE:'); trace.forEach(function (t) { msgStack.push(' -> ' + t.file + ': ' + t.line + (t.function ? ' (in function "' + t.function + '")' : '')); }); } console.error(msgStack.join('\n')); }; page.viewportSize = {width: 1024, height: 768}; page.open(system.args[1], function (status) { if (status !== 'success') { phantom.exit(); } var jqueryJs = "https://ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js"; setTimeout(function () { page.includeJs(jqueryJs, function () { page.evaluate(function (username) { $('#Email').val(username); $('#next').click(); }, system.args[2]); }); }, 0); setTimeout(function () { page.includeJs(jqueryJs, function () { page.evaluate(function (password) { $('#Passwd').val(password); $('#signIn').click(); }, system.args[3]); }); }, 20000); setTimeout(function () { page.includeJs(jqueryJs, function () { page.evaluate(function () { $('#submit_approve_access').click(); }); }); }, 40000); setTimeout(function () { page.includeJs(jqueryJs, function () { var code = page.evaluate(function () { return $('#code').val(); }); fs.write("/dev/stdout", code, "w"); phantom.exit(); }); }, 80000); });

Name that PhantomJS script to authorize_google.js or anything else you like.

Install the Google API client:

pip install --upgrade google-api-python-client

You will have this Python module installed:

oauth2client/tools.py

Make a copy of that module, name it to something like google_tools.py.

Modify the run_flow function like this:

@_helpers.positional(3) def run_flow(flow, storage, flags=None, http=None, username=None, password=None): if flags is None: flags = argparser.parse_args() logging.getLogger().setLevel(getattr(logging, flags.logging_level)) if not flags.noauth_local_webserver: success = False port_number = 0 for port in flags.auth_host_port: port_number = port try: httpd = ClientRedirectServer((flags.auth_host_name, port), ClientRedirectHandler) except socket.error: pass else: success = True break flags.noauth_local_webserver = not success if not success: print(_FAILED_START_MESSAGE) if not flags.noauth_local_webserver: oauth_callback = 'http://{host}:{port}/'.format( host=flags.auth_host_name, port=port_number) else: oauth_callback = client.OOB_CALLBACK_URN flow.redirect_uri = oauth_callback authorize_url = flow.step1_get_authorize_url() if not flags.noauth_local_webserver: import webbrowser webbrowser.open(authorize_url, new=1, autoraise=True) print(_BROWSER_OPENED_MESSAGE.format(address=authorize_url)) else: if not flags.phantomjs: print(_GO_TO_LINK_MESSAGE.format(address=authorize_url)) code = None if not flags.noauth_local_webserver: httpd.handle_request() if 'error' in httpd.query_params: sys.exit('Authentication request was rejected.') if 'code' in httpd.query_params: code = httpd.query_params['code'] else: print('Failed to find "code" in the query parameters ' 'of the redirect.') sys.exit('Try running with --noauth_local_webserver.') else: if flags.phantomjs: print(authorize_url) process_out = subprocess.run( [ 'phantomjs', 'authorize_google.js', authorize_url, username, password ], stdout=subprocess.PIPE ) code = str(process_out.stdout.decode('UTF-8')) else: code = input('Enter verification code: ').strip() try: credential = flow.step2_exchange(code, http=http) except client.FlowExchangeError as e: sys.exit('Authentication has failed: {0}'.format(e)) storage.put(credential) credential.set_store(storage) print('Authentication successful.') return credential

You can see that the custom run_flow function creates a subprocess to execute our authorize_google.js PhantomJS script. In your main Python script, instead of importing the default tools module, you will import your custom google_tools module.

Write your main Python script like this:

from __future__ import print_function import httplib2 import os from apiclient import discovery from oauth2client import client #from oauth2client import tools from core import google_tools as tools from oauth2client.file import Storage try: import argparse flags = argparse.ArgumentParser(parents=[tools.argparser]).parse_args() except ImportError: flags = None # If modifying these scopes, delete your previously saved credentials # at ~/.credentials/sheets.googleapis.com-python-quickstart.json SCOPES = 'https://www.googleapis.com/auth/spreadsheets.readonly' CLIENT_SECRET_FILE = 'google_client_secret.json' APPLICATION_NAME = 'Labster Image Manager' def get_credentials(): """Gets valid user credentials from storage. If nothing has been stored, or if the stored credentials are invalid, the OAuth2 flow is completed to obtain the new credentials. Returns: Credentials, the obtained credential. """ home_dir = os.path.expanduser('~') credential_dir = os.path.join(home_dir, '.credentials') if not os.path.exists(credential_dir): os.makedirs(credential_dir) credential_path = os.path.join(credential_dir, 'sheets.googleapis.com-image-manager.json') store = Storage(credential_path) credentials = store.get() if not credentials or credentials.invalid: flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES) flow.user_agent = APPLICATION_NAME if flags: credentials = tools.run_flow(flow, store, flags, username='a-google-email', password='the-password-of-that-google-account') else: # Needed only for compatibility with Python 2.6 credentials = tools.run(flow, store) print('Storing credentials to ' + credential_path) return credentials def main(): """Shows basic usage of the Sheets API. Creates a Sheets API service object and prints the names and majors of students in a sample spreadsheet: https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit """ credentials = get_credentials() http = credentials.authorize(httplib2.Http()) discoveryUrl = ('https://sheets.googleapis.com/$discovery/rest?' 'version=v4') service = discovery.build('sheets', 'v4', http=http, discoveryServiceUrl=discoveryUrl) spreadsheetId = '1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms' rangeName = 'Class Data!A2:E' result = service.spreadsheets().values().get( spreadsheetId=spreadsheetId, range=rangeName).execute() values = result.get('values', []) if not values: print('No data found.') else: #print('Name, Major:') print('Finished:') for row in values: # Print columns A and E, which correspond to indices 0 and 4. print('%s, %s' % (row[0], row[4])) if __name__ == '__main__': main()

Actually, that main Python script is almost an exact copy of the example provided by Google, except we replace the import tools with our import google_tools.