Python script to load csv data to bigquery failed. Error 404
from apiclient.discovery import build
from apiclient.errors import HttpError
from oauth2client.client import AccessTokenRefreshError
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.client import flow_from_clientsecrets
from oauth2client.file import Storage
from oauth2client.tools import run
import argparse
import httplib2
import os
import sys
from apiclient import discovery
from oauth2client import file
from oauth2client import client
from oauth2client import tools
# Parser for command-line arguments.
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[tools.argparser])
# CLIENT_SECRETS is name of a file containing the OAuth 2.0 information for this
# application, including client_id and client_secret. You can see the Client ID
# and Client secret on the APIs page in the Cloud Console:
CLIENT_SECRETS = os.path.join(os.path.dirname(__file__), 'client_secrets.json')
# Set up a Flow object to be used for authentication.
# Add one or more of the following scopes. PLEASE ONLY ADD THE SCOPES YOU
# NEED. For more information on using scopes please see
FLOW = client.flow_from_clientsecrets(CLIENT_SECRETS,
scope=[
],
message=tools.message_if_missing(CLIENT_SECRETS))
def main(argv):
# Parse the command-line flags.
flags = parser.parse_args(argv[1:])
# If the credentials don't exist or are invalid run through the native client
# flow. The Storage object will ensure that if successful the good
# credentials will get written back to the file.
storage = file.Storage('sample.dat')
credentials = storage.get()
if credentials is None or credentials.invalid:
credentials = tools.run_flow(FLOW, storage, flags)
# Create an httplib2.Http object to handle our HTTP requests and authorize it
# with our good Credentials.
http = httplib2.Http()
http = credentials.authorize(http)
# Construct the service object for the interacting with the BigQuery API.
service = discovery.build('bigquery', 'v2', http=http)
projectId = "synthetic-trail-459"
datasetId = "GIE"
tableId = "csbq"
newSchemaFile = "schema.json"
schema = open(newSchemaFile, 'r')
# Create the body of the request, separated by a boundary of xxx
newresource = ('--xxx\n' +
'Content-Type: application/json; charset=UTF-8\n' + '\n' +
'{\n' +
' "configuration": {\n' +
' "load": {\n' +
' "schema": {\n'
' "fields": ' + schema.read() + '\n' +
' },\n' +
' "destinationTable": {\n' +
' "projectId": "' + projectId + '",\n' +
' "datasetId": "' + datasetId + '",\n' +
' "tableId": "' + tableId + '"\n' +
' }\n' +
' }\n' +
' }\n' +
'}\n' +
'--xxx\n' +
'Content-Type: application/octet-stream\n' +
'\n')
newDataFile = "chr.csv"
# Append data from the specified file to the request body
f = open(newDataFile, 'r')
newresource += f.read()
# Signify the end of the body
newresource += ('--xxx--\n')
headers = {'Content-Type': 'multipart/related; boundary=xxx'}
resp, content = http.request(url, method="POST", body=newresource, headers=headers)
print resp.status
print resp
if resp.status == 200:
jsonResponse = json.loads(content)
jobReference = jsonResponse['jobReference']['jobId']
import time
while True:
getJob = jobCollection.get(projectId=projectId, jobId=jobReference).execute()
currentStatus = getJob['status']['state']
if 'DONE' == currentStatus:
print "Done Loading!"
return
else:
print 'Waiting to load...'
print 'Current status: ' + currentStatus
print time.ctime()
time.sleep(10)
if __name__ == '__main__':
main(sys.argv)