Python, Import csv to Bigquery Load job failed

Vikram Singh Saggu

unread,

Jul 3, 2014, 6:05:52 AM7/3/14

to gce-dis...@googlegroups.com

Hi,

Python script to load csv data to bigquery failed. Error 404

{'status': '400', 'alternate-protocol': '443:quic', 'content-length': '39', 'server': 'HTTP Upload Server Built on Jun 12 2014 14:56:53 (1402610213)', 'date': 'Thu, 03 Jul 2014 09:00:57 GMT', 'content-type': 'text/html; charset=UTF-8'}

Below is the script :-

from apiclient.discovery import build

from apiclient.errors import HttpError

from oauth2client.client import AccessTokenRefreshError

from oauth2client.client import OAuth2WebServerFlow

from oauth2client.client import flow_from_clientsecrets

from oauth2client.file import Storage

from oauth2client.tools import run

import argparse

import httplib2

import os

import sys

from apiclient import discovery

from oauth2client import file

from oauth2client import client

from oauth2client import tools

# Parser for command-line arguments.

parser = argparse.ArgumentParser(

description=__doc__,

formatter_class=argparse.RawDescriptionHelpFormatter,

parents=[tools.argparser])

# CLIENT_SECRETS is name of a file containing the OAuth 2.0 information for this

# application, including client_id and client_secret. You can see the Client ID

# and Client secret on the APIs page in the Cloud Console:

# <https://cloud.google.com/console#/project/1004940506293/apiui>

CLIENT_SECRETS = os.path.join(os.path.dirname(__file__), 'client_secrets.json')

# Set up a Flow object to be used for authentication.

# Add one or more of the following scopes. PLEASE ONLY ADD THE SCOPES YOU

# NEED. For more information on using scopes please see

# <https://developers.google.com/+/best-practices>.

FLOW = client.flow_from_clientsecrets(CLIENT_SECRETS,

scope=[

'https://www.googleapis.com/auth/bigquery',

'https://www.googleapis.com/auth/bigquery.insertdata',

'https://www.googleapis.com/auth/cloud-platform',

'https://www.googleapis.com/auth/devstorage.full_control',

'https://www.googleapis.com/auth/devstorage.read_only',

'https://www.googleapis.com/auth/devstorage.read_write',

],

message=tools.message_if_missing(CLIENT_SECRETS))

def main(argv):

# Parse the command-line flags.

flags = parser.parse_args(argv[1:])

# If the credentials don't exist or are invalid run through the native client

# flow. The Storage object will ensure that if successful the good

# credentials will get written back to the file.

storage = file.Storage('sample.dat')

credentials = storage.get()

if credentials is None or credentials.invalid:

credentials = tools.run_flow(FLOW, storage, flags)

# Create an httplib2.Http object to handle our HTTP requests and authorize it

# with our good Credentials.

http = httplib2.Http()

http = credentials.authorize(http)

# Construct the service object for the interacting with the BigQuery API.

service = discovery.build('bigquery', 'v2', http=http)

projectId = "synthetic-trail-459"

datasetId = "GIE"

tableId = "csbq"

url = "https://www.googleapis.com/upload/bigquery/v2/projects/" + projectId + "/jobs"

newSchemaFile = "schema.json"

schema = open(newSchemaFile, 'r')

# Create the body of the request, separated by a boundary of xxx

newresource = ('--xxx\n' +

'Content-Type: application/json; charset=UTF-8\n' + '\n' +

'{\n' +

' "configuration": {\n' +

' "load": {\n' +

' "schema": {\n'

' "fields": ' + schema.read() + '\n' +

' },\n' +

' "destinationTable": {\n' +

' "projectId": "' + projectId + '",\n' +

' "datasetId": "' + datasetId + '",\n' +

' "tableId": "' + tableId + '"\n' +

' }\n' +

'}\n' +

'--xxx\n' +

'Content-Type: application/octet-stream\n' +

'\n')

newDataFile = "chr.csv"

# Append data from the specified file to the request body

f = open(newDataFile, 'r')

newresource += f.read()

# Signify the end of the body

newresource += ('--xxx--\n')

headers = {'Content-Type': 'multipart/related; boundary=xxx'}

resp, content = http.request(url, method="POST", body=newresource, headers=headers)

print resp.status

print resp

if resp.status == 200:

jsonResponse = json.loads(content)

jobReference = jsonResponse['jobReference']['jobId']

import time

while True:

jobCollection = service.jobs()

getJob = jobCollection.get(projectId=projectId, jobId=jobReference).execute()

currentStatus = getJob['status']['state']

if 'DONE' == currentStatus:

print "Done Loading!"

return

else:

print 'Waiting to load...'

print 'Current status: ' + currentStatus

print time.ctime()

time.sleep(10)

if __name__ == '__main__':

main(sys.argv)

Marilu

unread,

Jul 3, 2014, 11:21:29 AM7/3/14

to gce-dis...@googlegroups.com

Hi Vikram,

The error seems to be related to the content type you're using and the one is being received.

headers = {'Content-Type': 'multipart/related... and 'content-type': 'text/html; charset=UTF-8'

Marilu

unread,

Jul 7, 2014, 2:20:07 PM7/7/14

to gce-dis...@googlegroups.com

Hi Vikram

I notice that you also have an error status': '400', which seems to be relate to your OAuth process.

I have run the script using the OAuth2WebServerFlow, flow.step1 and flow.step2, try it, it worked for me:

import sys

import json

import urllib

import httplib2

from oauth2client.client import OAuth2WebServerFlow

from apiclient.discovery import build

from oauth2client.file import Storage

from oauth2client.tools import run

from oauth2client.client import AccessTokenRefreshError

from apiclient.errors import HttpError

#Construct URLS

METADATA_SERVER = 'http://metadata/computeMetadata/v1/instance/service-accounts'

SERVICE_ACCOUNT = 'default'

GOOGLE_STORAGE_PROJECT_NUMBER = 'your-project-ID'

CLIENT_ID = 'your-id-client-secret.apps.googleusercontent.com'

CLIENT_SECRET = 'your-client-secret'

OAUTH_SCOPE = 'https://www.googleapis.com/auth/bigquery'

REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

API_VERSION = "v1"

PROJECT_ID = "your-project-id"

URL_PREFIX = 'https://www.googleapis.com/upload/bigquery/v2/projects/' + PROJECT_ID + '/jobs'

REDIRECT_URI = 'urn:ietf:wg:oauth:2.0:oob'

#Obtain API authorizarion using OAuth2.0

def oauht_acces():

flow= OAuth2WebServerFlow(CLIENT_ID,CLIENT_SECRET,OAUTH_SCOPE,REDIRECT_URI)

authorized_url = flow.step1_get_authorize_url()

print ' '

print 'Go to the following link in your browser: ' , authorized_url

print ' '

code = raw_input ('Enter verification code : ').strip()

credentials = flow.step2_exchange(code)

if credentials is None or credentials.invalid:

credentials = tools.run_flow(FLOW, storage, flags)

http = httplib2.Http()

auth_http = credentials.authorize(http)

storage = Storage('big_query')

storage.put(credentials)

return auth_http

def loadTable(http, service):

datasetId = 'your-datasetId'

tableId = 'your-table'

newSchemaFile = raw_input("What is your schema? ")

schema = open(newSchemaFile, 'r')

# Create the body of the request, separated by a boundary of xxx

newresource = ('--xxx\n' +

'Content-Type: application/json; charset=UTF-8\n' + '\n' +

'{\n' +

' "configuration": {\n' +

' "load": {\n' +

' "schema": {\n'

' "fields": ' + schema.read() + '\n' +

' },\n' +

' "destinationTable": {\n' +

' "projectId": "' + PROJECT_ID + '",\n' +

' "datasetId": "' + datasetId + '",\n' +

' "tableId": "' + tableId + '"\n' +

' }\n' +

'}\n' +

'--xxx\n' +

'Content-Type: application/octet-stream\n' +

'\n')

newDataFile = raw_input("What is your data? ")

# Append data from the specified file to the request body

f = open(newDataFile, 'r')

newresource += f.read()

# Signify the end of the body

newresource += ('--xxx--\n')

headers = {'Content-Type': 'multipart/related; boundary=xxx'}

resp, content = http.request(URL_PREFIX, method="POST", body=newresource, headers=headers)

if resp.status == 200:

jsonResponse = json.loads(content)

jobReference = jsonResponse['jobReference']['jobId']

import time

while True:

jobCollection = service.jobs()

getJob = jobCollection.get(projectId=PROJECT_ID, jobId=jobReference).execute()

currentStatus = getJob['status']['state']

if 'DONE' == currentStatus:

print "Done Loading!"

return

else:

print 'Waiting to load...'

print 'Current status: ' + currentStatus

print time.ctime()

time.sleep(10)

def main():

# Get the authorization and then upload file

auth_http = oauht_acces();

service = build('bigquery','v2', http=auth_http)

loadTable(auth_http, service)

if __name__ == '__main__':

main()

Reply all

Reply to author

Forward

Python, Import csv to Bigquery Load job failed - Error 404

Vikram Singh Saggu

Marilu

Marilu