Write BigQuery query results into table using python

Question

I want to write a results of a BigQuery query, stored in txt file, into BigQuery table. I'm passing query text as a variable into below function, but get the following error:

error_info=method + ' ' + url) google.cloud.exceptions.BadRequest: 400 Required parameter is missing (POST https://www.googleapis.com/bigquery/v2/projects/myproject/jobs)

What am I missing?

The function:

from google.cloud import bigquery
import uuid

def async_query(query, dataset_id, dest_table, project_Id):


    # configuration json
    query_data = {
        "configuration": {
            "query": {
                "query": query,
                "defaultDataset": dataset_id,
                "allowLargeResults": True,
                "destinationTable": {
                    "projectId": project_Id,
                    "datasetId": dataset_id,
                    "tableId": dest_table
                    },
                "createDisposition": 'CREATE_IF_NEEDED',
                "writeDisposition": 'WRITE_TRUNCATE'
          }
        }
    }

    client = bigquery.Client()
    query_job = client.run_async_query(str(uuid.uuid4()), query_data)
    query_job.use_legacy_sql = False
    query_job.begin()
    wait_for_job(query_job)

    # Drain the query results by requesting a page at a time.
    query_results = query_job.results()
    page_token = None

    while True:
        rows, total_rows, page_token = query_results.fetch_data(
            max_results=10,
            page_token=page_token)

        for row in rows:
            print(row)

        if not page_token:
            break
def wait_for_job(job):
    while True:
        job.reload()  # Refreshes the state via a GET request.
        if job.state == 'DONE':
            if job.error_result:
                raise RuntimeError(job.errors)
            return
        time.sleep(1)

Mikhail Berlyant · Accepted Answer · 2016-12-09 17:14:33Z

You can fix defaultDataset in your configuration as below

# configuration json
query_data = {
    "configuration": {
        "query": {
            "query": query,
            "defaultDataset": {
                "projectId": project_Id,
                "datasetId": dataset_id
                },
            "allowLargeResults": True,
            "destinationTable": {
                "projectId": project_Id,
                "datasetId": dataset_id,
                "tableId": dest_table
                },
            "createDisposition": 'CREATE_IF_NEEDED',
            "writeDisposition": 'WRITE_TRUNCATE'
      }
    }
}

Note: "projectId": project_Id is optional in defaultDataset

the whole defaultDataset is also optional and in your case you can just omit it as in

# configuration json
query_data = {
    "configuration": {
        "query": {
            "query": query,
            "allowLargeResults": True,
            "destinationTable": {
                "projectId": project_Id,
                "datasetId": dataset_id,
                "tableId": dest_table
                },
            "createDisposition": 'CREATE_IF_NEEDED',
            "writeDisposition": 'WRITE_TRUNCATE'
      }
    }
}

see more in configuration.query.defaultDataset

hugo jan · Accepted Answer · 2016-12-13 10:38:23Z

I have got it to work with the following solution of adding the destination table as a bigquery.Table in the queryjob

from google.cloud import bigquery
import uuid

def async_query(query, dataset_id, dest_table, project_Id):
    client = bigquery.Client()
    query_job = client.run_async_query(str(uuid.uuid4()), query)
    query_job.use_legacy_sql = False
    dataset = bigquery.Dataset(dataset_id, client)
    table = bigquery.Table(dest_table, dataset)
    query_job.destination = table
    query_job.write_disposition = 'WRITE_TRUNCATE'
    query_job.begin()
    wait_for_job(query_job)

    # Drain the query results by requesting a page at a time.
    query_results = query_job.results()
    page_token = None

    while True:
        rows, total_rows, page_token = query_results.fetch_data(
            max_results=10,
            page_token=page_token)

        for row in rows:
            print(row)

        if not page_token:
            break

def wait_for_job(job):
    while True:
        job.reload()  # Refreshes the state via a GET request.

Elliott Brossard · Accepted Answer · 2016-12-09 15:03:36Z

0

I think it's that defaultDataset is actually an object, so you'll need to set the datasetId field inside (this is probably what is triggering the error).

Can you try correcting that and see if it helps? You can see the complete options for the query API in the documentation.

answered Dec 9, 2016 at 15:03

Elliott Brossard

34k2 gold badges75 silver badges105 bronze badges

Collectives™ on Stack Overflow

Write BigQuery query results into table using python

3 Answers 3

Comments

Comments

Comments

Your Answer

Hot Network Questions

Collectives™ on Stack Overflow

3 Answers 3

Comments

Comments

Comments

Your Answer

Sign up or log in

Post as a guest

Related