Execute queries in BigQuery in parallel using python

An example of how to run multiple queries in parallel.

import time
from concurrent.futures import ThreadPoolExecutor
from google.cloud import bigquery
from google.oauth2 import service_account

##########
# Variable set up
##########

# Replace with your actual Google Cloud Project ID
PROJECT = "project-id"

# Set up Google BigQuery Client
# Authenticate using a service account key file
credentials = service_account.Credentials.from_service_account_file(
    "big_query_admin.json"  # Path to your service account key file
)
BigQuery_client = bigquery.Client(credentials=credentials, project=PROJECT)  # Specify the project

## Helper functions


def run_query(query):
    '''
    Runs a query against BigQuery and waits for it to complete.
    Prints a message indicating success or failure.
    '''

    query_job = BigQuery_client.query(query)

    # Poll the query job status until it's done
    while not query_job.done():
        # Sleep for 3 minutes (180 seconds) before checking again
        # This reduces the number of API calls and potential costs
        time.sleep(180)

    # Check if the query encountered an error
    if query_job.error_result:
        print("The query failed")
        # You might want to handle errors more gracefully here, e.g., log them or raise an exception
    else:
        print("Query complete")
        # You could potentially return the query results here if needed


# Create an empty list to store the queries you want to run
a_list_of_queries_to_run = []

# ... (Logic to populate the query list would go here) ...

# Run queries in parallel using a ThreadPoolExecutor
# max_workers=10 means up to 10 queries can run concurrently
with ThreadPoolExecutor(max_workers=10) as executor:
    # Use executor.map to apply the run_query function to each query in the list
    # results will contain a list of None values (since run_query doesn't return anything)
    results = list(executor.map(run_query, a_list_of_queries_to_run))

Send a Comment

Your email address will not be published.