An example of how to run multiple queries in parallel.
import time
from concurrent.futures import ThreadPoolExecutor
from google.cloud import bigquery
from google.oauth2 import service_account
##########
# Variable set up
##########
# Replace with your actual Google Cloud Project ID
PROJECT = "project-id"
# Set up Google BigQuery Client
# Authenticate using a service account key file
credentials = service_account.Credentials.from_service_account_file(
"big_query_admin.json" # Path to your service account key file
)
BigQuery_client = bigquery.Client(credentials=credentials, project=PROJECT) # Specify the project
## Helper functions
def run_query(query):
'''
Runs a query against BigQuery and waits for it to complete.
Prints a message indicating success or failure.
'''
query_job = BigQuery_client.query(query)
# Poll the query job status until it's done
while not query_job.done():
# Sleep for 3 minutes (180 seconds) before checking again
# This reduces the number of API calls and potential costs
time.sleep(180)
# Check if the query encountered an error
if query_job.error_result:
print("The query failed")
# You might want to handle errors more gracefully here, e.g., log them or raise an exception
else:
print("Query complete")
# You could potentially return the query results here if needed
# Create an empty list to store the queries you want to run
a_list_of_queries_to_run = []
# ... (Logic to populate the query list would go here) ...
# Run queries in parallel using a ThreadPoolExecutor
# max_workers=10 means up to 10 queries can run concurrently
with ThreadPoolExecutor(max_workers=10) as executor:
# Use executor.map to apply the run_query function to each query in the list
# results will contain a list of None values (since run_query doesn't return anything)
results = list(executor.map(run_query, a_list_of_queries_to_run))