Dominik Süß

fighting computers since 1999

BigQuery run query across multiple datasets


I was trying to query for jobs matching a specific prefix across all datasets which are split up into clusters. Wildcard tables would have been nice but they don't work across datasets so I was stuck with this solution:

CREATE TEMP TABLE job_counts (dataset_id STRING, job_count INT64);
FOR record IN (
    SELECT 
      catalog_name as project_id,
      schema_name as dataset_id
    FROM `<project>.INFORMATION_SCHEMA.SCHEMATA`
    WHERE schema_name like 'some_prefix%'
)
DO 
  EXECUTE IMMEDIATE 
    CONCAT("INSERT job_counts (dataset_id, job_count)  SELECT '",record.dataset_id,"' as dataset_id, count(1) from ", record.dataset_id,".jobs WHERE name LIKE 'partner-%'");

END FOR;

SELECT * FROM job_counts;