From asi
Guides BigQuery engineering with bq CLI for queries, table ops, data load/export; GoogleSQL syntax, functions, window funcs; partitioning, clustering, optimization.
How this skill is triggered — by the user, by Claude, or both
Slash command
/asi:bigqueryThe summary Claude sees in its skill listing — used to decide when to auto-load this skill
This skill provides a comprehensive guide for BigQuery development.
This skill provides a comprehensive guide for BigQuery development.
# Execute query with Standard SQL
bq query --use_legacy_sql=false 'SELECT * FROM `project.dataset.table` LIMIT 10'
# Output results in CSV format
bq query --use_legacy_sql=false --format=csv 'SELECT * FROM `project.dataset.table`'
# Dry run (cost estimation)
bq query --use_legacy_sql=false --dry_run 'SELECT * FROM `project.dataset.table`'
# Save results to table
bq query --use_legacy_sql=false --destination_table=project:dataset.result_table 'SELECT * FROM `project.dataset.table`'
# List tables
bq ls project:dataset
# Check table schema
bq show --schema --format=prettyjson project:dataset.table
# Create table (from schema file)
bq mk --table project:dataset.table schema.json
# Create partitioned table
bq mk --table --time_partitioning_field=created_at project:dataset.table schema.json
# Create clustered table
bq mk --table --clustering_fields=user_id,category project:dataset.table schema.json
# Delete table
bq rm -t project:dataset.table
# Load from CSV
bq load --source_format=CSV project:dataset.table gs://bucket/data.csv schema.json
# Load from JSON
bq load --source_format=NEWLINE_DELIMITED_JSON project:dataset.table gs://bucket/data.json
# Load from Parquet (auto-detect schema)
bq load --source_format=PARQUET --autodetect project:dataset.table gs://bucket/data.parquet
# Export to Cloud Storage
bq extract --destination_format=CSV project:dataset.table gs://bucket/export/*.csv
-- Basic SELECT
SELECT
column1,
column2,
COUNT(*) AS count
FROM
`project.dataset.table`
WHERE
date >= '2024-01-01'
GROUP BY
column1, column2
HAVING
COUNT(*) > 10
ORDER BY
count DESC
LIMIT 100
-- String functions
CONCAT(str1, str2)
LOWER(str), UPPER(str)
TRIM(str), LTRIM(str), RTRIM(str)
SUBSTR(str, start, length)
REGEXP_CONTAINS(str, r'pattern')
REGEXP_EXTRACT(str, r'pattern')
SPLIT(str, delimiter)
-- Date/time functions
CURRENT_DATE(), CURRENT_TIMESTAMP()
DATE(timestamp), TIMESTAMP(date)
DATE_ADD(date, INTERVAL 1 DAY)
DATE_DIFF(date1, date2, DAY)
FORMAT_DATE('%Y-%m-%d', date)
PARSE_DATE('%Y%m%d', str)
EXTRACT(YEAR FROM date)
-- Aggregate functions
COUNT(*), COUNT(DISTINCT column)
SUM(column), AVG(column)
MIN(column), MAX(column)
ARRAY_AGG(column)
STRING_AGG(column, ',')
-- Window functions
ROW_NUMBER() OVER (PARTITION BY col ORDER BY col2)
RANK() OVER (ORDER BY col DESC)
LAG(col, 1) OVER (ORDER BY date)
LEAD(col, 1) OVER (ORDER BY date)
SUM(col) OVER (PARTITION BY category)
-- INNER JOIN
SELECT a.*, b.column
FROM `project.dataset.table_a` AS a
INNER JOIN `project.dataset.table_b` AS b
ON a.id = b.id
-- LEFT JOIN
SELECT a.*, b.column
FROM `project.dataset.table_a` AS a
LEFT JOIN `project.dataset.table_b` AS b
ON a.id = b.id
-- CROSS JOIN (commonly used for array expansion)
SELECT *
FROM `project.dataset.table`,
UNNEST(array_column) AS element
WITH
base_data AS (
SELECT *
FROM `project.dataset.table`
WHERE date >= '2024-01-01'
),
aggregated AS (
SELECT
category,
COUNT(*) AS count
FROM base_data
GROUP BY category
)
SELECT *
FROM aggregated
ORDER BY count DESC
Divide data by date to reduce query scan volume.
-- Create date-partitioned table
CREATE TABLE `project.dataset.partitioned_table`
PARTITION BY DATE(created_at)
AS SELECT * FROM `project.dataset.source_table`;
-- Integer partitioning
CREATE TABLE `project.dataset.int_partitioned`
PARTITION BY RANGE_BUCKET(user_id, GENERATE_ARRAY(0, 1000000, 10000))
AS SELECT * FROM source;
-- Require partition filter
CREATE TABLE `project.dataset.table`
PARTITION BY DATE(created_at)
OPTIONS (
require_partition_filter = TRUE
);
Sort and group data by specified columns.
-- Clustering table
CREATE TABLE `project.dataset.clustered_table`
PARTITION BY DATE(created_at)
CLUSTER BY user_id, category
AS SELECT * FROM source;
-- Avoid SELECT *
-- Bad
SELECT * FROM table;
-- Good
SELECT column1, column2 FROM table;
-- Leverage partition pruning
-- Bad (function applied to partition column)
WHERE DATE(created_at) = '2024-01-01'
-- Good
WHERE created_at >= '2024-01-01' AND created_at < '2024-01-02'
-- Use APPROX_ functions for estimates (faster)
SELECT APPROX_COUNT_DISTINCT(user_id) FROM table;
-- Put smaller table on right side (broadcast JOIN)
SELECT *
FROM large_table
JOIN small_table ON large_table.id = small_table.id;
-- JOIN only needed columns
WITH filtered AS (
SELECT id, needed_column FROM large_table WHERE condition
)
SELECT * FROM filtered JOIN other_table ON ...
-- Check job statistics
SELECT
job_id,
total_bytes_processed,
total_slot_ms,
TIMESTAMP_DIFF(end_time, start_time, SECOND) AS duration_sec
FROM `region-us`.INFORMATION_SCHEMA.JOBS
WHERE creation_time > TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 DAY)
ORDER BY total_slot_ms DESC
LIMIT 10;
-- Set query byte limit per project
-- Configure in Cloud Console or gcloud
roles/bigquery.admin: Full permissionsroles/bigquery.dataEditor: Read/write dataroles/bigquery.dataViewer: Read-only dataroles/bigquery.jobUser: Execute jobsroles/bigquery.user: List datasets, execute jobs-- Apply policy tag
ALTER TABLE `project.dataset.table`
ALTER COLUMN sensitive_column
SET OPTIONS (policy_tags = ['projects/project/locations/us/taxonomies/123/policyTags/456']);
-- Create row access policy
CREATE ROW ACCESS POLICY region_filter
ON `project.dataset.table`
GRANT TO ('user:[email protected]')
FILTER USING (region = 'APAC');
-- Linear regression model
CREATE OR REPLACE MODEL `project.dataset.model`
OPTIONS (
model_type = 'LINEAR_REG',
input_label_cols = ['target']
) AS
SELECT feature1, feature2, target
FROM `project.dataset.training_data`;
-- Logistic regression model
CREATE OR REPLACE MODEL `project.dataset.classifier`
OPTIONS (
model_type = 'LOGISTIC_REG',
input_label_cols = ['label']
) AS
SELECT * FROM training_data;
-- Model evaluation
SELECT * FROM ML.EVALUATE(MODEL `project.dataset.model`);
-- Prediction
SELECT *
FROM ML.PREDICT(
MODEL `project.dataset.model`,
(SELECT * FROM `project.dataset.new_data`)
);
-- Reference Cloud Storage CSV as external table
CREATE EXTERNAL TABLE `project.dataset.external_table`
OPTIONS (
format = 'CSV',
uris = ['gs://bucket/path/*.csv'],
skip_leading_rows = 1
);
-- Parquet external table
CREATE EXTERNAL TABLE `project.dataset.parquet_table`
OPTIONS (
format = 'PARQUET',
uris = ['gs://bucket/path/*.parquet']
);
-- Connect to Cloud SQL
SELECT * FROM EXTERNAL_QUERY(
'projects/project/locations/us/connections/connection_id',
'SELECT * FROM mysql_table'
);
-- Configure in Cloud Console or bq command
-- Run daily at 2 AM
bq query --use_legacy_sql=false \
--schedule='every 24 hours' \
--display_name='Daily aggregation' \
--destination_table='project:dataset.daily_summary' \
--replace \
'SELECT DATE(created_at) as date, COUNT(*) as count FROM source GROUP BY 1'
npx claudepluginhub plurigrid/asi --plugin asiGenerates optimized BigQuery SQL from natural language descriptions, converts queries from other SQL dialects to BigQuery syntax, and applies best practices like avoiding SELECT * and using ARRAY_AGG for latest records.
Analyzes BigQuery cost and performance: slot reservations, BI Engine, query cost estimation, dataset governance, and partitioning/clustering optimization.
Provides expert guidance for creating, modifying, and optimizing dbt pipelines targeting Google BigQuery. Activates when users work with dbt models, optimize SQL, or set up dbt projects.