Skip to main content
This cookbook contains patterns for optimizing NQL query performance. Apply these techniques to reduce execution time and costs.

Filter early and aggressively

Apply filters before joins

Filter data in subqueries or CTEs before joining to reduce the amount of data processed:
-- Better: Filter before joining
WITH active_users AS (
  SELECT user_id, email
  FROM company_data."100"
  WHERE
    status = 'active'
    AND created_at >= CURRENT_DATE - INTERVAL '90' DAY
),
recent_orders AS (
  SELECT user_id, order_id, amount
  FROM company_data."200"
  WHERE order_date >= CURRENT_DATE - INTERVAL '30' DAY
)
SELECT
  u.user_id,
  u.email,
  o.order_id,
  o.amount
FROM active_users u
INNER JOIN recent_orders o ON u.user_id = o.user_id
-- Avoid: Filtering after joining
SELECT
  u.user_id,
  u.email,
  o.order_id,
  o.amount
FROM company_data."100" u
INNER JOIN company_data."200" o ON u.user_id = o.user_id
WHERE
  u.status = 'active'
  AND u.created_at >= CURRENT_DATE - INTERVAL '90' DAY
  AND o.order_date >= CURRENT_DATE - INTERVAL '30' DAY

Use price filtering early

Add _price_cpm_usd filters to limit data costs:
SELECT user_id, email, category, event_date
FROM company_data."123"
WHERE
  _price_cpm_usd <= 1.00  -- Apply price filter early
  AND category = 'target'
  AND event_date >= CURRENT_DATE - INTERVAL '30' DAY

Use QUALIFY instead of subqueries

Deduplication with QUALIFY

QUALIFY is more efficient than subquery-based deduplication:
-- Better: QUALIFY
SELECT user_id, email, last_seen
FROM company_data."123"
QUALIFY ROW_NUMBER() OVER (
  PARTITION BY user_id
  ORDER BY last_seen DESC
) = 1
-- Avoid: Subquery approach
SELECT user_id, email, last_seen
FROM (
  SELECT
    user_id,
    email,
    last_seen,
    ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY last_seen DESC) AS rn
  FROM company_data."123"
) sub
WHERE rn = 1

Combined filtering with QUALIFY

Apply multiple window conditions efficiently:
SELECT
  user_id,
  email,
  category
FROM company_data."123"
QUALIFY
  ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) = 1
  AND SUM(1) OVER (PARTITION BY category) >= 100  -- Only categories with 100+ rows

Optimize joins

Use fully qualified column names

Explicit column references help the query optimizer:
SELECT
  company_data."100".user_id,
  company_data."100".email,
  company_data."200".order_id
FROM company_data."100"
INNER JOIN company_data."200"
  ON company_data."100".user_id = company_data."200".customer_id

Avoid OR in join conditions

OR conditions prevent efficient join algorithms:
-- Avoid: OR in join condition
SELECT t1.id, t1.value, t2.related_value
FROM company_data."100" t1
INNER JOIN company_data."200" t2
  ON t1.id = t2.id OR t1.alt_id = t2.alt_id
-- Better: Use COALESCE
SELECT t1.id, t1.value, t2.related_value
FROM company_data."100" t1
INNER JOIN company_data."200" t2
  ON COALESCE(t1.id, t1.alt_id) = COALESCE(t2.id, t2.alt_id)
-- Or: Union separate queries
SELECT t1.id, t1.value, t2.related_value
FROM company_data."100" t1
INNER JOIN company_data."200" t2 ON t1.id = t2.id
UNION
SELECT t1.id, t1.value, t2.related_value
FROM company_data."100" t1
INNER JOIN company_data."200" t2 ON t1.alt_id = t2.alt_id
WHERE t1.id IS NULL OR t2.id IS NULL

Aggregate before joining

Pre-aggregate large tables to reduce join complexity:
-- Better: Aggregate first
SELECT
  c.customer_id,
  c.name,
  order_stats.total_orders,
  order_stats.total_amount
FROM company_data."100" c
INNER JOIN (
  SELECT
    customer_id,
    COUNT(1) AS total_orders,
    SUM(amount) AS total_amount
  FROM company_data."200"
  GROUP BY customer_id
) order_stats ON c.customer_id = order_stats.customer_id

Partitioning strategies

Partition by date for time-series data

CREATE MATERIALIZED VIEW "daily_events"
PARTITIONED_BY event_date DAY
AS (
  SELECT
    user_id,
    event_type,
    DATE_TRUNC('day', event_timestamp) AS event_date
  FROM company_data."123"
  WHERE event_timestamp >= CURRENT_DATE - INTERVAL '90' DAY
)
BUDGET 100 USD

Monthly partitions for longer retention

CREATE MATERIALIZED VIEW "monthly_summary"
PARTITIONED_BY report_month MONTH
AS (
  SELECT
    DATE_TRUNC('month', event_date) AS report_month,
    category,
    COUNT(1) AS event_count,
    SUM(amount) AS total_amount
  FROM company_data."123"
  GROUP BY DATE_TRUNC('month', event_date), category
)
BUDGET 50 USD

Budget management

Set appropriate budget limits

Match budget to expected data volume:
-- Daily budget for daily refreshes
CREATE MATERIALIZED VIEW "daily_data"
REFRESH_SCHEDULE = '@daily'
AS (SELECT user_id, email, event_date FROM company_data."123" WHERE event_date = CURRENT_DATE - INTERVAL '1' DAY)
LIMIT 10 USD PER CALENDAR_DAY

-- Monthly budget for larger datasets
CREATE MATERIALIZED VIEW "monthly_data"
REFRESH_SCHEDULE = '@monthly'
AS (SELECT user_id, email, created_at FROM company_data."123")
LIMIT 500 USD PER CALENDAR_MONTH

Combine price and date filters

Maximize value by filtering on both price and recency:
SELECT user_id, email, event_timestamp
FROM company_data."123"
WHERE
  _price_cpm_usd <= 0.75
  AND event_timestamp >= CURRENT_DATE - INTERVAL '14' DAY

Efficient aggregations

Use APPROX_COUNT_DISTINCT instead of COUNT(DISTINCT …)

APPROX_COUNT_DISTINCT is significantly faster and cheaper than COUNT(DISTINCT ...). It returns exact results for low-cardinality columns and near-exact results at scale. Use it for any query that doesn’t require a guaranteed exact count.
-- Better: Use APPROX_COUNT_DISTINCT for unique counts
SELECT
  category,
  APPROX_COUNT_DISTINCT(user_id) AS unique_users
FROM company_data."123"
GROUP BY category
-- Avoid: COUNT(DISTINCT ...) is slower and more expensive
SELECT
  category,
  COUNT(DISTINCT user_id) AS unique_users
FROM company_data."123"
GROUP BY category
Reserve COUNT(DISTINCT ...) for cases where an exact count drives business logic—for example, threshold comparisons in HAVING or CASE WHEN clauses:
-- Exact count is appropriate here because the threshold matters
SELECT category, COUNT(DISTINCT user_id) AS unique_users
FROM company_data."123"
GROUP BY category
HAVING COUNT(DISTINCT user_id) >= 5

Pre-aggregate for common queries

Create summary views for frequently queried aggregations:
CREATE MATERIALIZED VIEW "daily_category_stats"
REFRESH_SCHEDULE = '@daily'
AS (
  SELECT
    DATE_TRUNC('day', event_timestamp) AS event_date,
    category,
    COUNT(1) AS events,
    APPROX_COUNT_DISTINCT(user_id) AS users,
    SUM(amount) AS total
  FROM company_data."123"
  WHERE event_timestamp >= CURRENT_DATE - INTERVAL '30' DAY
  GROUP BY DATE_TRUNC('day', event_timestamp), category
)
BUDGET 25 USD PER CALENDAR_DAY

Limit result sets

Always use LIMIT during exploration

-- While exploring data
SELECT user_id, email, category
FROM company_data."123"
WHERE category = 'test'
LIMIT 100 ROWS

-- Or with offset for pagination
LIMIT 100 OFFSET 100 ROWS

Use EXPLAIN before large queries

-- Preview before executing
EXPLAIN
SELECT user_id, email, event_timestamp
FROM company_data."123"
WHERE event_timestamp >= CURRENT_DATE - INTERVAL '90' DAY

Efficient date handling

Use DATE_TRUNC for grouping

SELECT
  DATE_TRUNC('day', event_timestamp) AS day,
  COUNT(1) AS events
FROM company_data."123"
GROUP BY DATE_TRUNC('day', event_timestamp)

Avoid functions on indexed columns in WHERE

-- Better: Compare directly
WHERE event_date >= '2024-01-01'

-- Avoid: Function on column
WHERE EXTRACT(YEAR FROM event_date) = 2024

CTEs for complex queries

Use CTEs for readability and potential optimization

WITH
  base_data AS (
    SELECT user_id, event_date, amount
    FROM company_data."123"
    WHERE
      event_date >= CURRENT_DATE - INTERVAL '30' DAY
      AND _price_cpm_usd <= 1.00
  ),
  user_metrics AS (
    SELECT
      user_id,
      COUNT(1) AS event_count,
      SUM(amount) AS total_amount
    FROM base_data
    GROUP BY user_id
  ),
  ranked_users AS (
    SELECT
      user_id,
      event_count,
      total_amount,
      RANK() OVER (ORDER BY total_amount DESC) AS rank
    FROM user_metrics
  )
SELECT user_id, event_count, total_amount, rank
FROM ranked_users
WHERE rank <= 100

Anti-patterns to avoid

Use explicit column lists (wildcards not supported)

NQL requires explicit column lists—SELECT * and COUNT(*) are not supported. See Explicit Column Selection for why this design choice improves cost transparency, governance, and query optimization.
-- Not supported in NQL
SELECT * FROM company_data."123"

-- Required: Select only needed columns
SELECT user_id, email, created_at FROM company_data."123"

-- Not supported: COUNT(*)
SELECT COUNT(*) FROM company_data."123"

-- Required: Use COUNT(1) for row counts
SELECT COUNT(1) FROM company_data."123"

Don’t use LIKE with leading wildcards when possible

-- Slow: Leading wildcard
WHERE email LIKE '%@gmail.com'

-- Better: Use REGEXP_EXTRACT or suffix column
WHERE REGEXP_EXTRACT(email, '@(.+)$') = 'gmail.com'

Don’t nest too many subqueries

-- Avoid: Deeply nested subqueries
SELECT user_id, email FROM (
  SELECT user_id, email FROM (
    SELECT user_id, email FROM (
      SELECT user_id, email FROM company_data."123"
    ) a
  ) b
) c

-- Better: Use CTEs
WITH step1 AS (SELECT user_id, email FROM company_data."123"),
     step2 AS (SELECT user_id, email FROM step1),
     step3 AS (SELECT user_id, email FROM step2)
SELECT user_id, email FROM step3