diff --git a/src/compute-types/src/explain/text.rs b/src/compute-types/src/explain/text.rs index 04d099cc702fb..0a6aab33d265a 100644 --- a/src/compute-types/src/explain/text.rs +++ b/src/compute-types/src/explain/text.rs @@ -290,6 +290,7 @@ impl Plan { key_val_plan, plan, mfp_after, + temporal_bucketing_strategy: _, } => { ctx.indent.set(); if !mfp_after.expressions.is_empty() || !mfp_after.predicates.is_empty() { @@ -739,6 +740,7 @@ impl Plan { key_val_plan, plan, mfp_after, + temporal_bucketing_strategy, } => { use crate::plan::reduce::ReducePlan; match plan { @@ -764,6 +766,13 @@ impl Plan { let key = CompactScalars(key); writeln!(f, "{}input_key={}", ctx.indent, key)?; } + if !matches!(temporal_bucketing_strategy, ArrangementStrategy::Direct) { + writeln!( + f, + "{}temporal_bucketing_strategy={}", + ctx.indent, temporal_bucketing_strategy + )?; + } if key_val_plan.key_plan.deref().is_identity() { writeln!(f, "{}key_plan=id", ctx.indent)?; } else { diff --git a/src/compute-types/src/plan.rs b/src/compute-types/src/plan.rs index edabc5f1c947b..35168618f054f 100644 --- a/src/compute-types/src/plan.rs +++ b/src/compute-types/src/plan.rs @@ -130,6 +130,15 @@ pub enum ArrangementStrategy { TemporalBucketing, } +impl std::fmt::Display for ArrangementStrategy { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ArrangementStrategy::Direct => write!(f, "Direct"), + ArrangementStrategy::TemporalBucketing => write!(f, "TemporalBucketing"), + } + } +} + /// An identifier for an LIR node. #[derive( Clone, @@ -308,6 +317,20 @@ pub enum PlanNode { /// predicates so that it can be readily evaluated. /// TODO(ggevay): should we wrap this in [`mz_expr::SafeMfpPlan`]? mfp_after: MapFilterProject, + /// Strategy for forming the internal input arrangement built by `Reduce` + /// (materialized via `key_val_plan`). + /// + /// Set by the lowering from the input's `has_future_updates` flag. The + /// renderer applies it to the keyed `(key, val)` stream feeding the + /// reduce. See `render_reduce` for the rationale on why this is + /// plumbed through `Reduce` rather than handled at the arrangement site. + /// + /// Note: unrelated to the hash buckets used by hierarchical reductions + /// (e.g. `ReducePlan::Hierarchical`'s `buckets`), which are an internal + /// sharding scheme for `min`/`max`-style aggregations. Here "bucketing" + /// refers exclusively to temporal (time-domain) bucketing of + /// future-stamped updates. + temporal_bucketing_strategy: ArrangementStrategy, }, /// Key-based "Top K" operator, retaining the first K records in each group. TopK { @@ -805,6 +828,7 @@ impl CollectionPlan for PlanNode { key_val_plan: _, plan: _, mfp_after: _, + temporal_bucketing_strategy: _, } | PlanNode::TopK { input, diff --git a/src/compute-types/src/plan/interpret/api.rs b/src/compute-types/src/plan/interpret/api.rs index 60b5736f01c35..1898e542df0af 100644 --- a/src/compute-types/src/plan/interpret/api.rs +++ b/src/compute-types/src/plan/interpret/api.rs @@ -393,6 +393,7 @@ where key_val_plan, plan, mfp_after, + temporal_bucketing_strategy: _, } => { // Descend recursively into all children. let input = self.apply_rec(input, rg)?; @@ -676,6 +677,7 @@ where key_val_plan, plan, mfp_after, + temporal_bucketing_strategy: _, } => { // Descend recursively into all children. let input = self.apply_rec(input, rg)?; diff --git a/src/compute-types/src/plan/lowering.rs b/src/compute-types/src/plan/lowering.rs index 0545222982906..1662fa3f91407 100644 --- a/src/compute-types/src/plan/lowering.rs +++ b/src/compute-types/src/plan/lowering.rs @@ -31,6 +31,11 @@ use crate::plan::{ArrangementStrategy, AvailableCollections, GetPlan, LirId, Pla /// Pick an [`ArrangementStrategy`] based on whether the input may contain future-stamped /// updates. Future updates are the only case where temporal bucketing pays off. +/// +/// Convention: every caller that returns `TemporalBucketing` must also clear +/// `LoweredExpr::has_future_updates` on the resulting `LoweredExpr`, so that a stack of +/// bucketing-eligible operators only buckets at the lowest one. A trailing temporal MFP +/// fused on top naturally re-arms the flag. fn strategy_from_future(has_future_updates: bool) -> ArrangementStrategy { if has_future_updates { ArrangementStrategy::TemporalBucketing @@ -294,6 +299,12 @@ impl Context { // Even with a non-temporal MFP, we must propagate `has_future_updates` // from the underlying binding — applying an MFP doesn't drop future- // timestamped updates that already exist on the input. + // + // TODO(temporal-bucketing): `has_future_updates` is computed per + // dataflow; we don't currently propagate it across `Id::Global` + // boundaries (e.g., from an MV's dataflow to its consumer's), so a + // downstream-only `Get`-then-`ArrangeBy` won't bucket unless the + // consumer has its own local temporal MFP. let has_future_updates = self.has_future_updates.contains(id) || match &plan { GetPlan::Arrangement(_, _, mfp) | GetPlan::Collection(mfp) => { @@ -1031,17 +1042,24 @@ This is not expected to cause incorrect results, but could indicate a performanc // Return the plan and extended keys. let lir_id = self.allocate_lir_id(); + let strategy = strategy_from_future(future); + // Bucketing absorption: see `strategy_from_future`. If we bucket, clear + // the future-updates flag so the immediate parent is lowered as `Direct`. + let has_future_updates = match strategy { + ArrangementStrategy::TemporalBucketing => false, + ArrangementStrategy::Direct => future, + }; LoweredExpr { plan: PlanNode::ArrangeBy { input_key, input: Box::new(input), input_mfp, forms, - strategy: strategy_from_future(future), + strategy, } .as_plan(lir_id), keys: input_keys, - has_future_updates: future, + has_future_updates, } } } @@ -1203,9 +1221,21 @@ This is not expected to cause incorrect results, but could indicate a performanc ); let output_keys = reduce_plan.keys(group_key.len(), output_arity); let lir_id = self.allocate_lir_id(); + // `Reduce` builds its own input arrangement inside `render_reduce` (via `KeyValPlan`), + // bypassing `ensure_collections`. So we can't piggy-back on an upstream `ArrangeBy`'s + // strategy to request temporal bucketing on a temporal-MFP-fed input: there is no such + // `ArrangeBy`. Instead we record the strategy directly on the `Reduce` node, and + // `render_reduce` applies bucketing to the keyed `(key, val)` stream itself. + let temporal_bucketing_strategy = strategy_from_future(input_future); // `extract_mfp_after` strips temporal predicates back into `*mfp_on_top` (the residual // MFP installed above the reduce), so `mfp_after` is non-temporal and cannot introduce - // future updates. The output's future flag is just whatever the input had. + // future updates. + // + // Bucketing absorption: see `strategy_from_future`. + let has_future_updates = match temporal_bucketing_strategy { + ArrangementStrategy::TemporalBucketing => false, + ArrangementStrategy::Direct => input_future, + }; Ok(LoweredExpr { plan: PlanNode::Reduce { input_key, @@ -1213,10 +1243,11 @@ This is not expected to cause incorrect results, but could indicate a performanc key_val_plan, plan: reduce_plan, mfp_after, + temporal_bucketing_strategy, } .as_plan(lir_id), keys: output_keys, - has_future_updates: input_future, + has_future_updates, }) } diff --git a/src/compute-types/src/plan/render_plan.rs b/src/compute-types/src/plan/render_plan.rs index 09cc55da15cfa..2dc21dc133d2b 100644 --- a/src/compute-types/src/plan/render_plan.rs +++ b/src/compute-types/src/plan/render_plan.rs @@ -215,6 +215,9 @@ pub enum Expr { /// the key for the reduction; otherwise, the results become undefined. Additionally, the /// MFP must be free from temporal predicates so that it can be readily evaluated. mfp_after: MapFilterProject, + /// How the renderer should form the internal input arrangement built by `Reduce`. + /// Mirrors [`PlanNode::Reduce::temporal_bucketing_strategy`]. + temporal_bucketing_strategy: ArrangementStrategy, }, /// Key-based "Top K" operator, retaining the first K records in each group. TopK { @@ -438,6 +441,7 @@ impl TryFrom for LetFreePlan { key_val_plan, plan, mfp_after, + temporal_bucketing_strategy, } => { let expr = Reduce { input_key, @@ -445,6 +449,7 @@ impl TryFrom for LetFreePlan { key_val_plan, plan, mfp_after, + temporal_bucketing_strategy, }; insert_node(lir_id, parent, expr, nesting); @@ -911,6 +916,7 @@ impl<'a> std::fmt::Display for RenderPlanExprHumanizer<'a> { key_val_plan: _key_val_plan, plan, mfp_after: _mfp_after, + temporal_bucketing_strategy: _, } => match plan { ReducePlan::Distinct => write!(f, "Distinct GroupAggregate"), ReducePlan::Accumulable(..) => write!(f, "Accumulable GroupAggregate"), diff --git a/src/compute/src/render.rs b/src/compute/src/render.rs index 35f96297de254..8041d3b513dee 100644 --- a/src/compute/src/render.rs +++ b/src/compute/src/render.rs @@ -1264,10 +1264,18 @@ impl<'scope, T: RenderTimestamp + MaybeBucketByTime> Context<'scope, T> { key_val_plan, plan, mfp_after, + temporal_bucketing_strategy, } => { let input = expect_input(input); let mfp_option = (!mfp_after.is_identity()).then_some(mfp_after); - self.render_reduce(input_key, input, key_val_plan, plan, mfp_option) + self.render_reduce( + input_key, + input, + key_val_plan, + plan, + mfp_option, + temporal_bucketing_strategy, + ) } TopK { input, top_k_plan } => { let input = expect_input(input); @@ -1481,11 +1489,18 @@ pub trait RenderTimestamp: MzTimestamp + Refines { /// Total-ordered timestamps perform real bucketing; partially-ordered timestamps /// (e.g. `Product<…>` in iterative scopes) implement this as a no-op. pub trait MaybeBucketByTime: Timestamp { - fn maybe_apply_temporal_bucketing<'scope>( - stream: StreamVec<'scope, Self, (Row, Self, Diff)>, + fn maybe_apply_temporal_bucketing<'scope, D>( + stream: StreamVec<'scope, Self, (D, Self, Diff)>, as_of: Antichain, summary: mz_repr::Timestamp, - ) -> VecCollection<'scope, Self, Row, Diff>; + ) -> VecCollection<'scope, Self, D, Diff> + where + D: timely::ExchangeData + + crate::typedefs::MzData + + Ord + + Clone + + std::fmt::Debug + + differential_dataflow::Hashable; } impl RenderTimestamp for mz_repr::Timestamp { @@ -1510,11 +1525,19 @@ impl RenderTimestamp for mz_repr::Timestamp { } impl MaybeBucketByTime for mz_repr::Timestamp { - fn maybe_apply_temporal_bucketing<'scope>( - stream: StreamVec<'scope, Self, (Row, Self, Diff)>, + fn maybe_apply_temporal_bucketing<'scope, D>( + stream: StreamVec<'scope, Self, (D, Self, Diff)>, as_of: Antichain, summary: mz_repr::Timestamp, - ) -> VecCollection<'scope, Self, Row, Diff> { + ) -> VecCollection<'scope, Self, D, Diff> + where + D: timely::ExchangeData + + crate::typedefs::MzData + + Ord + + Clone + + std::fmt::Debug + + differential_dataflow::Hashable, + { stream .bucket::>(as_of, summary) .as_collection() @@ -1551,11 +1574,19 @@ impl RenderTimestamp for Product> { } impl MaybeBucketByTime for Product> { - fn maybe_apply_temporal_bucketing<'scope>( - stream: StreamVec<'scope, Self, (Row, Self, Diff)>, + fn maybe_apply_temporal_bucketing<'scope, D>( + stream: StreamVec<'scope, Self, (D, Self, Diff)>, _as_of: Antichain, _summary: mz_repr::Timestamp, - ) -> VecCollection<'scope, Self, Row, Diff> { + ) -> VecCollection<'scope, Self, D, Diff> + where + D: timely::ExchangeData + + crate::typedefs::MzData + + Ord + + Clone + + std::fmt::Debug + + differential_dataflow::Hashable, + { // TODO: Implement bucketing on outer timestamp for iterative scopes. stream.as_collection() } diff --git a/src/compute/src/render/context.rs b/src/compute/src/render/context.rs index 5ba42239c74b6..828adf08ff012 100644 --- a/src/compute/src/render/context.rs +++ b/src/compute/src/render/context.rs @@ -1004,7 +1004,21 @@ impl<'scope, T: RenderTimestamp> CollectionBundle<'scope, T> { ); } - // Track whether we applied temporal bucketing, to avoid double-bucketing. + // Track whether we already applied temporal bucketing in this call, to + // avoid bucketing the same updates twice. + // + // Stacked bucketing across operators is prevented at the LIR level: in + // `strategy_from_future` (see `src/compute-types/src/plan/lowering.rs`), + // a bucketing consumer clears `LoweredExpr::has_future_updates`, so its + // parent is lowered with `ArrangementStrategy::Direct`. + // + // This flag is a belt-and-suspenders check for the case where a single + // `ensure_collections` call would otherwise fire bucketing twice on the + // same collection: once when forming the raw collection (via + // `as_collection_core` below) and again when building an arrangement in + // `collections.arranged`, or across two arrangements in that loop. The + // same `strategy` argument applies to all sites; the flag downgrades + // every application after the first to `Direct`. let mut bucketed = false; // True iff at least one new arrangement will actually be built below. Bucketing only @@ -1023,19 +1037,17 @@ impl<'scope, T: RenderTimestamp> CollectionBundle<'scope, T> { // Apply temporal bucketing when the lowering selected `TemporalBucketing` and // we will build at least one arrangement. This path fires when the collection // must be formed from scratch (e.g., from an arrangement via as_collection_core). - let oks = if will_create_arrangement - && matches!(strategy, ArrangementStrategy::TemporalBucketing) - && ENABLE_COMPUTE_TEMPORAL_BUCKETING.get(config_set) - { - let summary: mz_repr::Timestamp = TEMPORAL_BUCKETING_SUMMARY - .get(config_set) - .try_into() - .expect("must fit"); - bucketed = true; - T::maybe_apply_temporal_bucketing(oks.inner, as_of.clone(), summary) - } else { - oks - }; + let (oks, applied) = apply_bucketing_strategy( + oks, + if will_create_arrangement { + strategy + } else { + ArrangementStrategy::Direct + }, + as_of.clone(), + config_set, + ); + bucketed |= applied; self.collection = Some((oks, errs)); } for (key, _, thinning) in collections.arranged { @@ -1051,19 +1063,14 @@ impl<'scope, T: RenderTimestamp> CollectionBundle<'scope, T> { // the bundle (e.g., from an upstream temporal Mfp or Get) and we // haven't bucketed yet. This is the common path for temporal-MFP // → ArrangeBy flows. - let oks = if !bucketed - && matches!(strategy, ArrangementStrategy::TemporalBucketing) - && ENABLE_COMPUTE_TEMPORAL_BUCKETING.get(config_set) - { - let summary: mz_repr::Timestamp = TEMPORAL_BUCKETING_SUMMARY - .get(config_set) - .try_into() - .expect("must fit"); - bucketed = true; - T::maybe_apply_temporal_bucketing(oks.inner, as_of.clone(), summary) + let effective_strategy = if bucketed { + ArrangementStrategy::Direct } else { - oks + strategy }; + let (oks, applied) = + apply_bucketing_strategy(oks, effective_strategy, as_of.clone(), config_set); + bucketed |= applied; let (oks, errs_keyed, passthrough) = Self::arrange_collection(&name, oks, key.clone(), thinning.clone()); let errs_concat: KeyCollection<_, _, _> = errs.clone().concat(errs_keyed).into(); @@ -1353,3 +1360,44 @@ fn walk_cursor( } *fuel -= work; } + +/// Apply temporal bucketing to a per-row stream when the requested `strategy` selects it and the +/// `enable_compute_temporal_bucketing` dyncfg is on; otherwise return `oks` unchanged. +/// +/// Returns `(stream, applied)` where `applied` is true iff bucketing actually fired. Callers +/// use this flag to avoid double-bucketing the same stream within a single `ensure_collections` +/// invocation. +/// +/// Generic over the row data type `D` so the helper can serve both `Row` streams (the +/// `ArrangeBy` rendering in `ensure_collections`) and `(Row, Row)` streams (the internal +/// keyed input arrangement built by `render_reduce`). +pub(crate) fn apply_bucketing_strategy<'scope, T, D>( + oks: VecCollection<'scope, T, D, Diff>, + strategy: ArrangementStrategy, + as_of: Antichain, + config_set: &ConfigSet, +) -> (VecCollection<'scope, T, D, Diff>, bool) +where + T: RenderTimestamp + MaybeBucketByTime, + D: timely::ExchangeData + + crate::typedefs::MzData + + Ord + + Clone + + std::fmt::Debug + + differential_dataflow::Hashable, +{ + if matches!(strategy, ArrangementStrategy::TemporalBucketing) + && ENABLE_COMPUTE_TEMPORAL_BUCKETING.get(config_set) + { + let summary: mz_repr::Timestamp = TEMPORAL_BUCKETING_SUMMARY + .get(config_set) + .try_into() + .expect("must fit"); + ( + T::maybe_apply_temporal_bucketing(oks.inner, as_of, summary), + true, + ) + } else { + (oks, false) + } +} diff --git a/src/compute/src/render/reduce.rs b/src/compute/src/render/reduce.rs index e165bd567c5e2..013ef6594b2ea 100644 --- a/src/compute/src/render/reduce.rs +++ b/src/compute/src/render/reduce.rs @@ -27,6 +27,7 @@ use differential_dataflow::trace::implementations::merge_batcher::container::Int use differential_dataflow::trace::{Builder, Trace}; use differential_dataflow::{Data, VecCollection}; use itertools::Itertools; +use mz_compute_types::plan::ArrangementStrategy; use mz_compute_types::plan::reduce::{ AccumulablePlan, BasicPlan, BucketedPlan, HierarchicalPlan, KeyValPlan, MonotonicPlan, ReducePlan, ReductionType, SingleBasicPlan, reduction_type, @@ -68,7 +69,11 @@ impl<'scope, T: RenderTimestamp> Context<'scope, T> { key_val_plan: KeyValPlan, reduce_plan: ReducePlan, mfp_after: Option, - ) -> CollectionBundle<'scope, T> { + temporal_bucketing_strategy: ArrangementStrategy, + ) -> CollectionBundle<'scope, T> + where + T: crate::render::MaybeBucketByTime, + { // Convert `mfp_after` to an actionable plan. let mfp_after = mfp_after.map(|m| { m.into_plan() @@ -156,15 +161,25 @@ impl<'scope, T: RenderTimestamp> Context<'scope, T> { }, ); - // Render the reduce plan - self.render_reduce_plan( - reduce_plan, + // Bucket the keyed `(key, val)` stream when lowering chose `TemporalBucketing`. + // `Reduce` builds its own arrangement via `KeyValPlan`, bypassing + // `ensure_collections`, so the strategy is plumbed through `PlanNode::Reduce` + // rather than inferred at the arrangement site. `apply_bucketing_strategy` is a + // no-op for `Direct`. + // + // Unlike `ensure_collections`, there's only one bucketing call site here, so we + // don't need to track an `already_bucketed` flag. If a second site is ever added + // in this function, it must consult `_bucketed`. + let (key_val_collection, _bucketed) = crate::render::context::apply_bucketing_strategy( key_val_input.as_collection(), - err, - key_arity, - mfp_after, - ) - .leave_region(self.scope) + temporal_bucketing_strategy, + self.as_of_frontier.clone(), + &self.config_set, + ); + + // Render the reduce plan + self.render_reduce_plan(reduce_plan, key_val_collection, err, key_arity, mfp_after) + .leave_region(self.scope) }) } diff --git a/test/sqllogictest/explain/physical_plan_as_json.slt b/test/sqllogictest/explain/physical_plan_as_json.slt index c068f82066a64..9490192b838d9 100644 --- a/test/sqllogictest/explain/physical_plan_as_json.slt +++ b/test/sqllogictest/explain/physical_plan_as_json.slt @@ -1813,7 +1813,8 @@ SELECT DISTINCT a, b FROM t 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -1972,7 +1973,8 @@ GROUP BY a 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -2144,7 +2146,8 @@ FROM t 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -2436,7 +2439,8 @@ SELECT * FROM hierarchical_group_by 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -2568,7 +2572,8 @@ MATERIALIZED VIEW hierarchical_global_mv 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -2875,7 +2880,8 @@ SELECT * FROM hierarchical_global 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -3342,7 +3348,8 @@ GROUP BY a 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -3601,7 +3608,8 @@ GROUP BY a 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -3976,7 +3984,8 @@ FROM t 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -4205,7 +4214,8 @@ FROM t 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -4499,7 +4509,8 @@ MATERIALIZED VIEW collated_group_by_mv 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -4638,7 +4649,8 @@ MATERIALIZED VIEW collated_group_by_mv 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -4897,7 +4909,8 @@ MATERIALIZED VIEW collated_group_by_mv 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -5156,7 +5169,8 @@ MATERIALIZED VIEW collated_group_by_mv 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -5912,7 +5926,8 @@ SELECT * FROM collated_group_by 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -6051,7 +6066,8 @@ SELECT * FROM collated_group_by 2 ], "input_arity": 3 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -6310,7 +6326,8 @@ SELECT * FROM collated_group_by 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -6569,7 +6586,8 @@ SELECT * FROM collated_group_by 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -7368,7 +7386,8 @@ MATERIALIZED VIEW collated_global_mv 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -7477,7 +7496,8 @@ MATERIALIZED VIEW collated_global_mv 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -7706,7 +7726,8 @@ MATERIALIZED VIEW collated_global_mv 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -7935,7 +7956,8 @@ MATERIALIZED VIEW collated_global_mv 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } } @@ -8716,7 +8738,8 @@ SELECT * FROM collated_global 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -8825,7 +8848,8 @@ SELECT * FROM collated_global 1 ], "input_arity": 2 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -9054,7 +9078,8 @@ SELECT * FROM collated_global 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } }, @@ -9283,7 +9308,8 @@ SELECT * FROM collated_global 0 ], "input_arity": 1 - } + }, + "temporal_bucketing_strategy": "Direct" } } } diff --git a/test/sqllogictest/temporal_bucketing.slt b/test/sqllogictest/temporal_bucketing.slt new file mode 100644 index 0000000000000..0e72aec250c34 --- /dev/null +++ b/test/sqllogictest/temporal_bucketing.slt @@ -0,0 +1,127 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +mode cockroach + +# ----------------------------------------------------------------------------- +# Tests for LIR-level temporal bucketing applied to the *input arrangement* +# of `Reduce`. See `agent-notes/temporal-bucketing-reduce/plan.md`. +# +# Without the `input_bucketing_strategy` field, a `Temporal Filter -> +# GroupAggregate` pattern would have no LIR node to attach bucketing to, +# because `Reduce` builds its internal arrangement inside `render_reduce` +# via `KeyValPlan`, bypassing `ensure_collections`. +# ----------------------------------------------------------------------------- + +statement ok +CREATE TABLE events (k INT NOT NULL, event_time TIMESTAMP NOT NULL); + +# Firing: temporal filter directly above a GROUP BY in a materialized view. +# Lowering should set `input_bucketing_strategy=TemporalBucketing` on the Reduce. +query T multiline +EXPLAIN PHYSICAL PLAN AS VERBOSE TEXT FOR +CREATE MATERIALIZED VIEW mv_firing AS +SELECT k, count(*) +FROM events +WHERE event_time + INTERVAL '45 day' > mz_now() +GROUP BY k; +---- +materialize.public.mv_firing: + Reduce::Accumulable + simple_aggrs[0]=(0, count(*)) + temporal_bucketing_strategy=TemporalBucketing + key_plan=id + val_plan + project=(#1) + map=(true) + Get::Collection materialize.public.events + raw=true + +Source materialize.public.events + project=(#0) + filter=((mz_now() < timestamp_to_mz_timestamp((#1{event_time} + 45 days)))) + +Target cluster: quickstart + +EOF + +# Idempotency: a temporal-filter view materialized as an index already absorbs +# the future-update flag. A downstream Reduce reading from the indexed `Get` +# must NOT re-bucket; the pretty-printer omits `input_bucketing_strategy` when +# it is the default `Direct`. +statement ok +CREATE VIEW v_recent AS +SELECT k FROM events WHERE event_time + INTERVAL '45 day' > mz_now(); + +statement ok +CREATE DEFAULT INDEX ON v_recent; + +query T multiline +EXPLAIN PHYSICAL PLAN AS VERBOSE TEXT FOR +CREATE MATERIALIZED VIEW mv_idempotent AS +SELECT k, count(*) FROM v_recent GROUP BY k; +---- +materialize.public.mv_idempotent: + Reduce::Accumulable + simple_aggrs[0]=(0, count(*)) + input_key=#0{k} + key_plan=id + val_plan + project=(#1) + map=(true) + Get::PassArrangements materialize.public.v_recent + raw=false + arrangements[0]={ key=[#0{k}], permutation=id, thinning=() } + +Used Indexes: + - materialize.public.v_recent_primary_idx (*** full scan ***) + +Target cluster: quickstart + +EOF + +# Nested aggregation: an inner Reduce::Hierarchical sits directly above a +# temporal-filter MFP and gets bucketed. The outer `count(*)` Reduce is +# optimized away here, but the test still pins down that the inner Reduce +# gets `input_bucketing_strategy=TemporalBucketing`. +query T multiline +EXPLAIN PHYSICAL PLAN AS VERBOSE TEXT FOR +CREATE MATERIALIZED VIEW mv_rearm AS +SELECT bucket, count(*) FROM ( + SELECT k AS bucket, count(*) AS n, max(event_time) AS m + FROM events + WHERE event_time + INTERVAL '45 day' > mz_now() + GROUP BY k +) sub +WHERE m + INTERVAL '1 day' > mz_now() +GROUP BY bucket; +---- +materialize.public.mv_rearm: + Mfp + project=(#0, #2) + filter=((mz_now() < timestamp_to_mz_timestamp((#1{m} + 1 day)))) + map=(1) + input_key=#0 + Reduce::Hierarchical + aggr_funcs=[max] + buckets=[268435456, 16777216, 1048576, 65536, 4096, 256, 16] + temporal_bucketing_strategy=TemporalBucketing + key_plan + project=(#0) + val_plan + project=(#1) + Get::Collection materialize.public.events + raw=true + +Source materialize.public.events + filter=((mz_now() < timestamp_to_mz_timestamp((#1{event_time} + 45 days)))) + +Target cluster: quickstart + +EOF