From 08548ffcafd298924c8d1f1e32198d25aef70c49 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 20 May 2026 13:29:56 -0400 Subject: [PATCH 1/3] test from bug report, enriched with plans --- test/sqllogictest/variadic_outer_join.slt | 314 ++++++++++++++++++++++ 1 file changed, 314 insertions(+) create mode 100644 test/sqllogictest/variadic_outer_join.slt diff --git a/test/sqllogictest/variadic_outer_join.slt b/test/sqllogictest/variadic_outer_join.slt new file mode 100644 index 0000000000000..c3f518d63b570 --- /dev/null +++ b/test/sqllogictest/variadic_outer_join.slt @@ -0,0 +1,314 @@ +# Copyright Materialize, Inc. and contributors. All rights reserved. +# +# Use of this software is governed by the Business Source License +# included in the LICENSE file at the root of this repository. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0. + +mode cockroach + +statement ok +CREATE TABLE a (k int) + +statement ok +CREATE TABLE b (x int, y int, k int) + +statement ok +CREATE TABLE c (k int) + +statement ok +INSERT INTO a VALUES (1) + +# Only (5, 5, 100) satisfies the b.x = b.y filter. +statement ok +INSERT INTO b VALUES (5, 5, 100), (3, 7, 200) + +statement ok +INSERT INTO c VALUES (100) + +# Baseline: with the variadic LEFT JOIN lowering disabled, the generic +# outer-join lowering classifies `b.x = b.y` as a right-local filter and +# produces the correct single-row result. +simple conn=mz_system,user=mz_system +ALTER SYSTEM SET enable_variadic_left_join_lowering TO false +---- +COMPLETE 0 + +query T multiline +EXPLAIN PHYSICAL PLAN FOR +SELECT a.k, b.x, b.y, b.k, c.k +FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k +---- +Explained Query: + With + cte l0 = + Join::Linear + linear_stage[0] + closure + project=(#3, #0..=#2) + lookup={ relation=0, key=[] } + stream={ key=[], thinning=(#0..=#2) } + source={ relation=1, key=[] } + ArrangeBy + raw=true + arrangements[0]={ key=[], permutation=id, thinning=(#0) } + Get::PassArrangements materialize.public.a + raw=true + ArrangeBy + raw=true + arrangements[0]={ key=[], permutation=id, thinning=(#0..=#2) } + Get::Collection materialize.public.b + raw=true + cte l1 = + Union + Get::PassArrangements l0 + raw=true + Join::Linear + final_closure + project=(#0..=#3) + map=(null, null, null) + linear_stage[0] + lookup={ relation=1, key=[#0] } + stream={ key=[#0], thinning=() } + source={ relation=0, key=[#0] } + ArrangeBy + raw=true + arrangements[0]={ key=[#0], permutation=id, thinning=() } + Union consolidate_output=true + Negate + ArrangeBy + input_key=[#0] + raw=true + Reduce::Distinct + key_plan=id + val_plan + project=() + Get::Collection l0 + project=(#0) + raw=true + ArrangeBy + input_key=[#0] + raw=true + Reduce::Distinct + key_plan=id + val_plan + project=() + Get::PassArrangements materialize.public.a + raw=true + ArrangeBy + raw=true + arrangements[0]={ key=[#0], permutation=id, thinning=() } + Get::PassArrangements materialize.public.a + raw=true + cte l2 = + ArrangeBy + raw=true + arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } + Get::Collection l1 + filter=((#3{k}) IS NOT NULL) + raw=true + cte l3 = + Join::Linear + linear_stage[0] + closure + project=(#1..=#3, #0) + lookup={ relation=1, key=[#0{k}] } + stream={ key=[#3{k}], thinning=(#0..=#2) } + source={ relation=0, key=[#3{k}] } + Get::PassArrangements l2 + raw=true + arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } + ArrangeBy + raw=true + arrangements[0]={ key=[#0{k}], permutation=id, thinning=() } + Get::Collection materialize.public.c + raw=true + Return + Union + Mfp + project=(#0..=#4) + map=(null) + Union consolidate_output=true + Negate + Join::Linear + linear_stage[0] + closure + project=(#1..=#3, #0) + lookup={ relation=0, key=[#3{k}] } + stream={ key=[#0], thinning=() } + source={ relation=1, key=[#0] } + Get::PassArrangements l2 + raw=true + arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } + Reduce::Distinct + key_plan=id + val_plan + project=() + Get::Collection l3 + project=(#3) + raw=true + Get::PassArrangements l1 + raw=true + Get::Collection l3 + project=(#0..=#3, #3) + raw=true + +Source materialize.public.a +Source materialize.public.b + filter=((#0 = #1)) +Source materialize.public.c + filter=((#0{k}) IS NOT NULL) + +Target cluster: quickstart + +EOF + +query IIIII rowsort +SELECT a.k, b.x, b.y, b.k, c.k +FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k +---- +1 5 5 100 100 + +# Re-enable the variadic lowering (the system default) and re-run the +# same query. The expected result is unchanged. +# But we have observed a buggy extra NULL-enriched row (in comments below) + +simple conn=mz_system,user=mz_system +ALTER SYSTEM SET enable_variadic_left_join_lowering TO true +---- +COMPLETE 0 + +query T multiline +EXPLAIN PHYSICAL PLAN FOR +SELECT a.k, b.x, b.y, b.k, c.k +FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k +---- +Explained Query: + Join::Delta + plan_path[0] + delta_stage[1] + closure + project=(#1, #4..=#6, #8) + map=(case when (#7) IS NULL then null else #0 end) + lookup={ relation=2, key=[#0] } + stream={ key=[case when (#2) IS NULL then null else #1 end], thinning=(#0..=#5) } + delta_stage[0] + closure + project=(#0, #3, #4, #6..=#8) + map=((#4) IS NULL, case when #5 then null else #1 end, case when #5 then null else #2 end, case when #5 then null else #3 end) + lookup={ relation=1, key=[] } + stream={ key=[], thinning=(#0) } + source={ relation=0, key=[] } + plan_path[1] + delta_stage[1] + closure + project=(#4, #0..=#3) + lookup={ relation=0, key=[] } + stream={ key=[], thinning=(#0..=#3) } + delta_stage[0] + closure + project=(#3..=#5, #7) + map=(case when (#6) IS NULL then null else #0 end) + lookup={ relation=2, key=[#0] } + stream={ key=[case when (#1) IS NULL then null else #0 end], thinning=(#0..=#4) } + initial_closure + project=(#2, #3, #5..=#7) + map=((#3) IS NULL, case when #4 then null else #0 end, case when #4 then null else #1 end, case when #4 then null else #2 end) + source={ relation=1, key=[] } + plan_path[2] + delta_stage[1] + closure + project=(#4, #1..=#3, #0) + lookup={ relation=0, key=[] } + stream={ key=[], thinning=(#0..=#3) } + delta_stage[0] + closure + project=(#1, #7..=#9) + map=((#5) IS NULL, case when #6 then null else #2 end, case when #6 then null else #3 end, case when #6 then null else #4 end) + lookup={ relation=1, key=[case when (#3) IS NULL then null else #2 end] } + stream={ key=[#0], thinning=(#1) } + initial_closure + project=(#0, #2) + map=(case when (#1) IS NULL then null else #0 end) + source={ relation=2, key=[#0] } + ArrangeBy + raw=true + arrangements[0]={ key=[], permutation=id, thinning=(#0) } + Get::PassArrangements materialize.public.a + raw=true + ArrangeBy + raw=true + arrangements[0]={ key=[], permutation=id, thinning=(#0..=#3) } + arrangements[1]={ key=[case when (#3) IS NULL then null else #2 end], permutation={#0: #1, #1: #2, #2: #3, #3: #4}, thinning=(#0..=#3) } + Union + Get::Collection materialize.public.b + project=(#0..=#3) + filter=((#0 = #1)) + map=(true) + raw=true + Mfp + project=(#1, #0, #2, #3) + map=(null, null, null) + input_key=#0 + Reduce::Distinct + key_plan=id + val_plan + project=() + Union + Get::Collection materialize.public.b + project=(#0) + filter=((null OR (#0) IS NULL)) + raw=true + Constant + - (null) + ArrangeBy + raw=true + arrangements[0]={ key=[#0], permutation=id, thinning=(#1) } + Union + Get::Collection materialize.public.c + project=(#0, #1) + map=(true) + raw=true + Mfp + project=(#0, #1) + map=(null) + input_key=#0 + Threshold::Basic ensure_arrangement={ key=[#0], permutation=id, thinning=() } + ArrangeBy + raw=false + arrangements[0]={ key=[#0], permutation=id, thinning=() } + Union consolidate_output=true + Negate + Get::Collection materialize.public.c + raw=true + ArrangeBy + input_key=[#0] + raw=true + Reduce::Distinct + key_plan=id + val_plan + project=() + Union + Get::Collection materialize.public.b + project=(#2) + raw=true + Constant + - (null) + +Source materialize.public.a +Source materialize.public.c + filter=((#0) IS NOT NULL) + +Target cluster: quickstart + +EOF + +query IIIII rowsort +SELECT a.k, b.x, b.y, b.k, c.k +FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k +---- +1 5 5 100 100 + +# 1 NULL NULL NULL NULL From 40fc52d7c74a157b8c56f0f4c66fe54a343897c5 Mon Sep 17 00:00:00 2001 From: Michael Greenberg Date: Wed, 20 May 2026 14:23:35 -0400 Subject: [PATCH 2/3] ensure the bound input is actually in the input --- src/sql/src/plan/lowering/variadic_left.rs | 5 +- test/sqllogictest/variadic_outer_join.slt | 310 +++++++-------------- 2 files changed, 98 insertions(+), 217 deletions(-) diff --git a/src/sql/src/plan/lowering/variadic_left.rs b/src/sql/src/plan/lowering/variadic_left.rs index 72e3df39c3507..a36d14967605f 100644 --- a/src/sql/src/plan/lowering/variadic_left.rs +++ b/src/sql/src/plan/lowering/variadic_left.rs @@ -196,8 +196,9 @@ pub(crate) fn attempt_left_join_magic( inc_metrics("voj_5"); return Ok(None); } - // Only columns not from the outer scope introduce bindings. - if left >= oa { + // Only columns not from the outer scope introduce bindings (`oa <= left`) + // And `left` needs to be a column in the left relation (`left < oa + ba`) + if oa <= left && left < oa + ba { if let Some(bound) = bound_input { // If left references come from different inputs, bail out. if bound_to[left] != bound { diff --git a/test/sqllogictest/variadic_outer_join.slt b/test/sqllogictest/variadic_outer_join.slt index c3f518d63b570..7e823bbe06ac4 100644 --- a/test/sqllogictest/variadic_outer_join.slt +++ b/test/sqllogictest/variadic_outer_join.slt @@ -37,123 +37,61 @@ ALTER SYSTEM SET enable_variadic_left_join_lowering TO false COMPLETE 0 query T multiline -EXPLAIN PHYSICAL PLAN FOR +EXPLAIN OPTIMIZED PLAN FOR SELECT a.k, b.x, b.y, b.k, c.k FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k ---- Explained Query: With cte l0 = - Join::Linear - linear_stage[0] - closure - project=(#3, #0..=#2) - lookup={ relation=0, key=[] } - stream={ key=[], thinning=(#0..=#2) } - source={ relation=1, key=[] } - ArrangeBy - raw=true - arrangements[0]={ key=[], permutation=id, thinning=(#0) } - Get::PassArrangements materialize.public.a - raw=true - ArrangeBy - raw=true - arrangements[0]={ key=[], permutation=id, thinning=(#0..=#2) } - Get::Collection materialize.public.b - raw=true + CrossJoin type=differential + ArrangeBy keys=[[]] + ReadStorage materialize.public.a + ArrangeBy keys=[[]] + Filter (#0 = #1) + ReadStorage materialize.public.b cte l1 = Union - Get::PassArrangements l0 - raw=true - Join::Linear - final_closure - project=(#0..=#3) - map=(null, null, null) - linear_stage[0] - lookup={ relation=1, key=[#0] } - stream={ key=[#0], thinning=() } - source={ relation=0, key=[#0] } - ArrangeBy - raw=true - arrangements[0]={ key=[#0], permutation=id, thinning=() } - Union consolidate_output=true - Negate - ArrangeBy - input_key=[#0] - raw=true - Reduce::Distinct - key_plan=id - val_plan - project=() - Get::Collection l0 - project=(#0) - raw=true - ArrangeBy - input_key=[#0] - raw=true - Reduce::Distinct - key_plan=id - val_plan - project=() - Get::PassArrangements materialize.public.a - raw=true - ArrangeBy - raw=true - arrangements[0]={ key=[#0], permutation=id, thinning=() } - Get::PassArrangements materialize.public.a - raw=true + Get l0 + Project (#0, #2..=#4) + Map (null, null, null) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Distinct project=[#0] + Project (#0) + Get l0 + Distinct project=[#0] + ReadStorage materialize.public.a + ArrangeBy keys=[[#0]] + ReadStorage materialize.public.a cte l2 = - ArrangeBy - raw=true - arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } - Get::Collection l1 - filter=((#3{k}) IS NOT NULL) - raw=true + ArrangeBy keys=[[#3{k}]] + Filter (#3{k}) IS NOT NULL + Get l1 cte l3 = - Join::Linear - linear_stage[0] - closure - project=(#1..=#3, #0) - lookup={ relation=1, key=[#0{k}] } - stream={ key=[#3{k}], thinning=(#0..=#2) } - source={ relation=0, key=[#3{k}] } - Get::PassArrangements l2 - raw=true - arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } - ArrangeBy - raw=true - arrangements[0]={ key=[#0{k}], permutation=id, thinning=() } - Get::Collection materialize.public.c - raw=true + Project (#0..=#3) + Join on=(#3{k} = #4{k}) type=differential + Get l2 + ArrangeBy keys=[[#0{k}]] + Filter (#0{k}) IS NOT NULL + ReadStorage materialize.public.c Return Union - Mfp - project=(#0..=#4) - map=(null) - Union consolidate_output=true + Map (null) + Union Negate - Join::Linear - linear_stage[0] - closure - project=(#1..=#3, #0) - lookup={ relation=0, key=[#3{k}] } - stream={ key=[#0], thinning=() } - source={ relation=1, key=[#0] } - Get::PassArrangements l2 - raw=true - arrangements[0]={ key=[#3{k}], permutation={#0: #1, #1: #2, #2: #3, #3: #0}, thinning=(#0..=#2) } - Reduce::Distinct - key_plan=id - val_plan - project=() - Get::Collection l3 - project=(#3) - raw=true - Get::PassArrangements l1 - raw=true - Get::Collection l3 - project=(#0..=#3, #3) - raw=true + Project (#0..=#3) + Join on=(#3{k} = #4) type=differential + Get l2 + ArrangeBy keys=[[#0]] + Distinct project=[#0] + Project (#3) + Get l3 + Get l1 + Project (#0..=#3, #3) + Get l3 Source materialize.public.a Source materialize.public.b @@ -181,125 +119,67 @@ ALTER SYSTEM SET enable_variadic_left_join_lowering TO true COMPLETE 0 query T multiline -EXPLAIN PHYSICAL PLAN FOR +EXPLAIN OPTIMIZED PLAN FOR SELECT a.k, b.x, b.y, b.k, c.k FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k ---- Explained Query: - Join::Delta - plan_path[0] - delta_stage[1] - closure - project=(#1, #4..=#6, #8) - map=(case when (#7) IS NULL then null else #0 end) - lookup={ relation=2, key=[#0] } - stream={ key=[case when (#2) IS NULL then null else #1 end], thinning=(#0..=#5) } - delta_stage[0] - closure - project=(#0, #3, #4, #6..=#8) - map=((#4) IS NULL, case when #5 then null else #1 end, case when #5 then null else #2 end, case when #5 then null else #3 end) - lookup={ relation=1, key=[] } - stream={ key=[], thinning=(#0) } - source={ relation=0, key=[] } - plan_path[1] - delta_stage[1] - closure - project=(#4, #0..=#3) - lookup={ relation=0, key=[] } - stream={ key=[], thinning=(#0..=#3) } - delta_stage[0] - closure - project=(#3..=#5, #7) - map=(case when (#6) IS NULL then null else #0 end) - lookup={ relation=2, key=[#0] } - stream={ key=[case when (#1) IS NULL then null else #0 end], thinning=(#0..=#4) } - initial_closure - project=(#2, #3, #5..=#7) - map=((#3) IS NULL, case when #4 then null else #0 end, case when #4 then null else #1 end, case when #4 then null else #2 end) - source={ relation=1, key=[] } - plan_path[2] - delta_stage[1] - closure - project=(#4, #1..=#3, #0) - lookup={ relation=0, key=[] } - stream={ key=[], thinning=(#0..=#3) } - delta_stage[0] - closure - project=(#1, #7..=#9) - map=((#5) IS NULL, case when #6 then null else #2 end, case when #6 then null else #3 end, case when #6 then null else #4 end) - lookup={ relation=1, key=[case when (#3) IS NULL then null else #2 end] } - stream={ key=[#0], thinning=(#1) } - initial_closure - project=(#0, #2) - map=(case when (#1) IS NULL then null else #0 end) - source={ relation=2, key=[#0] } - ArrangeBy - raw=true - arrangements[0]={ key=[], permutation=id, thinning=(#0) } - Get::PassArrangements materialize.public.a - raw=true - ArrangeBy - raw=true - arrangements[0]={ key=[], permutation=id, thinning=(#0..=#3) } - arrangements[1]={ key=[case when (#3) IS NULL then null else #2 end], permutation={#0: #1, #1: #2, #2: #3, #3: #4}, thinning=(#0..=#3) } - Union - Get::Collection materialize.public.b - project=(#0..=#3) - filter=((#0 = #1)) - map=(true) - raw=true - Mfp - project=(#1, #0, #2, #3) - map=(null, null, null) - input_key=#0 - Reduce::Distinct - key_plan=id - val_plan - project=() - Union - Get::Collection materialize.public.b - project=(#0) - filter=((null OR (#0) IS NULL)) - raw=true - Constant - - (null) - ArrangeBy - raw=true - arrangements[0]={ key=[#0], permutation=id, thinning=(#1) } + With + cte l0 = + CrossJoin type=differential + ArrangeBy keys=[[]] + ReadStorage materialize.public.a + ArrangeBy keys=[[]] + Filter (#0 = #1) + ReadStorage materialize.public.b + cte l1 = Union - Get::Collection materialize.public.c - project=(#0, #1) - map=(true) - raw=true - Mfp - project=(#0, #1) - map=(null) - input_key=#0 - Threshold::Basic ensure_arrangement={ key=[#0], permutation=id, thinning=() } - ArrangeBy - raw=false - arrangements[0]={ key=[#0], permutation=id, thinning=() } - Union consolidate_output=true - Negate - Get::Collection materialize.public.c - raw=true - ArrangeBy - input_key=[#0] - raw=true - Reduce::Distinct - key_plan=id - val_plan - project=() - Union - Get::Collection materialize.public.b - project=(#2) - raw=true - Constant - - (null) + Get l0 + Project (#0, #2..=#4) + Map (null, null, null) + Join on=(#0 = #1) type=differential + ArrangeBy keys=[[#0]] + Union + Negate + Distinct project=[#0] + Project (#0) + Get l0 + Distinct project=[#0] + ReadStorage materialize.public.a + ArrangeBy keys=[[#0]] + ReadStorage materialize.public.a + cte l2 = + ArrangeBy keys=[[#3{k}]] + Filter (#3{k}) IS NOT NULL + Get l1 + cte l3 = + Project (#0..=#3) + Join on=(#3{k} = #4{k}) type=differential + Get l2 + ArrangeBy keys=[[#0{k}]] + Filter (#0{k}) IS NOT NULL + ReadStorage materialize.public.c + Return + Union + Map (null) + Union + Negate + Project (#0..=#3) + Join on=(#3{k} = #4) type=differential + Get l2 + ArrangeBy keys=[[#0]] + Distinct project=[#0] + Project (#3) + Get l3 + Get l1 + Project (#0..=#3, #3) + Get l3 Source materialize.public.a +Source materialize.public.b + filter=((#0 = #1)) Source materialize.public.c - filter=((#0) IS NOT NULL) + filter=((#0{k}) IS NOT NULL) Target cluster: quickstart From 2c7486f064b5b75ef2601f640cc5e650b691b9c1 Mon Sep 17 00:00:00 2001 From: Dennis Felsing Date: Thu, 21 May 2026 01:27:26 +0000 Subject: [PATCH 3/3] slt: Add additional test case --- test/sqllogictest/variadic_outer_join.slt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/sqllogictest/variadic_outer_join.slt b/test/sqllogictest/variadic_outer_join.slt index 7e823bbe06ac4..ad89cedc5f36b 100644 --- a/test/sqllogictest/variadic_outer_join.slt +++ b/test/sqllogictest/variadic_outer_join.slt @@ -192,3 +192,9 @@ FROM (a LEFT JOIN b ON b.x = b.y) LEFT JOIN c ON b.k = c.k 1 5 5 100 100 # 1 NULL NULL NULL NULL + +query IIII rowsort +SELECT a.k, b.k, b.x, c.k +FROM (a LEFT JOIN b ON a.k = b.k AND b.x = b.y) LEFT JOIN c ON a.k = c.k +---- +1 NULL NULL NULL