From 2d75e22bb9f6dabcd3f56210c6ce1648171d8517 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Wed, 15 Apr 2026 15:04:37 -0400 Subject: [PATCH 1/2] Fix OPTIONAL MATCH dropping null-preserving rows with subquery WHERE (#2378) Cypher OPTIONAL MATCH semantics require that when no right-hand row survives the WHERE predicate, the outer row is still emitted with NULLs in the optional columns. Before this fix, a WHERE containing a list comprehension or sub-pattern predicate (EXISTS { ... }, COUNT { ... }) would take the transform_cypher_clause_with_where rewrite path, which detaches the WHERE, transforms the match clause as a subquery, and then attaches the WHERE as an outer filter on that subquery. For OPTIONAL MATCH, the inner subquery already produced a LATERAL LEFT JOIN with null-preserving rows; the outer filter then ran against those nulled rows and dropped them when the predicate evaluated NULL or false on the nulled side, producing zero rows where Cypher semantics require one null-filled row per outer match. Fix: in transform_cypher_match, the has_list_comp_or_subquery rewrite now only applies to non-optional MATCH. In the OPTIONAL MATCH path, transform_cypher_optional_match_clause detaches the WHERE from the cypher_match node before recursively transforming the right-hand side (so the inner transform does not double-apply or misresolve the predicate in a fresh namespace), and re-attaches the transformed predicate as the LEFT JOIN's ON condition after both sides are in the namespace. A LEFT JOIN with a failing ON condition correctly preserves left rows with null right columns, which matches Cypher OPTIONAL MATCH ... WHERE semantics. Regression tests cover: - EXISTS { (friend)-[...]->(...) } referencing the optional variable - EXISTS { (p)-[...]->(...) } referencing the outer variable - non-correlated EXISTS (previously-working guard) - plain scalar predicate on the optional variable (guard) - constant-false WHERE (guard) Fixes issue #2378. Co-Authored-By: Claude Opus 4.6 (1M context) --- regress/expected/cypher_match.out | 124 +++++++++++++++++++++++++++++ regress/sql/cypher_match.sql | 73 +++++++++++++++++ src/backend/parser/cypher_clause.c | 63 ++++++++++++++- 3 files changed, 259 insertions(+), 1 deletion(-) diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index 2f01d5163..e55ed23c3 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -3834,6 +3834,130 @@ NOTICE: graph "issue_2193" has been dropped (1 row) +-- +-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer +-- rows when its WHERE clause contains a correlated sub-pattern predicate. +-- +-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional +-- binding; when no right-hand row survives the predicate, the outer row +-- is still emitted with NULLs in the optional columns. Before the fix, +-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an +-- outer filter on the transformed subquery, so it ran after the LATERAL +-- LEFT JOIN produced null-preserving rows and then incorrectly dropped +-- them when the predicate evaluated NULL/false on the nulled side. +-- +SELECT create_graph('issue_2378'); +NOTICE: graph "issue_2378" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2378', $$ + CREATE (a:Person {name: 'Alice'}), + (b:Person {name: 'Bob'}), + (c:Person {name: 'Charlie'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c) +$$) AS (v agtype); + v +--- +(0 rows) + +-- Correlated EXISTS referencing the optional variable (friend). +-- Neither Bob nor Charlie knows anyone, so for every outer p the +-- predicate fails on all optional matches; expect one row per person +-- with friend = NULL. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (friend)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | + "Bob" | + "Charlie" | +(3 rows) + +-- Correlated EXISTS referencing the outer variable (p). +-- Alice knows someone so her optional matches pass; Bob and Charlie +-- don't, so they are emitted with NULL friend. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (p)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + name | friend +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Bob" | + "Charlie" | +(4 rows) + +-- Non-correlated EXISTS (was already working; kept as a regression guard). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { MATCH (x:Person) RETURN x } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + name | friend +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Bob" | + "Charlie" | +(4 rows) + +-- Plain scalar predicate on the optional variable (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE friend.name = 'Bob' + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | "Bob" + "Bob" | + "Charlie" | +(3 rows) + +-- Constant-false WHERE on the optional side (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE false + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | + "Bob" | + "Charlie" | +(3 rows) + +SELECT drop_graph('issue_2378', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_2378._ag_label_vertex +drop cascades to table issue_2378._ag_label_edge +drop cascades to table issue_2378."Person" +drop cascades to table issue_2378."KNOWS" +NOTICE: graph "issue_2378" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index 410d097bb..e56aafac8 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1616,6 +1616,79 @@ $$) AS (result agtype); SELECT drop_graph('issue_2193', true); +-- +-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer +-- rows when its WHERE clause contains a correlated sub-pattern predicate. +-- +-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional +-- binding; when no right-hand row survives the predicate, the outer row +-- is still emitted with NULLs in the optional columns. Before the fix, +-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an +-- outer filter on the transformed subquery, so it ran after the LATERAL +-- LEFT JOIN produced null-preserving rows and then incorrectly dropped +-- them when the predicate evaluated NULL/false on the nulled side. +-- +SELECT create_graph('issue_2378'); +SELECT * FROM cypher('issue_2378', $$ + CREATE (a:Person {name: 'Alice'}), + (b:Person {name: 'Bob'}), + (c:Person {name: 'Charlie'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c) +$$) AS (v agtype); + +-- Correlated EXISTS referencing the optional variable (friend). +-- Neither Bob nor Charlie knows anyone, so for every outer p the +-- predicate fails on all optional matches; expect one row per person +-- with friend = NULL. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (friend)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +-- Correlated EXISTS referencing the outer variable (p). +-- Alice knows someone so her optional matches pass; Bob and Charlie +-- don't, so they are emitted with NULL friend. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (p)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + +-- Non-correlated EXISTS (was already working; kept as a regression guard). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { MATCH (x:Person) RETURN x } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + +-- Plain scalar predicate on the optional variable (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE friend.name = 'Bob' + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +-- Constant-false WHERE on the optional side (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE false + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +SELECT drop_graph('issue_2378', true); + -- -- Clean up -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index e5540aa3e..6eac61e9f 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2640,6 +2640,7 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, cypher_match *match_self = (cypher_match*) clause->self; Node *where = match_self->where; + /* * Check label validity early unless the predecessor clause chain * contains a data-modifying operation (CREATE, SET, DELETE, MERGE). @@ -2655,7 +2656,23 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, match_self->where = make_false_where_clause(false); } - if (has_list_comp_or_subquery((Node *)match_self->where, NULL)) + /* + * For a non-optional MATCH with a list comprehension or subquery in + * its WHERE clause, transform the match pattern as a subquery and + * then apply the WHERE as an outer filter. This keeps the parent's + * namespace available to the subquery-bearing predicate. + * + * This rewrite is NOT safe for OPTIONAL MATCH: wrapping the WHERE + * around the transformed clause turns it into a post-filter on the + * LATERAL LEFT JOIN produced by transform_cypher_optional_match_clause, + * which incorrectly drops the null-preserving outer rows that the + * LEFT JOIN generates when no right-hand match exists. For the + * optional case we fall through to the normal transform, which + * places the WHERE inside the right-hand subquery of the LEFT JOIN + * where it correctly scopes to the optional binding (issue #2378). + */ + if (!match_self->optional && + has_list_comp_or_subquery((Node *)match_self->where, NULL)) { match_self->where = NULL; return transform_cypher_clause_with_where(cpstate, @@ -2794,10 +2811,28 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate * List *res_colnames = NIL, *res_colvars = NIL; Alias *l_alias, *r_alias; ParseNamespaceItem *jnsitem; + cypher_match *match_self = (cypher_match *) clause->self; + Node *saved_where = match_self->where; int i = 0; j->jointype = JOIN_LEFT; + /* + * If the OPTIONAL MATCH carries a WHERE clause, temporarily detach + * it so that the recursive right-hand transform does NOT try to + * apply it inside the inner subquery. We re-apply the predicate + * below as a LEFT JOIN ON condition, which is the only placement + * that both (a) scopes the predicate to the optional binding and + * (b) preserves null-filled outer rows when the predicate fails. + * Without this, a WHERE that contains a sub-pattern predicate + * (e.g. EXISTS {...} referencing the optional variable) either + * gets silently dropped during the inner transform (namespace + * mismatch re-binds the variable in a fresh scope) or gets pulled + * up by the containing wrapper and filters out the null-preserving + * rows. See issue #2378. + */ + match_self->where = NULL; + l_alias = makeAlias(PREV_CYPHER_CLAUSE_ALIAS, NIL); r_alias = makeAlias(CYPHER_OPT_RIGHT_ALIAS, NIL); @@ -2819,6 +2854,32 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate * j->rarg = transform_clause_for_join(cpstate, clause, &r_rte, &r_nsitem, r_alias); + /* add right-side nsitem so the re-attached WHERE below can resolve + * newly-bound variables from the optional pattern */ + pstate->p_namespace = lappend(pstate->p_namespace, r_nsitem); + + /* + * Now that both sides are visible in the namespace, re-attach the + * OPTIONAL MATCH's WHERE predicate as the LEFT JOIN's ON clause. + * PostgreSQL correctly preserves left rows whose right side fails + * an ON condition (LEFT JOIN semantics), which is exactly what + * Cypher OPTIONAL MATCH ... WHERE requires: if the WHERE filters + * out all matches for a given outer row, that outer row is still + * emitted with nulls in the optional columns. + */ + if (saved_where != NULL) + { + Node *where_qual; + + where_qual = transform_cypher_expr(cpstate, saved_where, + EXPR_KIND_JOIN_ON); + where_qual = coerce_to_boolean(pstate, where_qual, "WHERE"); + j->quals = where_qual; + } + + /* restore the WHERE on the node so we don't mutate caller state */ + match_self->where = saved_where; + /* * Since this is a left join, we need to mark j->rarg as it may potentially * emit NULL. The jindex argument holds rtindex of the join's RTE, which is From bc891bfbbdd513450574511f2db899ef3f7db8a5 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Sun, 19 Apr 2026 08:42:19 -0400 Subject: [PATCH 2/2] Fix parse-tree malformation in OPTIONAL MATCH WHERE re-attach (#2378) transform_cypher_optional_match_clause was calling transform_cypher_expr with EXPR_KIND_JOIN_ON when re-attaching the detached WHERE as the LEFT JOIN's ON condition. All other WHERE transforms in cypher_clause.c use EXPR_KIND_WHERE, and there are three explicit p_expr_kind == EXPR_KIND_WHERE guards (cypher_clause.c:5415, 5679, 6597) that do load-bearing variable resolution for sub-pattern predicates -- walking up parent parsestates to rebind variables like `friend` inside EXISTS { (friend)-[...]->(...) }. Using EXPR_KIND_JOIN_ON bypassed those guards, so the sub-pattern fell through to the "create new variable" path and produced a structurally invalid parse tree. Under a release PG build the query happened to produce correct-looking output, but under --enable-cassert the downstream invariant checks aborted, crashing the backend and taking down the regression run (reported by @MuhammadTahaNaveed). Fix: use EXPR_KIND_WHERE, matching the pattern already established in transform_cypher_clause_with_where at line 2619. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/backend/parser/cypher_clause.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 6eac61e9f..95b205c07 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2872,7 +2872,7 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate * Node *where_qual; where_qual = transform_cypher_expr(cpstate, saved_where, - EXPR_KIND_JOIN_ON); + EXPR_KIND_WHERE); where_qual = coerce_to_boolean(pstate, where_qual, "WHERE"); j->quals = where_qual; }