diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index 2f01d5163..e55ed23c3 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -3834,6 +3834,130 @@ NOTICE: graph "issue_2193" has been dropped (1 row) +-- +-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer +-- rows when its WHERE clause contains a correlated sub-pattern predicate. +-- +-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional +-- binding; when no right-hand row survives the predicate, the outer row +-- is still emitted with NULLs in the optional columns. Before the fix, +-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an +-- outer filter on the transformed subquery, so it ran after the LATERAL +-- LEFT JOIN produced null-preserving rows and then incorrectly dropped +-- them when the predicate evaluated NULL/false on the nulled side. +-- +SELECT create_graph('issue_2378'); +NOTICE: graph "issue_2378" has been created + create_graph +-------------- + +(1 row) + +SELECT * FROM cypher('issue_2378', $$ + CREATE (a:Person {name: 'Alice'}), + (b:Person {name: 'Bob'}), + (c:Person {name: 'Charlie'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c) +$$) AS (v agtype); + v +--- +(0 rows) + +-- Correlated EXISTS referencing the optional variable (friend). +-- Neither Bob nor Charlie knows anyone, so for every outer p the +-- predicate fails on all optional matches; expect one row per person +-- with friend = NULL. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (friend)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | + "Bob" | + "Charlie" | +(3 rows) + +-- Correlated EXISTS referencing the outer variable (p). +-- Alice knows someone so her optional matches pass; Bob and Charlie +-- don't, so they are emitted with NULL friend. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (p)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + name | friend +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Bob" | + "Charlie" | +(4 rows) + +-- Non-correlated EXISTS (was already working; kept as a regression guard). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { MATCH (x:Person) RETURN x } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + name | friend +-----------+----------- + "Alice" | "Bob" + "Alice" | "Charlie" + "Bob" | + "Charlie" | +(4 rows) + +-- Plain scalar predicate on the optional variable (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE friend.name = 'Bob' + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | "Bob" + "Bob" | + "Charlie" | +(3 rows) + +-- Constant-false WHERE on the optional side (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE false + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + name | friend +-----------+-------- + "Alice" | + "Bob" | + "Charlie" | +(3 rows) + +SELECT drop_graph('issue_2378', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table issue_2378._ag_label_vertex +drop cascades to table issue_2378._ag_label_edge +drop cascades to table issue_2378."Person" +drop cascades to table issue_2378."KNOWS" +NOTICE: graph "issue_2378" has been dropped + drop_graph +------------ + +(1 row) + -- -- Clean up -- diff --git a/regress/sql/cypher_match.sql b/regress/sql/cypher_match.sql index 410d097bb..e56aafac8 100644 --- a/regress/sql/cypher_match.sql +++ b/regress/sql/cypher_match.sql @@ -1616,6 +1616,79 @@ $$) AS (result agtype); SELECT drop_graph('issue_2193', true); +-- +-- Issue 2378: OPTIONAL MATCH may incorrectly drop null-preserving outer +-- rows when its WHERE clause contains a correlated sub-pattern predicate. +-- +-- Cypher OPTIONAL MATCH semantics: the WHERE applies to the optional +-- binding; when no right-hand row survives the predicate, the outer row +-- is still emitted with NULLs in the optional columns. Before the fix, +-- a WHERE containing EXISTS { ... } or COUNT { ... } was attached as an +-- outer filter on the transformed subquery, so it ran after the LATERAL +-- LEFT JOIN produced null-preserving rows and then incorrectly dropped +-- them when the predicate evaluated NULL/false on the nulled side. +-- +SELECT create_graph('issue_2378'); +SELECT * FROM cypher('issue_2378', $$ + CREATE (a:Person {name: 'Alice'}), + (b:Person {name: 'Bob'}), + (c:Person {name: 'Charlie'}), + (a)-[:KNOWS]->(b), + (a)-[:KNOWS]->(c) +$$) AS (v agtype); + +-- Correlated EXISTS referencing the optional variable (friend). +-- Neither Bob nor Charlie knows anyone, so for every outer p the +-- predicate fails on all optional matches; expect one row per person +-- with friend = NULL. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (friend)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +-- Correlated EXISTS referencing the outer variable (p). +-- Alice knows someone so her optional matches pass; Bob and Charlie +-- don't, so they are emitted with NULL friend. +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { (p)-[:KNOWS]->(:Person) } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + +-- Non-correlated EXISTS (was already working; kept as a regression guard). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE EXISTS { MATCH (x:Person) RETURN x } + RETURN p.name AS name, friend.name AS friend + ORDER BY name, friend +$$) AS (name agtype, friend agtype); + +-- Plain scalar predicate on the optional variable (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE friend.name = 'Bob' + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +-- Constant-false WHERE on the optional side (was already working). +SELECT * FROM cypher('issue_2378', $$ + MATCH (p:Person) + OPTIONAL MATCH (p)-[:KNOWS]->(friend:Person) + WHERE false + RETURN p.name AS name, friend.name AS friend + ORDER BY name +$$) AS (name agtype, friend agtype); + +SELECT drop_graph('issue_2378', true); + -- -- Clean up -- diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index e5540aa3e..95b205c07 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -2640,6 +2640,7 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, cypher_match *match_self = (cypher_match*) clause->self; Node *where = match_self->where; + /* * Check label validity early unless the predecessor clause chain * contains a data-modifying operation (CREATE, SET, DELETE, MERGE). @@ -2655,7 +2656,23 @@ static Query *transform_cypher_match(cypher_parsestate *cpstate, match_self->where = make_false_where_clause(false); } - if (has_list_comp_or_subquery((Node *)match_self->where, NULL)) + /* + * For a non-optional MATCH with a list comprehension or subquery in + * its WHERE clause, transform the match pattern as a subquery and + * then apply the WHERE as an outer filter. This keeps the parent's + * namespace available to the subquery-bearing predicate. + * + * This rewrite is NOT safe for OPTIONAL MATCH: wrapping the WHERE + * around the transformed clause turns it into a post-filter on the + * LATERAL LEFT JOIN produced by transform_cypher_optional_match_clause, + * which incorrectly drops the null-preserving outer rows that the + * LEFT JOIN generates when no right-hand match exists. For the + * optional case we fall through to the normal transform, which + * places the WHERE inside the right-hand subquery of the LEFT JOIN + * where it correctly scopes to the optional binding (issue #2378). + */ + if (!match_self->optional && + has_list_comp_or_subquery((Node *)match_self->where, NULL)) { match_self->where = NULL; return transform_cypher_clause_with_where(cpstate, @@ -2794,10 +2811,28 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate * List *res_colnames = NIL, *res_colvars = NIL; Alias *l_alias, *r_alias; ParseNamespaceItem *jnsitem; + cypher_match *match_self = (cypher_match *) clause->self; + Node *saved_where = match_self->where; int i = 0; j->jointype = JOIN_LEFT; + /* + * If the OPTIONAL MATCH carries a WHERE clause, temporarily detach + * it so that the recursive right-hand transform does NOT try to + * apply it inside the inner subquery. We re-apply the predicate + * below as a LEFT JOIN ON condition, which is the only placement + * that both (a) scopes the predicate to the optional binding and + * (b) preserves null-filled outer rows when the predicate fails. + * Without this, a WHERE that contains a sub-pattern predicate + * (e.g. EXISTS {...} referencing the optional variable) either + * gets silently dropped during the inner transform (namespace + * mismatch re-binds the variable in a fresh scope) or gets pulled + * up by the containing wrapper and filters out the null-preserving + * rows. See issue #2378. + */ + match_self->where = NULL; + l_alias = makeAlias(PREV_CYPHER_CLAUSE_ALIAS, NIL); r_alias = makeAlias(CYPHER_OPT_RIGHT_ALIAS, NIL); @@ -2819,6 +2854,32 @@ static RangeTblEntry *transform_cypher_optional_match_clause(cypher_parsestate * j->rarg = transform_clause_for_join(cpstate, clause, &r_rte, &r_nsitem, r_alias); + /* add right-side nsitem so the re-attached WHERE below can resolve + * newly-bound variables from the optional pattern */ + pstate->p_namespace = lappend(pstate->p_namespace, r_nsitem); + + /* + * Now that both sides are visible in the namespace, re-attach the + * OPTIONAL MATCH's WHERE predicate as the LEFT JOIN's ON clause. + * PostgreSQL correctly preserves left rows whose right side fails + * an ON condition (LEFT JOIN semantics), which is exactly what + * Cypher OPTIONAL MATCH ... WHERE requires: if the WHERE filters + * out all matches for a given outer row, that outer row is still + * emitted with nulls in the optional columns. + */ + if (saved_where != NULL) + { + Node *where_qual; + + where_qual = transform_cypher_expr(cpstate, saved_where, + EXPR_KIND_WHERE); + where_qual = coerce_to_boolean(pstate, where_qual, "WHERE"); + j->quals = where_qual; + } + + /* restore the WHERE on the node so we don't mutate caller state */ + match_self->where = saved_where; + /* * Since this is a left join, we need to mark j->rarg as it may potentially * emit NULL. The jindex argument holds rtindex of the join's RTE, which is