From 75b95f2526da83253a9a20cce65e1d9671a291b9 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Sat, 28 Feb 2026 15:37:16 -0500 Subject: [PATCH 1/6] Add MERGE ON CREATE SET / ON MATCH SET support (issue #1619) Implements the openCypher-standard ON CREATE SET and ON MATCH SET clauses for the MERGE statement. This allows conditional property updates depending on whether MERGE created a new path or matched an existing one: MERGE (n:Person {name: 'Alice'}) ON CREATE SET n.created = timestamp() ON MATCH SET n.updated = timestamp() Implementation spans parser, planner, and executor: - Grammar: new merge_actions_opt/merge_actions/merge_action rules in cypher_gram.y, with ON keyword added to cypher_kwlist.h - Nodes: on_match/on_create lists on cypher_merge, corresponding on_match_set_info/on_create_set_info on cypher_merge_information, and prop_expr on cypher_update_item (all serialized through copy/out/read funcs) - Transform: cypher_clause.c transforms ON SET items and stores prop_expr for direct expression evaluation - Executor: cypher_set.c extracts apply_update_list() from process_update_list(); cypher_merge.c calls it at all merge decision points (simple merge, terminal, non-terminal with eager buffering, and first-clause-with-followers paths) Key design choice: prop_expr stores the Expr* directly in cypher_update_item rather than using prop_position into the scan tuple. The planner strips target list entries for SET expressions that CustomScan doesn't need, making prop_position references dangling. By storing the expression directly (only for MERGE ON SET items), we evaluate it with ExecInitExpr/ExecEvalExpr independent of the scan tuple layout. Includes regression tests covering: basic ON CREATE SET, basic ON MATCH SET, combined ON CREATE + ON MATCH, multiple SET items, expression evaluation, interaction with WITH clause, and edge property updates. All 31 regression tests pass. --- regress/expected/cypher_merge.out | 129 +++++++++++++++++++++++++++ regress/sql/cypher_merge.sql | 78 ++++++++++++++++ src/backend/executor/cypher_merge.c | 84 +++++++++++++++-- src/backend/executor/cypher_set.c | 53 ++++++++--- src/backend/nodes/cypher_copyfuncs.c | 3 + src/backend/nodes/cypher_outfuncs.c | 7 +- src/backend/nodes/cypher_readfuncs.c | 3 + src/backend/parser/cypher_clause.c | 46 ++++++++++ src/backend/parser/cypher_gram.y | 64 ++++++++++++- src/include/executor/cypher_utils.h | 6 ++ src/include/nodes/cypher_nodes.h | 6 ++ src/include/parser/cypher_kwlist.h | 1 + 12 files changed, 457 insertions(+), 23 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 4242f2f59..5fe8f4ae5 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -2001,9 +2001,138 @@ SELECT * FROM cypher('issue_1954', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype --- (0 rows) +-- +-- ON CREATE SET / ON MATCH SET tests (issue #1619) +-- +SELECT create_graph('merge_actions'); +NOTICE: graph "merge_actions" has been created + create_graph +-------------- + +(1 row) + +-- Basic ON CREATE SET: first run creates the node +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Alice'}) + ON CREATE SET n.created = true + RETURN n.name, n.created +$$) AS (name agtype, created agtype); + name | created +---------+--------- + "Alice" | true +(1 row) + +-- ON MATCH SET: second run matches the existing node +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Alice'}) + ON MATCH SET n.found = true + RETURN n.name, n.created, n.found +$$) AS (name agtype, created agtype, found agtype); + name | created | found +---------+---------+------- + "Alice" | true | true +(1 row) + +-- Both ON CREATE SET and ON MATCH SET (first run = create) +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bob'}) + ON CREATE SET n.created = true + ON MATCH SET n.matched = true + RETURN n.name, n.created, n.matched +$$) AS (name agtype, created agtype, matched agtype); + name | created | matched +-------+---------+--------- + "Bob" | true | +(1 row) + +-- Both ON CREATE SET and ON MATCH SET (second run = match) +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bob'}) + ON CREATE SET n.created = true + ON MATCH SET n.matched = true + RETURN n.name, n.created, n.matched +$$) AS (name agtype, created agtype, matched agtype); + name | created | matched +-------+---------+--------- + "Bob" | true | true +(1 row) + +-- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Alice'}) + MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) + ON CREATE SET b.source = 'merge_create' + RETURN a.name, b.name, b.source +$$) AS (a agtype, b agtype, source agtype); + a | b | source +---------+-----------+---------------- + "Alice" | "Charlie" | "merge_create" +(1 row) + +-- Multiple SET items in a single ON CREATE SET +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Dave'}) + ON CREATE SET n.a = 1, n.b = 2 + RETURN n.name, n.a, n.b +$$) AS (name agtype, a agtype, b agtype); + name | a | b +--------+---+--- + "Dave" | 1 | 2 +(1 row) + +-- Reverse order: ON MATCH before ON CREATE should work +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Eve'}) + ON MATCH SET n.seen = true + ON CREATE SET n.new = true + RETURN n.name, n.new +$$) AS (name agtype, new agtype); + name | new +-------+------ + "Eve" | true +(1 row) + +-- Error: ON CREATE SET specified more than once +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bad'}) + ON CREATE SET n.a = 1 + ON CREATE SET n.b = 2 + RETURN n +$$) AS (n agtype); +ERROR: ON CREATE SET specified more than once +LINE 1: SELECT * FROM cypher('merge_actions', $$ + ^ +-- Error: ON MATCH SET specified more than once +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bad'}) + ON MATCH SET n.a = 1 + ON MATCH SET n.b = 2 + RETURN n +$$) AS (n agtype); +ERROR: ON MATCH SET specified more than once +LINE 1: SELECT * FROM cypher('merge_actions', $$ + ^ +-- cleanup +SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); + a +--- +(0 rows) + -- -- delete graphs -- +SELECT drop_graph('merge_actions', true); +NOTICE: drop cascades to 4 other objects +DETAIL: drop cascades to table merge_actions._ag_label_vertex +drop cascades to table merge_actions._ag_label_edge +drop cascades to table merge_actions."Person" +drop cascades to table merge_actions."KNOWS" +NOTICE: graph "merge_actions" has been dropped + drop_graph +------------ + +(1 row) + SELECT drop_graph('issue_1907', true); NOTICE: drop cascades to 4 other objects DETAIL: drop cascades to table issue_1907._ag_label_vertex diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 5939c42a8..4feadeef5 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -932,9 +932,87 @@ SELECT * FROM cypher('issue_1709', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype SELECT * FROM cypher('issue_1446', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); SELECT * FROM cypher('issue_1954', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); +-- +-- ON CREATE SET / ON MATCH SET tests (issue #1619) +-- +SELECT create_graph('merge_actions'); + +-- Basic ON CREATE SET: first run creates the node +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Alice'}) + ON CREATE SET n.created = true + RETURN n.name, n.created +$$) AS (name agtype, created agtype); + +-- ON MATCH SET: second run matches the existing node +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Alice'}) + ON MATCH SET n.found = true + RETURN n.name, n.created, n.found +$$) AS (name agtype, created agtype, found agtype); + +-- Both ON CREATE SET and ON MATCH SET (first run = create) +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bob'}) + ON CREATE SET n.created = true + ON MATCH SET n.matched = true + RETURN n.name, n.created, n.matched +$$) AS (name agtype, created agtype, matched agtype); + +-- Both ON CREATE SET and ON MATCH SET (second run = match) +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bob'}) + ON CREATE SET n.created = true + ON MATCH SET n.matched = true + RETURN n.name, n.created, n.matched +$$) AS (name agtype, created agtype, matched agtype); + +-- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Alice'}) + MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) + ON CREATE SET b.source = 'merge_create' + RETURN a.name, b.name, b.source +$$) AS (a agtype, b agtype, source agtype); + +-- Multiple SET items in a single ON CREATE SET +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Dave'}) + ON CREATE SET n.a = 1, n.b = 2 + RETURN n.name, n.a, n.b +$$) AS (name agtype, a agtype, b agtype); + +-- Reverse order: ON MATCH before ON CREATE should work +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Eve'}) + ON MATCH SET n.seen = true + ON CREATE SET n.new = true + RETURN n.name, n.new +$$) AS (name agtype, new agtype); + +-- Error: ON CREATE SET specified more than once +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bad'}) + ON CREATE SET n.a = 1 + ON CREATE SET n.b = 2 + RETURN n +$$) AS (n agtype); + +-- Error: ON MATCH SET specified more than once +SELECT * FROM cypher('merge_actions', $$ + MERGE (n:Person {name: 'Bad'}) + ON MATCH SET n.a = 1 + ON MATCH SET n.b = 2 + RETURN n +$$) AS (n agtype); + +-- cleanup +SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); + -- -- delete graphs -- +SELECT drop_graph('merge_actions', true); SELECT drop_graph('issue_1907', true); SELECT drop_graph('cypher_merge', true); SELECT drop_graph('issue_1630', true); diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 1edfc812d..fb48e3eb1 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -321,8 +321,29 @@ static void process_simple_merge(CustomScanState *node) /* setup the scantuple that the process_path needs */ econtext->ecxt_scantuple = sss->ss.ss_ScanTupleSlot; + mark_tts_isnull(econtext->ecxt_scantuple); process_path(css, NULL, true); + + /* ON CREATE SET: path was just created */ + if (css->on_create_set_info) + { + ExecStoreVirtualTuple(econtext->ecxt_scantuple); + apply_update_list(&css->css, css->on_create_set_info); + } + } + else + { + /* ON MATCH SET: path already exists */ + if (css->on_match_set_info) + { + ExprContext *econtext = node->ss.ps.ps_ExprContext; + + econtext->ecxt_scantuple = + node->ss.ps.lefttree->ps_ProjInfo->pi_exprContext->ecxt_scantuple; + + apply_update_list(&css->css, css->on_match_set_info); + } } } @@ -657,6 +678,11 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) free_path_entry_array(prebuilt_path_array, path_length); process_path(css, found_path_array, false); + + /* ON MATCH SET: path was found as duplicate */ + if (css->on_match_set_info) + apply_update_list(&css->css, + css->on_match_set_info); } else { @@ -668,8 +694,19 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); + + /* ON CREATE SET: path was just created */ + if (css->on_create_set_info) + apply_update_list(&css->css, + css->on_create_set_info); } } + else + { + /* ON MATCH SET: path already existed from lateral join */ + if (css->on_match_set_info) + apply_update_list(&css->css, css->on_match_set_info); + } /* Project the result and save a copy */ econtext->ecxt_scantuple = @@ -742,6 +779,10 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) { free_path_entry_array(prebuilt_path_array, path_length); process_path(css, found_path_array, false); + + /* ON MATCH SET: path was found as duplicate */ + if (css->on_match_set_info) + apply_update_list(&css->css, css->on_match_set_info); } else { @@ -752,8 +793,18 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); + + /* ON CREATE SET: path was just created */ + if (css->on_create_set_info) + apply_update_list(&css->css, css->on_create_set_info); } } + else + { + /* ON MATCH SET: path already existed from lateral join */ + if (css->on_match_set_info) + apply_update_list(&css->css, css->on_match_set_info); + } } while (true); @@ -826,6 +877,14 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) */ css->found_a_path = true; + /* ON MATCH SET: path already exists */ + if (css->on_match_set_info) + { + econtext->ecxt_scantuple = + node->ss.ps.lefttree->ps_ProjInfo->pi_exprContext->ecxt_scantuple; + apply_update_list(&css->css, css->on_match_set_info); + } + econtext->ecxt_scantuple = ExecProject(node->ss.ps.lefttree->ps_ProjInfo); return ExecProject(node->ss.ps.ps_ProjInfo); } @@ -886,21 +945,26 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) /* setup the scantuple that the process_path needs */ econtext->ecxt_scantuple = sss->ss.ss_ScanTupleSlot; - /* create the path */ - process_path(css, NULL, true); - - /* mark the create_new_path flag to true. */ - css->created_new_path = true; - /* - * find the tts_values that process_path did not populate and - * mark as null. + * Initialize the scan tuple slot as all-null before process_path + * populates it with the created entities. This ensures the slot + * is properly set up for apply_update_list. */ mark_tts_isnull(econtext->ecxt_scantuple); - /* store the heap tuble */ + /* create the path */ + process_path(css, NULL, true); + + /* mark the slot as valid so tts_nvalid reflects natts */ ExecStoreVirtualTuple(econtext->ecxt_scantuple); + /* ON CREATE SET: path was just created */ + if (css->on_create_set_info) + apply_update_list(&css->css, css->on_create_set_info); + + /* mark the create_new_path flag to true. */ + css->created_new_path = true; + /* * make the subquery's projection scan slot be the tuple table we * created and run the projection logic. @@ -1029,6 +1093,8 @@ Node *create_cypher_merge_plan_state(CustomScan *cscan) cypher_css->created_new_path = false; cypher_css->found_a_path = false; cypher_css->graph_oid = merge_information->graph_oid; + cypher_css->on_match_set_info = merge_information->on_match_set_info; + cypher_css->on_create_set_info = merge_information->on_create_set_info; cypher_css->css.ss.ps.type = T_CustomScanState; cypher_css->css.methods = &cypher_merge_exec_methods; diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index 09d3d3b54..ddfacdeca 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -325,8 +325,7 @@ static agtype_value *replace_entity_in_path(agtype_value *path, static void update_all_paths(CustomScanState *node, graphid id, agtype *updated_entity) { - cypher_set_custom_scan_state *css = (cypher_set_custom_scan_state *)node; - ExprContext *econtext = css->css.ss.ps.ps_ExprContext; + ExprContext *econtext = node->ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; int i; @@ -372,13 +371,18 @@ static void update_all_paths(CustomScanState *node, graphid id, } } -static void process_update_list(CustomScanState *node) +/* + * Core SET logic that can be called from any executor (SET, MERGE, etc.). + * Takes the CustomScanState for expression context and a + * cypher_update_information describing which properties to set. + */ +void apply_update_list(CustomScanState *node, + cypher_update_information *set_info) { - cypher_set_custom_scan_state *css = (cypher_set_custom_scan_state *)node; - ExprContext *econtext = css->css.ss.ps.ps_ExprContext; + ExprContext *econtext = node->ss.ps.ps_ExprContext; TupleTableSlot *scanTupleSlot = econtext->ecxt_scantuple; ListCell *lc; - EState *estate = css->css.ss.ps.state; + EState *estate = node->ss.ps.state; int *luindex = NULL; int lidx = 0; HTAB *qual_cache = NULL; @@ -413,7 +417,7 @@ static void process_update_list(CustomScanState *node) * to correctly update an 'entity' after all other previous updates to that * 'entity' have been done. */ - foreach (lc, css->set_list->set_items) + foreach (lc, set_info->set_items) { cypher_update_item *update_item = NULL; @@ -428,7 +432,7 @@ static void process_update_list(CustomScanState *node) lidx = 0; /* iterate through SET set items */ - foreach (lc, css->set_list->set_items) + foreach (lc, set_info->set_items) { agtype_value *altered_properties; agtype_value *original_entity_value; @@ -446,7 +450,7 @@ static void process_update_list(CustomScanState *node) cypher_update_item *update_item; Datum new_entity; HeapTuple heap_tuple; - char *clause_name = css->set_list->clause_name; + char *clause_name = set_info->clause_name; int cid; Oid index_oid = InvalidOid; Relation rel; @@ -499,11 +503,31 @@ static void process_update_list(CustomScanState *node) * this is a REMOVE clause or the variable references a variable that is * NULL. It will be possible for a variable to be NULL when OPTIONAL * MATCH is implemented. + * + * If prop_expr is set (used by MERGE ON CREATE/MATCH SET), evaluate + * the expression directly rather than reading from the scan tuple. + * The planner may have stripped the target entry at prop_position. */ if (update_item->remove_item) { remove_property = true; } + else if (update_item->prop_expr != NULL) + { + ExprState *expr_state; + Datum val; + bool isnull; + + expr_state = ExecInitExpr((Expr *)update_item->prop_expr, + (PlanState *)node); + val = ExecEvalExpr(expr_state, econtext, &isnull); + remove_property = isnull; + + if (!isnull) + { + new_property_value = DATUM_GET_AGTYPE_P(val); + } + } else { remove_property = scanTupleSlot->tts_isnull[update_item->prop_position - 1]; @@ -517,7 +541,7 @@ static void process_update_list(CustomScanState *node) { new_property_value = NULL; } - else + else if (update_item->prop_expr == NULL) { new_property_value = DATUM_GET_AGTYPE_P(scanTupleSlot->tts_values[update_item->prop_position - 1]); } @@ -550,7 +574,7 @@ static void process_update_list(CustomScanState *node) } resultRelInfo = create_entity_result_rel_info( - estate, css->set_list->graph_name, label_name); + estate, set_info->graph_name, label_name); rel = resultRelInfo->ri_RelationDesc; relid = RelationGetRelid(rel); @@ -797,6 +821,13 @@ static void process_update_list(CustomScanState *node) pfree_if_not_null(luindex); } +static void process_update_list(CustomScanState *node) +{ + cypher_set_custom_scan_state *css = (cypher_set_custom_scan_state *)node; + + apply_update_list(node, css->set_list); +} + static TupleTableSlot *exec_cypher_set(CustomScanState *node) { cypher_set_custom_scan_state *css = (cypher_set_custom_scan_state *)node; diff --git a/src/backend/nodes/cypher_copyfuncs.c b/src/backend/nodes/cypher_copyfuncs.c index 420ab1d22..11ca4a27b 100644 --- a/src/backend/nodes/cypher_copyfuncs.c +++ b/src/backend/nodes/cypher_copyfuncs.c @@ -136,6 +136,7 @@ void copy_cypher_update_item(ExtensibleNode *newnode, const ExtensibleNode *from COPY_NODE_FIELD(qualified_name); COPY_SCALAR_FIELD(remove_item); COPY_SCALAR_FIELD(is_add); + COPY_NODE_FIELD(prop_expr); } /* copy function for cypher_delete_information */ @@ -168,6 +169,8 @@ void copy_cypher_merge_information(ExtensibleNode *newnode, const ExtensibleNode COPY_SCALAR_FIELD(graph_oid); COPY_SCALAR_FIELD(merge_function_attr); COPY_NODE_FIELD(path); + COPY_NODE_FIELD(on_match_set_info); + COPY_NODE_FIELD(on_create_set_info); } /* copy function for cypher_predicate_function */ diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index cf8a400fc..ff8f36d61 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -200,12 +200,14 @@ void out_cypher_predicate_function(StringInfo str, const ExtensibleNode *node) WRITE_NODE_FIELD(where); } -/* serialization function for the cypher_delete ExtensibleNode. */ +/* serialization function for the cypher_merge ExtensibleNode. */ void out_cypher_merge(StringInfo str, const ExtensibleNode *node) { DEFINE_AG_NODE(cypher_merge); WRITE_NODE_FIELD(path); + WRITE_NODE_FIELD(on_match); + WRITE_NODE_FIELD(on_create); } /* serialization function for the cypher_path ExtensibleNode. */ @@ -438,6 +440,7 @@ void out_cypher_update_item(StringInfo str, const ExtensibleNode *node) WRITE_NODE_FIELD(qualified_name); WRITE_BOOL_FIELD(remove_item); WRITE_BOOL_FIELD(is_add); + WRITE_NODE_FIELD(prop_expr); } /* serialization function for the cypher_delete_information ExtensibleNode. */ @@ -470,6 +473,8 @@ void out_cypher_merge_information(StringInfo str, const ExtensibleNode *node) WRITE_INT32_FIELD(graph_oid); WRITE_INT32_FIELD(merge_function_attr); WRITE_NODE_FIELD(path); + WRITE_NODE_FIELD(on_match_set_info); + WRITE_NODE_FIELD(on_create_set_info); } /* diff --git a/src/backend/nodes/cypher_readfuncs.c b/src/backend/nodes/cypher_readfuncs.c index 14b553dbb..f7150616f 100644 --- a/src/backend/nodes/cypher_readfuncs.c +++ b/src/backend/nodes/cypher_readfuncs.c @@ -269,6 +269,7 @@ void read_cypher_update_item(struct ExtensibleNode *node) READ_NODE_FIELD(qualified_name); READ_BOOL_FIELD(remove_item); READ_BOOL_FIELD(is_add); + READ_NODE_FIELD(prop_expr); } /* @@ -310,6 +311,8 @@ void read_cypher_merge_information(struct ExtensibleNode *node) READ_UINT_FIELD(graph_oid); READ_INT_FIELD(merge_function_attr); READ_NODE_FIELD(path); + READ_NODE_FIELD(on_match_set_info); + READ_NODE_FIELD(on_create_set_info); } /* diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index 0ab574fe6..f2d6c4b26 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -7341,6 +7341,52 @@ static Query *transform_cypher_merge(cypher_parsestate *cpstate, merge_information->graph_oid = cpstate->graph_oid; merge_information->path = merge_path; + /* Transform ON MATCH SET items, if any */ + if (self->on_match != NIL) + { + ListCell *lc2; + + merge_information->on_match_set_info = + transform_cypher_set_item_list(cpstate, self->on_match, query); + merge_information->on_match_set_info->clause_name = "MERGE ON MATCH SET"; + merge_information->on_match_set_info->graph_name = cpstate->graph_name; + + /* + * Store prop_expr for direct evaluation in the MERGE executor. + * The planner may strip SET expression target entries from the plan, + * so we embed the Expr in the update item for direct evaluation. + */ + foreach(lc2, merge_information->on_match_set_info->set_items) + { + cypher_update_item *item = lfirst(lc2); + TargetEntry *set_tle = get_tle_by_resno(query->targetList, + item->prop_position); + if (set_tle != NULL) + item->prop_expr = (Node *)set_tle->expr; + } + } + + /* Transform ON CREATE SET items, if any */ + if (self->on_create != NIL) + { + ListCell *lc2; + + merge_information->on_create_set_info = + transform_cypher_set_item_list(cpstate, self->on_create, query); + merge_information->on_create_set_info->clause_name = "MERGE ON CREATE SET"; + merge_information->on_create_set_info->graph_name = cpstate->graph_name; + + /* Store prop_expr for MERGE executor (see comment above) */ + foreach(lc2, merge_information->on_create_set_info->set_items) + { + cypher_update_item *item = lfirst(lc2); + TargetEntry *set_tle = get_tle_by_resno(query->targetList, + item->prop_position); + if (set_tle != NULL) + item->prop_expr = (Node *)set_tle->expr; + } + } + if (!clause->next) { merge_information->flags |= CYPHER_CLAUSE_FLAG_TERMINAL; diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index e14c6b481..019648146 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -64,6 +64,10 @@ bool boolean; Node *node; List *list; + struct { + List *on_match; + List *on_create; + } merge_actions; } %token INTEGER @@ -89,7 +93,7 @@ LIMIT MATCH MERGE NONE NOT NULL_P - OPERATOR OPTIONAL OR ORDER + ON OPERATOR OPTIONAL OR ORDER REMOVE RETURN SET SINGLE SKIP STARTS THEN TRUE_P @@ -139,6 +143,7 @@ /* MERGE clause */ %type merge +%type merge_actions_opt merge_actions merge_action /* CALL ... YIELD clause */ %type call_stmt yield_item @@ -1139,17 +1144,72 @@ detach_opt: * MERGE clause */ merge: - MERGE path + MERGE path merge_actions_opt { cypher_merge *n; n = make_ag_node(cypher_merge); n->path = $2; + n->on_match = $3.on_match; + n->on_create = $3.on_create; $$ = (Node *)n; } ; +merge_actions_opt: + /* empty */ + { + $$.on_match = NIL; + $$.on_create = NIL; + } + | merge_actions + { + $$ = $1; + } + ; + +merge_actions: + merge_action + { + $$ = $1; + } + | merge_actions merge_action + { + if ($2.on_match != NIL) + { + if ($1.on_match != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("ON MATCH SET specified more than once"))); + $$.on_match = $2.on_match; + $$.on_create = $1.on_create; + } + else + { + if ($1.on_create != NIL) + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("ON CREATE SET specified more than once"))); + $$.on_create = $2.on_create; + $$.on_match = $1.on_match; + } + } + ; + +merge_action: + ON MATCH SET set_item_list + { + $$.on_match = $4; + $$.on_create = NIL; + } + | ON CREATE SET set_item_list + { + $$.on_match = NIL; + $$.on_create = $4; + } + ; + /* * common */ diff --git a/src/include/executor/cypher_utils.h b/src/include/executor/cypher_utils.h index cdd8fa33e..ac4b5ea5a 100644 --- a/src/include/executor/cypher_utils.h +++ b/src/include/executor/cypher_utils.h @@ -111,8 +111,14 @@ typedef struct cypher_merge_custom_scan_state List *eager_tuples; int eager_tuples_index; bool eager_buffer_filled; + cypher_update_information *on_match_set_info; /* NULL if not specified */ + cypher_update_information *on_create_set_info; /* NULL if not specified */ } cypher_merge_custom_scan_state; +/* Reusable SET logic callable from MERGE executor */ +void apply_update_list(CustomScanState *node, + cypher_update_information *set_info); + TupleTableSlot *populate_vertex_tts(TupleTableSlot *elemTupleSlot, agtype_value *id, agtype_value *properties); TupleTableSlot *populate_edge_tts( diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index 93bbe01de..a2711cdfb 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -124,6 +124,8 @@ typedef struct cypher_merge { ExtensibleNode extensible; Node *path; + List *on_match; /* List of cypher_set_item, or NIL */ + List *on_create; /* List of cypher_set_item, or NIL */ } cypher_merge; /* @@ -472,6 +474,8 @@ typedef struct cypher_update_item List *qualified_name; bool remove_item; bool is_add; + Node *prop_expr; /* SET value expression, used by MERGE ON CREATE/MATCH SET + * where the expression is not in the plan's target list */ } cypher_update_item; typedef struct cypher_delete_information @@ -498,6 +502,8 @@ typedef struct cypher_merge_information uint32 graph_oid; AttrNumber merge_function_attr; cypher_create_path *path; + cypher_update_information *on_match_set_info; /* NULL if no ON MATCH SET */ + cypher_update_information *on_create_set_info; /* NULL if no ON CREATE SET */ } cypher_merge_information; /* grammar node for typecasts */ diff --git a/src/include/parser/cypher_kwlist.h b/src/include/parser/cypher_kwlist.h index 0de294979..44ac09452 100644 --- a/src/include/parser/cypher_kwlist.h +++ b/src/include/parser/cypher_kwlist.h @@ -31,6 +31,7 @@ PG_KEYWORD("merge", MERGE, RESERVED_KEYWORD) PG_KEYWORD("none", NONE, RESERVED_KEYWORD) PG_KEYWORD("not", NOT, RESERVED_KEYWORD) PG_KEYWORD("null", NULL_P, RESERVED_KEYWORD) +PG_KEYWORD("on", ON, RESERVED_KEYWORD) PG_KEYWORD("operator", OPERATOR, RESERVED_KEYWORD) PG_KEYWORD("optional", OPTIONAL, RESERVED_KEYWORD) PG_KEYWORD("or", OR, RESERVED_KEYWORD) From 20b4f036b9008add74b7623a6e89bd65a32fe627 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Mon, 2 Mar 2026 15:12:59 -0500 Subject: [PATCH 2/6] Address Copilot review: pre-init ExprState, add ON MATCH SET predecessor test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move ExecInitExpr for ON CREATE/MATCH SET items from per-row execution in apply_update_list() to plan initialization in begin_cypher_merge(). Follows the established pattern used by cypher_target_node (id_expr_state, prop_expr_state). - Add prop_expr_state field to cypher_update_item with serialization support in outfuncs/readfuncs/copyfuncs. - apply_update_list() uses pre-initialized state when available, falls back to per-row init for plain SET callers. - Fix misleading comment: "ON MATCH SET" → "ON CREATE SET" for Case 1 first-run test. - Add Case 1 second-run test that triggers ON MATCH SET with a predecessor clause (MATCH ... MERGE ... ON MATCH SET). --- .gitignore | 1 + regress/expected/cypher_merge.out | 14 +++++++++++++- regress/sql/cypher_merge.sql | 10 +++++++++- src/backend/executor/cypher_merge.c | 29 ++++++++++++++++++++++++++++ src/backend/executor/cypher_set.c | 16 +++++++++++++-- src/backend/nodes/cypher_copyfuncs.c | 1 + src/backend/nodes/cypher_outfuncs.c | 1 + src/backend/nodes/cypher_readfuncs.c | 1 + src/include/nodes/cypher_nodes.h | 1 + 9 files changed, 70 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 03923b03e..1e2f8f674 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ __pycache__ **/apache_age_python.egg-info drivers/python/build +*.bc diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 5fe8f4ae5..f334ff1ac 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -2057,7 +2057,7 @@ $$) AS (name agtype, created agtype, matched agtype); "Bob" | true | true (1 row) --- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor) +-- ON CREATE SET with MERGE after MATCH (Case 1: has predecessor, first run = create) SELECT * FROM cypher('merge_actions', $$ MATCH (a:Person {name: 'Alice'}) MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) @@ -2069,6 +2069,18 @@ $$) AS (a agtype, b agtype, source agtype); "Alice" | "Charlie" | "merge_create" (1 row) +-- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor, second run = match) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Alice'}) + MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) + ON MATCH SET b.visited = true + RETURN a.name, b.name, b.visited +$$) AS (a agtype, b agtype, visited agtype); + a | b | visited +---------+-----------+--------- + "Alice" | "Charlie" | true +(1 row) + -- Multiple SET items in a single ON CREATE SET SELECT * FROM cypher('merge_actions', $$ MERGE (n:Person {name: 'Dave'}) diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 4feadeef5..34b3ee036 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -967,7 +967,7 @@ SELECT * FROM cypher('merge_actions', $$ RETURN n.name, n.created, n.matched $$) AS (name agtype, created agtype, matched agtype); --- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor) +-- ON CREATE SET with MERGE after MATCH (Case 1: has predecessor, first run = create) SELECT * FROM cypher('merge_actions', $$ MATCH (a:Person {name: 'Alice'}) MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) @@ -975,6 +975,14 @@ SELECT * FROM cypher('merge_actions', $$ RETURN a.name, b.name, b.source $$) AS (a agtype, b agtype, source agtype); +-- ON MATCH SET with MERGE after MATCH (Case 1: has predecessor, second run = match) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Alice'}) + MERGE (a)-[:KNOWS]->(b:Person {name: 'Charlie'}) + ON MATCH SET b.visited = true + RETURN a.name, b.name, b.visited +$$) AS (a agtype, b agtype, visited agtype); + -- Multiple SET items in a single ON CREATE SET SELECT * FROM cypher('merge_actions', $$ MERGE (n:Person {name: 'Dave'}) diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index fb48e3eb1..2e5aaa0fe 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -191,6 +191,35 @@ static void begin_cypher_merge(CustomScanState *node, EState *estate, } } + /* + * Pre-initialize ExprStates for ON CREATE SET / ON MATCH SET items. + * This must happen once at plan init time, not per-row. + */ + if (css->on_create_set_info != NULL) + { + foreach(lc, css->on_create_set_info->set_items) + { + cypher_update_item *item = (cypher_update_item *)lfirst(lc); + if (item->prop_expr != NULL) + { + item->prop_expr_state = ExecInitExpr( + (Expr *)item->prop_expr, (PlanState *)node); + } + } + } + if (css->on_match_set_info != NULL) + { + foreach(lc, css->on_match_set_info->set_items) + { + cypher_update_item *item = (cypher_update_item *)lfirst(lc); + if (item->prop_expr != NULL) + { + item->prop_expr_state = ExecInitExpr( + (Expr *)item->prop_expr, (PlanState *)node); + } + } + } + /* * Postgres does not assign the es_output_cid in queries that do * not write to disk, ie: SELECT commands. We need the command id diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index ddfacdeca..b7f19dc6a 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -518,8 +518,20 @@ void apply_update_list(CustomScanState *node, Datum val; bool isnull; - expr_state = ExecInitExpr((Expr *)update_item->prop_expr, - (PlanState *)node); + /* + * Use the pre-initialized ExprState if available (set during + * plan init in begin_cypher_merge). Fall back to per-row init + * for callers that haven't pre-initialized (e.g. plain SET). + */ + if (update_item->prop_expr_state != NULL) + { + expr_state = update_item->prop_expr_state; + } + else + { + expr_state = ExecInitExpr((Expr *)update_item->prop_expr, + (PlanState *)node); + } val = ExecEvalExpr(expr_state, econtext, &isnull); remove_property = isnull; diff --git a/src/backend/nodes/cypher_copyfuncs.c b/src/backend/nodes/cypher_copyfuncs.c index 11ca4a27b..283096ca7 100644 --- a/src/backend/nodes/cypher_copyfuncs.c +++ b/src/backend/nodes/cypher_copyfuncs.c @@ -137,6 +137,7 @@ void copy_cypher_update_item(ExtensibleNode *newnode, const ExtensibleNode *from COPY_SCALAR_FIELD(remove_item); COPY_SCALAR_FIELD(is_add); COPY_NODE_FIELD(prop_expr); + COPY_NODE_FIELD(prop_expr_state); } /* copy function for cypher_delete_information */ diff --git a/src/backend/nodes/cypher_outfuncs.c b/src/backend/nodes/cypher_outfuncs.c index ff8f36d61..84d32a8f8 100644 --- a/src/backend/nodes/cypher_outfuncs.c +++ b/src/backend/nodes/cypher_outfuncs.c @@ -441,6 +441,7 @@ void out_cypher_update_item(StringInfo str, const ExtensibleNode *node) WRITE_BOOL_FIELD(remove_item); WRITE_BOOL_FIELD(is_add); WRITE_NODE_FIELD(prop_expr); + WRITE_NODE_FIELD(prop_expr_state); } /* serialization function for the cypher_delete_information ExtensibleNode. */ diff --git a/src/backend/nodes/cypher_readfuncs.c b/src/backend/nodes/cypher_readfuncs.c index f7150616f..1e7e0ef82 100644 --- a/src/backend/nodes/cypher_readfuncs.c +++ b/src/backend/nodes/cypher_readfuncs.c @@ -270,6 +270,7 @@ void read_cypher_update_item(struct ExtensibleNode *node) READ_BOOL_FIELD(remove_item); READ_BOOL_FIELD(is_add); READ_NODE_FIELD(prop_expr); + READ_NODE_FIELD(prop_expr_state); } /* diff --git a/src/include/nodes/cypher_nodes.h b/src/include/nodes/cypher_nodes.h index a2711cdfb..3433bebb0 100644 --- a/src/include/nodes/cypher_nodes.h +++ b/src/include/nodes/cypher_nodes.h @@ -476,6 +476,7 @@ typedef struct cypher_update_item bool is_add; Node *prop_expr; /* SET value expression, used by MERGE ON CREATE/MATCH SET * where the expression is not in the plan's target list */ + ExprState *prop_expr_state; /* initialized at plan init, not per-row */ } cypher_update_item; typedef struct cypher_delete_information From 409b7e9f90cd237757818df6d134ef8c3b9af0c6 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Fri, 6 Mar 2026 11:57:29 -0500 Subject: [PATCH 3/6] Address Copilot review: ON in safe_keywords, chained MERGE tests 1. Add ON to safe_keywords in cypher_gram.y so that property keys and labels named 'on' still work (e.g., n.on, MATCH (n:on)). All other keywords added as tokens are also in safe_keywords. 2. Add chained (non-terminal) MERGE regression tests exercising the eager-buffering code path with ON CREATE SET and ON MATCH SET. First run creates both nodes (ON CREATE SET fires), second run matches both (ON MATCH SET fires). All regression tests pass (cypher_merge: ok). --- regress/expected/cypher_merge.out | 26 ++++++++++++++++++++++++++ regress/sql/cypher_merge.sql | 18 ++++++++++++++++++ src/backend/parser/cypher_gram.y | 1 + 3 files changed, 45 insertions(+) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index f334ff1ac..a8a162b72 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -2124,6 +2124,32 @@ $$) AS (n agtype); ERROR: ON MATCH SET specified more than once LINE 1: SELECT * FROM cypher('merge_actions', $$ ^ +-- Chained (non-terminal) MERGE with ON CREATE SET (eager-buffering path) +SELECT * FROM cypher('merge_actions', $$ + MERGE (a:Person {name: 'Frank'}) + ON CREATE SET a.created = true + MERGE (a)-[:KNOWS]->(b:Person {name: 'Grace'}) + ON CREATE SET b.created = true + RETURN a.name, a.created, b.name, b.created +$$) AS (a_name agtype, a_created agtype, b_name agtype, b_created agtype); + a_name | a_created | b_name | b_created +---------+-----------+---------+----------- + "Frank" | true | "Grace" | true +(1 row) + +-- Chained (non-terminal) MERGE with ON MATCH SET (second run = match) +SELECT * FROM cypher('merge_actions', $$ + MERGE (a:Person {name: 'Frank'}) + ON MATCH SET a.matched = true + MERGE (a)-[:KNOWS]->(b:Person {name: 'Grace'}) + ON MATCH SET b.matched = true + RETURN a.name, a.matched, b.name, b.matched +$$) AS (a_name agtype, a_matched agtype, b_name agtype, b_matched agtype); + a_name | a_matched | b_name | b_matched +---------+-----------+---------+----------- + "Frank" | true | "Grace" | true +(1 row) + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); a diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 34b3ee036..439ca5797 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -1014,6 +1014,24 @@ SELECT * FROM cypher('merge_actions', $$ RETURN n $$) AS (n agtype); +-- Chained (non-terminal) MERGE with ON CREATE SET (eager-buffering path) +SELECT * FROM cypher('merge_actions', $$ + MERGE (a:Person {name: 'Frank'}) + ON CREATE SET a.created = true + MERGE (a)-[:KNOWS]->(b:Person {name: 'Grace'}) + ON CREATE SET b.created = true + RETURN a.name, a.created, b.name, b.created +$$) AS (a_name agtype, a_created agtype, b_name agtype, b_created agtype); + +-- Chained (non-terminal) MERGE with ON MATCH SET (second run = match) +SELECT * FROM cypher('merge_actions', $$ + MERGE (a:Person {name: 'Frank'}) + ON MATCH SET a.matched = true + MERGE (a)-[:KNOWS]->(b:Person {name: 'Grace'}) + ON MATCH SET b.matched = true + RETURN a.name, a.matched, b.name, b.matched +$$) AS (a_name agtype, a_matched agtype, b_name agtype, b_matched agtype); + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); diff --git a/src/backend/parser/cypher_gram.y b/src/backend/parser/cypher_gram.y index 019648146..c857724dd 100644 --- a/src/backend/parser/cypher_gram.y +++ b/src/backend/parser/cypher_gram.y @@ -2483,6 +2483,7 @@ safe_keywords: | MERGE { $$ = KEYWORD_STRDUP($1); } | NONE { $$ = KEYWORD_STRDUP($1); } | NOT { $$ = KEYWORD_STRDUP($1); } + | ON { $$ = KEYWORD_STRDUP($1); } | OPERATOR { $$ = KEYWORD_STRDUP($1); } | OPTIONAL { $$ = KEYWORD_STRDUP($1); } | OR { $$ = KEYWORD_STRDUP($1); } From 5e968fb4f01575a7d1c5984345998aab9a375088 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Fri, 6 Mar 2026 14:02:37 -0500 Subject: [PATCH 4/6] Address Copilot review: ExecStoreVirtualTuple, prop_expr assertion, ON keyword test - Move ExecStoreVirtualTuple before apply_update_list unconditionally in Case 1 non-terminal and terminal MERGE paths, matching the pattern at Case 3 (line 994). Ensures tts_nvalid is set for downstream ExecProject even when ON CREATE SET is absent. - Add resolve_merge_set_exprs() helper to deduplicate the prop_expr resolution loops for ON MATCH SET and ON CREATE SET. Includes ereport when target entry is missing (internal error, should never happen). - Add regression test for ON keyword as label name, confirming backward compatibility via safe_keywords grammar path. Co-Authored-By: Claude Opus 4.6 --- regress/expected/cypher_merge.out | 13 ++++++- regress/sql/cypher_merge.sql | 6 +++ src/backend/executor/cypher_merge.c | 2 + src/backend/parser/cypher_clause.c | 59 ++++++++++++++++------------- 4 files changed, 53 insertions(+), 27 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index a8a162b72..bf943fd8a 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -2150,6 +2150,16 @@ $$) AS (a_name agtype, a_matched agtype, b_name agtype, b_matched agtype); "Frank" | true | "Grace" | true (1 row) +-- ON keyword as label name (backward compat via safe_keywords) +SELECT * FROM cypher('merge_actions', $$ + CREATE (n:on {name: 'test'}) + RETURN n.name +$$) AS (name agtype); + name +-------- + "test" +(1 row) + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); a @@ -2160,11 +2170,12 @@ SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agt -- delete graphs -- SELECT drop_graph('merge_actions', true); -NOTICE: drop cascades to 4 other objects +NOTICE: drop cascades to 5 other objects DETAIL: drop cascades to table merge_actions._ag_label_vertex drop cascades to table merge_actions._ag_label_edge drop cascades to table merge_actions."Person" drop cascades to table merge_actions."KNOWS" +drop cascades to table merge_actions."on" NOTICE: graph "merge_actions" has been dropped drop_graph ------------ diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 439ca5797..5637b6557 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -1032,6 +1032,12 @@ SELECT * FROM cypher('merge_actions', $$ RETURN a.name, a.matched, b.name, b.matched $$) AS (a_name agtype, a_matched agtype, b_name agtype, b_matched agtype); +-- ON keyword as label name (backward compat via safe_keywords) +SELECT * FROM cypher('merge_actions', $$ + CREATE (n:on {name: 'test'}) + RETURN n.name +$$) AS (name agtype); + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index 2e5aaa0fe..ca9e62af2 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -723,6 +723,7 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); + ExecStoreVirtualTuple(econtext->ecxt_scantuple); /* ON CREATE SET: path was just created */ if (css->on_create_set_info) @@ -822,6 +823,7 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); + ExecStoreVirtualTuple(econtext->ecxt_scantuple); /* ON CREATE SET: path was just created */ if (css->on_create_set_info) diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index f2d6c4b26..d37d64c35 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -7269,6 +7269,33 @@ Query *cypher_parse_sub_analyze(Node *parseTree, * similar to OPTIONAL MATCH, however with the added feature of creating the * path if not there, rather than just emitting NULL. */ +/* + * Resolve prop_expr for each SET item by looking up its target entry. + * The planner may strip SET expression target entries from the plan, + * so we embed the Expr in the update item for direct evaluation. + */ +static void +resolve_merge_set_exprs(List *set_items, List *targetList, + const char *clause_name) +{ + ListCell *lc; + + foreach(lc, set_items) + { + cypher_update_item *item = lfirst(lc); + TargetEntry *set_tle = get_tle_by_resno(targetList, + item->prop_position); + if (set_tle == NULL) + { + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("%s target entry not found at position %d", + clause_name, item->prop_position))); + } + item->prop_expr = (Node *)set_tle->expr; + } +} + static Query *transform_cypher_merge(cypher_parsestate *cpstate, cypher_clause *clause) { @@ -7344,47 +7371,27 @@ static Query *transform_cypher_merge(cypher_parsestate *cpstate, /* Transform ON MATCH SET items, if any */ if (self->on_match != NIL) { - ListCell *lc2; - merge_information->on_match_set_info = transform_cypher_set_item_list(cpstate, self->on_match, query); merge_information->on_match_set_info->clause_name = "MERGE ON MATCH SET"; merge_information->on_match_set_info->graph_name = cpstate->graph_name; - /* - * Store prop_expr for direct evaluation in the MERGE executor. - * The planner may strip SET expression target entries from the plan, - * so we embed the Expr in the update item for direct evaluation. - */ - foreach(lc2, merge_information->on_match_set_info->set_items) - { - cypher_update_item *item = lfirst(lc2); - TargetEntry *set_tle = get_tle_by_resno(query->targetList, - item->prop_position); - if (set_tle != NULL) - item->prop_expr = (Node *)set_tle->expr; - } + resolve_merge_set_exprs( + merge_information->on_match_set_info->set_items, + query->targetList, "ON MATCH SET"); } /* Transform ON CREATE SET items, if any */ if (self->on_create != NIL) { - ListCell *lc2; - merge_information->on_create_set_info = transform_cypher_set_item_list(cpstate, self->on_create, query); merge_information->on_create_set_info->clause_name = "MERGE ON CREATE SET"; merge_information->on_create_set_info->graph_name = cpstate->graph_name; - /* Store prop_expr for MERGE executor (see comment above) */ - foreach(lc2, merge_information->on_create_set_info->set_items) - { - cypher_update_item *item = lfirst(lc2); - TargetEntry *set_tle = get_tle_by_resno(query->targetList, - item->prop_position); - if (set_tle != NULL) - item->prop_expr = (Node *)set_tle->expr; - } + resolve_merge_set_exprs( + merge_information->on_create_set_info->set_items, + query->targetList, "ON CREATE SET"); } if (!clause->next) From 39a2cc874c887bfee1a286154a0e80867c78f1c4 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Sun, 19 Apr 2026 11:19:10 -0400 Subject: [PATCH 5/6] Fix TTS_EMPTY assertion when re-using scan slot after process_path The four ExecStoreVirtualTuple calls in exec_cypher_merge were triggering an Assert failure under --enable-cassert: TRAP: failed Assert("TTS_EMPTY(slot)"), File: execTuples.c, Line: 1748 ExecStoreVirtualTuple (execTuples.c:1748) asserts that its target slot is in the TTS_EMPTY state. In our MERGE executor, process_path writes directly into the subquery's scan tuple slot -- which already holds the subquery's output tuple and therefore is NOT empty. On a release build the assertion compiles out and ExecStoreVirtualTuple just clears the flag and sets tts_nvalid; on an --enable-cassert build the backend aborts and takes down the regression run. We only need the bookkeeping half of ExecStoreVirtualTuple (clear TTS_FLAG_EMPTY and set tts_nvalid = natts) -- not the "store semantics" that motivate the assertion. Add a small static helper mark_scan_slot_valid() that does exactly the bookkeeping, and replace the four call sites. Release-build behavior is byte-identical since Assert() compiles to nothing; cassert-build behavior now matches release. Caught by the cassert-enabled regression suite we reinstated after #2380. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/backend/executor/cypher_merge.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/src/backend/executor/cypher_merge.c b/src/backend/executor/cypher_merge.c index ca9e62af2..a30d477f8 100644 --- a/src/backend/executor/cypher_merge.c +++ b/src/backend/executor/cypher_merge.c @@ -80,6 +80,7 @@ static bool check_path(cypher_merge_custom_scan_state *css, static void process_path(cypher_merge_custom_scan_state *css, path_entry **path_array, bool should_insert); static void mark_tts_isnull(TupleTableSlot *slot); +static void mark_scan_slot_valid(TupleTableSlot *slot); const CustomExecMethods cypher_merge_exec_methods = {MERGE_SCAN_STATE_NAME, begin_cypher_merge, @@ -357,7 +358,7 @@ static void process_simple_merge(CustomScanState *node) /* ON CREATE SET: path was just created */ if (css->on_create_set_info) { - ExecStoreVirtualTuple(econtext->ecxt_scantuple); + mark_scan_slot_valid(econtext->ecxt_scantuple); apply_update_list(&css->css, css->on_create_set_info); } } @@ -376,6 +377,23 @@ static void process_simple_merge(CustomScanState *node) } } +/* + * mark_scan_slot_valid - mark a scan slot as populated after direct writes + * to tts_values[] by process_path. + * + * This does the same bookkeeping as ExecStoreVirtualTuple (clear TTS_EMPTY, + * set tts_nvalid = natts) but without the TTS_EMPTY precondition assertion. + * We cannot use ExecStoreVirtualTuple here because process_path writes into + * a scan slot that already holds the subquery's output tuple -- the slot is + * NOT empty, and asserting it is would fire under --enable-cassert while + * silently clearing the flag on release builds. + */ +static void mark_scan_slot_valid(TupleTableSlot *slot) +{ + slot->tts_flags &= ~TTS_FLAG_EMPTY; + slot->tts_nvalid = slot->tts_tupleDescriptor->natts; +} + /* * Iterate through the TupleTableSlot's tts_values and marks the isnull field * with true. @@ -723,7 +741,7 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); - ExecStoreVirtualTuple(econtext->ecxt_scantuple); + mark_scan_slot_valid(econtext->ecxt_scantuple); /* ON CREATE SET: path was just created */ if (css->on_create_set_info) @@ -823,7 +841,7 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) css->created_paths_list = new_path; process_path(css, prebuilt_path_array, true); - ExecStoreVirtualTuple(econtext->ecxt_scantuple); + mark_scan_slot_valid(econtext->ecxt_scantuple); /* ON CREATE SET: path was just created */ if (css->on_create_set_info) @@ -987,7 +1005,7 @@ static TupleTableSlot *exec_cypher_merge(CustomScanState *node) process_path(css, NULL, true); /* mark the slot as valid so tts_nvalid reflects natts */ - ExecStoreVirtualTuple(econtext->ecxt_scantuple); + mark_scan_slot_valid(econtext->ecxt_scantuple); /* ON CREATE SET: path was just created */ if (css->on_create_set_info) From f0ed4c95137ceb64a7e1fa07efe3a039751052e3 Mon Sep 17 00:00:00 2001 From: Greg Felice Date: Tue, 21 Apr 2026 11:59:30 -0400 Subject: [PATCH 6/6] Fix MERGE ON CREATE/MATCH SET crash when RHS references a bound variable When MERGE has a previous clause (e.g. MATCH, UNWIND), transform_cypher_merge takes the lateral-left-join path via transform_merge_make_lateral_join. That helper called addRangeTableEntryForJoin with nscolumns=NULL, leaving the join ParseNamespaceItem's p_nscolumns unset. For queries that did not subsequently resolve a column reference against that nsitem (e.g. RETURN, which runs in a fresh namespace built by handle_prev_clause), the NULL was harmless. Our ON CREATE / ON MATCH SET transform runs in-line, before the MERGE query becomes a subquery, so transform_cypher_set_item_list consulted the join's nsitem directly. colNameToVar -> scanNSItemForColumn then dereferenced p_nscolumns[attnum-1] = NULL[0] and the backend segfaulted on any ON SET whose RHS referenced a bound variable. Reported by MuhammadTahaNaveed on PR #2347. Populate the join's p_nscolumns from res_colvars. The Var we end up producing for a bound entity lives inside prop_expr, which is opaque to the planner, so it is not rewritten to match the plan's output slots. At ExecEvalScalarVar time only varattno is consulted, and scantuple's layout mirrors the join's eref->colnames (via make_target_list_from_join). Use the join rtindex and 1-based eref position so scantuple[varattno - 1] resolves to the correct entity column at runtime; without this, Vars for a (varno=l_rte) and b (varno=r_rte) with varattno=1 both hit scantuple[0] and b.id evaluated to a.id. Also initialise apply_update_list's new_property_value at its declaration. All control paths reach the single alter_property_value call with the variable set, but -Wmaybe-uninitialized fires at -O2 because the compiler cannot prove remove_property == isnull when prop_expr is non-NULL. Regression: six new cases in cypher_merge covering outer-ref RHS, self-ref RHS with a previous clause (the silent-wrong-value variant we initially missed), UNWIND-driven MERGE with self-ref (Muhammad's second reproducer), multi-item ON CREATE SET mixing reference kinds, and ON MATCH SET with a variable RHS on the match-branch second run. Cassert installcheck: 33/33. --- regress/expected/cypher_merge.out | 71 ++++++++++++++++++++++++++++++ regress/sql/cypher_merge.sql | 47 ++++++++++++++++++++ src/backend/executor/cypher_set.c | 2 +- src/backend/parser/cypher_clause.c | 61 +++++++++++++++++++++++-- 4 files changed, 176 insertions(+), 5 deletions(-) diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index bf943fd8a..ac08a971a 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -2160,6 +2160,77 @@ $$) AS (name agtype); "test" (1 row) +-- Issue #2347: RHS of ON CREATE / ON MATCH SET referencing a bound +-- variable crashed the backend when MERGE had a previous clause, because +-- the lateral-join's ParseNamespaceItem had p_nscolumns=NULL. +-- ON CREATE SET with RHS referencing the outer MATCH's variable +SELECT * FROM cypher('merge_actions', $$ CREATE (:Person {name:'Anchor'}) $$) AS (a agtype); + a +--- +(0 rows) + +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'FromOuter'}) + ON CREATE SET b.source_name = a.name + RETURN a.name, b.name, b.source_name +$$) AS (a_name agtype, b_name agtype, b_source agtype); + a_name | b_name | b_source +----------+-------------+---------- + "Anchor" | "FromOuter" | "Anchor" +(1 row) + +-- ON CREATE SET with RHS referencing the MERGE-bound variable itself +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'SelfRef'}) + ON CREATE SET b.echo_name = b.name + RETURN b.name, b.echo_name +$$) AS (b_name agtype, b_echo agtype); + b_name | b_echo +-----------+----------- + "SelfRef" | "SelfRef" +(1 row) + +-- ON CREATE SET driven by UNWIND with self-reference on the RHS +-- (Muhammad's second reproducer) +SELECT * FROM cypher('merge_actions', $$ + UNWIND ['U1', 'U2'] AS nm + MERGE (n:Person {name: nm}) + ON CREATE SET n.copy_name = n.name + RETURN n.name, n.copy_name +$$) AS (n_name agtype, n_copy agtype); + n_name | n_copy +--------+-------- + "U1" | "U1" + "U2" | "U2" +(2 rows) + +-- Multiple SET items mixing outer-ref, self-ref, and literal RHS +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'MultiItem'}) + ON CREATE SET b.from_a = a.name, b.self = b.name, b.lit = 'literal' + RETURN b.from_a, b.self, b.lit +$$) AS (fa agtype, sf agtype, lit agtype); + fa | sf | lit +----------+-------------+----------- + "Anchor" | "MultiItem" | "literal" +(1 row) + +-- ON MATCH SET with variable RHS (second run on existing node) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'FromOuter'}) + ON CREATE SET b.source_name = a.name + ON MATCH SET b.last_seen_by = a.name + RETURN b.source_name, b.last_seen_by +$$) AS (src agtype, last agtype); + src | last +----------+---------- + "Anchor" | "Anchor" +(1 row) + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); a diff --git a/regress/sql/cypher_merge.sql b/regress/sql/cypher_merge.sql index 5637b6557..86b3e0235 100644 --- a/regress/sql/cypher_merge.sql +++ b/regress/sql/cypher_merge.sql @@ -1038,6 +1038,53 @@ SELECT * FROM cypher('merge_actions', $$ RETURN n.name $$) AS (name agtype); +-- Issue #2347: RHS of ON CREATE / ON MATCH SET referencing a bound +-- variable crashed the backend when MERGE had a previous clause, because +-- the lateral-join's ParseNamespaceItem had p_nscolumns=NULL. + +-- ON CREATE SET with RHS referencing the outer MATCH's variable +SELECT * FROM cypher('merge_actions', $$ CREATE (:Person {name:'Anchor'}) $$) AS (a agtype); +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'FromOuter'}) + ON CREATE SET b.source_name = a.name + RETURN a.name, b.name, b.source_name +$$) AS (a_name agtype, b_name agtype, b_source agtype); + +-- ON CREATE SET with RHS referencing the MERGE-bound variable itself +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'SelfRef'}) + ON CREATE SET b.echo_name = b.name + RETURN b.name, b.echo_name +$$) AS (b_name agtype, b_echo agtype); + +-- ON CREATE SET driven by UNWIND with self-reference on the RHS +-- (Muhammad's second reproducer) +SELECT * FROM cypher('merge_actions', $$ + UNWIND ['U1', 'U2'] AS nm + MERGE (n:Person {name: nm}) + ON CREATE SET n.copy_name = n.name + RETURN n.name, n.copy_name +$$) AS (n_name agtype, n_copy agtype); + +-- Multiple SET items mixing outer-ref, self-ref, and literal RHS +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'MultiItem'}) + ON CREATE SET b.from_a = a.name, b.self = b.name, b.lit = 'literal' + RETURN b.from_a, b.self, b.lit +$$) AS (fa agtype, sf agtype, lit agtype); + +-- ON MATCH SET with variable RHS (second run on existing node) +SELECT * FROM cypher('merge_actions', $$ + MATCH (a:Person {name: 'Anchor'}) + MERGE (b:Person {name: 'FromOuter'}) + ON CREATE SET b.source_name = a.name + ON MATCH SET b.last_seen_by = a.name + RETURN b.source_name, b.last_seen_by +$$) AS (src agtype, last agtype); + -- cleanup SELECT * FROM cypher('merge_actions', $$ MATCH (n) DETACH DELETE n $$) AS (a agtype); diff --git a/src/backend/executor/cypher_set.c b/src/backend/executor/cypher_set.c index b7f19dc6a..872f644db 100644 --- a/src/backend/executor/cypher_set.c +++ b/src/backend/executor/cypher_set.c @@ -440,7 +440,7 @@ void apply_update_list(CustomScanState *node, agtype_value *id; agtype_value *label; agtype *original_entity; - agtype *new_property_value; + agtype *new_property_value = NULL; TupleTableSlot *slot; ResultRelInfo *resultRelInfo; ScanKeyData scan_keys[1]; diff --git a/src/backend/parser/cypher_clause.c b/src/backend/parser/cypher_clause.c index d37d64c35..3083c52e1 100644 --- a/src/backend/parser/cypher_clause.c +++ b/src/backend/parser/cypher_clause.c @@ -7509,10 +7509,63 @@ transform_merge_make_lateral_join(cypher_parsestate *cpstate, Query *query, */ get_res_cols(pstate, l_nsitem, r_nsitem, &res_colnames, &res_colvars); - /* make the RTE for the join */ - jnsitem = addRangeTableEntryForJoin(pstate, res_colnames, NULL, j->jointype, - 0, res_colvars, NIL, NIL, j->alias, - NULL, true); + /* + * Build a ParseNamespaceColumn array for the join RTE so that + * subsequent name lookups (e.g. transform_cypher_set_item_list for an + * ON CREATE SET / ON MATCH SET expression) can resolve references to + * variables bound in the prev clause or the MERGE's path via + * colNameToVar → scanNSItemForColumn, which dereferences + * nsitem->p_nscolumns. Passing NULL here left p_nscolumns unset and + * caused a segfault whenever an ON SET item's RHS referenced a bound + * variable (issue #2347). + * + * Each column's nscolumn references the join RTE (via its rtindex) with + * p_varattno = position in res_colnames. This matches the scantuple + * layout that apply_update_list sees at execution time: the join's + * target list (built by make_target_list_from_join below) iterates + * eref->colnames in order, so scantuple[i-1] corresponds to the i-th + * entry in eref->colnames. Using the underlying RTE's varno/varattno + * would be semantically equivalent for planner-rewritten Vars in the + * query tree, but the Vars we produce here end up inside prop_expr -- + * opaque metadata the planner does not walk -- so they stay un-remapped + * and must index the scantuple layout directly. + * + * addRangeTableEntryForJoin appends the new RTE to pstate->p_rtable, so + * its rtindex is list_length(p_rtable) + 1 at this point. + */ + { + int colcount = list_length(res_colvars); + int join_rtindex = list_length(pstate->p_rtable) + 1; + ParseNamespaceColumn *nscolumns; + ListCell *lvar; + int col_idx = 0; + + nscolumns = (ParseNamespaceColumn *) + palloc0(colcount * sizeof(ParseNamespaceColumn)); + + foreach (lvar, res_colvars) + { + Var *v = (Var *) lfirst(lvar); + + /* res_colvars is populated by get_res_cols via expandRTE */ + Assert(IsA(v, Var)); + + nscolumns[col_idx].p_varno = join_rtindex; + nscolumns[col_idx].p_varattno = col_idx + 1; + nscolumns[col_idx].p_vartype = v->vartype; + nscolumns[col_idx].p_vartypmod = v->vartypmod; + nscolumns[col_idx].p_varcollid = v->varcollid; + nscolumns[col_idx].p_varnosyn = join_rtindex; + nscolumns[col_idx].p_varattnosyn = col_idx + 1; + col_idx++; + } + + /* make the RTE for the join */ + jnsitem = addRangeTableEntryForJoin(pstate, res_colnames, nscolumns, + j->jointype, 0, res_colvars, + NIL, NIL, j->alias, NULL, true); + Assert(jnsitem->p_rtindex == join_rtindex); + } j->rtindex = jnsitem->p_rtindex;