From a4beeb94ac537461347aa71091da5f8a59d27310 Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Wed, 3 Dec 2025 22:17:09 +0500 Subject: [PATCH 1/6] Add index on id columns (#2117) - Whenever a label will be created, indices on id columns will be created by default. In case of vertex, a unique index on id column will be created, which will also serve as a unique constraint. In case of edge, a non-unique index on start_id and end_id columns will be created. - This change is expected to improve the performance of queries that involve joins. From some performance tests, it was observed that the performance of queries improved alot. - Loader was updated to insert tuples in indices as well. This has caused to slow the loader down a bit, but it was necessary. - A bug related to command ids in cypher_delete executor was also fixed. --- regress/expected/age_load.out | 14 -- regress/expected/cypher_match.out | 72 ++++---- regress/expected/cypher_merge.out | 2 +- regress/expected/cypher_vle.out | 24 +-- regress/expected/expr.out | 32 ++-- regress/expected/graph_generation.out | 20 +-- regress/expected/index.out | 97 +++++++++-- regress/expected/map_projection.out | 2 +- regress/sql/age_load.sql | 6 - regress/sql/index.sql | 34 ++-- src/backend/commands/label_commands.c | 78 ++++++++- src/backend/executor/cypher_delete.c | 4 + src/backend/utils/load/ag_load_edges.c | 75 +-------- src/backend/utils/load/ag_load_labels.c | 215 +----------------------- src/backend/utils/load/age_load.c | 193 +++++++++++++++++---- src/include/utils/load/ag_load_labels.h | 5 - src/include/utils/load/age_load.h | 16 +- 17 files changed, 423 insertions(+), 466 deletions(-) diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index b638e636b..5f2bdab78 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -43,13 +43,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', (1 row) --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; ?column? @@ -74,13 +67,6 @@ NOTICE: VLabel "City" has been created (1 row) --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; ?column? diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index ed2b3da08..a0e284beb 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -79,8 +79,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path [{"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex]::path + [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -88,8 +88,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -97,8 +97,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -115,8 +115,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -132,10 +132,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -143,10 +143,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -154,10 +154,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -165,8 +165,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -174,8 +174,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (2 rows) -- Right Path Test @@ -250,8 +250,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Left Path Test @@ -308,8 +308,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Divergent Path Tests @@ -412,8 +412,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype); i --------------------------------------------------------------------------------------------------------------------------- - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -712,8 +712,8 @@ $$) AS (r0 agtype); {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (6 rows) SELECT * FROM cypher('cypher_match', $$ @@ -775,8 +775,8 @@ $$) AS (r1 agtype); {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge + {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge (12 rows) @@ -1055,8 +1055,8 @@ SELECT * FROM cypher('cypher_match', {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) SELECT * FROM cypher('cypher_match', @@ -1068,8 +1068,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) -- Property Constraint in EXISTS @@ -1123,8 +1123,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -1156,8 +1156,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -2164,8 +2164,8 @@ SELECT * FROM cypher('cypher_match', $$ MATCH p=(u)-[]-()-[]-(u) RETURN p $$)as p ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path - [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path + [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path (4 rows) @@ -2407,15 +2407,15 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); @@ -3514,19 +3514,17 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ (1 row) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (y.id = _age_default_alias_0.end_id) - -> Seq Scan on "Product" y - -> Hash - -> Hash Join - Hash Cond: (x.id = _age_default_alias_0.start_id) - -> Seq Scan on "Customer" x - -> Hash - -> Seq Scan on bought _age_default_alias_0 - Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) -(10 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Seq Scan on bought _age_default_alias_0 + Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) + -> Index Only Scan using "Customer_pkey" on "Customer" x + Index Cond: (id = _age_default_alias_0.start_id) + -> Index Only Scan using "Product_pkey" on "Product" y + Index Cond: (id = _age_default_alias_0.end_id) +(8 rows) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); QUERY PLAN diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 238a4c472..56a23f513 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -655,8 +655,8 @@ $$) AS (name agtype, bornIn agtype, city agtype); name | bornin | city -------------------+--------------+----------------------------------------------------------------------------------------- "Rob Reiner" | "New York" | {"id": 1970324836974593, "label": "City", "properties": {"name": "New York"}}::vertex - "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex "Michael Douglas" | "New Jersey" | {"id": 1970324836974594, "label": "City", "properties": {"name": "New Jersey"}}::vertex + "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex (3 rows) --validate diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 9cbb3420c..57f930d98 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -508,37 +508,37 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) -- diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 052caf777..033fe1de7 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -2688,10 +2688,10 @@ SELECT * FROM cypher('expr', $$ MATCH (v) RETURN v $$) AS (expression agtype); SELECT * FROM cypher('expr', $$ MATCH ()-[e]-() RETURN e $$) AS (expression agtype); expression --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) -- id() @@ -2700,10 +2700,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype); id ------------------ - 1407374883553281 - 1407374883553281 1407374883553282 1407374883553282 + 1407374883553281 + 1407374883553281 (4 rows) SELECT * FROM cypher('expr', $$ @@ -2742,10 +2742,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (start_id agtype); start_id ------------------ - 1125899906842626 - 1125899906842626 1125899906842625 1125899906842625 + 1125899906842626 + 1125899906842626 (4 rows) -- should return null @@ -2775,10 +2775,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (end_id agtype); end_id ------------------ - 1125899906842627 - 1125899906842627 1125899906842626 1125899906842626 + 1125899906842627 + 1125899906842627 (4 rows) -- should return null @@ -2808,10 +2808,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, start_id agtype, startNode agtype); id | start_id | startnode ------------------+------------------+---------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex (4 rows) -- should return null @@ -2841,10 +2841,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, end_id agtype, endNode agtype); id | end_id | endnode ------------------+------------------+--------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (4 rows) -- should return null @@ -7632,10 +7632,10 @@ SELECT * FROM cypher('opt_forms', $$MATCH (u) RETURN *$$) AS (result agtype); SELECT * FROM cypher('opt_forms', $$MATCH (u)--(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); u | v ---+--- - 2 | 3 - 3 | 2 1 | 2 2 | 1 + 2 | 3 + 3 | 2 (4 rows) SELECT * FROM cypher('opt_forms', $$MATCH (u)-->(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); @@ -7822,12 +7822,12 @@ SELECT * FROM cypher('keys', $$MATCH (v) RETURN keys(v)$$) AS (vertex_keys agtyp SELECT * FROM cypher('keys', $$MATCH ()-[e]-() RETURN keys(e)$$) AS (edge_keys agtype); edge_keys ----------- - [] - [] ["song"] ["song"] + [] ["song"] ["song"] + [] (6 rows) SELECT * FROM cypher('keys', $$RETURN keys({a:1,b:'two',c:[1,2,3]})$$) AS (keys agtype); diff --git a/regress/expected/graph_generation.out b/regress/expected/graph_generation.out index 235052a08..ca511eafa 100644 --- a/regress/expected/graph_generation.out +++ b/regress/expected/graph_generation.out @@ -43,15 +43,15 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge - {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge (10 rows) SELECT * FROM create_complete_graph('gp1',5,'edges','vertices'); @@ -140,25 +140,25 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge + {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842635, "label": "edges", "end_id": 844424930131975, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842636, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131974, "properties": {}}::edge + {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842637, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842640, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131975, "properties": {}}::edge - {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge + {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842638, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131974, "properties": {}}::edge {"id": 1125899906842641, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842643, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131976, "properties": {}}::edge + {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge {"id": 1125899906842645, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131969, "properties": {}}::edge (21 rows) diff --git a/regress/expected/index.out b/regress/expected/index.out index f911900ab..3ed7b1c33 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -264,18 +264,22 @@ $$) as (n agtype); --- (0 rows) -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; + indexname | indexdef +-----------------------------+------------------------------------------------------------------------------------------------ + _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) + _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) + _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) + cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) +(10 rows) + SET enable_mergejoin = ON; SET enable_hashjoin = OFF; SET enable_nestloop = OFF; @@ -288,6 +292,29 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Merge Join + Merge Cond: (_age_default_alias_0.id = e.start_id) + -> Merge Append + Sort Key: _age_default_alias_0.id + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Sort + Sort Key: e.start_id + -> Merge Join + Merge Cond: (a.id = e.end_id) + -> Index Only Scan using "Country_pkey" on "Country" a + -> Index Scan using has_city_end_id_idx on has_city e +(15 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -300,17 +327,53 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (_age_default_alias_0.id = e.start_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Hash + -> Hash Join + Hash Cond: (e.end_id = a.id) + -> Index Scan using has_city_end_id_idx on has_city e + -> Hash + -> Index Only Scan using "Country_pkey" on "Country" a +(14 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); - count -------- - 10 -(1 row) + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Nested Loop + -> Index Scan using has_city_start_id_idx on has_city e + -> Index Only Scan using "Country_pkey" on "Country" a + Index Cond: (id = e.end_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + Index Cond: (id = e.start_id) + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + Index Cond: (id = e.start_id) + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + Index Cond: (id = e.start_id) + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + Index Cond: (id = e.start_id) +(15 rows) SET enable_mergejoin = ON; SET enable_hashjoin = ON; diff --git a/regress/expected/map_projection.out b/regress/expected/map_projection.out index dcb7f0e76..f0c45c557 100644 --- a/regress/expected/map_projection.out +++ b/regress/expected/map_projection.out @@ -152,7 +152,7 @@ $$ $$) as (a agtype); a -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}, {"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}] + [{"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}, {"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}] (1 row) -- drop diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 425ca5417..180248bf1 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -34,9 +34,6 @@ SELECT * FROM cypher('agload_test_graph', $$CREATE (n:Country {__id__:1}) RETURN SELECT load_labels_from_file('agload_test_graph', 'Country', 'age_load/countries.csv', true); --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; @@ -52,9 +49,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', SELECT load_labels_from_file('agload_test_graph', 'City', 'age_load/cities.csv', true); --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; diff --git a/regress/sql/index.sql b/regress/sql/index.sql index aac1dc40e..d9a4331a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -166,26 +166,8 @@ SELECT * FROM cypher('cypher_index', $$ (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"}) $$) as (n agtype); -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; - -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); - -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; - -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; - -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); - -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); - -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; @@ -196,6 +178,11 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -205,12 +192,17 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); diff --git a/src/backend/commands/label_commands.c b/src/backend/commands/label_commands.c index 568bf987b..1aa7ed0f4 100644 --- a/src/backend/commands/label_commands.c +++ b/src/backend/commands/label_commands.c @@ -93,6 +93,10 @@ static void range_var_callback_for_remove_relation(const RangeVar *rel, Oid rel_oid, Oid odl_rel_oid, void *arg); +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique); PG_FUNCTION_INFO_V1(age_is_valid_label_name); @@ -393,16 +397,24 @@ static void create_table_for_label(char *graph_name, char *label_name, * inheritance system. */ if (list_length(parents) != 0) + { create_stmt->tableElts = NIL; + } else if (label_type == LABEL_TYPE_EDGE) + { create_stmt->tableElts = create_edge_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else if (label_type == LABEL_TYPE_VERTEX) + { create_stmt->tableElts = create_vertex_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else + { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("undefined label type \'%c\'", label_type))); + } create_stmt->inhRelations = parents; create_stmt->partbound = NULL; @@ -423,7 +435,69 @@ static void create_table_for_label(char *graph_name, char *label_name, ProcessUtility(wrapper, "(generated CREATE TABLE command)", false, PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, NULL); - /* CommandCounterIncrement() is called in ProcessUtility() */ + + /* Create index on id columns */ + if (label_type == LABEL_TYPE_VERTEX) + { + create_index_on_column(schema_name, rel_name, "id", true); + } + else if (label_type == LABEL_TYPE_EDGE) + { + create_index_on_column(schema_name, rel_name, "start_id", false); + create_index_on_column(schema_name, rel_name, "end_id", false); + } +} + +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique) +{ + IndexStmt *index_stmt; + IndexElem *index_col; + PlannedStmt *index_wrapper; + + index_stmt = makeNode(IndexStmt); + index_col = makeNode(IndexElem); + index_col->name = colname; + index_col->expr = NULL; + index_col->indexcolname = NULL; + index_col->collation = InvalidOid; + index_col->opclass = list_make1(makeString("graphid_ops")); + index_col->opclassopts = NIL; + index_col->ordering = SORTBY_DEFAULT; + index_col->nulls_ordering = SORTBY_NULLS_DEFAULT; + + index_stmt->relation = makeRangeVar(schema_name, rel_name, -1); + index_stmt->accessMethod = "btree"; + index_stmt->tableSpace = NULL; + index_stmt->indexParams = list_make1(index_col); + index_stmt->options = NIL; + index_stmt->whereClause = NULL; + index_stmt->excludeOpNames = NIL; + index_stmt->idxcomment = NULL; + index_stmt->indexOid = InvalidOid; + index_stmt->unique = unique; + index_stmt->nulls_not_distinct = false; + index_stmt->primary = unique; + index_stmt->isconstraint = unique; + index_stmt->deferrable = false; + index_stmt->initdeferred = false; + index_stmt->transformed = false; + index_stmt->concurrent = false; + index_stmt->if_not_exists = false; + index_stmt->reset_default_tblspc = false; + + index_wrapper = makeNode(PlannedStmt); + index_wrapper->commandType = CMD_UTILITY; + index_wrapper->canSetTag = false; + index_wrapper->utilityStmt = (Node *)index_stmt; + index_wrapper->stmt_location = -1; + index_wrapper->stmt_len = 0; + + ProcessUtility(index_wrapper, "(generated CREATE INDEX command)", false, + PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, + NULL); } /* @@ -482,7 +556,7 @@ static List *create_vertex_table_elements(char *graph_name, char *label_name, /* "id" graphid PRIMARY KEY DEFAULT "ag_catalog"."_graphid"(...) */ id = makeColumnDef(AG_VERTEX_COLNAME_ID, GRAPHIDOID, -1, InvalidOid); - id->constraints = list_make2(build_pk_constraint(), + id->constraints = list_make2(build_not_null_constraint(), build_id_default(graph_name, label_name, schema_name, seq_name)); diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index d58513535..4766c6e7a 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -343,6 +343,10 @@ static void delete_entity(EState *estate, ResultRelInfo *resultRelInfo, } /* increment the command counter */ CommandCounterIncrement(); + + /* Update command id in estate */ + estate->es_snapshot->curcid = GetCurrentCommandId(false); + estate->es_output_cid = GetCurrentCommandId(false); } else if (lock_result != TM_Invisible && lock_result != TM_SelfModified) { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 30dc4761d..67049431c 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -22,11 +22,6 @@ #include "utils/load/ag_load_edges.h" #include "utils/load/csv.h" -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); - void edge_field_cb(void *field, size_t field_len, void *data) { @@ -131,7 +126,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) if (batch_state->num_tuples >= batch_state->max_tuples) { /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state, cr->label_name, cr->graph_oid); + insert_batch(batch_state); batch_state->num_tuples = 0; } } @@ -223,7 +218,7 @@ int create_edges_from_csv_file(char *file_path, cr.load_as_agtype = load_as_agtype; /* Initialize the batch insert state */ - init_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + init_batch_insert(&cr.batch_state, label_name, graph_oid); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { @@ -238,7 +233,7 @@ int create_edges_from_csv_file(char *file_path, csv_fini(&p, edge_field_cb, edge_row_cb, &cr); /* Finish any remaining batch inserts */ - finish_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + finish_batch_insert(&cr.batch_state); if (ferror(fp)) { @@ -250,66 +245,4 @@ int create_edges_from_csv_file(char *file_path, free(cr.fields); csv_free(&p); return EXIT_SUCCESS; -} - -/* - * Initialize the batch insert state for edges. - */ -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - Relation relation; - int i; - - // Open a temporary relation to get the tuple descriptor - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); - - // Initialize the batch insert state - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - // Create slots - for (i = 0; i < BATCH_SIZE; i++) - { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - } - - table_close(relation, AccessShareLock); -} - -/* - * Finish the batch insert for edges. Insert the - * remaining tuples in the batch state and clean up. - */ -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - int i; - Relation relation; - - if ((*batch_state)->num_tuples > 0) - { - insert_batch(*batch_state, label_name, graph_oid); - (*batch_state)->num_tuples = 0; - } - - // Open a temporary relation to ensure resources are properly cleaned up - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); - - // Free slots - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); - } - - // Clean up batch state - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); -} +} \ No newline at end of file diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 2ab223346..4a04f3cd8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -24,18 +24,6 @@ #include "utils/load/ag_load_labels.h" #include "utils/load/csv.h" -static void setup_temp_table_for_vertex_ids(char *graph_name); -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid); -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid); - void vertex_field_cb(void *field, size_t field_len, void *data) { @@ -75,7 +63,6 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) graphid vertex_id; int64 entry_id; TupleTableSlot *slot; - TupleTableSlot *temp_id_slot; n_fields = cr->cur_field; @@ -114,11 +101,9 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) /* Get the appropriate slot from the batch state */ slot = batch_state->slots[batch_state->num_tuples]; - temp_id_slot = batch_state->temp_id_slots[batch_state->num_tuples]; /* Clear the slots contents */ ExecClearTuple(slot); - ExecClearTuple(temp_id_slot); /* Fill the values in the slot */ slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); @@ -129,20 +114,15 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) slot->tts_isnull[0] = false; slot->tts_isnull[1] = false; - temp_id_slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - temp_id_slot->tts_isnull[0] = false; - /* Make the slot as containing virtual tuple */ ExecStoreVirtualTuple(slot); - ExecStoreVirtualTuple(temp_id_slot); batch_state->num_tuples++; if (batch_state->num_tuples >= batch_state->max_tuples) { /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_vertex_batch(batch_state, cr->label_name, cr->graph_oid, - cr->temp_table_relid); + insert_batch(batch_state); batch_state->num_tuples = 0; } } @@ -202,7 +182,6 @@ int create_labels_from_csv_file(char *file_path, unsigned char options = 0; csv_vertex_reader cr; char *label_seq_name; - Oid temp_table_relid; if (csv_init(&p, options) != 0) { @@ -210,13 +189,6 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - if (!OidIsValid(temp_table_relid)) - { - setup_temp_table_for_vertex_ids(graph_name); - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - } - csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -243,7 +215,6 @@ int create_labels_from_csv_file(char *file_path, cr.id_field_exists = id_field_exists; cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); cr.load_as_agtype = load_as_agtype; - cr.temp_table_relid = temp_table_relid; if (cr.id_field_exists) { @@ -258,8 +229,7 @@ int create_labels_from_csv_file(char *file_path, } /* Initialize the batch insert state */ - init_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); + init_batch_insert(&cr.batch_state, label_name, graph_oid); while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { @@ -274,8 +244,7 @@ int create_labels_from_csv_file(char *file_path, csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); /* Finish any remaining batch inserts */ - finish_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); + finish_batch_insert(&cr.batch_state); if (ferror(fp)) { @@ -288,180 +257,4 @@ int create_labels_from_csv_file(char *file_path, free(cr.fields); csv_free(&p); return EXIT_SUCCESS; -} - -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid) -{ - insert_batch_in_temp_table(batch_state, graph_oid, temp_table_relid); - insert_batch(batch_state, label_name, graph_oid); -} - -/* - * Create and populate a temporary table with vertex ids that are already - * present in the graph. This table will be used to check if the new vertex - * id generated by loader is a duplicate. - * Unique index is created to enforce uniqueness of the ids. - * - * We dont need this for loading edges since the ids are generated using - * sequence and are unique. - */ -static void setup_temp_table_for_vertex_ids(char *graph_name) -{ - char *create_as_query; - char *index_query; - - create_as_query = psprintf("CREATE TEMP TABLE IF NOT EXISTS %s AS " - "SELECT DISTINCT id FROM \"%s\".%s", - GET_TEMP_VERTEX_ID_TABLE(graph_name), graph_name, - AG_DEFAULT_LABEL_VERTEX); - - index_query = psprintf("CREATE UNIQUE INDEX ON %s (id)", - GET_TEMP_VERTEX_ID_TABLE(graph_name)); - SPI_connect(); - SPI_execute(create_as_query, false, 0); - SPI_execute(index_query, false, 0); - - SPI_finish(); -} - -/* - * Inserts batch of tuples into the temporary table. - * This function also updates the index to check for - * uniqueness of the ids. - */ -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid) -{ - int i; - EState *estate; - ResultRelInfo *resultRelInfo; - Relation rel; - List *result; - - rel = table_open(relid, RowExclusiveLock); - - /* Initialize executor state */ - estate = CreateExecutorState(); - - /* Initialize result relation information */ - resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, rel, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; - - /* Open the indices */ - ExecOpenIndices(resultRelInfo, false); - - /* Insert the batch into the temporary table */ - heap_multi_insert(rel, batch_state->temp_id_slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - - for (i = 0; i < batch_state->num_tuples; i++) - { - result = ExecInsertIndexTuples(resultRelInfo, batch_state->temp_id_slots[i], - estate, false, true, NULL, NIL, false); - /* Check if the unique cnstraint is violated */ - if (list_length(result) != 0) - { - Datum id; - bool isnull; - - id = slot_getattr(batch_state->temp_id_slots[i], 1, &isnull); - ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", - DATUM_GET_GRAPHID(id)), - errhint("Entry id %ld is already used", - get_graphid_entry_id(id)))); - } - } - /* Clean up and close the indices */ - ExecCloseIndices(resultRelInfo); - - FreeExecutorState(estate); - table_close(rel, RowExclusiveLock); - - CommandCounterIncrement(); -} - -/* - * Initialize the batch insert state for vertices. - */ -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; - - Relation temp_table_relation; - int i; - - /* Open a temporary relation to get the tuple descriptor */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); - - temp_table_relation = table_open(temp_table_relid, AccessShareLock); - - /* Initialize the batch insert state */ - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->temp_id_slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - /* Create slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - (*batch_state)->temp_id_slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(temp_table_relation), - &TTSOpsHeapTuple); - } - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); -} - -/* - * Finish the batch insert for vertices. Insert the - * remaining tuples in the batch state and clean up. - */ -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; - - Relation temp_table_relation; - int i; - - if ((*batch_state)->num_tuples > 0) - { - insert_vertex_batch(*batch_state, label_name, graph_oid, temp_table_relid); - (*batch_state)->num_tuples = 0; - } - - /* Open a temporary relation to ensure resources are properly cleaned up */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); - - temp_table_relation = table_open(temp_table_relid, AccessShareLock); - - /* Free slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); - ExecDropSingleTupleTableSlot((*batch_state)->temp_id_slots[i]); - } - - /* Clean up batch state */ - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null((*batch_state)->temp_id_slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); -} +} \ No newline at end of file diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 1658ac306..daae3153e 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,11 +18,14 @@ */ #include "postgres.h" +#include "catalog/indexing.h" +#include "executor/executor.h" #include "utils/json.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" +#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); @@ -217,18 +220,35 @@ void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, errmsg("label %s already exists as vertex label", label_name))); } + /* Open the relation */ + label_relation = table_open(get_label_relation(label_name, graph_oid), + RowExclusiveLock); + + /* Form the tuple */ values[0] = GRAPHID_GET_DATUM(edge_id); values[1] = GRAPHID_GET_DATUM(start_id); values[2] = GRAPHID_GET_DATUM(end_id); values[3] = AGTYPE_P_GET_DATUM((edge_properties)); - - label_relation = table_open(get_label_relation(label_name, graph_oid), - RowExclusiveLock); - tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } @@ -246,46 +266,75 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, if (get_label_kind(label_name, graph_oid) == LABEL_KIND_EDGE) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("label %s already exists as edge label", label_name))); + errmsg("label %s already exists as edge label", + label_name))); } - values[0] = GRAPHID_GET_DATUM(vertex_id); - values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); - + /* Open the relation */ label_relation = table_open(get_label_relation(label_name, graph_oid), RowExclusiveLock); + + /* Form the tuple */ + values[0] = GRAPHID_GET_DATUM(vertex_id); + values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid) +void insert_batch(batch_insert_state *batch_state) { - Relation label_relation; - BulkInsertState bistate; - Oid relid; - - // Get the relation OID - relid = get_label_relation(label_name, graph_oid); - - // Open the relation - label_relation = table_open(relid, RowExclusiveLock); - - // Prepare the BulkInsertState - bistate = GetBulkInsertState(); - - // Perform the bulk insert - heap_multi_insert(label_relation, batch_state->slots, - batch_state->num_tuples, GetCurrentCommandId(true), - 0, bistate); + List *result; + int i; - // Clean up - FreeBulkInsertState(bistate); - table_close(label_relation, RowExclusiveLock); + /* Insert the tuples */ + heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, + batch_state->slots, batch_state->num_tuples, + GetCurrentCommandId(true), 0, NULL); + + /* Insert index entries for the tuples */ + if (batch_state->resultRelInfo->ri_NumIndices > 0) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + result = ExecInsertIndexTuples(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate, false, + true, NULL, NIL, false); + + /* Check if the unique constraint is violated */ + if (list_length(result) != 0) + { + Datum id; + bool isnull; + + id = slot_getattr(batch_state->slots[i], 1, &isnull); + ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", + DATUM_GET_GRAPHID(id)), + errhint("Entry id %ld is already used", + get_graphid_entry_id(id)))); + } + } + } CommandCounterIncrement(); } @@ -475,3 +524,79 @@ static int32 get_or_create_label(Oid graph_oid, char *graph_name, return label_id; } + +/* + * Initialize the batch insert state. + */ +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid) +{ + Relation relation; + Oid relid; + EState *estate; + ResultRelInfo *resultRelInfo; + int i; + + /* Open the relation */ + relid = get_label_relation(label_name, graph_oid); + relation = table_open(relid, RowExclusiveLock); + + /* Initialize executor state */ + estate = CreateExecutorState(); + + /* Initialize resultRelInfo */ + resultRelInfo = makeNode(ResultRelInfo); + InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); + estate->es_result_relations = &resultRelInfo; + + /* Open the indices */ + ExecOpenIndices(resultRelInfo, false); + + /* Initialize the batch insert state */ + *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); + (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->estate = estate; + (*batch_state)->resultRelInfo = resultRelInfo; + (*batch_state)->max_tuples = BATCH_SIZE; + (*batch_state)->num_tuples = 0; + + /* Create slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + (*batch_state)->slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(relation), + &TTSOpsHeapTuple); + } +} + +/* + * Finish the batch insert for vertices. Insert the + * tuples remaining in the batch state and clean up. + */ +void finish_batch_insert(batch_insert_state **batch_state) +{ + int i; + + if ((*batch_state)->num_tuples > 0) + { + insert_batch(*batch_state); + (*batch_state)->num_tuples = 0; + } + + /* Free slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + } + + /* Clean up, close the indices and relation */ + ExecCloseIndices((*batch_state)->resultRelInfo); + table_close((*batch_state)->resultRelInfo->ri_RelationDesc, + RowExclusiveLock); + + /* Clean up batch state */ + FreeExecutorState((*batch_state)->estate); + pfree((*batch_state)->slots); + pfree(*batch_state); + *batch_state = NULL; +} \ No newline at end of file diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 3a70a5c05..b8ed1572e 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -24,10 +24,6 @@ #include "access/heapam.h" #include "utils/load/age_load.h" -#define AGE_VERTIX 1 -#define AGE_EDGE 2 - - struct counts { long unsigned fields; long unsigned allvalues; @@ -51,7 +47,6 @@ typedef struct { char *label_name; int label_id; Oid label_seq_relid; - Oid temp_table_relid; bool id_field_exists; bool load_as_agtype; int curr_seq_num; diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index b1335581b..72f11493d 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -30,16 +30,13 @@ #ifndef AGE_ENTITY_CREATOR_H #define AGE_ENTITY_CREATOR_H -#define TEMP_VERTEX_ID_TABLE_SUFFIX "_ag_vertex_ids" -#define GET_TEMP_VERTEX_ID_TABLE(graph_name) \ - psprintf("_%s%s", graph_name, TEMP_VERTEX_ID_TABLE_SUFFIX) - #define BATCH_SIZE 1000 -typedef struct +typedef struct batch_insert_state { + EState *estate; + ResultRelInfo *resultRelInfo; TupleTableSlot **slots; - TupleTableSlot **temp_id_slots; int num_tuples; int max_tuples; } batch_insert_state; @@ -57,7 +54,10 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, agtype* end_properties); -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); + +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid); +void finish_batch_insert(batch_insert_state **batch_state); #endif /* AGE_ENTITY_CREATOR_H */ From 3234b5fa7ff02dad6f0a3af795ba893b606290e9 Mon Sep 17 00:00:00 2001 From: Aleksey Konovkin Date: Tue, 9 Dec 2025 22:49:05 +0300 Subject: [PATCH 2/6] Fix possible memory and file descriptors leaks (#2258) - Used postgres memory allocation functions instead of standard ones. - Wrapped main loop of csv loader in PG_TRY block for better error handling. --- src/backend/utils/adt/age_global_graph.c | 6 +- src/backend/utils/adt/agtype.c | 39 ++++---- src/backend/utils/load/ag_load_edges.c | 91 ++++++++++-------- src/backend/utils/load/ag_load_labels.c | 115 ++++++++++++----------- src/include/utils/agtype.h | 1 + 5 files changed, 135 insertions(+), 117 deletions(-) diff --git a/src/backend/utils/adt/age_global_graph.c b/src/backend/utils/adt/age_global_graph.c index 6f30060ae..c34e51ee3 100644 --- a/src/backend/utils/adt/age_global_graph.c +++ b/src/backend/utils/adt/age_global_graph.c @@ -1237,12 +1237,10 @@ Datum age_delete_global_graphs(PG_FUNCTION_ARGS) { char *graph_name = NULL; - graph_name = strndup(agtv_temp->val.string.val, - agtv_temp->val.string.len); + graph_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); success = delete_specific_GRAPH_global_contexts(graph_name); - - free(graph_name); } else { diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index f190cfffe..c5d41fa19 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -184,6 +184,17 @@ static agtype_value *agtype_build_map_as_agtype_value(FunctionCallInfo fcinfo); agtype_value *agtype_composite_to_agtype_value_binary(agtype *a); static agtype_value *tostring_helper(Datum arg, Oid type, char *msghdr); + +void *repalloc_check(void *ptr, size_t len) +{ + if (ptr != NULL) + { + return repalloc(ptr, len); + } + + return palloc(len); +} + /* * Due to how pfree can be implemented, it may not check for a passed NULL. This * wrapper does just that, it will only call pfree is the pointer passed is not @@ -5605,7 +5616,7 @@ static char *get_label_name(const char *graph_name, graphid element_graphid) result = NameStr(*DatumGetName(heap_getattr(tuple, Anum_ag_label_name, tupdesc, &column_is_null))); /* duplicate it */ - result = strdup(result); + result = pstrdup(result); /* end the scan and close the relation */ systable_endscan(scan_desc); @@ -5720,8 +5731,8 @@ Datum age_startnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5755,8 +5766,6 @@ Datum age_startnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, start_id); - free(label_name); - return result; } @@ -5785,8 +5794,8 @@ Datum age_endnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5820,8 +5829,6 @@ Datum age_endnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, end_id); - free(label_name); - return result; } @@ -6463,11 +6470,10 @@ Datum age_tofloat(PG_FUNCTION_ARGS) NumericGetDatum(agtv_value->val.numeric))); else if (agtv_value->type == AGTV_STRING) { - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); result = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); if (!is_valid) PG_RETURN_NULL(); } @@ -6765,8 +6771,8 @@ Datum age_tointeger(PG_FUNCTION_ARGS) { char *endptr; /* we need a null terminated cstring */ - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); /* convert it if it is a regular integer string */ result = strtoi64(string, &endptr, 10); @@ -6780,7 +6786,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) f = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); /* * If the conversions failed or it's a special float value, * return null. @@ -6793,10 +6798,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) result = (int64) f; } - else - { - free(string); - } } else { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 67049431c..931c6e0dc 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -36,8 +36,8 @@ void edge_field_cb(void *field, size_t field_len, void *data) if (cr->cur_field == cr->alloc) { cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); + cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); + cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); if (cr->fields == NULL) { cr->error = 1; @@ -48,7 +48,7 @@ void edge_field_cb(void *field, size_t field_len, void *data) } cr->fields_len[cr->cur_field] = field_len; cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char*)field, field_len); + cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); cr->cur_field += 1; } @@ -78,13 +78,13 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) { cr->header_num = cr->cur_field; cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); + cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); + cr->header = palloc((sizeof (char*) * cr->cur_field)); for (i = 0; icur_field; i++) { cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); + cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); } } else @@ -133,7 +133,7 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) for (i = 0; i < n_fields; ++i) { - free(cr->fields[i]); + pfree_if_not_null(cr->fields[i]); } if (cr->error) @@ -192,6 +192,10 @@ int create_edges_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } + p.malloc_func = palloc; + p.realloc_func = repalloc_check; + p.free_func = pfree_if_not_null; + csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -202,47 +206,52 @@ int create_edges_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) + PG_TRY(); { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + label_seq_name = get_label_seq_relation_name(label_name); + + memset((void*)&cr, 0, sizeof(csv_edge_reader)); + cr.alloc = 128; + cr.fields = palloc(sizeof(char *) * cr.alloc); + cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); + cr.header_row_length = 0; + cr.curr_row_length = 0; + cr.graph_name = graph_name; + cr.graph_oid = graph_oid; + cr.label_name = label_name; + cr.label_id = label_id; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + cr.load_as_agtype = load_as_agtype; + + /* Initialize the batch insert state */ + init_batch_insert(&cr.batch_state, label_name, graph_oid); + + while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + if (csv_parse(&p, buf, bytes_read, edge_field_cb, + edge_row_cb, &cr) != bytes_read) + { + ereport(ERROR, (errmsg("Error while parsing file: %s\n", + csv_strerror(csv_error(&p))))); + } } - } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); + csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + /* Finish any remaining batch inserts */ + finish_batch_insert(&cr.batch_state); - if (ferror(fp)) + if (ferror(fp)) + { + ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); + } + } + PG_FINALLY(); { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); + fclose(fp); + csv_free(&p); } + PG_END_TRY(); - fclose(fp); - - free(cr.fields); - csv_free(&p); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 4a04f3cd8..1e86bbda4 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -39,8 +39,8 @@ void vertex_field_cb(void *field, size_t field_len, void *data) if (cr->cur_field == cr->alloc) { cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); + cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); + cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); if (cr->fields == NULL) { cr->error = 1; @@ -51,7 +51,7 @@ void vertex_field_cb(void *field, size_t field_len, void *data) } cr->fields_len[cr->cur_field] = field_len; cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char *) field, field_len); + cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); cr->cur_field += 1; } @@ -70,13 +70,13 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) { cr->header_num = cr->cur_field; cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); + cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); + cr->header = palloc((sizeof (char*) * cr->cur_field)); for (i = 0; icur_field; i++) { cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); + cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); } } else @@ -129,7 +129,7 @@ void vertex_row_cb(int delim __attribute__((unused)), void *data) for (i = 0; i < n_fields; ++i) { - free(cr->fields[i]); + pfree_if_not_null(cr->fields[i]); } if (cr->error) @@ -189,6 +189,10 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to initialize csv parser\n"))); } + p.malloc_func = palloc; + p.realloc_func = repalloc_check; + p.free_func = pfree_if_not_null; + csv_set_space_func(&p, is_space); csv_set_term_func(&p, is_term); @@ -199,62 +203,67 @@ int create_labels_from_csv_file(char *file_path, (errmsg("Failed to open %s\n", file_path))); } - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + PG_TRY(); { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + label_seq_name = get_label_seq_relation_name(label_name); + + memset((void*)&cr, 0, sizeof(csv_vertex_reader)); + + cr.alloc = 2048; + cr.fields = palloc(sizeof(char *) * cr.alloc); + cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); + cr.header_row_length = 0; + cr.curr_row_length = 0; + cr.graph_name = graph_name; + cr.graph_oid = graph_oid; + cr.label_name = label_name; + cr.label_id = label_id; + cr.id_field_exists = id_field_exists; + cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + cr.load_as_agtype = load_as_agtype; + + if (cr.id_field_exists) + { + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + * + * We cant use currval because it will error out if nextval was + * not called before in the session. + */ + cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); + } - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* Initialize the batch insert state */ + init_batch_insert(&cr.batch_state, label_name, graph_oid); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + if (csv_parse(&p, buf, bytes_read, vertex_field_cb, + vertex_row_cb, &cr) != bytes_read) + { + ereport(ERROR, (errmsg("Error while parsing file: %s\n", + csv_strerror(csv_error(&p))))); + } } - } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); + csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + /* Finish any remaining batch inserts */ + finish_batch_insert(&cr.batch_state); - if (ferror(fp)) + if (ferror(fp)) + { + ereport(ERROR, (errmsg("Error while reading file %s\n", + file_path))); + } + } + PG_FINALLY(); { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); + fclose(fp); + csv_free(&p); } + PG_END_TRY(); - fclose(fp); - - free(cr.fields); - csv_free(&p); return EXIT_SUCCESS; } \ No newline at end of file diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 1f6908103..ec9125073 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -659,6 +659,7 @@ void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); void pfree_if_not_null(void *ptr); +void *repalloc_check(void *ptr, size_t len); agtype_value *agtype_value_from_cstring(char *str, int len); /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void); From f54182981e80aae51ed5111ca0ade72b60fcc4e8 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Tue, 16 Dec 2025 08:33:28 -0800 Subject: [PATCH 3/6] Restrict age_load commands (#2274) This PR applies restrictions to the following age_load commands - load_labels_from_file() load_edges_from_file() They are now tied to a specific root directory and are required to have a specific file extension to eliminate any attempts to force them to access any other files. Nothing else has changed with the actual command formats or parameters, only that they work out of the /tmp/age directory and only access files with an extension of .csv. Added regression tests and updated the location of the csv files for those regression tests. modified: regress/expected/age_load.out modified: regress/sql/age_load.sql modified: src/backend/utils/load/age_load.c --- regress/expected/age_load.out | 44 +++++++++++++++++- regress/sql/age_load.sql | 38 +++++++++++++++- src/backend/utils/load/age_load.c | 76 ++++++++++++++++++++++++++++--- 3 files changed, 149 insertions(+), 9 deletions(-) diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 5f2bdab78..55d1ff1d6 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -16,7 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; SET search_path TO ag_catalog; -- Create a country using CREATE clause @@ -401,6 +403,43 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN prop {"bool": "false", "string": "nUll", "numeric": "3.14"} (6 rows) +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +ERROR: file path must not be NULL +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); +ERROR: file path must not be NULL +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +ERROR: file name cannot be zero length +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); +ERROR: file name cannot be zero length +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_vertices.csv] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_edges.csv] +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_vertices.txt] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_edges.txt] +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: You can only load files with extension [.csv]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: You can only load files with extension [.csv]. +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +ERROR: You can only load files located in [/tmp/age/]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); +ERROR: You can only load files located in [/tmp/age/]. +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table agload_conversion._ag_label_vertex @@ -415,3 +454,6 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- End +-- diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 180248bf1..cefcfb4ca 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -17,7 +17,9 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; @@ -160,4 +162,38 @@ SELECT create_elabel('agload_conversion','Edges2'); SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); + +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); + +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); + +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); + +-- +-- End +-- diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index daae3153e..79170fd53 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -31,6 +31,62 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); +static char *build_safe_filename(char *name); + +#define AGE_BASE_CSV_DIRECTORY "/tmp/age/" +#define AGE_CSV_FILE_EXTENSION ".csv" + +static char *build_safe_filename(char *name) +{ + int length; + char path[PATH_MAX]; + char *resolved; + + if (name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be NULL"))); + + } + + length = strlen(name); + + if (length == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be zero length"))); + + } + + snprintf(path, sizeof(path), "%s%s", AGE_BASE_CSV_DIRECTORY, name); + + resolved = realpath(path, NULL); + + if (resolved == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("File or path does not exist [%s]", path))); + } + + if (strncmp(resolved, AGE_BASE_CSV_DIRECTORY, + strlen(AGE_BASE_CSV_DIRECTORY)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files located in [%s].", + AGE_BASE_CSV_DIRECTORY))); + } + + length = strlen(resolved) - 4; + if (strncmp(resolved+length, AGE_CSV_FILE_EXTENSION, + strlen(AGE_CSV_FILE_EXTENSION)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files with extension [%s].", + AGE_CSV_FILE_EXTENSION))); + } + + return resolved; +} agtype *create_empty_agtype(void) { @@ -344,7 +400,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) { Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; @@ -373,7 +429,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); id_field_exists = PG_GETARG_BOOL(3); load_as_agtype = PG_GETARG_BOOL(4); @@ -385,7 +441,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_VERTEX; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, @@ -394,6 +450,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } @@ -403,7 +462,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; @@ -431,7 +490,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); load_as_agtype = PG_GETARG_BOOL(3); graph_name_str = NameStr(*graph_name); @@ -442,7 +501,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_EDGE; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, @@ -450,6 +509,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } @@ -599,4 +661,4 @@ void finish_batch_insert(batch_insert_state **batch_state) pfree((*batch_state)->slots); pfree(*batch_state); *batch_state = NULL; -} \ No newline at end of file +} From 5d34d2f2632f47134bab046a9c3851c66cf25c72 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Fri, 9 Jan 2026 12:55:36 -0800 Subject: [PATCH 4/6] Fix and improve index.sql regression test coverage (#2300) NOTE: This PR was created with AI tools and a human. - Remove unused copy command (leftover from deleted agload_test_graph test) - Replace broken Section 4 that referenced non-existent graph with comprehensive WHERE clause tests covering string, int, bool, and float properties with AND/OR/NOT operators - Add EXPLAIN tests to verify index usage: - Section 3: Validate GIN indices (load_city_gin_idx, load_country_gin_idx) show Bitmap Index Scan for property matching - Section 4: Validate all expression indices (city_country_code_idx, city_id_idx, city_west_coast_idx, country_life_exp_idx) show Index Scan for WHERE clause filtering All indices now have EXPLAIN verification confirming they are used as expected. modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 290 ++++++++++++++++++++++++++++++++++--- regress/sql/index.sql | 174 ++++++++++++++++++++-- 2 files changed, 436 insertions(+), 28 deletions(-) diff --git a/regress/expected/index.out b/regress/expected/index.out index 3ed7b1c33..9faead660 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; SET enable_mergejoin = ON; @@ -385,6 +384,19 @@ CREATE INDEX load_city_gin_idx ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on "City" c + Recheck Cond: (properties @> '{"city_id": 1}'::agtype) + -> Bitmap Index Scan on load_city_gin_idx + Index Cond: (properties @> '{"city_id": 1}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) RETURN c @@ -418,6 +430,19 @@ $$) as (n agtype); {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex (4 rows) +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------------------- + Bitmap Heap Scan on "Country" c + Recheck Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) + -> Bitmap Index Scan on load_country_gin_idx + Index Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -441,26 +466,259 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Scan using city_country_code_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(2 rows) + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" + "Los Angeles" + "Seattle" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + name +------ +(0 rows) + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); - count -------- - 0 +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Index Scan using city_id_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"city_id"'::agtype]) = '1'::agtype) +(2 rows) + +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" (1 row) -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + name +------------- + "Vancouver" +(1 row) + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" +(2 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Monterrey" + "Tijuana" +(2 rows) + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); -ERROR: graph "agload_test_graph" does not exist -LINE 1: SELECT COUNT(*) FROM cypher('agload_test_graph', $$ - ^ +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) +(2 rows) + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" + "Vancouver" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +--------------- + "New York" + "Toronto" + "Montreal" + "Mexico City" + "Monterrey" + "Tijuana" +(6 rows) + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" +(3 rows) + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "New York" + "Vancouver" +(2 rows) + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" +(1 row) + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Index Scan using country_life_exp_idx on "Country" c + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"life_expectancy"'::agtype]) > '80.0'::agtype) +(2 rows) + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Canada" +(1 row) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Mexico" +(1 row) + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- @@ -478,5 +736,3 @@ NOTICE: graph "cypher_index" has been dropped (1 row) -SELECT drop_graph('agload_test_graph', true); -ERROR: graph "agload_test_graph" does not exist diff --git a/regress/sql/index.sql b/regress/sql/index.sql index d9a4331a4..96e7dd81a 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -17,8 +17,6 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load - LOAD 'age'; SET search_path TO ag_catalog; @@ -219,6 +217,11 @@ ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) @@ -235,6 +238,12 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c $$) as (n agtype); +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -250,23 +259,166 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); + +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); + +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); +$$) as (plan agtype); -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); + +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); +$$) as (plan agtype); + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); + +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- SELECT drop_graph('cypher_index', true); -SELECT drop_graph('agload_test_graph', true); From cc9417924fd83ad5da64da7035918ff913603631 Mon Sep 17 00:00:00 2001 From: John Gemignani Date: Sat, 10 Jan 2026 08:37:54 -0800 Subject: [PATCH 5/6] Fix and improve index.sql addendum (#2301) NOTE: This PR was created with the help of AI tools and a human. Added additional requested regression tests - *EXPLAIN for pattern with WHERE clause *EXPLAIN for pattern with filters on both country and city modified: regress/expected/index.out modified: regress/sql/index.sql --- regress/expected/index.out | 34 ++++++++++++++++++++++++++++++++++ regress/sql/index.sql | 14 ++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/regress/expected/index.out b/regress/expected/index.out index 9faead660..745cab269 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -626,6 +626,19 @@ $$) as (name agtype); "Tijuana" (6 rows) +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(3 rows) + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -700,6 +713,27 @@ $$) as (name agtype); "Mexico" (1 row) +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Index Scan using city_west_coast_idx on "City" city + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + -> Bitmap Heap Scan on has_city _age_default_alias_0 + Recheck Cond: (start_id = city.id) + -> Bitmap Index Scan on has_city_start_id_idx + Index Cond: (start_id = city.id) + -> Index Scan using "Country_pkey" on "Country" country + Index Cond: (id = _age_default_alias_0.end_id) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"CA"'::agtype) +(11 rows) + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) diff --git a/regress/sql/index.sql b/regress/sql/index.sql index 96e7dd81a..a6e075c70 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -357,6 +357,13 @@ SELECT * FROM cypher('cypher_index', $$ ORDER BY a.city_id $$) as (name agtype); +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + -- Test WHERE with multiple conditions (AND) SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) @@ -404,6 +411,13 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c.name $$) as (name agtype); +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + -- Test WHERE in combination with pattern matching SELECT * FROM cypher('cypher_index', $$ MATCH (country:Country)<-[:has_city]-(city:City) From dffd83bcf350c1ffb0cceb8c3a5b6b17dd94484c Mon Sep 17 00:00:00 2001 From: Muhammad Taha Naveed Date: Mon, 19 Jan 2026 22:21:02 +0500 Subject: [PATCH 6/6] Replace libcsv with pg COPY for csv loading (#2310) - Commit also adds permission checks - Resolves a critical memory spike issue on loading large file - Use pg's COPY infrastructure (BeginCopyFrom, NextCopyFromRawFields) for 64KB buffered CSV parsing instead of libcsv - Add byte based flush threshold (64KB) matching COPY behavior for memory safety - Use heap_multi_insert with BulkInsertState for optimized batch inserts - Add per batch memory context to prevent memory growth during large loads - Remove libcsv dependency (libcsv.c, csv.h) - Improves loading performance by 15-20% - No previous regression tests were impacted - Added regression tests for permissions/rls Assisted-by AI --- Makefile | 1 - regress/expected/age_load.out | 189 ++++++++ regress/expected/index.out | 12 +- regress/sql/age_load.sql | 125 ++++++ regress/sql/index.sql | 2 +- src/backend/utils/load/ag_load_edges.c | 388 +++++++++-------- src/backend/utils/load/ag_load_labels.c | 381 ++++++++-------- src/backend/utils/load/age_load.c | 248 ++++++++++- src/backend/utils/load/libcsv.c | 549 ------------------------ src/include/utils/load/ag_load_edges.h | 50 +-- src/include/utils/load/ag_load_labels.h | 50 +-- src/include/utils/load/age_load.h | 27 +- src/include/utils/load/csv.h | 108 ----- 13 files changed, 999 insertions(+), 1131 deletions(-) delete mode 100644 src/backend/utils/load/libcsv.c delete mode 100644 src/include/utils/load/csv.h diff --git a/Makefile b/Makefile index 4cd623484..2d2912571 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ OBJS = src/backend/age.o \ src/backend/utils/load/ag_load_labels.o \ src/backend/utils/load/ag_load_edges.o \ src/backend/utils/load/age_load.o \ - src/backend/utils/load/libcsv.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index 55d1ff1d6..1f76c31ce 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -454,6 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Test security and permissions +-- +SELECT create_graph('agload_security'); +NOTICE: graph "agload_security" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_security', 'SecEdge'); +NOTICE: ELabel "SecEdge" has been created + create_elabel +--------------- + +(1 row) + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +RESET ROLE; +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied for table Person1 +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied for table SecEdge +RESET ROLE; +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +RESET ROLE; +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 3: Row-Level Security (RLS) +-- +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; +-- Switch to load_test_user +SET ROLE load_test_user; +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +RESET ROLE; +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 4: Constraint checking (CHECK constraint) +-- +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: new row for relation "Person1" violates check constraint "check_bool_true" +DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...). +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true" +DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}). +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_security._ag_label_vertex +drop cascades to table agload_security._ag_label_edge +drop cascades to table agload_security."Person1" +drop cascades to table agload_security."Person2" +drop cascades to table agload_security."SecEdge" +NOTICE: graph "agload_security" has been dropped + drop_graph +------------ + +(1 row) + -- -- End -- diff --git a/regress/expected/index.out b/regress/expected/index.out index 745cab269..ec62bf57d 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -264,19 +264,19 @@ $$) as (n agtype); (0 rows) -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; indexname | indexdef -----------------------------+------------------------------------------------------------------------------------------------ + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) - _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) - idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) - Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) - has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) - City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) (10 rows) SET enable_mergejoin = ON; diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index cefcfb4ca..976f050af 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -194,6 +194,131 @@ SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', t -- SELECT drop_graph('agload_conversion', true); +-- +-- Test security and permissions +-- + +SELECT create_graph('agload_security'); +SELECT create_vlabel('agload_security', 'Person1'); +SELECT create_vlabel('agload_security', 'Person2'); +SELECT create_elabel('agload_security', 'SecEdge'); + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; + +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; + +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; + +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 3: Row-Level Security (RLS) +-- + +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; + +-- Switch to load_test_user +SET ROLE load_test_user; + +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +RESET ROLE; + +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; + +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 4: Constraint checking (CHECK constraint) +-- + +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; + +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); + -- -- End -- diff --git a/regress/sql/index.sql b/regress/sql/index.sql index a6e075c70..d4a4b24a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -165,7 +165,7 @@ SELECT * FROM cypher('cypher_index', $$ $$) as (n agtype); -- Verify that the incices are created on id columns -SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index'; +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 931c6e0dc..c05bf3352 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -16,50 +16,30 @@ * specific language governing permissions and limitations * under the License. */ - #include "postgres.h" -#include "utils/load/ag_load_edges.h" -#include "utils/load/csv.h" +#include "access/heapam.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" -void edge_field_cb(void *field, size_t field_len, void *data) -{ - - csv_edge_reader *cr = (csv_edge_reader*)data; - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char*)field, field_len); - cr->cur_field += 1; -} +#include "utils/load/ag_load_edges.h" -/* Parser calls this function when it detects end of a row */ -void edge_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single edge row from COPY's raw fields. + * Edge CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + */ +static void process_edge_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + Oid graph_oid, bool load_as_agtype, + batch_insert_state *batch_state) { - - csv_edge_reader *cr = (csv_edge_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - - size_t i, n_fields; int64 start_id_int; graphid start_vertex_graph_id; int start_vertex_type_id; @@ -72,104 +52,92 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) int64 entry_id; TupleTableSlot *slot; - n_fields = cr->cur_field; + char *start_vertex_type; + char *end_vertex_type; + agtype *edge_properties; - if (cr->row == 0) - { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); + /* Generate edge ID */ + entry_id = nextval_internal(label_seq_relid, true); + edge_id = make_graphid(label_id, entry_id); - for (i = 0; icur_field; i++) - { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - edge_id = make_graphid(cr->label_id, entry_id); - - start_id_int = strtol(cr->fields[0], NULL, 10); - start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); - end_id_int = strtol(cr->fields[2], NULL, 10); - end_vertex_type_id = get_label_id(cr->fields[3], cr->graph_oid); - - start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); - end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - - /* Clear the slots contents */ - ExecClearTuple(slot); - - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); - slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); - slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); - slot->tts_values[3] = AGTYPE_P_GET_DATUM( - create_agtype_from_list_i( - cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; - slot->tts_isnull[2] = false; - slot->tts_isnull[3] = false; - - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - batch_state->num_tuples++; - - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + /* Trim whitespace from vertex type names */ + start_vertex_type = trim_whitespace(fields[1]); + end_vertex_type = trim_whitespace(fields[3]); - for (i = 0; i < n_fields; ++i) - { - pfree_if_not_null(cr->fields[i]); - } + /* Parse start vertex info */ + start_id_int = strtol(fields[0], NULL, 10); + start_vertex_type_id = get_label_id(start_vertex_type, graph_oid); - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } + /* Parse end vertex info */ + end_id_int = strtol(fields[2], NULL, 10); + end_vertex_type_id = get_label_id(end_vertex_type, graph_oid); - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; -} + /* Create graphids for start and end vertices */ + start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); + end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); -static int is_space(unsigned char c) -{ - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Build the agtype properties */ + edge_properties = create_agtype_from_list_i(header, fields, + nfields, 4, load_as_agtype); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM(edge_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + + batch_state->buffered_bytes += VARSIZE(edge_properties); + batch_state->num_tuples++; + + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - return 0; + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } } -static int is_term(unsigned char c) +/* + * Create COPY options for CSV parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; + + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load edges from CSV file using pg's COPY infrastructure. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -177,79 +145,133 @@ int create_edges_from_csv_file(char *file_path, int label_id, bool load_as_agtype) { + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Edge Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); + + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); + + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_edge_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + PG_TRY(); { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) + { + if (is_first_row) + { + int i; - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } - PG_TRY(); - { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_edge_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + graph_oid, load_as_agtype, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 1e86bbda4..5b11f68b8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,155 +17,114 @@ * under the License. */ #include "postgres.h" -#include "executor/spi.h" + +#include "access/heapam.h" +#include "access/table.h" #include "catalog/namespace.h" +#include "commands/copy.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/csv.h" - -void vertex_field_cb(void *field, size_t field_len, void *data) -{ - - csv_vertex_reader *cr = (csv_vertex_reader *) data; - - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = repalloc_check(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = repalloc_check(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = pnstrdup((char *) field, field_len); - cr->cur_field += 1; -} -void vertex_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single vertex row from COPY's raw fields. + * Vertex CSV format: [id,] [properties...] + */ +static void process_vertex_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + bool id_field_exists, bool load_as_agtype, + int64 *curr_seq_num, + batch_insert_state *batch_state) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - size_t i, n_fields; graphid vertex_id; int64 entry_id; TupleTableSlot *slot; + agtype *vertex_properties; - n_fields = cr->cur_field; - - if (cr->row == 0) + /* Generate or use provided entry_id */ + if (id_field_exists) { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )palloc(sizeof(size_t *) * cr->cur_field); - cr->header = palloc((sizeof (char*) * cr->cur_field)); - - for (i = 0; icur_field; i++) + entry_id = strtol(fields[0], NULL, 10); + if (entry_id > *curr_seq_num) { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = pnstrdup(cr->fields[i], cr->header_len[i]); + /* This is needed to ensure the sequence is up-to-date */ + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(label_seq_relid), + Int64GetDatum(entry_id)); + *curr_seq_num = entry_id; } } else { - if (cr->id_field_exists) - { - entry_id = strtol(cr->fields[0], NULL, 10); - if (entry_id > cr->curr_seq_num) - { - DirectFunctionCall2(setval_oid, - ObjectIdGetDatum(cr->label_seq_relid), - Int64GetDatum(entry_id)); - cr->curr_seq_num = entry_id; - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - } + entry_id = nextval_internal(label_seq_relid, true); + } - vertex_id = make_graphid(cr->label_id, entry_id); + vertex_id = make_graphid(label_id, entry_id); - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; - /* Clear the slots contents */ - ExecClearTuple(slot); + /* Clear the slots contents */ + ExecClearTuple(slot); - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - slot->tts_values[1] = AGTYPE_P_GET_DATUM( - create_agtype_from_list(cr->header, cr->fields, - n_fields, entry_id, - cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; + /* Build the agtype properties */ + vertex_properties = create_agtype_from_list(header, fields, + nfields, entry_id, + load_as_agtype); - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM(vertex_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; - batch_state->num_tuples++; + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state); - batch_state->num_tuples = 0; - } - } + batch_state->buffered_bytes += VARSIZE(vertex_properties); + batch_state->num_tuples++; - for (i = 0; i < n_fields; ++i) + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - pfree_if_not_null(cr->fields[i]); + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } - - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } - - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; } -static int is_space(unsigned char c) +/* + * Create COPY options for csv parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; -} -static int is_term(unsigned char c) -{ - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load vertex labels from csv file using pg's COPY infrastructure. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -174,96 +133,146 @@ int create_labels_from_csv_file(char *file_path, bool id_field_exists, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_vertex_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + int64 curr_seq_num = 0; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ + label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); + + if (id_field_exists) { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); + /* + * Set the curr_seq_num since we will need it to compare with + * incoming entry_id. + */ + curr_seq_num = nextval_internal(label_seq_relid, true); } - p.malloc_func = palloc; - p.realloc_func = repalloc_check; - p.free_func = pfree_if_not_null; + /* Initialize the batch insert state */ + init_batch_insert(&batch_state, label_name, graph_oid); - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); PG_TRY(); { - label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = palloc(sizeof(char *) * cr.alloc); - cr.fields_len = palloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - - if (cr.id_field_exists) + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist - NULL means all columns */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - /* - * Set the curr_seq_num since we will need it to compare with - * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. - */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); - } + if (is_first_row) + { + int i; - /* Initialize the batch insert state */ - init_batch_insert(&cr.batch_state, label_name, graph_oid); + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } + + is_first_row = false; + } + else { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_vertex_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + id_field_exists, load_as_agtype, + &curr_seq_num, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } } } - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); - /* Finish any remaining batch inserts */ - finish_batch_insert(&cr.batch_state); + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); - } + /* Clean up COPY state */ + EndCopyFrom(cstate); } PG_FINALLY(); { - fclose(fp); - csv_free(&p); + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } + + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); + + /* Delete batch context */ + MemoryContextDelete(batch_context); + + /* Free parse state */ + free_parsestate(pstate); } PG_END_TRY(); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 79170fd53..b59920c9c 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,24 +18,81 @@ */ #include "postgres.h" + +#include "access/heapam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" #include "catalog/indexing.h" +#include "catalog/pg_authid.h" #include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "parser/parse_relation.h" +#include "utils/acl.h" #include "utils/json.h" +#include "utils/rel.h" +#include "utils/rls.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" #include "utils/load/age_load.h" -#include "utils/rel.h" static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); static char *build_safe_filename(char *name); +static void check_file_read_permission(void); +static void check_table_permissions(Oid relid); +static void check_rls_for_load(Oid relid); #define AGE_BASE_CSV_DIRECTORY "/tmp/age/" #define AGE_CSV_FILE_EXTENSION ".csv" +/* + * Trim leading and trailing whitespace from a string. + * Returns a newly allocated string with whitespace removed. + * Returns empty string for NULL input. + */ +char *trim_whitespace(const char *str) +{ + const char *start; + const char *end; + size_t len; + + if (str == NULL) + { + return pstrdup(""); + } + + /* Find first non-whitespace character */ + start = str; + while (*start && (*start == ' ' || *start == '\t' || + *start == '\n' || *start == '\r')) + { + start++; + } + + /* If string is all whitespace, return empty string */ + if (*start == '\0') + { + return pstrdup(""); + } + + /* Find last non-whitespace character */ + end = str + strlen(str) - 1; + while (end > start && (*end == ' ' || *end == '\t' || + *end == '\n' || *end == '\r')) + { + end--; + } + + /* Copy the trimmed string */ + len = end - start + 1; + return pnstrdup(start, len); +} + static char *build_safe_filename(char *name) { int length; @@ -88,6 +145,51 @@ static char *build_safe_filename(char *name) return resolved; } +/* + * Check if the current user has permission to read server files. + * Only users with the pg_read_server_files role can load from files. + */ +static void check_file_read_permission(void) +{ + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to LOAD from a file"), + errdetail("Only roles with privileges of the \"%s\" role may LOAD from a file.", + "pg_read_server_files"))); + } +} + +/* + * Check if the current user has INSERT permission on the target table. + */ +static void check_table_permissions(Oid relid) +{ + AclResult aclresult; + + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_INSERT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, get_rel_name(relid)); + } +} + +/* + * Check if RLS is enabled on the target table. + * CSV loading is not supported with row-level security. + */ +static void check_rls_for_load(Oid relid) +{ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOAD from file is not supported with row-level security"), + errhint("Use Cypher CREATE clause instead."))); + } +} + agtype *create_empty_agtype(void) { agtype* out; @@ -118,6 +220,14 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) char *new_csv_val; agtype_value *res; + /* Handle NULL or empty input - return null agtype value */ + if (csv_val == NULL || csv_val[0] == '\0') + { + res = palloc(sizeof(agtype_value)); + res->type = AGTV_NULL; + return res; + } + if (!json_validate(cstring_to_text(csv_val), false, false)) { // wrap the string with double-quote @@ -175,18 +285,40 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, for (i = 0; itype = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -228,18 +360,40 @@ agtype* create_agtype_from_list_i(char **header, char **fields, for (i = start_index; i < fields_len; i++) { + char *trimmed_value; + + /* Skip empty header fields (e.g., from trailing commas) */ + if (header[i] == NULL || header[i][0] == '\0') + { + continue; + } + key_agtype = string_to_agtype_value(header[i]); result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); + /* Trim whitespace from field value */ + trimmed_value = trim_whitespace(fields[i]); + if (load_as_agtype) { - value_agtype = csv_value_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(trimmed_value); } else { - value_agtype = string_to_agtype_value(fields[i]); + /* Handle empty field values */ + if (trimmed_value[0] == '\0') + { + value_agtype = palloc(sizeof(agtype_value)); + value_agtype->type = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -362,11 +516,24 @@ void insert_batch(batch_insert_state *batch_state) List *result; int i; + /* Check constraints for each tuple before inserting */ + if (batch_state->resultRelInfo->ri_RelationDesc->rd_att->constr) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + ExecConstraints(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate); + } + } + /* Insert the tuples */ heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, batch_state->slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); - + GetCurrentCommandId(true), + TABLE_INSERT_SKIP_FSM, /* Skip free space map for bulk */ + batch_state->bistate); /* Use bulk insert state */ + /* Insert index entries for the tuples */ if (batch_state->resultRelInfo->ri_NumIndices > 0) { @@ -405,6 +572,7 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool id_field_exists; bool load_as_agtype; @@ -427,6 +595,9 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -447,6 +618,11 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_VERTEX); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); @@ -459,7 +635,6 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) PG_FUNCTION_INFO_V1(load_edges_from_file); Datum load_edges_from_file(PG_FUNCTION_ARGS) { - Name graph_name; Name label_name; text* file_name; @@ -467,6 +642,7 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool load_as_agtype; @@ -488,6 +664,9 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); file_name = PG_GETARG_TEXT_P(2); @@ -507,6 +686,11 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_EDGE); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); @@ -597,19 +781,42 @@ void init_batch_insert(batch_insert_state **batch_state, Oid relid; EState *estate; ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + RTEPermissionInfo *perminfo; + List *range_table = NIL; + List *perminfos = NIL; int i; - /* Open the relation */ + /* Get the relation OID */ relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, RowExclusiveLock); /* Initialize executor state */ estate = CreateExecutorState(); - /* Initialize resultRelInfo */ + /* Create range table entry for ExecConstraints */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; + rte->rellockmode = RowExclusiveLock; + rte->perminfoindex = 1; + range_table = list_make1(rte); + + /* Create permission info */ + perminfo = makeNode(RTEPermissionInfo); + perminfo->relid = relid; + perminfo->requiredPerms = ACL_INSERT; + perminfos = list_make1(perminfo); + + /* Initialize range table in executor state */ + ExecInitRangeTable(estate, range_table, perminfos); + + /* Initialize resultRelInfo - this opens the relation */ resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, relation, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Get relation from resultRelInfo (opened by ExecInitResultRelation) */ + relation = resultRelInfo->ri_RelationDesc; /* Open the indices */ ExecOpenIndices(resultRelInfo, false); @@ -619,8 +826,9 @@ void init_batch_insert(batch_insert_state **batch_state, (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); (*batch_state)->estate = estate; (*batch_state)->resultRelInfo = resultRelInfo; - (*batch_state)->max_tuples = BATCH_SIZE; (*batch_state)->num_tuples = 0; + (*batch_state)->buffered_bytes = 0; + (*batch_state)->bistate = GetBulkInsertState(); /* Create slots */ for (i = 0; i < BATCH_SIZE; i++) @@ -651,12 +859,14 @@ void finish_batch_insert(batch_insert_state **batch_state) ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); } - /* Clean up, close the indices and relation */ - ExecCloseIndices((*batch_state)->resultRelInfo); - table_close((*batch_state)->resultRelInfo->ri_RelationDesc, - RowExclusiveLock); + /* Free BulkInsertState */ + FreeBulkInsertState((*batch_state)->bistate); + + /* Close result relations and range table relations */ + ExecCloseResultRelations((*batch_state)->estate); + ExecCloseRangeTableRelations((*batch_state)->estate); - /* Clean up batch state */ + /* Clean up executor state */ FreeExecutorState((*batch_state)->estate); pfree((*batch_state)->slots); pfree(*batch_state); diff --git a/src/backend/utils/load/libcsv.c b/src/backend/utils/load/libcsv.c deleted file mode 100644 index f0e8b46be..000000000 --- a/src/backend/utils/load/libcsv.c +++ /dev/null @@ -1,549 +0,0 @@ -/* -libcsv - parse and write csv data -Copyright (C) 2008 Robert Gamble - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include - -#if __STDC_VERSION__ >= 199901L -# include -#else - /* C89 doesn't have stdint.h or SIZE_MAX */ -# define SIZE_MAX ((size_t)-1) -#endif - -#include "utils/load/csv.h" - -#define VERSION "3.0.3" - -#define ROW_NOT_BEGUN 0 -#define FIELD_NOT_BEGUN 1 -#define FIELD_BEGUN 2 -#define FIELD_MIGHT_HAVE_ENDED 3 - -/* - Explanation of states - ROW_NOT_BEGUN There have not been any fields encountered for this row - FIELD_NOT_BEGUN There have been fields but we are currently not in one - FIELD_BEGUN We are in a field - FIELD_MIGHT_HAVE_ENDED - We encountered a double quote inside a quoted field, the - field is either ended or the quote is literal -*/ - -#define MEM_BLK_SIZE 128 - -#define SUBMIT_FIELD(p) \ - do { \ - if (!quoted) \ - entry_pos -= spaces; \ - if (p->options & CSV_APPEND_NULL) \ - ((p)->entry_buf[entry_pos]) = '\0'; \ - if (cb1 && (p->options & CSV_EMPTY_IS_NULL) && !quoted && entry_pos == 0) \ - cb1(NULL, entry_pos, data); \ - else if (cb1) \ - cb1(p->entry_buf, entry_pos, data); \ - pstate = FIELD_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_ROW(p, c) \ - do { \ - if (cb2) \ - cb2(c, data); \ - pstate = ROW_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) - -static const char *csv_errors[] = {"success", - "error parsing data while strict checking enabled", - "memory exhausted while increasing buffer size", - "data size too large", - "invalid status code"}; - -int -csv_error(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Return the current status of the parser */ - return p->status; -} - -const char * -csv_strerror(int status) -{ - /* Return a textual description of status */ - if (status >= CSV_EINVALID || status < 0) - return csv_errors[CSV_EINVALID]; - else - return csv_errors[status]; -} - -int -csv_get_opts(const struct csv_parser *p) -{ - /* Return the currently set options of parser */ - if (p == NULL) - return -1; - - return p->options; -} - -int -csv_set_opts(struct csv_parser *p, unsigned char options) -{ - /* Set the options */ - if (p == NULL) - return -1; - - p->options = options; - return 0; -} - -int -csv_init(struct csv_parser *p, unsigned char options) -{ - /* Initialize a csv_parser object returns 0 on success, -1 on error */ - if (p == NULL) - return -1; - - p->entry_buf = NULL; - p->pstate = ROW_NOT_BEGUN; - p->quoted = 0; - p->spaces = 0; - p->entry_pos = 0; - p->entry_size = 0; - p->status = 0; - p->options = options; - p->quote_char = CSV_QUOTE; - p->delim_char = CSV_COMMA; - p->is_space = NULL; - p->is_term = NULL; - p->blk_size = MEM_BLK_SIZE; - p->malloc_func = NULL; - p->realloc_func = realloc; - p->free_func = free; - - return 0; -} - -void -csv_free(struct csv_parser *p) -{ - /* Free the entry_buffer of csv_parser object */ - if (p == NULL) - return; - - if (p->entry_buf && p->free_func) - p->free_func(p->entry_buf); - - p->entry_buf = NULL; - p->entry_size = 0; - - return; -} - -int -csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - int quoted; - int pstate; - size_t spaces; - size_t entry_pos; - - if (p == NULL) - return -1; - - /* Finalize parsing. Needed, for example, when file does not end in a newline */ - quoted = p->quoted; - pstate = p->pstate; - spaces = p->spaces; - entry_pos = p->entry_pos; - - if ((pstate == FIELD_BEGUN) && p->quoted && (p->options & CSV_STRICT) && (p->options & CSV_STRICT_FINI)) { - /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ - p->status = CSV_EPARSE; - return -1; - } - - switch (pstate) { - case FIELD_MIGHT_HAVE_ENDED: - p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ - entry_pos = p->entry_pos; - /*lint -fallthrough */ - case FIELD_NOT_BEGUN: - case FIELD_BEGUN: - /* Unnecessary: - quoted = p->quoted, pstate = p->pstate; - spaces = p->spaces, entry_pos = p->entry_pos; - */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, -1); - break; - case ROW_NOT_BEGUN: /* Already ended properly */ - ; - } - - /* Reset parser */ - p->spaces = p->quoted = p->entry_pos = p->status = 0; - p->pstate = ROW_NOT_BEGUN; - - return 0; -} - -void -csv_set_delim(struct csv_parser *p, unsigned char c) -{ - /* Set the delimiter */ - if (p) p->delim_char = c; -} - -void -csv_set_quote(struct csv_parser *p, unsigned char c) -{ - /* Set the quote character */ - if (p) p->quote_char = c; -} - -unsigned char -csv_get_delim(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the delimiter */ - return p->delim_char; -} - -unsigned char -csv_get_quote(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the quote character */ - return p->quote_char; -} - -void -csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the space function */ - if (p) p->is_space = f; -} - -void -csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the term function */ - if (p) p->is_term = f; -} - -void -csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) -{ - /* Set the realloc function used to increase buffer size */ - if (p && f) p->realloc_func = f; -} - -void -csv_set_free_func(struct csv_parser *p, void (*f)(void *)) -{ - /* Set the free function used to free the buffer */ - if (p && f) p->free_func = f; -} - -void -csv_set_blk_size(struct csv_parser *p, size_t size) -{ - /* Set the block size used to increment buffer size */ - if (p) p->blk_size = size; -} - -size_t -csv_get_buffer_size(const struct csv_parser *p) -{ - /* Get the size of the entry buffer */ - if (p) - return p->entry_size; - return 0; -} - -static int -csv_increase_buffer(struct csv_parser *p) -{ - size_t to_add; - void *vp; - - if (p == NULL) return 0; - if (p->realloc_func == NULL) return 0; - - /* Increase the size of the entry buffer. Attempt to increase size by - * p->blk_size, if this is larger than SIZE_MAX try to increase current - * buffer size to SIZE_MAX. If allocation fails, try to allocate halve - * the size and try again until successful or increment size is zero. - */ - - to_add = p->blk_size; - - if ( p->entry_size >= SIZE_MAX - to_add ) - to_add = SIZE_MAX - p->entry_size; - - if (!to_add) { - p->status = CSV_ETOOBIG; - return -1; - } - - while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) { - to_add /= 2; - if (!to_add) { - p->status = CSV_ENOMEM; - return -1; - } - } - - /* Update entry buffer pointer and entry_size if successful */ - p->entry_buf = vp; - p->entry_size += to_add; - return 0; -} - -size_t -csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - unsigned const char *us = s; /* Access input data as array of unsigned char */ - unsigned char c; /* The character we are currently processing */ - size_t pos = 0; /* The number of characters we have processed in this call */ - - /* Store key fields into local variables for performance */ - unsigned char delim = p->delim_char; - unsigned char quote = p->quote_char; - int (*is_space)(unsigned char) = p->is_space; - int (*is_term)(unsigned char) = p->is_term; - int quoted = p->quoted; - int pstate = p->pstate; - size_t spaces = p->spaces; - size_t entry_pos = p->entry_pos; - - - if (!p->entry_buf && pos < len) { - /* Buffer hasn't been allocated yet and len > 0 */ - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - while (pos < len) { - /* Check memory usage, increase buffer if necessary */ - if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - c = us[pos++]; - - switch (pstate) { - case ROW_NOT_BEGUN: - case FIELD_NOT_BEGUN: - if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ - continue; - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (pstate == FIELD_NOT_BEGUN) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { /* ROW_NOT_BEGUN */ - /* Don't submit empty rows by default */ - if (p->options & CSV_REPALL_NL) { - SUBMIT_ROW(p, c); - } - } - continue; - } else if (c == delim) { /* Comma */ - SUBMIT_FIELD(p); - break; - } else if (c == quote) { /* Quote */ - pstate = FIELD_BEGUN; - quoted = 1; - } else { /* Anything else */ - pstate = FIELD_BEGUN; - quoted = 0; - SUBMIT_CHAR(p, c); - } - break; - case FIELD_BEGUN: - if (c == quote) { /* Quote */ - if (quoted) { - SUBMIT_CHAR(p, c); - pstate = FIELD_MIGHT_HAVE_ENDED; - } else { - /* STRICT ERROR - double quote inside non-quoted field */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - SUBMIT_CHAR(p, c); - spaces = 0; - } - } else if (c == delim) { /* Comma */ - if (quoted) { - SUBMIT_CHAR(p, c); - } else { - SUBMIT_FIELD(p); - } - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (!quoted) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { - SUBMIT_CHAR(p, c); - } - } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ - SUBMIT_CHAR(p, c); - spaces++; - } else { /* Anything else */ - SUBMIT_CHAR(p, c); - spaces = 0; - } - break; - case FIELD_MIGHT_HAVE_ENDED: - /* This only happens when a quote character is encountered in a quoted field */ - if (c == delim) { /* Comma */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ - SUBMIT_CHAR(p, c); - spaces++; - } else if (c == quote) { /* Quote */ - if (spaces) { - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - spaces = 0; - SUBMIT_CHAR(p, c); - } else { - /* Two quotes in a row */ - pstate = FIELD_BEGUN; - } - } else { /* Anything else */ - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - pstate = FIELD_BEGUN; - spaces = 0; - SUBMIT_CHAR(p, c); - } - break; - default: - break; - } - } - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; -} - -size_t -csv_write (void *dest, size_t dest_size, const void *src, size_t src_size) -{ - return csv_write2(dest, dest_size, src, src_size, CSV_QUOTE); -} - -int -csv_fwrite (FILE *fp, const void *src, size_t src_size) -{ - return csv_fwrite2(fp, src, src_size, CSV_QUOTE); -} - -size_t -csv_write2 (void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) -{ - unsigned char *cdest = dest; - const unsigned char *csrc = src; - size_t chars = 0; - - if (src == NULL) - return 0; - - if (dest == NULL) - dest_size = 0; - - if (dest_size > 0) - *cdest++ = quote; - chars++; - - while (src_size) { - if (*csrc == quote) { - if (dest_size > chars) - *cdest++ = quote; - if (chars < SIZE_MAX) chars++; - } - if (dest_size > chars) - *cdest++ = *csrc; - if (chars < SIZE_MAX) chars++; - src_size--; - csrc++; - } - - if (dest_size > chars) - *cdest = quote; - if (chars < SIZE_MAX) chars++; - - return chars; -} - -int -csv_fwrite2 (FILE *fp, const void *src, size_t src_size, unsigned char quote) -{ - const unsigned char *csrc = src; - - if (fp == NULL || src == NULL) - return 0; - - if (fputc(quote, fp) == EOF) - return EOF; - - while (src_size) { - if (*csrc == quote) { - if (fputc(quote, fp) == EOF) - return EOF; - } - if (fputc(*csrc, fp) == EOF) - return EOF; - src_size--; - csrc++; - } - - if (fputc(quote, fp) == EOF) { - return EOF; - } - - return 0; -} diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index eec9484cc..4db00d93a 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,42 +17,28 @@ * under the License. */ -#include "access/heapam.h" -#include "utils/load/age_load.h" - #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - char *start_vertex; - char *end_vertex; - bool load_as_agtype; - batch_insert_state *batch_state; -} csv_edge_reader; - - -void edge_field_cb(void *field, size_t field_len, void *data); -void edge_row_cb(int delim __attribute__((unused)), void *data); +#include "utils/load/age_load.h" +/* + * Load edges from a CSV file using pg's COPY infrastructure. + * + * CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the edge label + * label_id - ID of the label + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *label_name, int label_id, - bool load_as_agtype); + char *label_name, int label_id, + bool load_as_agtype); #endif /* AG_LOAD_EDGES_H */ - diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index b8ed1572e..c3d517f30 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -17,46 +17,26 @@ * under the License. */ - #ifndef AG_LOAD_LABELS_H #define AG_LOAD_LABELS_H -#include "access/heapam.h" #include "utils/load/age_load.h" -struct counts { - long unsigned fields; - long unsigned allvalues; - long unsigned rows; -}; - -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - bool id_field_exists; - bool load_as_agtype; - int curr_seq_num; - batch_insert_state *batch_state; -} csv_vertex_reader; - - -void vertex_field_cb(void *field, size_t field_len, void *data); -void vertex_row_cb(int delim __attribute__((unused)), void *data); - +/* + * Load vertex labels from a CSV file using pg's COPY infrastructure. + * CSV format: [id,] [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the vertex label + * label_id - ID of the label + * id_field_exists - If true, first CSV column contains the vertex ID + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index 72f11493d..6573c79f3 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -17,6 +17,10 @@ * under the License. */ +#ifndef AG_LOAD_H +#define AG_LOAD_H + +#include "access/heapam.h" #include "commands/sequence.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -27,10 +31,8 @@ #include "commands/graph_commands.h" #include "utils/ag_cache.h" -#ifndef AGE_ENTITY_CREATOR_H -#define AGE_ENTITY_CREATOR_H - #define BATCH_SIZE 1000 +#define MAX_BUFFERED_BYTES 65535 /* 64KB, same as pg COPY */ typedef struct batch_insert_state { @@ -38,26 +40,29 @@ typedef struct batch_insert_state ResultRelInfo *resultRelInfo; TupleTableSlot **slots; int num_tuples; - int max_tuples; + size_t buffered_bytes; + BulkInsertState bistate; } batch_insert_state; -agtype* create_empty_agtype(void); - -agtype* create_agtype_from_list(char **header, char **fields, +agtype *create_empty_agtype(void); +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id, bool load_as_agtype); -agtype* create_agtype_from_list_i(char **header, char **fields, +agtype *create_agtype_from_list_i(char **header, char **fields, size_t fields_len, size_t start_index, bool load_as_agtype); + void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, - agtype* end_properties); -void insert_batch(batch_insert_state *batch_state); + agtype *edge_properties); void init_batch_insert(batch_insert_state **batch_state, char *label_name, Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); void finish_batch_insert(batch_insert_state **batch_state); -#endif /* AGE_ENTITY_CREATOR_H */ +char *trim_whitespace(const char *str); + +#endif /* AG_LOAD_H */ diff --git a/src/include/utils/load/csv.h b/src/include/utils/load/csv.h deleted file mode 100644 index 062536977..000000000 --- a/src/include/utils/load/csv.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Created by Shoaib on 12/5/2021. -*/ - -/* -libcsv - parse and write csv data -Copyright (C) 2008-2021 Robert Gamble -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifndef LIBCSV_H__ -#define LIBCSV_H__ -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define CSV_MAJOR 3 -#define CSV_MINOR 0 -#define CSV_RELEASE 3 - -/* Error Codes */ -#define CSV_SUCCESS 0 -#define CSV_EPARSE 1 /* Parse error in strict mode */ -#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ -#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ -#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ - - -/* parser options */ -#define CSV_STRICT 1 /* enable strict mode */ -#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ -#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last - field is quoted and doesn't contain ending - quote */ -#define CSV_APPEND_NULL 8 /* Ensure that all fields are null-terminated */ -#define CSV_EMPTY_IS_NULL 16 /* Pass null pointer to cb1 function when - empty, unquoted fields are encountered */ - - -/* Character values */ -#define CSV_TAB 0x09 -#define CSV_SPACE 0x20 -#define CSV_CR 0x0d -#define CSV_LF 0x0a -#define CSV_COMMA 0x2c -#define CSV_QUOTE 0x22 - -struct csv_parser { - int pstate; /* Parser state */ - int quoted; /* Is the current field a quoted field? */ - size_t spaces; /* Number of continuous spaces after quote or in a non-quoted field */ - unsigned char * entry_buf; /* Entry buffer */ - size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ - size_t entry_size; /* Size of entry buffer */ - int status; /* Operation status */ - unsigned char options; - unsigned char quote_char; - unsigned char delim_char; - int (*is_space)(unsigned char); - int (*is_term)(unsigned char); - size_t blk_size; - void *(*malloc_func)(size_t); /* not used */ - void *(*realloc_func)(void *, size_t); /* function used to allocate buffer memory */ - void (*free_func)(void *); /* function used to free buffer memory */ -}; - -/* Function Prototypes */ -int csv_init(struct csv_parser *p, unsigned char options); -int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -void csv_free(struct csv_parser *p); -int csv_error(const struct csv_parser *p); -const char * csv_strerror(int error); -size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size); -int csv_fwrite(FILE *fp, const void *src, size_t src_size); -size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote); -int csv_fwrite2(FILE *fp, const void *src, size_t src_size, unsigned char quote); -int csv_get_opts(const struct csv_parser *p); -int csv_set_opts(struct csv_parser *p, unsigned char options); -void csv_set_delim(struct csv_parser *p, unsigned char c); -void csv_set_quote(struct csv_parser *p, unsigned char c); -unsigned char csv_get_delim(const struct csv_parser *p); -unsigned char csv_get_quote(const struct csv_parser *p); -void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_realloc_func(struct csv_parser *p, void *(*)(void *, size_t)); -void csv_set_free_func(struct csv_parser *p, void (*)(void *)); -void csv_set_blk_size(struct csv_parser *p, size_t); -size_t csv_get_buffer_size(const struct csv_parser *p); - -#ifdef __cplusplus -} -#endif - -#endif