diff --git a/Makefile b/Makefile index 4cd623484..2d2912571 100644 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ OBJS = src/backend/age.o \ src/backend/utils/load/ag_load_labels.o \ src/backend/utils/load/ag_load_edges.o \ src/backend/utils/load/age_load.o \ - src/backend/utils/load/libcsv.o \ src/backend/utils/name_validation.o \ src/backend/utils/ag_guc.o diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out index b638e636b..1f76c31ce 100644 --- a/regress/expected/age_load.out +++ b/regress/expected/age_load.out @@ -16,7 +16,9 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; SET search_path TO ag_catalog; -- Create a country using CREATE clause @@ -43,13 +45,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', (1 row) --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; ?column? @@ -74,13 +69,6 @@ NOTICE: VLabel "City" has been created (1 row) --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - ?column? ----------- - t -(1 row) - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; ?column? @@ -415,6 +403,43 @@ SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN prop {"bool": "false", "string": "nUll", "numeric": "3.14"} (6 rows) +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +ERROR: file path must not be NULL +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); +ERROR: file path must not be NULL +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +ERROR: file name cannot be zero length +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); +ERROR: file name cannot be zero length +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_vertices.csv] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +ERROR: File or path does not exist [/tmp/age/age_load_xxx/conversion_edges.csv] +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_vertices.txt] +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: File or path does not exist [/tmp/age/age_load/conversion_edges.txt] +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +ERROR: You can only load files with extension [.csv]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); +ERROR: You can only load files with extension [.csv]. +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +ERROR: You can only load files located in [/tmp/age/]. +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); +ERROR: You can only load files located in [/tmp/age/]. +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); NOTICE: drop cascades to 6 other objects DETAIL: drop cascades to table agload_conversion._ag_label_vertex @@ -429,3 +454,195 @@ NOTICE: graph "agload_conversion" has been dropped (1 row) +-- +-- Test security and permissions +-- +SELECT create_graph('agload_security'); +NOTICE: graph "agload_security" has been created + create_graph +-------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person1'); +NOTICE: VLabel "Person1" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_vlabel('agload_security', 'Person2'); +NOTICE: VLabel "Person2" has been created + create_vlabel +--------------- + +(1 row) + +SELECT create_elabel('agload_security', 'SecEdge'); +NOTICE: ELabel "SecEdge" has been created + create_elabel +--------------- + +(1 row) + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied to LOAD from a file +DETAIL: Only roles with privileges of the "pg_read_server_files" role may LOAD from a file. +RESET ROLE; +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: permission denied for table Person1 +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: permission denied for table SecEdge +RESET ROLE; +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +RESET ROLE; +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 3: Row-Level Security (RLS) +-- +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; +-- Switch to load_test_user +SET ROLE load_test_user; +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: LOAD from file is not supported with row-level security +HINT: Use Cypher CREATE clause instead. +RESET ROLE; +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + load_labels_from_file +----------------------- + +(1 row) + +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + load_edges_from_file +---------------------- + +(1 row) + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; + count +------- + 6 +(1 row) + +SELECT COUNT(*) FROM agload_security."SecEdge"; + count +------- + 6 +(1 row) + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; +-- +-- Test 4: Constraint checking (CHECK constraint) +-- +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +ERROR: new row for relation "Person1" violates check constraint "check_bool_true" +DETAIL: Failing row contains (844424930131970, {"id": "2", "bool": "false", "__id__": 2, "string": "John", "num...). +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +ERROR: new row for relation "SecEdge" violates check constraint "check_bool_true" +DETAIL: Failing row contains (1407374883553294, 844424930131969, 1125899906842625, {"bool": "false", "string": "John", "numeric": "-2"}). +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); +NOTICE: drop cascades to 5 other objects +DETAIL: drop cascades to table agload_security._ag_label_vertex +drop cascades to table agload_security._ag_label_edge +drop cascades to table agload_security."Person1" +drop cascades to table agload_security."Person2" +drop cascades to table agload_security."SecEdge" +NOTICE: graph "agload_security" has been dropped + drop_graph +------------ + +(1 row) + +-- +-- End +-- diff --git a/regress/expected/cypher_match.out b/regress/expected/cypher_match.out index ed2b3da08..a0e284beb 100644 --- a/regress/expected/cypher_match.out +++ b/regress/expected/cypher_match.out @@ -79,8 +79,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path [{"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex]::path + [{"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex, {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge, {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex, {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge, {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -88,8 +88,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -97,8 +97,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -115,8 +115,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -132,10 +132,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -143,10 +143,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -154,10 +154,10 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) SELECT * FROM cypher('cypher_match', $$ @@ -165,8 +165,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -174,8 +174,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a ---------------------------------------------------------------------------------- - {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (2 rows) -- Right Path Test @@ -250,8 +250,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Left Path Test @@ -308,8 +308,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (a agtype); a --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge (2 rows) --Divergent Path Tests @@ -412,8 +412,8 @@ SELECT * FROM cypher('cypher_match', $$ $$) AS (i agtype); i --------------------------------------------------------------------------------------------------------------------------- - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (2 rows) SELECT * FROM cypher('cypher_match', $$ @@ -712,8 +712,8 @@ $$) AS (r0 agtype); {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge + {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge (6 rows) SELECT * FROM cypher('cypher_match', $$ @@ -775,8 +775,8 @@ $$) AS (r1 agtype); {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge - {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge + {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge (12 rows) @@ -1055,8 +1055,8 @@ SELECT * FROM cypher('cypher_match', {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) SELECT * FROM cypher('cypher_match', @@ -1068,8 +1068,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex (6 rows) -- Property Constraint in EXISTS @@ -1123,8 +1123,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -1156,8 +1156,8 @@ AS (u agtype, e agtype, v agtype); {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex | {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974594, "label": "e2", "end_id": 1688849860263937, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263937, "label": "v2", "properties": {"id": "initial"}}::vertex {"id": 1688849860263938, "label": "v2", "properties": {"id": "middle"}}::vertex | {"id": 1970324836974593, "label": "e2", "end_id": 1688849860263939, "start_id": 1688849860263938, "properties": {}}::edge | {"id": 1688849860263939, "label": "v2", "properties": {"id": "end"}}::vertex - {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2251799813685251, "label": "v3", "properties": {"id": "end"}}::vertex | {"id": 2533274790395905, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685251, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex + {"id": 2251799813685249, "label": "v3", "properties": {"id": "initial"}}::vertex | {"id": 2533274790395906, "label": "e3", "end_id": 2251799813685250, "start_id": 2251799813685249, "properties": {}}::edge | {"id": 2251799813685250, "label": "v3", "properties": {"id": "middle"}}::vertex {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex | {"id": 3096224743817217, "label": "self", "end_id": 2814749767106561, "start_id": 2814749767106561, "properties": {}}::edge | {"id": 2814749767106561, "label": "loop", "properties": {"id": "initial"}}::vertex (7 rows) @@ -2164,8 +2164,8 @@ SELECT * FROM cypher('cypher_match', $$ MATCH p=(u)-[]-()-[]-(u) RETURN p $$)as p ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path - [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex]::path + [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path [{"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex, {"id": 4785074604081156, "label": "knows", "end_id": 281474976710668, "start_id": 281474976710667, "properties": {}}::edge, {"id": 281474976710667, "label": "", "properties": {"name": "Dave"}}::vertex, {"id": 4785074604081155, "label": "knows", "end_id": 281474976710667, "start_id": 281474976710668, "properties": {}}::edge, {"id": 281474976710668, "label": "", "properties": {"name": "John"}}::vertex]::path (4 rows) @@ -2407,15 +2407,15 @@ SELECT * FROM cypher('cypher_match', $$ MATCH (a {name:a.name}) MATCH (a {age:a. SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a)-[u {relationship: u.relationship, years: u.years}]->(b) RETURN p $$) as (a agtype); a ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path [{"id": 281474976710661, "label": "", "properties": {"age": 4, "name": "T"}}::vertex, {"id": 4785074604081153, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710661, "properties": {"years": 3, "relationship": "friends"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path + [{"id": 281474976710659, "label": "", "properties": {"age": 3, "name": "orphan"}}::vertex, {"id": 4785074604081154, "label": "knows", "end_id": 281474976710666, "start_id": 281474976710659, "properties": {"years": 4, "relationship": "enemies"}}::edge, {"id": 281474976710666, "label": "", "properties": {"age": 6}}::vertex]::path (2 rows) SELECT * FROM cypher('cypher_match', $$ MATCH p=(a {name:a.name})-[u {relationship: u.relationship}]->(b {age:b.age}) RETURN p $$) as (a agtype); @@ -3514,19 +3514,17 @@ SELECT count(*) FROM cypher('test_enable_containment', $$ MATCH p=(x:Customer)-[ (1 row) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer)-[:bought ={store: 'Amazon', addr:{city: 'Vancouver', street: 30}}]->(y:Product) RETURN 0 $$) as (a agtype); - QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Hash Join - Hash Cond: (y.id = _age_default_alias_0.end_id) - -> Seq Scan on "Product" y - -> Hash - -> Hash Join - Hash Cond: (x.id = _age_default_alias_0.start_id) - -> Seq Scan on "Customer" x - -> Hash - -> Seq Scan on bought _age_default_alias_0 - Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) -(10 rows) + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Seq Scan on bought _age_default_alias_0 + Filter: ((agtype_access_operator(VARIADIC ARRAY[properties, '"store"'::agtype]) = '"Amazon"'::agtype) AND (agtype_access_operator(VARIADIC ARRAY[properties, '"addr"'::agtype]) = '{"city": "Vancouver", "street": 30}'::agtype)) + -> Index Only Scan using "Customer_pkey" on "Customer" x + Index Cond: (id = _age_default_alias_0.start_id) + -> Index Only Scan using "Product_pkey" on "Product" y + Index Cond: (id = _age_default_alias_0.end_id) +(8 rows) SELECT * FROM cypher('test_enable_containment', $$ EXPLAIN (costs off) MATCH (x:Customer ={school: { name: 'XYZ College',program: { major: 'Psyc', degree: 'BSc'} },phone: [ 123456789, 987654321, 456987123 ]}) RETURN 0 $$) as (a agtype); QUERY PLAN diff --git a/regress/expected/cypher_merge.out b/regress/expected/cypher_merge.out index 238a4c472..56a23f513 100644 --- a/regress/expected/cypher_merge.out +++ b/regress/expected/cypher_merge.out @@ -655,8 +655,8 @@ $$) AS (name agtype, bornIn agtype, city agtype); name | bornin | city -------------------+--------------+----------------------------------------------------------------------------------------- "Rob Reiner" | "New York" | {"id": 1970324836974593, "label": "City", "properties": {"name": "New York"}}::vertex - "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex "Michael Douglas" | "New Jersey" | {"id": 1970324836974594, "label": "City", "properties": {"name": "New Jersey"}}::vertex + "Martin Sheen" | "Ohio" | {"id": 1970324836974595, "label": "City", "properties": {"name": "Ohio"}}::vertex (3 rows) --validate diff --git a/regress/expected/cypher_vle.out b/regress/expected/cypher_vle.out index 9cbb3420c..57f930d98 100644 --- a/regress/expected/cypher_vle.out +++ b/regress/expected/cypher_vle.out @@ -508,37 +508,37 @@ SELECT * FROM cypher('cypher_vle', $$MATCH p=(u)-[e*0..0]->(v) RETURN id(u), p, SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[*0..0]->()-[]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) SELECT * FROM cypher('cypher_vle', $$MATCH p=()-[]->()-[*0..0]->() RETURN p $$) AS (p agtype); p ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path - [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 1125899906842628, "label": "edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "main edge", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path + [{"id": 844424930131969, "label": "begin", "properties": {}}::vertex, {"id": 2251799813685249, "label": "alternate_edge", "end_id": 1407374883553281, "start_id": 844424930131969, "properties": {"name": "alternate edge", "number": 1, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1970324836974593, "label": "self_loop", "end_id": 1407374883553281, "start_id": 1407374883553281, "properties": {"name": "self loop", "number": 1, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553281, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553281, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842627, "label": "edge", "end_id": 1407374883553282, "start_id": 1407374883553281, "properties": {"name": "main edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842626, "label": "edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "main edge", "number": 3, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685250, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1407374883553282, "properties": {"name": "alternate edge", "number": 2, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395905, "label": "bypass_edge", "end_id": 1688849860263937, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path - [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553282, "label": "middle", "properties": {}}::vertex, {"id": 2533274790395906, "label": "bypass_edge", "end_id": 844424930131969, "start_id": 1407374883553282, "properties": {"name": "bypass edge", "number": 2, "packages": [1, 3, 5, 7], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 844424930131969, "label": "begin", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 1125899906842625, "label": "edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "main edge", "number": 4, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685251, "label": "alternate_edge", "end_id": 1688849860263937, "start_id": 1407374883553283, "properties": {"name": "alternate edge", "number": 3, "packages": [2, 4, 6], "dangerous": {"type": "poisons", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path + [{"id": 1407374883553283, "label": "middle", "properties": {}}::vertex, {"id": 2251799813685253, "label": "alternate_edge", "end_id": 1407374883553282, "start_id": 1407374883553283, "properties": {"name": "backup edge", "number": 2, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553282, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 2251799813685252, "label": "alternate_edge", "end_id": 1407374883553283, "start_id": 1688849860263937, "properties": {"name": "backup edge", "number": 1, "packages": [1, 3, 5, 7]}}::edge, {"id": 1407374883553283, "label": "middle", "properties": {}}::vertex]::path + [{"id": 1688849860263937, "label": "end", "properties": {}}::vertex, {"id": 1970324836974594, "label": "self_loop", "end_id": 1688849860263937, "start_id": 1688849860263937, "properties": {"name": "self loop", "number": 2, "dangerous": {"type": "all", "level": "all"}}}::edge, {"id": 1688849860263937, "label": "end", "properties": {}}::vertex]::path (13 rows) -- diff --git a/regress/expected/expr.out b/regress/expected/expr.out index 052caf777..033fe1de7 100644 --- a/regress/expected/expr.out +++ b/regress/expected/expr.out @@ -2688,10 +2688,10 @@ SELECT * FROM cypher('expr', $$ MATCH (v) RETURN v $$) AS (expression agtype); SELECT * FROM cypher('expr', $$ MATCH ()-[e]-() RETURN e $$) AS (expression agtype); expression --------------------------------------------------------------------------------------------------------------------------- - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge - {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge {"id": 1407374883553282, "label": "e1", "end_id": 1125899906842626, "start_id": 1125899906842625, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge + {"id": 1407374883553281, "label": "e1", "end_id": 1125899906842627, "start_id": 1125899906842626, "properties": {}}::edge (4 rows) -- id() @@ -2700,10 +2700,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype); id ------------------ - 1407374883553281 - 1407374883553281 1407374883553282 1407374883553282 + 1407374883553281 + 1407374883553281 (4 rows) SELECT * FROM cypher('expr', $$ @@ -2742,10 +2742,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (start_id agtype); start_id ------------------ - 1125899906842626 - 1125899906842626 1125899906842625 1125899906842625 + 1125899906842626 + 1125899906842626 (4 rows) -- should return null @@ -2775,10 +2775,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (end_id agtype); end_id ------------------ - 1125899906842627 - 1125899906842627 1125899906842626 1125899906842626 + 1125899906842627 + 1125899906842627 (4 rows) -- should return null @@ -2808,10 +2808,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, start_id agtype, startNode agtype); id | start_id | startnode ------------------+------------------+---------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex - 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex 1407374883553282 | 1125899906842625 | {"id": 1125899906842625, "label": "v1", "properties": {"id": "initial"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex (4 rows) -- should return null @@ -2841,10 +2841,10 @@ SELECT * FROM cypher('expr', $$ $$) AS (id agtype, end_id agtype, endNode agtype); id | end_id | endnode ------------------+------------------+--------------------------------------------------------------------------------- - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex - 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex 1407374883553282 | 1125899906842626 | {"id": 1125899906842626, "label": "v1", "properties": {"id": "middle"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex + 1407374883553281 | 1125899906842627 | {"id": 1125899906842627, "label": "v1", "properties": {"id": "end"}}::vertex (4 rows) -- should return null @@ -7632,10 +7632,10 @@ SELECT * FROM cypher('opt_forms', $$MATCH (u) RETURN *$$) AS (result agtype); SELECT * FROM cypher('opt_forms', $$MATCH (u)--(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); u | v ---+--- - 2 | 3 - 3 | 2 1 | 2 2 | 1 + 2 | 3 + 3 | 2 (4 rows) SELECT * FROM cypher('opt_forms', $$MATCH (u)-->(v) RETURN u.i, v.i$$) AS (u agtype, v agtype); @@ -7822,12 +7822,12 @@ SELECT * FROM cypher('keys', $$MATCH (v) RETURN keys(v)$$) AS (vertex_keys agtyp SELECT * FROM cypher('keys', $$MATCH ()-[e]-() RETURN keys(e)$$) AS (edge_keys agtype); edge_keys ----------- - [] - [] ["song"] ["song"] + [] ["song"] ["song"] + [] (6 rows) SELECT * FROM cypher('keys', $$RETURN keys({a:1,b:'two',c:[1,2,3]})$$) AS (keys agtype); diff --git a/regress/expected/graph_generation.out b/regress/expected/graph_generation.out index 235052a08..ca511eafa 100644 --- a/regress/expected/graph_generation.out +++ b/regress/expected/graph_generation.out @@ -43,15 +43,15 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge - {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge (10 rows) SELECT * FROM create_complete_graph('gp1',5,'edges','vertices'); @@ -140,25 +140,25 @@ SELECT * FROM cypher('gp1', $$MATCH (a)-[e]->(b) RETURN e$$) as (n agtype); n ---------------------------------------------------------------------------------------------------------------------------- {"id": 1125899906842625, "label": "edges", "end_id": 844424930131970, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842626, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131969, "properties": {}}::edge + {"id": 1125899906842629, "label": "edges", "end_id": 844424930131971, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842627, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131969, "properties": {}}::edge - {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842630, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131970, "properties": {}}::edge - {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge + {"id": 1125899906842632, "label": "edges", "end_id": 844424930131972, "start_id": 844424930131971, "properties": {}}::edge {"id": 1125899906842628, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131969, "properties": {}}::edge {"id": 1125899906842631, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131970, "properties": {}}::edge {"id": 1125899906842633, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131971, "properties": {}}::edge + {"id": 1125899906842634, "label": "edges", "end_id": 844424930131973, "start_id": 844424930131972, "properties": {}}::edge {"id": 1125899906842635, "label": "edges", "end_id": 844424930131975, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842636, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131974, "properties": {}}::edge + {"id": 1125899906842639, "label": "edges", "end_id": 844424930131976, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842637, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131974, "properties": {}}::edge - {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842640, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131975, "properties": {}}::edge - {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge + {"id": 1125899906842642, "label": "edges", "end_id": 844424930131977, "start_id": 844424930131976, "properties": {}}::edge {"id": 1125899906842638, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131974, "properties": {}}::edge {"id": 1125899906842641, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131975, "properties": {}}::edge {"id": 1125899906842643, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131976, "properties": {}}::edge + {"id": 1125899906842644, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131977, "properties": {}}::edge {"id": 1125899906842645, "label": "edges", "end_id": 844424930131978, "start_id": 844424930131969, "properties": {}}::edge (21 rows) diff --git a/regress/expected/index.out b/regress/expected/index.out index f911900ab..ec62bf57d 100644 --- a/regress/expected/index.out +++ b/regress/expected/index.out @@ -16,7 +16,6 @@ * specific language governing permissions and limitations * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load LOAD 'age'; SET search_path TO ag_catalog; SET enable_mergejoin = ON; @@ -264,18 +263,22 @@ $$) as (n agtype); --- (0 rows) -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; + indexname | indexdef +-----------------------------+------------------------------------------------------------------------------------------------ + City_pkey | CREATE UNIQUE INDEX "City_pkey" ON cypher_index."City" USING btree (id) + Country_pkey | CREATE UNIQUE INDEX "Country_pkey" ON cypher_index."Country" USING btree (id) + _ag_label_edge_end_id_idx | CREATE INDEX _ag_label_edge_end_id_idx ON cypher_index._ag_label_edge USING btree (end_id) + _ag_label_edge_pkey | CREATE UNIQUE INDEX _ag_label_edge_pkey ON cypher_index._ag_label_edge USING btree (id) + _ag_label_edge_start_id_idx | CREATE INDEX _ag_label_edge_start_id_idx ON cypher_index._ag_label_edge USING btree (start_id) + _ag_label_vertex_pkey | CREATE UNIQUE INDEX _ag_label_vertex_pkey ON cypher_index._ag_label_vertex USING btree (id) + cypher_index_idx_props_uq | CREATE UNIQUE INDEX cypher_index_idx_props_uq ON cypher_index.idx USING btree (properties) + has_city_end_id_idx | CREATE INDEX has_city_end_id_idx ON cypher_index.has_city USING btree (end_id) + has_city_start_id_idx | CREATE INDEX has_city_start_id_idx ON cypher_index.has_city USING btree (start_id) + idx_pkey | CREATE UNIQUE INDEX idx_pkey ON cypher_index.idx USING btree (id) +(10 rows) + SET enable_mergejoin = ON; SET enable_hashjoin = OFF; SET enable_nestloop = OFF; @@ -288,6 +291,29 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Merge Join + Merge Cond: (_age_default_alias_0.id = e.start_id) + -> Merge Append + Sort Key: _age_default_alias_0.id + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Sort + Sort Key: e.start_id + -> Merge Join + Merge Cond: (a.id = e.end_id) + -> Index Only Scan using "Country_pkey" on "Country" a + -> Index Scan using has_city_end_id_idx on has_city e +(15 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -300,17 +326,53 @@ $$) as (n agtype); 10 (1 row) +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Hash Join + Hash Cond: (_age_default_alias_0.id = e.start_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + -> Hash + -> Hash Join + Hash Cond: (e.end_id = a.id) + -> Index Scan using has_city_end_id_idx on has_city e + -> Hash + -> Index Only Scan using "Country_pkey" on "Country" a +(14 rows) + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); - count -------- - 10 -(1 row) + QUERY PLAN +---------------------------------------------------------------------------------------------------------- + Aggregate + -> Nested Loop + -> Nested Loop + -> Index Scan using has_city_start_id_idx on has_city e + -> Index Only Scan using "Country_pkey" on "Country" a + Index Cond: (id = e.end_id) + -> Append + -> Index Only Scan using _ag_label_vertex_pkey on _ag_label_vertex _age_default_alias_0_1 + Index Cond: (id = e.start_id) + -> Index Only Scan using idx_pkey on idx _age_default_alias_0_2 + Index Cond: (id = e.start_id) + -> Index Only Scan using "Country_pkey" on "Country" _age_default_alias_0_3 + Index Cond: (id = e.start_id) + -> Index Only Scan using "City_pkey" on "City" _age_default_alias_0_4 + Index Cond: (id = e.start_id) +(15 rows) SET enable_mergejoin = ON; SET enable_hashjoin = ON; @@ -322,6 +384,19 @@ CREATE INDEX load_city_gin_idx ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------- + Bitmap Heap Scan on "City" c + Recheck Cond: (properties @> '{"city_id": 1}'::agtype) + -> Bitmap Index Scan on load_city_gin_idx + Index Cond: (properties @> '{"city_id": 1}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) RETURN c @@ -355,6 +430,19 @@ $$) as (n agtype); {"id": 1970324836974597, "label": "City", "properties": {"name": "Vancouver", "city_id": 5, "west_coast": true, "country_code": "CA"}}::vertex (4 rows) +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + QUERY PLAN +-------------------------------------------------------------------------- + Bitmap Heap Scan on "Country" c + Recheck Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) + -> Bitmap Index Scan on load_country_gin_idx + Index Cond: (properties @> '{"life_expectancy": 82.05}'::agtype) +(4 rows) + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -378,26 +466,293 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + QUERY PLAN +--------------------------------------------------------------------------------------------------------------- + Index Scan using city_country_code_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(2 rows) + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" + "Los Angeles" + "Seattle" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + name +------ +(0 rows) + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); - count -------- - 0 +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------- + Index Scan using city_id_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"city_id"'::agtype]) = '1'::agtype) +(2 rows) + +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" (1 row) -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); + name +------------- + "Vancouver" +(1 row) + +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "New York" + "San Fransisco" +(2 rows) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "Monterrey" + "Tijuana" +(2 rows) + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true + RETURN a +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) +(2 rows) + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" + "Vancouver" +(4 rows) + +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +--------------- + "New York" + "Toronto" + "Montreal" + "Mexico City" + "Monterrey" + "Tijuana" +(6 rows) + +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true RETURN a -$$) as (n agtype); -ERROR: graph "agload_test_graph" does not exist -LINE 1: SELECT COUNT(*) FROM cypher('agload_test_graph', $$ - ^ +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------- + Index Scan using city_west_coast_idx on "City" a + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"US"'::agtype) +(3 rows) + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +----------------- + "San Fransisco" + "Los Angeles" + "Seattle" +(3 rows) + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + name +------------- + "New York" + "Vancouver" +(2 rows) + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + name +------------ + "New York" +(1 row) + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------ + Index Scan using country_life_exp_idx on "Country" c + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"life_expectancy"'::agtype]) > '80.0'::agtype) +(2 rows) + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Canada" +(1 row) + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + name +---------- + "Mexico" +(1 row) + +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------- + Nested Loop + -> Nested Loop + -> Index Scan using city_west_coast_idx on "City" city + Index Cond: (agtype_access_operator(VARIADIC ARRAY[properties, '"west_coast"'::agtype]) = 'true'::agtype) + -> Bitmap Heap Scan on has_city _age_default_alias_0 + Recheck Cond: (start_id = city.id) + -> Bitmap Index Scan on has_city_start_id_idx + Index Cond: (start_id = city.id) + -> Index Scan using "Country_pkey" on "Country" country + Index Cond: (id = _age_default_alias_0.end_id) + Filter: (agtype_access_operator(VARIADIC ARRAY[properties, '"country_code"'::agtype]) = '"CA"'::agtype) +(11 rows) + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + name +------------- + "Vancouver" + "Toronto" + "Montreal" +(3 rows) + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- @@ -415,5 +770,3 @@ NOTICE: graph "cypher_index" has been dropped (1 row) -SELECT drop_graph('agload_test_graph', true); -ERROR: graph "agload_test_graph" does not exist diff --git a/regress/expected/map_projection.out b/regress/expected/map_projection.out index dcb7f0e76..f0c45c557 100644 --- a/regress/expected/map_projection.out +++ b/regress/expected/map_projection.out @@ -152,7 +152,7 @@ $$ $$) as (a agtype); a -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - [{"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}, {"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}] + [{"name": "Tom Hanks", "movies": [{"title": "Forrest Gump"}, {"title": "Finch"}, {"title": "The Circle"}]}, {"name": "Christian Bale", "movies": [{"title": "The Prestige"}, {"title": "The Dark Knight"}]}] (1 row) -- drop diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql index 425ca5417..976f050af 100644 --- a/regress/sql/age_load.sql +++ b/regress/sql/age_load.sql @@ -17,7 +17,9 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load +\! rm -rf /tmp/age/age_load +\! mkdir -p /tmp/age +\! cp -r regress/age_load/data /tmp/age/age_load LOAD 'age'; @@ -34,9 +36,6 @@ SELECT * FROM cypher('agload_test_graph', $$CREATE (n:Country {__id__:1}) RETURN SELECT load_labels_from_file('agload_test_graph', 'Country', 'age_load/countries.csv', true); --- A temporary table should have been created with 54 ids; 1 from CREATE and 53 from file -SELECT COUNT(*)=54 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 248 SELECT currval('agload_test_graph."Country_id_seq"')=248; @@ -52,9 +51,6 @@ SELECT load_labels_from_file('agload_test_graph', 'Country', SELECT load_labels_from_file('agload_test_graph', 'City', 'age_load/cities.csv', true); --- Temporary table should have 54+72485 rows now -SELECT COUNT(*)=54+72485 FROM "_agload_test_graph_ag_vertex_ids"; - -- Sequence should be equal to max entry id i.e. 146941 SELECT currval('agload_test_graph."City_id_seq"')=146941; @@ -166,4 +162,163 @@ SELECT create_elabel('agload_conversion','Edges2'); SELECT load_edges_from_file('agload_conversion', 'Edges2', 'age_load/conversion_edges.csv', false); SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e:Edges2]->() RETURN properties(e) $$) as (a agtype); +-- +-- Check sandbox +-- +-- check null file name +SELECT load_labels_from_file('agload_conversion', 'Person1', NULL, true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', NULL, true); + +-- check no file name +SELECT load_labels_from_file('agload_conversion', 'Person1', '', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '', true); + +-- check for file/path does not exist +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load_xxx/conversion_vertices.csv', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load_xxx/conversion_edges.csv', true); +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check wrong extension +\! touch /tmp/age/age_load/conversion_vertices.txt +\! touch /tmp/age/age_load/conversion_edges.txt +SELECT load_labels_from_file('agload_conversion', 'Person1', 'age_load/conversion_vertices.txt', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', 'age_load/conversion_edges.txt', true); + +-- check outside sandbox directory +SELECT load_labels_from_file('agload_conversion', 'Person1', '../../etc/passwd', true, true); +SELECT load_edges_from_file('agload_conversion', 'Edges1', '../../etc/passwd', true); + +-- +-- Cleanup +-- SELECT drop_graph('agload_conversion', true); + +-- +-- Test security and permissions +-- + +SELECT create_graph('agload_security'); +SELECT create_vlabel('agload_security', 'Person1'); +SELECT create_vlabel('agload_security', 'Person2'); +SELECT create_elabel('agload_security', 'SecEdge'); + +-- +-- Test 1: File read permission (pg_read_server_files role) +-- +-- Create a user without pg_read_server_files role +CREATE USER load_test_user; +GRANT USAGE ON SCHEMA ag_catalog TO load_test_user; + +-- This should fail because load_test_user doesn't have pg_read_server_files +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant pg_read_server_files and try again - should fail on table permission now +GRANT pg_read_server_files TO load_test_user; + +-- +-- Test 2: Table INSERT permission (ACL_INSERT) +-- +-- User has file read permission but no INSERT on the label table +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Grant INSERT permission and try again - should succeed +GRANT USAGE ON SCHEMA agload_security TO load_test_user; +GRANT INSERT ON agload_security."Person1" TO load_test_user; +GRANT INSERT ON agload_security."SecEdge" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."Person1_id_seq" TO load_test_user; +GRANT UPDATE ON SEQUENCE agload_security."SecEdge_id_seq" TO load_test_user; +GRANT SELECT ON ag_catalog.ag_label TO load_test_user; +GRANT SELECT ON ag_catalog.ag_graph TO load_test_user; + +SET ROLE load_test_user; +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); +RESET ROLE; + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 3: Row-Level Security (RLS) +-- + +-- Enable RLS on the label tables +ALTER TABLE agload_security."Person1" ENABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" ENABLE ROW LEVEL SECURITY; + +-- Switch to load_test_user +SET ROLE load_test_user; + +-- Loading should fail when RLS is enabled +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +RESET ROLE; + +-- Disable RLS and try again - should succeed +ALTER TABLE agload_security."Person1" DISABLE ROW LEVEL SECURITY; +ALTER TABLE agload_security."SecEdge" DISABLE ROW LEVEL SECURITY; + +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- Verify data was loaded +SELECT COUNT(*) FROM agload_security."Person1"; +SELECT COUNT(*) FROM agload_security."SecEdge"; + +-- cleanup +DELETE FROM agload_security."Person1"; +DELETE FROM agload_security."SecEdge"; + +-- +-- Test 4: Constraint checking (CHECK constraint) +-- + +-- Add constraint on vertex properties - fail if bool property is false +ALTER TABLE agload_security."Person1" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - constraint violation +SELECT load_labels_from_file('agload_security', 'Person1', 'age_load/conversion_vertices.csv', true); + +-- Add constraint on edge properties - fail if bool property is false +ALTER TABLE agload_security."SecEdge" ADD CONSTRAINT check_bool_true + CHECK ((properties->>'"bool"')::boolean = true); + +-- This should fail - some edges have bool = false +SELECT load_edges_from_file('agload_security', 'SecEdge', 'age_load/conversion_edges.csv'); + +-- cleanup +ALTER TABLE agload_security."Person1" DROP CONSTRAINT check_bool_true; +ALTER TABLE agload_security."SecEdge" DROP CONSTRAINT check_bool_true; + +-- +-- Cleanup +-- +REVOKE ALL ON agload_security."Person1" FROM load_test_user; +REVOKE ALL ON agload_security."SecEdge" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."Person1_id_seq" FROM load_test_user; +REVOKE ALL ON SEQUENCE agload_security."SecEdge_id_seq" FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_label FROM load_test_user; +REVOKE ALL ON ag_catalog.ag_graph FROM load_test_user; +REVOKE ALL ON SCHEMA agload_security FROM load_test_user; +REVOKE ALL ON SCHEMA ag_catalog FROM load_test_user; +REVOKE pg_read_server_files FROM load_test_user; +DROP USER load_test_user; +SELECT drop_graph('agload_security', true); + +-- +-- End +-- diff --git a/regress/sql/index.sql b/regress/sql/index.sql index aac1dc40e..d4a4b24a4 100644 --- a/regress/sql/index.sql +++ b/regress/sql/index.sql @@ -17,8 +17,6 @@ * under the License. */ -\! cp -r regress/age_load/data regress/instance/data/age_load - LOAD 'age'; SET search_path TO ag_catalog; @@ -166,26 +164,8 @@ SELECT * FROM cypher('cypher_index', $$ (mx)<-[:has_city]-(:City {city_id: 10, name:"Tijuana", west_coast: false, country_code:"MX"}) $$) as (n agtype); -ALTER TABLE cypher_index."Country" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX CONCURRENTLY cntry_id_idx ON cypher_index."Country" (id); -ALTER TABLE cypher_index."Country" CLUSTER ON cntry_id_idx; - -ALTER TABLE cypher_index."City" ADD PRIMARY KEY (id); - -CREATE UNIQUE INDEX city_id_idx ON cypher_index."City" (id); - -ALTER TABLE cypher_index."City" CLUSTER ON city_id_idx; - -ALTER TABLE cypher_index.has_city -ADD CONSTRAINT has_city_end_fk FOREIGN KEY (end_id) -REFERENCES cypher_index."Country"(id) MATCH FULL; - -CREATE INDEX load_has_city_eid_idx ON cypher_index.has_city (end_id); - -CREATE INDEX load_has_city_sid_idx ON cypher_index.has_city (start_id); - -ALTER TABLE cypher_index."has_city" CLUSTER ON load_has_city_eid_idx; +-- Verify that the incices are created on id columns +SELECT indexname, indexdef FROM pg_indexes WHERE schemaname= 'cypher_index' ORDER BY 1; SET enable_mergejoin = ON; SET enable_hashjoin = OFF; @@ -196,6 +176,11 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = ON; SET enable_nestloop = OFF; @@ -205,12 +190,17 @@ SELECT COUNT(*) FROM cypher('cypher_index', $$ RETURN e $$) as (n agtype); +SELECT COUNT(*) FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() + RETURN e +$$) as (n agtype); + SET enable_mergejoin = OFF; SET enable_hashjoin = OFF; SET enable_nestloop = ON; SELECT COUNT(*) FROM cypher('cypher_index', $$ - MATCH (a:Country)<-[e:has_city]-() + EXPLAIN (costs off) MATCH (a:Country)<-[e:has_city]-() RETURN e $$) as (n agtype); @@ -227,6 +217,11 @@ ON cypher_index."City" USING gin (properties); CREATE INDEX load_country_gin_idx ON cypher_index."Country" USING gin (properties); +-- Verify GIN index is used for City property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:City {city_id: 1}) + RETURN c +$$) as (plan agtype); SELECT * FROM cypher('cypher_index', $$ MATCH (c:City {city_id: 1}) @@ -243,6 +238,12 @@ SELECT * FROM cypher('cypher_index', $$ RETURN c $$) as (n agtype); +-- Verify GIN index is used for Country property match +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country {life_expectancy: 82.05}) + RETURN c +$$) as (plan agtype); + SELECT * FROM cypher('cypher_index', $$ MATCH (c:Country {life_expectancy: 82.05}) RETURN c @@ -258,23 +259,180 @@ DROP INDEX cypher_index.load_country_gin_idx; -- -- Section 4: Index use with WHERE clause -- -SELECT COUNT(*) FROM cypher('cypher_index', $$ +-- Create expression index on country_code property +CREATE INDEX city_country_code_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); + +-- Verify index is used with EXPLAIN (should show Index Scan on city_country_code_idx) +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' + RETURN a +$$) as (plan agtype); + +-- Test WHERE with indexed string property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.country_code = 'US' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'CA' + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with no matching results +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'XX' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on city_id property +CREATE INDEX city_id_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"city_id"'::agtype)); + +-- Verify index is used with EXPLAIN for integer property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.city_id = 1 RETURN a -$$) as (n agtype); +$$) as (plan agtype); -CREATE INDEX CONCURRENTLY cntry_ode_idx ON cypher_index."City" -(ag_catalog.agtype_access_operator(properties, '"country_code"'::agtype)); +-- Test WHERE with indexed integer property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 + RETURN a.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 5 + RETURN a.name +$$) as (name agtype); -SELECT COUNT(*) FROM cypher('agload_test_graph', $$ +-- Test WHERE with comparison operators on indexed property +SELECT * FROM cypher('cypher_index', $$ MATCH (a:City) - WHERE a.country_code = 'RS' + WHERE a.city_id < 3 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id >= 8 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Create expression index on west_coast boolean property +CREATE INDEX city_west_coast_idx ON cypher_index."City" +(ag_catalog.agtype_access_operator(properties, '"west_coast"'::agtype)); + +-- Verify index is used with EXPLAIN for boolean property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.west_coast = true RETURN a -$$) as (n agtype); +$$) as (plan agtype); + +-- Test WHERE with indexed boolean property +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.west_coast = false + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- EXPLAIN for pattern with WHERE clause +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a +$$) as (plan agtype); + +-- Test WHERE with multiple conditions (AND) +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.country_code = 'US' AND a.west_coast = true + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with OR conditions +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE a.city_id = 1 OR a.city_id = 5 + RETURN a.name + ORDER BY a.city_id +$$) as (name agtype); + +-- Test WHERE with NOT +SELECT * FROM cypher('cypher_index', $$ + MATCH (a:City) + WHERE NOT a.west_coast = true AND a.country_code = 'US' + RETURN a.name +$$) as (name agtype); + +-- Create expression index on life_expectancy for Country +CREATE INDEX country_life_exp_idx ON cypher_index."Country" +(ag_catalog.agtype_access_operator(properties, '"life_expectancy"'::agtype)); + +-- Verify index is used with EXPLAIN for float property +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c +$$) as (plan agtype); + +-- Test WHERE with float property +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy > 80.0 + RETURN c.name +$$) as (name agtype); + +SELECT * FROM cypher('cypher_index', $$ + MATCH (c:Country) + WHERE c.life_expectancy < 76.0 + RETURN c.name +$$) as (name agtype); + +-- EXPLAIN for pattern with filters on both country and city +SELECT * FROM cypher('cypher_index', $$ + EXPLAIN (costs off) MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' AND city.west_coast = true + RETURN city.name +$$) as (plan agtype); + +-- Test WHERE in combination with pattern matching +SELECT * FROM cypher('cypher_index', $$ + MATCH (country:Country)<-[:has_city]-(city:City) + WHERE country.country_code = 'CA' + RETURN city.name + ORDER BY city.city_id +$$) as (name agtype); + +-- Clean up indices +DROP INDEX cypher_index.city_country_code_idx; +DROP INDEX cypher_index.city_id_idx; +DROP INDEX cypher_index.city_west_coast_idx; +DROP INDEX cypher_index.country_life_exp_idx; -- -- General Cleanup -- SELECT drop_graph('cypher_index', true); -SELECT drop_graph('agload_test_graph', true); diff --git a/src/backend/commands/label_commands.c b/src/backend/commands/label_commands.c index 568bf987b..1aa7ed0f4 100644 --- a/src/backend/commands/label_commands.c +++ b/src/backend/commands/label_commands.c @@ -93,6 +93,10 @@ static void range_var_callback_for_remove_relation(const RangeVar *rel, Oid rel_oid, Oid odl_rel_oid, void *arg); +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique); PG_FUNCTION_INFO_V1(age_is_valid_label_name); @@ -393,16 +397,24 @@ static void create_table_for_label(char *graph_name, char *label_name, * inheritance system. */ if (list_length(parents) != 0) + { create_stmt->tableElts = NIL; + } else if (label_type == LABEL_TYPE_EDGE) + { create_stmt->tableElts = create_edge_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else if (label_type == LABEL_TYPE_VERTEX) + { create_stmt->tableElts = create_vertex_table_elements( graph_name, label_name, schema_name, rel_name, seq_name); + } else + { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("undefined label type \'%c\'", label_type))); + } create_stmt->inhRelations = parents; create_stmt->partbound = NULL; @@ -423,7 +435,69 @@ static void create_table_for_label(char *graph_name, char *label_name, ProcessUtility(wrapper, "(generated CREATE TABLE command)", false, PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, NULL); - /* CommandCounterIncrement() is called in ProcessUtility() */ + + /* Create index on id columns */ + if (label_type == LABEL_TYPE_VERTEX) + { + create_index_on_column(schema_name, rel_name, "id", true); + } + else if (label_type == LABEL_TYPE_EDGE) + { + create_index_on_column(schema_name, rel_name, "start_id", false); + create_index_on_column(schema_name, rel_name, "end_id", false); + } +} + +static void create_index_on_column(char *schema_name, + char *rel_name, + char *colname, + bool unique) +{ + IndexStmt *index_stmt; + IndexElem *index_col; + PlannedStmt *index_wrapper; + + index_stmt = makeNode(IndexStmt); + index_col = makeNode(IndexElem); + index_col->name = colname; + index_col->expr = NULL; + index_col->indexcolname = NULL; + index_col->collation = InvalidOid; + index_col->opclass = list_make1(makeString("graphid_ops")); + index_col->opclassopts = NIL; + index_col->ordering = SORTBY_DEFAULT; + index_col->nulls_ordering = SORTBY_NULLS_DEFAULT; + + index_stmt->relation = makeRangeVar(schema_name, rel_name, -1); + index_stmt->accessMethod = "btree"; + index_stmt->tableSpace = NULL; + index_stmt->indexParams = list_make1(index_col); + index_stmt->options = NIL; + index_stmt->whereClause = NULL; + index_stmt->excludeOpNames = NIL; + index_stmt->idxcomment = NULL; + index_stmt->indexOid = InvalidOid; + index_stmt->unique = unique; + index_stmt->nulls_not_distinct = false; + index_stmt->primary = unique; + index_stmt->isconstraint = unique; + index_stmt->deferrable = false; + index_stmt->initdeferred = false; + index_stmt->transformed = false; + index_stmt->concurrent = false; + index_stmt->if_not_exists = false; + index_stmt->reset_default_tblspc = false; + + index_wrapper = makeNode(PlannedStmt); + index_wrapper->commandType = CMD_UTILITY; + index_wrapper->canSetTag = false; + index_wrapper->utilityStmt = (Node *)index_stmt; + index_wrapper->stmt_location = -1; + index_wrapper->stmt_len = 0; + + ProcessUtility(index_wrapper, "(generated CREATE INDEX command)", false, + PROCESS_UTILITY_SUBCOMMAND, NULL, NULL, None_Receiver, + NULL); } /* @@ -482,7 +556,7 @@ static List *create_vertex_table_elements(char *graph_name, char *label_name, /* "id" graphid PRIMARY KEY DEFAULT "ag_catalog"."_graphid"(...) */ id = makeColumnDef(AG_VERTEX_COLNAME_ID, GRAPHIDOID, -1, InvalidOid); - id->constraints = list_make2(build_pk_constraint(), + id->constraints = list_make2(build_not_null_constraint(), build_id_default(graph_name, label_name, schema_name, seq_name)); diff --git a/src/backend/executor/cypher_delete.c b/src/backend/executor/cypher_delete.c index d58513535..4766c6e7a 100644 --- a/src/backend/executor/cypher_delete.c +++ b/src/backend/executor/cypher_delete.c @@ -343,6 +343,10 @@ static void delete_entity(EState *estate, ResultRelInfo *resultRelInfo, } /* increment the command counter */ CommandCounterIncrement(); + + /* Update command id in estate */ + estate->es_snapshot->curcid = GetCurrentCommandId(false); + estate->es_output_cid = GetCurrentCommandId(false); } else if (lock_result != TM_Invisible && lock_result != TM_SelfModified) { diff --git a/src/backend/utils/adt/age_global_graph.c b/src/backend/utils/adt/age_global_graph.c index 6f30060ae..c34e51ee3 100644 --- a/src/backend/utils/adt/age_global_graph.c +++ b/src/backend/utils/adt/age_global_graph.c @@ -1237,12 +1237,10 @@ Datum age_delete_global_graphs(PG_FUNCTION_ARGS) { char *graph_name = NULL; - graph_name = strndup(agtv_temp->val.string.val, - agtv_temp->val.string.len); + graph_name = pnstrdup(agtv_temp->val.string.val, + agtv_temp->val.string.len); success = delete_specific_GRAPH_global_contexts(graph_name); - - free(graph_name); } else { diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c index f190cfffe..c5d41fa19 100644 --- a/src/backend/utils/adt/agtype.c +++ b/src/backend/utils/adt/agtype.c @@ -184,6 +184,17 @@ static agtype_value *agtype_build_map_as_agtype_value(FunctionCallInfo fcinfo); agtype_value *agtype_composite_to_agtype_value_binary(agtype *a); static agtype_value *tostring_helper(Datum arg, Oid type, char *msghdr); + +void *repalloc_check(void *ptr, size_t len) +{ + if (ptr != NULL) + { + return repalloc(ptr, len); + } + + return palloc(len); +} + /* * Due to how pfree can be implemented, it may not check for a passed NULL. This * wrapper does just that, it will only call pfree is the pointer passed is not @@ -5605,7 +5616,7 @@ static char *get_label_name(const char *graph_name, graphid element_graphid) result = NameStr(*DatumGetName(heap_getattr(tuple, Anum_ag_label_name, tupdesc, &column_is_null))); /* duplicate it */ - result = strdup(result); + result = pstrdup(result); /* end the scan and close the relation */ systable_endscan(scan_desc); @@ -5720,8 +5731,8 @@ Datum age_startnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5755,8 +5766,6 @@ Datum age_startnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, start_id); - free(label_name); - return result; } @@ -5785,8 +5794,8 @@ Datum age_endnode(PG_FUNCTION_ARGS) Assert(AGT_ROOT_IS_SCALAR(agt_arg)); agtv_object = get_ith_agtype_value_from_container(&agt_arg->root, 0); Assert(agtv_object->type == AGTV_STRING); - graph_name = strndup(agtv_object->val.string.val, - agtv_object->val.string.len); + graph_name = pnstrdup(agtv_object->val.string.val, + agtv_object->val.string.len); /* get the edge */ agt_arg = AG_GET_ARG_AGTYPE_P(1); @@ -5820,8 +5829,6 @@ Datum age_endnode(PG_FUNCTION_ARGS) result = get_vertex(graph_name, label_name, end_id); - free(label_name); - return result; } @@ -6463,11 +6470,10 @@ Datum age_tofloat(PG_FUNCTION_ARGS) NumericGetDatum(agtv_value->val.numeric))); else if (agtv_value->type == AGTV_STRING) { - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); result = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); if (!is_valid) PG_RETURN_NULL(); } @@ -6765,8 +6771,8 @@ Datum age_tointeger(PG_FUNCTION_ARGS) { char *endptr; /* we need a null terminated cstring */ - string = strndup(agtv_value->val.string.val, - agtv_value->val.string.len); + string = pnstrdup(agtv_value->val.string.val, + agtv_value->val.string.len); /* convert it if it is a regular integer string */ result = strtoi64(string, &endptr, 10); @@ -6780,7 +6786,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) f = float8in_internal_null(string, NULL, "double precision", string, &is_valid); - free(string); /* * If the conversions failed or it's a special float value, * return null. @@ -6793,10 +6798,6 @@ Datum age_tointeger(PG_FUNCTION_ARGS) result = (int64) f; } - else - { - free(string); - } } else { diff --git a/src/backend/utils/load/ag_load_edges.c b/src/backend/utils/load/ag_load_edges.c index 30dc4761d..c05bf3352 100644 --- a/src/backend/utils/load/ag_load_edges.c +++ b/src/backend/utils/load/ag_load_edges.c @@ -16,55 +16,30 @@ * specific language governing permissions and limitations * under the License. */ - #include "postgres.h" -#include "utils/load/ag_load_edges.h" -#include "utils/load/csv.h" - -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid); - -void edge_field_cb(void *field, size_t field_len, void *data) -{ - - csv_edge_reader *cr = (csv_edge_reader*)data; - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } +#include "access/heapam.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "commands/copy.h" +#include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char*)field, field_len); - cr->cur_field += 1; -} +#include "utils/load/ag_load_edges.h" -/* Parser calls this function when it detects end of a row */ -void edge_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single edge row from COPY's raw fields. + * Edge CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + */ +static void process_edge_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + Oid graph_oid, bool load_as_agtype, + batch_insert_state *batch_state) { - - csv_edge_reader *cr = (csv_edge_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - - size_t i, n_fields; int64 start_id_int; graphid start_vertex_graph_id; int start_vertex_type_id; @@ -77,104 +52,92 @@ void edge_row_cb(int delim __attribute__((unused)), void *data) int64 entry_id; TupleTableSlot *slot; - n_fields = cr->cur_field; + char *start_vertex_type; + char *end_vertex_type; + agtype *edge_properties; - if (cr->row == 0) - { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); + /* Generate edge ID */ + entry_id = nextval_internal(label_seq_relid, true); + edge_id = make_graphid(label_id, entry_id); - for (i = 0; icur_field; i++) - { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - edge_id = make_graphid(cr->label_id, entry_id); - - start_id_int = strtol(cr->fields[0], NULL, 10); - start_vertex_type_id = get_label_id(cr->fields[1], cr->graph_oid); - end_id_int = strtol(cr->fields[2], NULL, 10); - end_vertex_type_id = get_label_id(cr->fields[3], cr->graph_oid); - - start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); - end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - - /* Clear the slots contents */ - ExecClearTuple(slot); - - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); - slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); - slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); - slot->tts_values[3] = AGTYPE_P_GET_DATUM( - create_agtype_from_list_i( - cr->header, cr->fields, - n_fields, 4, cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; - slot->tts_isnull[2] = false; - slot->tts_isnull[3] = false; - - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - batch_state->num_tuples++; - - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_batch(batch_state, cr->label_name, cr->graph_oid); - batch_state->num_tuples = 0; - } - } + /* Trim whitespace from vertex type names */ + start_vertex_type = trim_whitespace(fields[1]); + end_vertex_type = trim_whitespace(fields[3]); - for (i = 0; i < n_fields; ++i) - { - free(cr->fields[i]); - } + /* Parse start vertex info */ + start_id_int = strtol(fields[0], NULL, 10); + start_vertex_type_id = get_label_id(start_vertex_type, graph_oid); - if (cr->error) - { - ereport(NOTICE,(errmsg("THere is some error"))); - } + /* Parse end vertex info */ + end_id_int = strtol(fields[2], NULL, 10); + end_vertex_type_id = get_label_id(end_vertex_type, graph_oid); - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; -} + /* Create graphids for start and end vertices */ + start_vertex_graph_id = make_graphid(start_vertex_type_id, start_id_int); + end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int); -static int is_space(unsigned char c) -{ - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; + + /* Clear the slots contents */ + ExecClearTuple(slot); + + /* Build the agtype properties */ + edge_properties = create_agtype_from_list_i(header, fields, + nfields, 4, load_as_agtype); + + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(edge_id); + slot->tts_values[1] = GRAPHID_GET_DATUM(start_vertex_graph_id); + slot->tts_values[2] = GRAPHID_GET_DATUM(end_vertex_graph_id); + slot->tts_values[3] = AGTYPE_P_GET_DATUM(edge_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; + slot->tts_isnull[2] = false; + slot->tts_isnull[3] = false; + + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); + + batch_state->buffered_bytes += VARSIZE(edge_properties); + batch_state->num_tuples++; + + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - return 0; + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } } -static int is_term(unsigned char c) +/* + * Create COPY options for CSV parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; + + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load edges from CSV file using pg's COPY infrastructure. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -182,134 +145,135 @@ int create_edges_from_csv_file(char *file_path, int label_id, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_edge_reader cr; - char *label_seq_name; - - if (csv_init(&p, options) != 0) - { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); - - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } - + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Edge Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ label_seq_name = get_label_seq_relation_name(label_name); - - memset((void*)&cr, 0, sizeof(csv_edge_reader)); - cr.alloc = 128; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); /* Initialize the batch insert state */ - init_edge_batch_insert(&cr.batch_state, label_name, graph_oid); + init_batch_insert(&batch_state, label_name, graph_oid); - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); + + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); + + PG_TRY(); { - if (csv_parse(&p, buf, bytes_read, edge_field_cb, - edge_row_cb, &cr) != bytes_read) + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist */ + copy_options); + + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); + if (is_first_row) + { + int i; + + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); + + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } + + is_first_row = false; + } + else + { + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_edge_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + graph_oid, load_as_agtype, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } + } } - } - csv_fini(&p, edge_field_cb, edge_row_cb, &cr); + /* Finish any remaining batch inserts */ + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - /* Finish any remaining batch inserts */ - finish_edge_batch_insert(&cr.batch_state, label_name, graph_oid); - - if (ferror(fp)) - { - ereport(ERROR, (errmsg("Error while reading file %s\n", file_path))); + /* Clean up COPY state */ + EndCopyFrom(cstate); } - - fclose(fp); - - free(cr.fields); - csv_free(&p); - return EXIT_SUCCESS; -} - -/* - * Initialize the batch insert state for edges. - */ -void init_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - Relation relation; - int i; - - // Open a temporary relation to get the tuple descriptor - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); - - // Initialize the batch insert state - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - // Create slots - for (i = 0; i < BATCH_SIZE; i++) + PG_FINALLY(); { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - } - - table_close(relation, AccessShareLock); -} + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } -/* - * Finish the batch insert for edges. Insert the - * remaining tuples in the batch state and clean up. - */ -void finish_edge_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid) -{ - int i; - Relation relation; + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); - if ((*batch_state)->num_tuples > 0) - { - insert_batch(*batch_state, label_name, graph_oid); - (*batch_state)->num_tuples = 0; - } - - // Open a temporary relation to ensure resources are properly cleaned up - relation = table_open(get_label_relation(label_name, graph_oid), AccessShareLock); + /* Delete batch context */ + MemoryContextDelete(batch_context); - // Free slots - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + /* Free parse state */ + free_parsestate(pstate); } + PG_END_TRY(); - // Clean up batch state - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); + return EXIT_SUCCESS; } diff --git a/src/backend/utils/load/ag_load_labels.c b/src/backend/utils/load/ag_load_labels.c index 2ab223346..5b11f68b8 100644 --- a/src/backend/utils/load/ag_load_labels.c +++ b/src/backend/utils/load/ag_load_labels.c @@ -17,175 +17,114 @@ * under the License. */ #include "postgres.h" -#include "executor/spi.h" + +#include "access/heapam.h" +#include "access/table.h" #include "catalog/namespace.h" +#include "commands/copy.h" #include "executor/executor.h" +#include "nodes/makefuncs.h" +#include "parser/parse_node.h" +#include "utils/memutils.h" +#include "utils/rel.h" #include "utils/load/ag_load_labels.h" -#include "utils/load/csv.h" - -static void setup_temp_table_for_vertex_ids(char *graph_name); -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid); -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid); -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid); - -void vertex_field_cb(void *field, size_t field_len, void *data) -{ - - csv_vertex_reader *cr = (csv_vertex_reader *) data; - - if (cr->error) - { - cr->error = 1; - ereport(NOTICE,(errmsg("There is some unknown error"))); - } - - /* check for space to store this field */ - if (cr->cur_field == cr->alloc) - { - cr->alloc *= 2; - cr->fields = realloc(cr->fields, sizeof(char *) * cr->alloc); - cr->fields_len = realloc(cr->header, sizeof(size_t *) * cr->alloc); - if (cr->fields == NULL) - { - cr->error = 1; - ereport(ERROR, - (errmsg("field_cb: failed to reallocate %zu bytes\n", - sizeof(char *) * cr->alloc))); - } - } - cr->fields_len[cr->cur_field] = field_len; - cr->curr_row_length += field_len; - cr->fields[cr->cur_field] = strndup((char *) field, field_len); - cr->cur_field += 1; -} -void vertex_row_cb(int delim __attribute__((unused)), void *data) +/* + * Process a single vertex row from COPY's raw fields. + * Vertex CSV format: [id,] [properties...] + */ +static void process_vertex_row(char **fields, int nfields, + char **header, int header_count, + int label_id, Oid label_seq_relid, + bool id_field_exists, bool load_as_agtype, + int64 *curr_seq_num, + batch_insert_state *batch_state) { - csv_vertex_reader *cr = (csv_vertex_reader*)data; - batch_insert_state *batch_state = cr->batch_state; - size_t i, n_fields; graphid vertex_id; int64 entry_id; TupleTableSlot *slot; - TupleTableSlot *temp_id_slot; + agtype *vertex_properties; - n_fields = cr->cur_field; - - if (cr->row == 0) + /* Generate or use provided entry_id */ + if (id_field_exists) { - cr->header_num = cr->cur_field; - cr->header_row_length = cr->curr_row_length; - cr->header_len = (size_t* )malloc(sizeof(size_t *) * cr->cur_field); - cr->header = malloc((sizeof (char*) * cr->cur_field)); - - for (i = 0; icur_field; i++) + entry_id = strtol(fields[0], NULL, 10); + if (entry_id > *curr_seq_num) { - cr->header_len[i] = cr->fields_len[i]; - cr->header[i] = strndup(cr->fields[i], cr->header_len[i]); + /* This is needed to ensure the sequence is up-to-date */ + DirectFunctionCall2(setval_oid, + ObjectIdGetDatum(label_seq_relid), + Int64GetDatum(entry_id)); + *curr_seq_num = entry_id; } } else { - if (cr->id_field_exists) - { - entry_id = strtol(cr->fields[0], NULL, 10); - if (entry_id > cr->curr_seq_num) - { - DirectFunctionCall2(setval_oid, - ObjectIdGetDatum(cr->label_seq_relid), - Int64GetDatum(entry_id)); - cr->curr_seq_num = entry_id; - } - } - else - { - entry_id = nextval_internal(cr->label_seq_relid, true); - } - - vertex_id = make_graphid(cr->label_id, entry_id); - - /* Get the appropriate slot from the batch state */ - slot = batch_state->slots[batch_state->num_tuples]; - temp_id_slot = batch_state->temp_id_slots[batch_state->num_tuples]; + entry_id = nextval_internal(label_seq_relid, true); + } - /* Clear the slots contents */ - ExecClearTuple(slot); - ExecClearTuple(temp_id_slot); + vertex_id = make_graphid(label_id, entry_id); - /* Fill the values in the slot */ - slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - slot->tts_values[1] = AGTYPE_P_GET_DATUM( - create_agtype_from_list(cr->header, cr->fields, - n_fields, entry_id, - cr->load_as_agtype)); - slot->tts_isnull[0] = false; - slot->tts_isnull[1] = false; + /* Get the appropriate slot from the batch state */ + slot = batch_state->slots[batch_state->num_tuples]; - temp_id_slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); - temp_id_slot->tts_isnull[0] = false; + /* Clear the slots contents */ + ExecClearTuple(slot); - /* Make the slot as containing virtual tuple */ - ExecStoreVirtualTuple(slot); - ExecStoreVirtualTuple(temp_id_slot); + /* Build the agtype properties */ + vertex_properties = create_agtype_from_list(header, fields, + nfields, entry_id, + load_as_agtype); - batch_state->num_tuples++; + /* Fill the values in the slot */ + slot->tts_values[0] = GRAPHID_GET_DATUM(vertex_id); + slot->tts_values[1] = AGTYPE_P_GET_DATUM(vertex_properties); + slot->tts_isnull[0] = false; + slot->tts_isnull[1] = false; - if (batch_state->num_tuples >= batch_state->max_tuples) - { - /* Insert the batch when it is full (i.e. BATCH_SIZE) */ - insert_vertex_batch(batch_state, cr->label_name, cr->graph_oid, - cr->temp_table_relid); - batch_state->num_tuples = 0; - } - } + /* Make the slot as containing virtual tuple */ + ExecStoreVirtualTuple(slot); - for (i = 0; i < n_fields; ++i) - { - free(cr->fields[i]); - } + batch_state->buffered_bytes += VARSIZE(vertex_properties); + batch_state->num_tuples++; - if (cr->error) + /* Insert the batch when tuple count OR byte threshold is reached */ + if (batch_state->num_tuples >= BATCH_SIZE || + batch_state->buffered_bytes >= MAX_BUFFERED_BYTES) { - ereport(NOTICE,(errmsg("THere is some error"))); + insert_batch(batch_state); + batch_state->num_tuples = 0; + batch_state->buffered_bytes = 0; } - - cr->cur_field = 0; - cr->curr_row_length = 0; - cr->row += 1; } -static int is_space(unsigned char c) +/* + * Create COPY options for csv parsing. + * Returns a List of DefElem nodes. + */ +static List *create_copy_options(void) { - if (c == CSV_SPACE || c == CSV_TAB) - { - return 1; - } - else - { - return 0; - } + List *options = NIL; -} -static int is_term(unsigned char c) -{ - if (c == CSV_CR || c == CSV_LF) - { - return 1; - } - else - { - return 0; - } + /* FORMAT csv */ + options = lappend(options, + makeDefElem("format", + (Node *) makeString("csv"), + -1)); + + /* HEADER false - we'll read the header ourselves */ + options = lappend(options, + makeDefElem("header", + (Node *) makeBoolean(false), + -1)); + + return options; } +/* + * Load vertex labels from csv file using pg's COPY infrastructure. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, @@ -194,274 +133,146 @@ int create_labels_from_csv_file(char *file_path, bool id_field_exists, bool load_as_agtype) { - - FILE *fp; - struct csv_parser p; - char buf[1024]; - size_t bytes_read; - unsigned char options = 0; - csv_vertex_reader cr; - char *label_seq_name; - Oid temp_table_relid; - - if (csv_init(&p, options) != 0) - { - ereport(ERROR, - (errmsg("Failed to initialize csv parser\n"))); - } - - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - if (!OidIsValid(temp_table_relid)) - { - setup_temp_table_for_vertex_ids(graph_name); - temp_table_relid = RelnameGetRelid(GET_TEMP_VERTEX_ID_TABLE(graph_name)); - } - - csv_set_space_func(&p, is_space); - csv_set_term_func(&p, is_term); - - fp = fopen(file_path, "rb"); - if (!fp) - { - ereport(ERROR, - (errmsg("Failed to open %s\n", file_path))); - } - + Relation label_rel; + Oid label_relid; + CopyFromState cstate; + List *copy_options; + ParseState *pstate; + char **fields; + int nfields; + char **header = NULL; + int header_count = 0; + bool is_first_row = true; + char *label_seq_name; + Oid label_seq_relid; + int64 curr_seq_num = 0; + batch_insert_state *batch_state = NULL; + MemoryContext batch_context; + MemoryContext old_context; + + /* Create a memory context for batch processing - reset after each batch */ + batch_context = AllocSetContextCreate(CurrentMemoryContext, + "AGE CSV Load Batch Context", + ALLOCSET_DEFAULT_SIZES); + + /* Get the label relation */ + label_relid = get_label_relation(label_name, graph_oid); + label_rel = table_open(label_relid, RowExclusiveLock); + + /* Get sequence info */ label_seq_name = get_label_seq_relation_name(label_name); + label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - memset((void*)&cr, 0, sizeof(csv_vertex_reader)); - - cr.alloc = 2048; - cr.fields = malloc(sizeof(char *) * cr.alloc); - cr.fields_len = malloc(sizeof(size_t *) * cr.alloc); - cr.header_row_length = 0; - cr.curr_row_length = 0; - cr.graph_name = graph_name; - cr.graph_oid = graph_oid; - cr.label_name = label_name; - cr.label_id = label_id; - cr.id_field_exists = id_field_exists; - cr.label_seq_relid = get_relname_relid(label_seq_name, graph_oid); - cr.load_as_agtype = load_as_agtype; - cr.temp_table_relid = temp_table_relid; - - if (cr.id_field_exists) + if (id_field_exists) { /* * Set the curr_seq_num since we will need it to compare with * incoming entry_id. - * - * We cant use currval because it will error out if nextval was - * not called before in the session. */ - cr.curr_seq_num = nextval_internal(cr.label_seq_relid, true); + curr_seq_num = nextval_internal(label_seq_relid, true); } /* Initialize the batch insert state */ - init_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); - - while ((bytes_read=fread(buf, 1, 1024, fp)) > 0) - { - if (csv_parse(&p, buf, bytes_read, vertex_field_cb, - vertex_row_cb, &cr) != bytes_read) - { - ereport(ERROR, (errmsg("Error while parsing file: %s\n", - csv_strerror(csv_error(&p))))); - } - } + init_batch_insert(&batch_state, label_name, graph_oid); - csv_fini(&p, vertex_field_cb, vertex_row_cb, &cr); + /* Create COPY options for CSV parsing */ + copy_options = create_copy_options(); - /* Finish any remaining batch inserts */ - finish_vertex_batch_insert(&cr.batch_state, label_name, graph_oid, - cr.temp_table_relid); + /* Create a minimal ParseState for BeginCopyFrom */ + pstate = make_parsestate(NULL); - if (ferror(fp)) + PG_TRY(); { - ereport(ERROR, (errmsg("Error while reading file %s\n", - file_path))); - } - - fclose(fp); - - free(cr.fields); - csv_free(&p); - return EXIT_SUCCESS; -} - -static void insert_vertex_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid, Oid temp_table_relid) -{ - insert_batch_in_temp_table(batch_state, graph_oid, temp_table_relid); - insert_batch(batch_state, label_name, graph_oid); -} - -/* - * Create and populate a temporary table with vertex ids that are already - * present in the graph. This table will be used to check if the new vertex - * id generated by loader is a duplicate. - * Unique index is created to enforce uniqueness of the ids. - * - * We dont need this for loading edges since the ids are generated using - * sequence and are unique. - */ -static void setup_temp_table_for_vertex_ids(char *graph_name) -{ - char *create_as_query; - char *index_query; - - create_as_query = psprintf("CREATE TEMP TABLE IF NOT EXISTS %s AS " - "SELECT DISTINCT id FROM \"%s\".%s", - GET_TEMP_VERTEX_ID_TABLE(graph_name), graph_name, - AG_DEFAULT_LABEL_VERTEX); - - index_query = psprintf("CREATE UNIQUE INDEX ON %s (id)", - GET_TEMP_VERTEX_ID_TABLE(graph_name)); - SPI_connect(); - SPI_execute(create_as_query, false, 0); - SPI_execute(index_query, false, 0); - - SPI_finish(); -} - -/* - * Inserts batch of tuples into the temporary table. - * This function also updates the index to check for - * uniqueness of the ids. - */ -static void insert_batch_in_temp_table(batch_insert_state *batch_state, - Oid graph_oid, Oid relid) -{ - int i; - EState *estate; - ResultRelInfo *resultRelInfo; - Relation rel; - List *result; - - rel = table_open(relid, RowExclusiveLock); - - /* Initialize executor state */ - estate = CreateExecutorState(); - - /* Initialize result relation information */ - resultRelInfo = makeNode(ResultRelInfo); - InitResultRelInfo(resultRelInfo, rel, 1, NULL, estate->es_instrument); - estate->es_result_relations = &resultRelInfo; - - /* Open the indices */ - ExecOpenIndices(resultRelInfo, false); - - /* Insert the batch into the temporary table */ - heap_multi_insert(rel, batch_state->temp_id_slots, batch_state->num_tuples, - GetCurrentCommandId(true), 0, NULL); + /* + * Initialize COPY FROM state. + * We pass the label relation but will only use NextCopyFromRawFields + * which returns raw parsed strings without type conversion. + */ + cstate = BeginCopyFrom(pstate, + label_rel, + NULL, /* whereClause */ + file_path, + false, /* is_program */ + NULL, /* data_source_cb */ + NIL, /* attnamelist - NULL means all columns */ + copy_options); - for (i = 0; i < batch_state->num_tuples; i++) - { - result = ExecInsertIndexTuples(resultRelInfo, batch_state->temp_id_slots[i], - estate, false, true, NULL, NIL, false); - /* Check if the unique cnstraint is violated */ - if (list_length(result) != 0) + /* + * Process rows using COPY's csv parsing. + * NextCopyFromRawFields uses 64KB buffers internally. + */ + while (NextCopyFromRawFields(cstate, &fields, &nfields)) { - Datum id; - bool isnull; - - id = slot_getattr(batch_state->temp_id_slots[i], 1, &isnull); - ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", - DATUM_GET_GRAPHID(id)), - errhint("Entry id %ld is already used", - get_graphid_entry_id(id)))); - } - } - /* Clean up and close the indices */ - ExecCloseIndices(resultRelInfo); - - FreeExecutorState(estate); - table_close(rel, RowExclusiveLock); - - CommandCounterIncrement(); -} + if (is_first_row) + { + int i; -/* - * Initialize the batch insert state for vertices. - */ -static void init_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; + /* First row is the header - save column names (in main context) */ + header_count = nfields; + header = (char **) palloc(sizeof(char *) * nfields); - Relation temp_table_relation; - int i; + for (i = 0; i < nfields; i++) + { + /* Trim whitespace from header fields */ + header[i] = trim_whitespace(fields[i]); + } - /* Open a temporary relation to get the tuple descriptor */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); + is_first_row = false; + } + else + { + /* Switch to batch context for row processing */ + old_context = MemoryContextSwitchTo(batch_context); + + /* Data row - process it */ + process_vertex_row(fields, nfields, + header, header_count, + label_id, label_seq_relid, + id_field_exists, load_as_agtype, + &curr_seq_num, + batch_state); + + /* Switch back to main context */ + MemoryContextSwitchTo(old_context); + + /* Reset batch context after each batch to free memory */ + if (batch_state->num_tuples == 0) + { + MemoryContextReset(batch_context); + } + } + } - temp_table_relation = table_open(temp_table_relid, AccessShareLock); + /* Finish any remaining batch inserts */ + finish_batch_insert(&batch_state); + MemoryContextReset(batch_context); - /* Initialize the batch insert state */ - *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); - (*batch_state)->max_tuples = BATCH_SIZE; - (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->temp_id_slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); - (*batch_state)->num_tuples = 0; - - /* Create slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - (*batch_state)->slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(relation), - &TTSOpsHeapTuple); - (*batch_state)->temp_id_slots[i] = MakeSingleTupleTableSlot( - RelationGetDescr(temp_table_relation), - &TTSOpsHeapTuple); + /* Clean up COPY state */ + EndCopyFrom(cstate); } - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); -} - -/* - * Finish the batch insert for vertices. Insert the - * remaining tuples in the batch state and clean up. - */ -static void finish_vertex_batch_insert(batch_insert_state **batch_state, - char *label_name, Oid graph_oid, - Oid temp_table_relid) -{ - Relation relation; - Oid relid; - - Relation temp_table_relation; - int i; - - if ((*batch_state)->num_tuples > 0) + PG_FINALLY(); { - insert_vertex_batch(*batch_state, label_name, graph_oid, temp_table_relid); - (*batch_state)->num_tuples = 0; - } + /* Free header if allocated */ + if (header != NULL) + { + int i; + for (i = 0; i < header_count; i++) + { + pfree(header[i]); + } + pfree(header); + } - /* Open a temporary relation to ensure resources are properly cleaned up */ - relid = get_label_relation(label_name, graph_oid); - relation = table_open(relid, AccessShareLock); + /* Close the relation */ + table_close(label_rel, RowExclusiveLock); - temp_table_relation = table_open(temp_table_relid, AccessShareLock); + /* Delete batch context */ + MemoryContextDelete(batch_context); - /* Free slots */ - for (i = 0; i < BATCH_SIZE; i++) - { - ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); - ExecDropSingleTupleTableSlot((*batch_state)->temp_id_slots[i]); + /* Free parse state */ + free_parsestate(pstate); } + PG_END_TRY(); - /* Clean up batch state */ - pfree_if_not_null((*batch_state)->slots); - pfree_if_not_null((*batch_state)->temp_id_slots); - pfree_if_not_null(*batch_state); - *batch_state = NULL; - - table_close(relation, AccessShareLock); - table_close(temp_table_relation, AccessShareLock); + return EXIT_SUCCESS; } diff --git a/src/backend/utils/load/age_load.c b/src/backend/utils/load/age_load.c index 1658ac306..b59920c9c 100644 --- a/src/backend/utils/load/age_load.c +++ b/src/backend/utils/load/age_load.c @@ -18,7 +18,21 @@ */ #include "postgres.h" + +#include "access/heapam.h" +#include "access/table.h" +#include "access/tableam.h" +#include "access/xact.h" +#include "catalog/indexing.h" +#include "catalog/pg_authid.h" +#include "executor/executor.h" +#include "miscadmin.h" +#include "nodes/parsenodes.h" +#include "parser/parse_relation.h" +#include "utils/acl.h" #include "utils/json.h" +#include "utils/rel.h" +#include "utils/rls.h" #include "utils/load/ag_load_edges.h" #include "utils/load/ag_load_labels.h" @@ -28,6 +42,153 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val); static Oid get_or_create_graph(const Name graph_name); static int32 get_or_create_label(Oid graph_oid, char *graph_name, char *label_name, char label_kind); +static char *build_safe_filename(char *name); +static void check_file_read_permission(void); +static void check_table_permissions(Oid relid); +static void check_rls_for_load(Oid relid); + +#define AGE_BASE_CSV_DIRECTORY "/tmp/age/" +#define AGE_CSV_FILE_EXTENSION ".csv" + +/* + * Trim leading and trailing whitespace from a string. + * Returns a newly allocated string with whitespace removed. + * Returns empty string for NULL input. + */ +char *trim_whitespace(const char *str) +{ + const char *start; + const char *end; + size_t len; + + if (str == NULL) + { + return pstrdup(""); + } + + /* Find first non-whitespace character */ + start = str; + while (*start && (*start == ' ' || *start == '\t' || + *start == '\n' || *start == '\r')) + { + start++; + } + + /* If string is all whitespace, return empty string */ + if (*start == '\0') + { + return pstrdup(""); + } + + /* Find last non-whitespace character */ + end = str + strlen(str) - 1; + while (end > start && (*end == ' ' || *end == '\t' || + *end == '\n' || *end == '\r')) + { + end--; + } + + /* Copy the trimmed string */ + len = end - start + 1; + return pnstrdup(start, len); +} + +static char *build_safe_filename(char *name) +{ + int length; + char path[PATH_MAX]; + char *resolved; + + if (name == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be NULL"))); + + } + + length = strlen(name); + + if (length == 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("file name cannot be zero length"))); + + } + + snprintf(path, sizeof(path), "%s%s", AGE_BASE_CSV_DIRECTORY, name); + + resolved = realpath(path, NULL); + + if (resolved == NULL) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("File or path does not exist [%s]", path))); + } + + if (strncmp(resolved, AGE_BASE_CSV_DIRECTORY, + strlen(AGE_BASE_CSV_DIRECTORY)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files located in [%s].", + AGE_BASE_CSV_DIRECTORY))); + } + + length = strlen(resolved) - 4; + if (strncmp(resolved+length, AGE_CSV_FILE_EXTENSION, + strlen(AGE_CSV_FILE_EXTENSION)) != 0) + { + ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("You can only load files with extension [%s].", + AGE_CSV_FILE_EXTENSION))); + } + + return resolved; +} + +/* + * Check if the current user has permission to read server files. + * Only users with the pg_read_server_files role can load from files. + */ +static void check_file_read_permission(void) +{ + if (!has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES)) + { + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("permission denied to LOAD from a file"), + errdetail("Only roles with privileges of the \"%s\" role may LOAD from a file.", + "pg_read_server_files"))); + } +} + +/* + * Check if the current user has INSERT permission on the target table. + */ +static void check_table_permissions(Oid relid) +{ + AclResult aclresult; + + aclresult = pg_class_aclcheck(relid, GetUserId(), ACL_INSERT); + if (aclresult != ACLCHECK_OK) + { + aclcheck_error(aclresult, OBJECT_TABLE, get_rel_name(relid)); + } +} + +/* + * Check if RLS is enabled on the target table. + * CSV loading is not supported with row-level security. + */ +static void check_rls_for_load(Oid relid) +{ + if (check_enable_rls(relid, InvalidOid, true) == RLS_ENABLED) + { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("LOAD from file is not supported with row-level security"), + errhint("Use Cypher CREATE clause instead."))); + } +} agtype *create_empty_agtype(void) { @@ -59,6 +220,14 @@ static agtype_value *csv_value_to_agtype_value(char *csv_val) char *new_csv_val; agtype_value *res; + /* Handle NULL or empty input - return null agtype value */ + if (csv_val == NULL || csv_val[0] == '\0') + { + res = palloc(sizeof(agtype_value)); + res->type = AGTV_NULL; + return res; + } + if (!json_validate(cstring_to_text(csv_val), false, false)) { // wrap the string with double-quote @@ -116,18 +285,40 @@ agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, for (i = 0; itype = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -169,18 +360,40 @@ agtype* create_agtype_from_list_i(char **header, char **fields, for (i = start_index; i < fields_len; i++) { + char *trimmed_value; + + /* Skip empty header fields (e.g., from trailing commas) */ + if (header[i] == NULL || header[i][0] == '\0') + { + continue; + } + key_agtype = string_to_agtype_value(header[i]); result.res = push_agtype_value(&result.parse_state, WAGT_KEY, key_agtype); + /* Trim whitespace from field value */ + trimmed_value = trim_whitespace(fields[i]); + if (load_as_agtype) { - value_agtype = csv_value_to_agtype_value(fields[i]); + value_agtype = csv_value_to_agtype_value(trimmed_value); } else { - value_agtype = string_to_agtype_value(fields[i]); + /* Handle empty field values */ + if (trimmed_value[0] == '\0') + { + value_agtype = palloc(sizeof(agtype_value)); + value_agtype->type = AGTV_STRING; + value_agtype->val.string.len = 0; + value_agtype->val.string.val = pstrdup(""); + } + else + { + value_agtype = string_to_agtype_value(trimmed_value); + } } result.res = push_agtype_value(&result.parse_state, @@ -217,18 +430,35 @@ void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, errmsg("label %s already exists as vertex label", label_name))); } + /* Open the relation */ + label_relation = table_open(get_label_relation(label_name, graph_oid), + RowExclusiveLock); + + /* Form the tuple */ values[0] = GRAPHID_GET_DATUM(edge_id); values[1] = GRAPHID_GET_DATUM(start_id); values[2] = GRAPHID_GET_DATUM(end_id); values[3] = AGTYPE_P_GET_DATUM((edge_properties)); - - label_relation = table_open(get_label_relation(label_name, graph_oid), - RowExclusiveLock); - tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } @@ -246,46 +476,88 @@ void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, if (get_label_kind(label_name, graph_oid) == LABEL_KIND_EDGE) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("label %s already exists as edge label", label_name))); + errmsg("label %s already exists as edge label", + label_name))); } - values[0] = GRAPHID_GET_DATUM(vertex_id); - values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); - + /* Open the relation */ label_relation = table_open(get_label_relation(label_name, graph_oid), RowExclusiveLock); + + /* Form the tuple */ + values[0] = GRAPHID_GET_DATUM(vertex_id); + values[1] = AGTYPE_P_GET_DATUM((vertex_properties)); tuple = heap_form_tuple(RelationGetDescr(label_relation), values, nulls); - heap_insert(label_relation, tuple, - GetCurrentCommandId(true), 0, NULL); + + if (RelationGetForm(label_relation)->relhasindex) + { + /* + * CatalogTupleInsertWithInfo() is originally for PostgreSQL's + * catalog. However, it is used here for convenience. + */ + CatalogIndexState indstate = CatalogOpenIndexes(label_relation); + CatalogTupleInsertWithInfo(label_relation, tuple, indstate); + CatalogCloseIndexes(indstate); + } + else + { + heap_insert(label_relation, tuple, GetCurrentCommandId(true), + 0, NULL); + } + + /* Close the relation */ table_close(label_relation, RowExclusiveLock); CommandCounterIncrement(); } -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid) +void insert_batch(batch_insert_state *batch_state) { - Relation label_relation; - BulkInsertState bistate; - Oid relid; - - // Get the relation OID - relid = get_label_relation(label_name, graph_oid); - - // Open the relation - label_relation = table_open(relid, RowExclusiveLock); + List *result; + int i; - // Prepare the BulkInsertState - bistate = GetBulkInsertState(); + /* Check constraints for each tuple before inserting */ + if (batch_state->resultRelInfo->ri_RelationDesc->rd_att->constr) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + ExecConstraints(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate); + } + } - // Perform the bulk insert - heap_multi_insert(label_relation, batch_state->slots, - batch_state->num_tuples, GetCurrentCommandId(true), - 0, bistate); + /* Insert the tuples */ + heap_multi_insert(batch_state->resultRelInfo->ri_RelationDesc, + batch_state->slots, batch_state->num_tuples, + GetCurrentCommandId(true), + TABLE_INSERT_SKIP_FSM, /* Skip free space map for bulk */ + batch_state->bistate); /* Use bulk insert state */ - // Clean up - FreeBulkInsertState(bistate); - table_close(label_relation, RowExclusiveLock); + /* Insert index entries for the tuples */ + if (batch_state->resultRelInfo->ri_NumIndices > 0) + { + for (i = 0; i < batch_state->num_tuples; i++) + { + result = ExecInsertIndexTuples(batch_state->resultRelInfo, + batch_state->slots[i], + batch_state->estate, false, + true, NULL, NIL, false); + + /* Check if the unique constraint is violated */ + if (list_length(result) != 0) + { + Datum id; + bool isnull; + + id = slot_getattr(batch_state->slots[i], 1, &isnull); + ereport(ERROR, (errmsg("Cannot insert duplicate vertex id: %ld", + DATUM_GET_GRAPHID(id)), + errhint("Entry id %ld is already used", + get_graphid_entry_id(id)))); + } + } + } CommandCounterIncrement(); } @@ -295,11 +567,12 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) { Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool id_field_exists; bool load_as_agtype; @@ -322,9 +595,12 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); id_field_exists = PG_GETARG_BOOL(3); load_as_agtype = PG_GETARG_BOOL(4); @@ -336,29 +612,37 @@ Datum load_labels_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_VERTEX; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_VERTEX); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_labels_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, id_field_exists, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } PG_FUNCTION_INFO_V1(load_edges_from_file); Datum load_edges_from_file(PG_FUNCTION_ARGS) { - Name graph_name; Name label_name; - text* file_path; + text* file_name; char* graph_name_str; char* label_name_str; char* file_path_str; Oid graph_oid; + Oid label_relid; int32 label_id; bool load_as_agtype; @@ -380,9 +664,12 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) errmsg("file path must not be NULL"))); } + /* Check file read permission first */ + check_file_read_permission(); + graph_name = PG_GETARG_NAME(0); label_name = PG_GETARG_NAME(1); - file_path = PG_GETARG_TEXT_P(2); + file_name = PG_GETARG_TEXT_P(2); load_as_agtype = PG_GETARG_BOOL(3); graph_name_str = NameStr(*graph_name); @@ -393,14 +680,22 @@ Datum load_edges_from_file(PG_FUNCTION_ARGS) label_name_str = AG_DEFAULT_LABEL_EDGE; } - file_path_str = text_to_cstring(file_path); + file_path_str = build_safe_filename(text_to_cstring(file_name)); graph_oid = get_or_create_graph(graph_name); label_id = get_or_create_label(graph_oid, graph_name_str, label_name_str, LABEL_KIND_EDGE); + /* Get the label relation and check permissions */ + label_relid = get_label_relation(label_name_str, graph_oid); + check_table_permissions(label_relid); + check_rls_for_load(label_relid); + create_edges_from_csv_file(file_path_str, graph_name_str, graph_oid, label_name_str, label_id, load_as_agtype); + + free(file_path_str); + PG_RETURN_VOID(); } @@ -475,3 +770,105 @@ static int32 get_or_create_label(Oid graph_oid, char *graph_name, return label_id; } + +/* + * Initialize the batch insert state. + */ +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid) +{ + Relation relation; + Oid relid; + EState *estate; + ResultRelInfo *resultRelInfo; + RangeTblEntry *rte; + RTEPermissionInfo *perminfo; + List *range_table = NIL; + List *perminfos = NIL; + int i; + + /* Get the relation OID */ + relid = get_label_relation(label_name, graph_oid); + + /* Initialize executor state */ + estate = CreateExecutorState(); + + /* Create range table entry for ExecConstraints */ + rte = makeNode(RangeTblEntry); + rte->rtekind = RTE_RELATION; + rte->relid = relid; + rte->relkind = RELKIND_RELATION; + rte->rellockmode = RowExclusiveLock; + rte->perminfoindex = 1; + range_table = list_make1(rte); + + /* Create permission info */ + perminfo = makeNode(RTEPermissionInfo); + perminfo->relid = relid; + perminfo->requiredPerms = ACL_INSERT; + perminfos = list_make1(perminfo); + + /* Initialize range table in executor state */ + ExecInitRangeTable(estate, range_table, perminfos); + + /* Initialize resultRelInfo - this opens the relation */ + resultRelInfo = makeNode(ResultRelInfo); + ExecInitResultRelation(estate, resultRelInfo, 1); + + /* Get relation from resultRelInfo (opened by ExecInitResultRelation) */ + relation = resultRelInfo->ri_RelationDesc; + + /* Open the indices */ + ExecOpenIndices(resultRelInfo, false); + + /* Initialize the batch insert state */ + *batch_state = (batch_insert_state *) palloc0(sizeof(batch_insert_state)); + (*batch_state)->slots = palloc(sizeof(TupleTableSlot *) * BATCH_SIZE); + (*batch_state)->estate = estate; + (*batch_state)->resultRelInfo = resultRelInfo; + (*batch_state)->num_tuples = 0; + (*batch_state)->buffered_bytes = 0; + (*batch_state)->bistate = GetBulkInsertState(); + + /* Create slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + (*batch_state)->slots[i] = MakeSingleTupleTableSlot( + RelationGetDescr(relation), + &TTSOpsHeapTuple); + } +} + +/* + * Finish the batch insert for vertices. Insert the + * tuples remaining in the batch state and clean up. + */ +void finish_batch_insert(batch_insert_state **batch_state) +{ + int i; + + if ((*batch_state)->num_tuples > 0) + { + insert_batch(*batch_state); + (*batch_state)->num_tuples = 0; + } + + /* Free slots */ + for (i = 0; i < BATCH_SIZE; i++) + { + ExecDropSingleTupleTableSlot((*batch_state)->slots[i]); + } + + /* Free BulkInsertState */ + FreeBulkInsertState((*batch_state)->bistate); + + /* Close result relations and range table relations */ + ExecCloseResultRelations((*batch_state)->estate); + ExecCloseRangeTableRelations((*batch_state)->estate); + + /* Clean up executor state */ + FreeExecutorState((*batch_state)->estate); + pfree((*batch_state)->slots); + pfree(*batch_state); + *batch_state = NULL; +} diff --git a/src/backend/utils/load/libcsv.c b/src/backend/utils/load/libcsv.c deleted file mode 100644 index f0e8b46be..000000000 --- a/src/backend/utils/load/libcsv.c +++ /dev/null @@ -1,549 +0,0 @@ -/* -libcsv - parse and write csv data -Copyright (C) 2008 Robert Gamble - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#include - -#if __STDC_VERSION__ >= 199901L -# include -#else - /* C89 doesn't have stdint.h or SIZE_MAX */ -# define SIZE_MAX ((size_t)-1) -#endif - -#include "utils/load/csv.h" - -#define VERSION "3.0.3" - -#define ROW_NOT_BEGUN 0 -#define FIELD_NOT_BEGUN 1 -#define FIELD_BEGUN 2 -#define FIELD_MIGHT_HAVE_ENDED 3 - -/* - Explanation of states - ROW_NOT_BEGUN There have not been any fields encountered for this row - FIELD_NOT_BEGUN There have been fields but we are currently not in one - FIELD_BEGUN We are in a field - FIELD_MIGHT_HAVE_ENDED - We encountered a double quote inside a quoted field, the - field is either ended or the quote is literal -*/ - -#define MEM_BLK_SIZE 128 - -#define SUBMIT_FIELD(p) \ - do { \ - if (!quoted) \ - entry_pos -= spaces; \ - if (p->options & CSV_APPEND_NULL) \ - ((p)->entry_buf[entry_pos]) = '\0'; \ - if (cb1 && (p->options & CSV_EMPTY_IS_NULL) && !quoted && entry_pos == 0) \ - cb1(NULL, entry_pos, data); \ - else if (cb1) \ - cb1(p->entry_buf, entry_pos, data); \ - pstate = FIELD_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_ROW(p, c) \ - do { \ - if (cb2) \ - cb2(c, data); \ - pstate = ROW_NOT_BEGUN; \ - entry_pos = quoted = spaces = 0; \ - } while (0) - -#define SUBMIT_CHAR(p, c) ((p)->entry_buf[entry_pos++] = (c)) - -static const char *csv_errors[] = {"success", - "error parsing data while strict checking enabled", - "memory exhausted while increasing buffer size", - "data size too large", - "invalid status code"}; - -int -csv_error(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Return the current status of the parser */ - return p->status; -} - -const char * -csv_strerror(int status) -{ - /* Return a textual description of status */ - if (status >= CSV_EINVALID || status < 0) - return csv_errors[CSV_EINVALID]; - else - return csv_errors[status]; -} - -int -csv_get_opts(const struct csv_parser *p) -{ - /* Return the currently set options of parser */ - if (p == NULL) - return -1; - - return p->options; -} - -int -csv_set_opts(struct csv_parser *p, unsigned char options) -{ - /* Set the options */ - if (p == NULL) - return -1; - - p->options = options; - return 0; -} - -int -csv_init(struct csv_parser *p, unsigned char options) -{ - /* Initialize a csv_parser object returns 0 on success, -1 on error */ - if (p == NULL) - return -1; - - p->entry_buf = NULL; - p->pstate = ROW_NOT_BEGUN; - p->quoted = 0; - p->spaces = 0; - p->entry_pos = 0; - p->entry_size = 0; - p->status = 0; - p->options = options; - p->quote_char = CSV_QUOTE; - p->delim_char = CSV_COMMA; - p->is_space = NULL; - p->is_term = NULL; - p->blk_size = MEM_BLK_SIZE; - p->malloc_func = NULL; - p->realloc_func = realloc; - p->free_func = free; - - return 0; -} - -void -csv_free(struct csv_parser *p) -{ - /* Free the entry_buffer of csv_parser object */ - if (p == NULL) - return; - - if (p->entry_buf && p->free_func) - p->free_func(p->entry_buf); - - p->entry_buf = NULL; - p->entry_size = 0; - - return; -} - -int -csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - int quoted; - int pstate; - size_t spaces; - size_t entry_pos; - - if (p == NULL) - return -1; - - /* Finalize parsing. Needed, for example, when file does not end in a newline */ - quoted = p->quoted; - pstate = p->pstate; - spaces = p->spaces; - entry_pos = p->entry_pos; - - if ((pstate == FIELD_BEGUN) && p->quoted && (p->options & CSV_STRICT) && (p->options & CSV_STRICT_FINI)) { - /* Current field is quoted, no end-quote was seen, and CSV_STRICT_FINI is set */ - p->status = CSV_EPARSE; - return -1; - } - - switch (pstate) { - case FIELD_MIGHT_HAVE_ENDED: - p->entry_pos -= p->spaces + 1; /* get rid of spaces and original quote */ - entry_pos = p->entry_pos; - /*lint -fallthrough */ - case FIELD_NOT_BEGUN: - case FIELD_BEGUN: - /* Unnecessary: - quoted = p->quoted, pstate = p->pstate; - spaces = p->spaces, entry_pos = p->entry_pos; - */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, -1); - break; - case ROW_NOT_BEGUN: /* Already ended properly */ - ; - } - - /* Reset parser */ - p->spaces = p->quoted = p->entry_pos = p->status = 0; - p->pstate = ROW_NOT_BEGUN; - - return 0; -} - -void -csv_set_delim(struct csv_parser *p, unsigned char c) -{ - /* Set the delimiter */ - if (p) p->delim_char = c; -} - -void -csv_set_quote(struct csv_parser *p, unsigned char c) -{ - /* Set the quote character */ - if (p) p->quote_char = c; -} - -unsigned char -csv_get_delim(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the delimiter */ - return p->delim_char; -} - -unsigned char -csv_get_quote(const struct csv_parser *p) -{ - assert(p && "received null csv_parser"); - - /* Get the quote character */ - return p->quote_char; -} - -void -csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the space function */ - if (p) p->is_space = f; -} - -void -csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)) -{ - /* Set the term function */ - if (p) p->is_term = f; -} - -void -csv_set_realloc_func(struct csv_parser *p, void *(*f)(void *, size_t)) -{ - /* Set the realloc function used to increase buffer size */ - if (p && f) p->realloc_func = f; -} - -void -csv_set_free_func(struct csv_parser *p, void (*f)(void *)) -{ - /* Set the free function used to free the buffer */ - if (p && f) p->free_func = f; -} - -void -csv_set_blk_size(struct csv_parser *p, size_t size) -{ - /* Set the block size used to increment buffer size */ - if (p) p->blk_size = size; -} - -size_t -csv_get_buffer_size(const struct csv_parser *p) -{ - /* Get the size of the entry buffer */ - if (p) - return p->entry_size; - return 0; -} - -static int -csv_increase_buffer(struct csv_parser *p) -{ - size_t to_add; - void *vp; - - if (p == NULL) return 0; - if (p->realloc_func == NULL) return 0; - - /* Increase the size of the entry buffer. Attempt to increase size by - * p->blk_size, if this is larger than SIZE_MAX try to increase current - * buffer size to SIZE_MAX. If allocation fails, try to allocate halve - * the size and try again until successful or increment size is zero. - */ - - to_add = p->blk_size; - - if ( p->entry_size >= SIZE_MAX - to_add ) - to_add = SIZE_MAX - p->entry_size; - - if (!to_add) { - p->status = CSV_ETOOBIG; - return -1; - } - - while ((vp = p->realloc_func(p->entry_buf, p->entry_size + to_add)) == NULL) { - to_add /= 2; - if (!to_add) { - p->status = CSV_ENOMEM; - return -1; - } - } - - /* Update entry buffer pointer and entry_size if successful */ - p->entry_buf = vp; - p->entry_size += to_add; - return 0; -} - -size_t -csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int c, void *), void *data) -{ - unsigned const char *us = s; /* Access input data as array of unsigned char */ - unsigned char c; /* The character we are currently processing */ - size_t pos = 0; /* The number of characters we have processed in this call */ - - /* Store key fields into local variables for performance */ - unsigned char delim = p->delim_char; - unsigned char quote = p->quote_char; - int (*is_space)(unsigned char) = p->is_space; - int (*is_term)(unsigned char) = p->is_term; - int quoted = p->quoted; - int pstate = p->pstate; - size_t spaces = p->spaces; - size_t entry_pos = p->entry_pos; - - - if (!p->entry_buf && pos < len) { - /* Buffer hasn't been allocated yet and len > 0 */ - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - while (pos < len) { - /* Check memory usage, increase buffer if necessary */ - if (entry_pos == ((p->options & CSV_APPEND_NULL) ? p->entry_size - 1 : p->entry_size) ) { - if (csv_increase_buffer(p) != 0) { - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; - } - } - - c = us[pos++]; - - switch (pstate) { - case ROW_NOT_BEGUN: - case FIELD_NOT_BEGUN: - if ((is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) && c!=delim) { /* Space or Tab */ - continue; - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (pstate == FIELD_NOT_BEGUN) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { /* ROW_NOT_BEGUN */ - /* Don't submit empty rows by default */ - if (p->options & CSV_REPALL_NL) { - SUBMIT_ROW(p, c); - } - } - continue; - } else if (c == delim) { /* Comma */ - SUBMIT_FIELD(p); - break; - } else if (c == quote) { /* Quote */ - pstate = FIELD_BEGUN; - quoted = 1; - } else { /* Anything else */ - pstate = FIELD_BEGUN; - quoted = 0; - SUBMIT_CHAR(p, c); - } - break; - case FIELD_BEGUN: - if (c == quote) { /* Quote */ - if (quoted) { - SUBMIT_CHAR(p, c); - pstate = FIELD_MIGHT_HAVE_ENDED; - } else { - /* STRICT ERROR - double quote inside non-quoted field */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - SUBMIT_CHAR(p, c); - spaces = 0; - } - } else if (c == delim) { /* Comma */ - if (quoted) { - SUBMIT_CHAR(p, c); - } else { - SUBMIT_FIELD(p); - } - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - if (!quoted) { - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else { - SUBMIT_CHAR(p, c); - } - } else if (!quoted && (is_space? is_space(c) : c == CSV_SPACE || c == CSV_TAB)) { /* Tab or space for non-quoted field */ - SUBMIT_CHAR(p, c); - spaces++; - } else { /* Anything else */ - SUBMIT_CHAR(p, c); - spaces = 0; - } - break; - case FIELD_MIGHT_HAVE_ENDED: - /* This only happens when a quote character is encountered in a quoted field */ - if (c == delim) { /* Comma */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - } else if (is_term ? is_term(c) : c == CSV_CR || c == CSV_LF) { /* Carriage Return or Line Feed */ - entry_pos -= spaces + 1; /* get rid of spaces and original quote */ - SUBMIT_FIELD(p); - SUBMIT_ROW(p, c); - } else if (is_space ? is_space(c) : c == CSV_SPACE || c == CSV_TAB) { /* Space or Tab */ - SUBMIT_CHAR(p, c); - spaces++; - } else if (c == quote) { /* Quote */ - if (spaces) { - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - spaces = 0; - SUBMIT_CHAR(p, c); - } else { - /* Two quotes in a row */ - pstate = FIELD_BEGUN; - } - } else { /* Anything else */ - /* STRICT ERROR - unescaped double quote */ - if (p->options & CSV_STRICT) { - p->status = CSV_EPARSE; - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos-1; - } - pstate = FIELD_BEGUN; - spaces = 0; - SUBMIT_CHAR(p, c); - } - break; - default: - break; - } - } - p->quoted = quoted, p->pstate = pstate, p->spaces = spaces, p->entry_pos = entry_pos; - return pos; -} - -size_t -csv_write (void *dest, size_t dest_size, const void *src, size_t src_size) -{ - return csv_write2(dest, dest_size, src, src_size, CSV_QUOTE); -} - -int -csv_fwrite (FILE *fp, const void *src, size_t src_size) -{ - return csv_fwrite2(fp, src, src_size, CSV_QUOTE); -} - -size_t -csv_write2 (void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote) -{ - unsigned char *cdest = dest; - const unsigned char *csrc = src; - size_t chars = 0; - - if (src == NULL) - return 0; - - if (dest == NULL) - dest_size = 0; - - if (dest_size > 0) - *cdest++ = quote; - chars++; - - while (src_size) { - if (*csrc == quote) { - if (dest_size > chars) - *cdest++ = quote; - if (chars < SIZE_MAX) chars++; - } - if (dest_size > chars) - *cdest++ = *csrc; - if (chars < SIZE_MAX) chars++; - src_size--; - csrc++; - } - - if (dest_size > chars) - *cdest = quote; - if (chars < SIZE_MAX) chars++; - - return chars; -} - -int -csv_fwrite2 (FILE *fp, const void *src, size_t src_size, unsigned char quote) -{ - const unsigned char *csrc = src; - - if (fp == NULL || src == NULL) - return 0; - - if (fputc(quote, fp) == EOF) - return EOF; - - while (src_size) { - if (*csrc == quote) { - if (fputc(quote, fp) == EOF) - return EOF; - } - if (fputc(*csrc, fp) == EOF) - return EOF; - src_size--; - csrc++; - } - - if (fputc(quote, fp) == EOF) { - return EOF; - } - - return 0; -} diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h index 1f6908103..ec9125073 100644 --- a/src/include/utils/agtype.h +++ b/src/include/utils/agtype.h @@ -659,6 +659,7 @@ void pfree_agtype_value(agtype_value* value); void pfree_agtype_value_content(agtype_value* value); void pfree_agtype_in_state(agtype_in_state* value); void pfree_if_not_null(void *ptr); +void *repalloc_check(void *ptr, size_t len); agtype_value *agtype_value_from_cstring(char *str, int len); /* Oid accessors for AGTYPE */ Oid get_AGTYPEOID(void); diff --git a/src/include/utils/load/ag_load_edges.h b/src/include/utils/load/ag_load_edges.h index eec9484cc..4db00d93a 100644 --- a/src/include/utils/load/ag_load_edges.h +++ b/src/include/utils/load/ag_load_edges.h @@ -17,42 +17,28 @@ * under the License. */ -#include "access/heapam.h" -#include "utils/load/age_load.h" - #ifndef AG_LOAD_EDGES_H #define AG_LOAD_EDGES_H -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - char *start_vertex; - char *end_vertex; - bool load_as_agtype; - batch_insert_state *batch_state; -} csv_edge_reader; - - -void edge_field_cb(void *field, size_t field_len, void *data); -void edge_row_cb(int delim __attribute__((unused)), void *data); +#include "utils/load/age_load.h" +/* + * Load edges from a CSV file using pg's COPY infrastructure. + * + * CSV format: start_id, start_vertex_type, end_id, end_vertex_type, [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the edge label + * label_id - ID of the label + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_edges_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, - char *label_name, int label_id, - bool load_as_agtype); + char *label_name, int label_id, + bool load_as_agtype); #endif /* AG_LOAD_EDGES_H */ - diff --git a/src/include/utils/load/ag_load_labels.h b/src/include/utils/load/ag_load_labels.h index 3a70a5c05..c3d517f30 100644 --- a/src/include/utils/load/ag_load_labels.h +++ b/src/include/utils/load/ag_load_labels.h @@ -17,51 +17,26 @@ * under the License. */ - #ifndef AG_LOAD_LABELS_H #define AG_LOAD_LABELS_H -#include "access/heapam.h" #include "utils/load/age_load.h" -#define AGE_VERTIX 1 -#define AGE_EDGE 2 - - -struct counts { - long unsigned fields; - long unsigned allvalues; - long unsigned rows; -}; - -typedef struct { - size_t row; - char **header; - size_t *header_len; - size_t header_num; - char **fields; - size_t *fields_len; - size_t alloc; - size_t cur_field; - int error; - size_t header_row_length; - size_t curr_row_length; - char *graph_name; - Oid graph_oid; - char *label_name; - int label_id; - Oid label_seq_relid; - Oid temp_table_relid; - bool id_field_exists; - bool load_as_agtype; - int curr_seq_num; - batch_insert_state *batch_state; -} csv_vertex_reader; - - -void vertex_field_cb(void *field, size_t field_len, void *data); -void vertex_row_cb(int delim __attribute__((unused)), void *data); - +/* + * Load vertex labels from a CSV file using pg's COPY infrastructure. + * CSV format: [id,] [properties...] + * + * Parameters: + * file_path - Path to the CSV file (must be in /tmp/age/) + * graph_name - Name of the graph + * graph_oid - OID of the graph + * label_name - Name of the vertex label + * label_id - ID of the label + * id_field_exists - If true, first CSV column contains the vertex ID + * load_as_agtype - If true, parse CSV values as agtype (JSON-like) + * + * Returns EXIT_SUCCESS on success. + */ int create_labels_from_csv_file(char *file_path, char *graph_name, Oid graph_oid, char *label_name, int label_id, bool id_field_exists, bool load_as_agtype); diff --git a/src/include/utils/load/age_load.h b/src/include/utils/load/age_load.h index b1335581b..6573c79f3 100644 --- a/src/include/utils/load/age_load.h +++ b/src/include/utils/load/age_load.h @@ -17,6 +17,10 @@ * under the License. */ +#ifndef AG_LOAD_H +#define AG_LOAD_H + +#include "access/heapam.h" #include "commands/sequence.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -27,37 +31,38 @@ #include "commands/graph_commands.h" #include "utils/ag_cache.h" -#ifndef AGE_ENTITY_CREATOR_H -#define AGE_ENTITY_CREATOR_H - -#define TEMP_VERTEX_ID_TABLE_SUFFIX "_ag_vertex_ids" -#define GET_TEMP_VERTEX_ID_TABLE(graph_name) \ - psprintf("_%s%s", graph_name, TEMP_VERTEX_ID_TABLE_SUFFIX) - #define BATCH_SIZE 1000 +#define MAX_BUFFERED_BYTES 65535 /* 64KB, same as pg COPY */ -typedef struct +typedef struct batch_insert_state { + EState *estate; + ResultRelInfo *resultRelInfo; TupleTableSlot **slots; - TupleTableSlot **temp_id_slots; int num_tuples; - int max_tuples; + size_t buffered_bytes; + BulkInsertState bistate; } batch_insert_state; -agtype* create_empty_agtype(void); - -agtype* create_agtype_from_list(char **header, char **fields, +agtype *create_empty_agtype(void); +agtype *create_agtype_from_list(char **header, char **fields, size_t fields_len, int64 vertex_id, bool load_as_agtype); -agtype* create_agtype_from_list_i(char **header, char **fields, +agtype *create_agtype_from_list_i(char **header, char **fields, size_t fields_len, size_t start_index, bool load_as_agtype); + void insert_vertex_simple(Oid graph_oid, char *label_name, graphid vertex_id, agtype *vertex_properties); void insert_edge_simple(Oid graph_oid, char *label_name, graphid edge_id, graphid start_id, graphid end_id, - agtype* end_properties); -void insert_batch(batch_insert_state *batch_state, char *label_name, - Oid graph_oid); + agtype *edge_properties); + +void init_batch_insert(batch_insert_state **batch_state, + char *label_name, Oid graph_oid); +void insert_batch(batch_insert_state *batch_state); +void finish_batch_insert(batch_insert_state **batch_state); + +char *trim_whitespace(const char *str); -#endif /* AGE_ENTITY_CREATOR_H */ +#endif /* AG_LOAD_H */ diff --git a/src/include/utils/load/csv.h b/src/include/utils/load/csv.h deleted file mode 100644 index 062536977..000000000 --- a/src/include/utils/load/csv.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Created by Shoaib on 12/5/2021. -*/ - -/* -libcsv - parse and write csv data -Copyright (C) 2008-2021 Robert Gamble -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -*/ - -#ifndef LIBCSV_H__ -#define LIBCSV_H__ -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define CSV_MAJOR 3 -#define CSV_MINOR 0 -#define CSV_RELEASE 3 - -/* Error Codes */ -#define CSV_SUCCESS 0 -#define CSV_EPARSE 1 /* Parse error in strict mode */ -#define CSV_ENOMEM 2 /* Out of memory while increasing buffer size */ -#define CSV_ETOOBIG 3 /* Buffer larger than SIZE_MAX needed */ -#define CSV_EINVALID 4 /* Invalid code,should never be received from csv_error*/ - - -/* parser options */ -#define CSV_STRICT 1 /* enable strict mode */ -#define CSV_REPALL_NL 2 /* report all unquoted carriage returns and linefeeds */ -#define CSV_STRICT_FINI 4 /* causes csv_fini to return CSV_EPARSE if last - field is quoted and doesn't contain ending - quote */ -#define CSV_APPEND_NULL 8 /* Ensure that all fields are null-terminated */ -#define CSV_EMPTY_IS_NULL 16 /* Pass null pointer to cb1 function when - empty, unquoted fields are encountered */ - - -/* Character values */ -#define CSV_TAB 0x09 -#define CSV_SPACE 0x20 -#define CSV_CR 0x0d -#define CSV_LF 0x0a -#define CSV_COMMA 0x2c -#define CSV_QUOTE 0x22 - -struct csv_parser { - int pstate; /* Parser state */ - int quoted; /* Is the current field a quoted field? */ - size_t spaces; /* Number of continuous spaces after quote or in a non-quoted field */ - unsigned char * entry_buf; /* Entry buffer */ - size_t entry_pos; /* Current position in entry_buf (and current size of entry) */ - size_t entry_size; /* Size of entry buffer */ - int status; /* Operation status */ - unsigned char options; - unsigned char quote_char; - unsigned char delim_char; - int (*is_space)(unsigned char); - int (*is_term)(unsigned char); - size_t blk_size; - void *(*malloc_func)(size_t); /* not used */ - void *(*realloc_func)(void *, size_t); /* function used to allocate buffer memory */ - void (*free_func)(void *); /* function used to free buffer memory */ -}; - -/* Function Prototypes */ -int csv_init(struct csv_parser *p, unsigned char options); -int csv_fini(struct csv_parser *p, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -void csv_free(struct csv_parser *p); -int csv_error(const struct csv_parser *p); -const char * csv_strerror(int error); -size_t csv_parse(struct csv_parser *p, const void *s, size_t len, void (*cb1)(void *, size_t, void *), void (*cb2)(int, void *), void *data); -size_t csv_write(void *dest, size_t dest_size, const void *src, size_t src_size); -int csv_fwrite(FILE *fp, const void *src, size_t src_size); -size_t csv_write2(void *dest, size_t dest_size, const void *src, size_t src_size, unsigned char quote); -int csv_fwrite2(FILE *fp, const void *src, size_t src_size, unsigned char quote); -int csv_get_opts(const struct csv_parser *p); -int csv_set_opts(struct csv_parser *p, unsigned char options); -void csv_set_delim(struct csv_parser *p, unsigned char c); -void csv_set_quote(struct csv_parser *p, unsigned char c); -unsigned char csv_get_delim(const struct csv_parser *p); -unsigned char csv_get_quote(const struct csv_parser *p); -void csv_set_space_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_term_func(struct csv_parser *p, int (*f)(unsigned char)); -void csv_set_realloc_func(struct csv_parser *p, void *(*)(void *, size_t)); -void csv_set_free_func(struct csv_parser *p, void (*)(void *)); -void csv_set_blk_size(struct csv_parser *p, size_t); -size_t csv_get_buffer_size(const struct csv_parser *p); - -#ifdef __cplusplus -} -#endif - -#endif