From 3f3319d93e8dee739aa9db159cece49335aaa923 Mon Sep 17 00:00:00 2001 From: Leo Meyerovich Date: Fri, 15 May 2026 14:17:00 -0700 Subject: [PATCH] feat(gfql): add public declarative schema model --- CHANGELOG.md | 1 + docs/source/api/gfql/index.rst | 1 + docs/source/api/gfql/schema.rst | 7 + docs/source/gfql/index.rst | 1 + docs/source/gfql/schema.rst | 109 ++++++ graphistry/Plottable.py | 1 + graphistry/PlotterBase.py | 6 + graphistry/__init__.py | 7 + .../compute/gfql/frontends/cypher/binder.py | 245 ++++++++++++- graphistry/compute/gfql_validate.py | 53 ++- graphistry/pygraphistry.py | 8 +- graphistry/schema.py | 333 ++++++++++++++++++ .../tests/compute/gfql/cypher/test_binder.py | 17 +- .../tests/compute/gfql/test_public_schema.py | 220 ++++++++++++ 14 files changed, 983 insertions(+), 26 deletions(-) create mode 100644 docs/source/api/gfql/schema.rst create mode 100644 docs/source/gfql/schema.rst create mode 100644 graphistry/schema.py create mode 100644 graphistry/tests/compute/gfql/test_public_schema.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5139638e33..27409e3f26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - **GFQL policy / Cypher compiler hooks (#1454)**: Added experimental exact-key `precompile` and `postcompile` policy hooks for local Cypher string-query compilation. `postcompile` reports success or failure using the existing policy `success`, `error`, and `error_type` fields plus a stable `CompileSummary` with scalar compiler metadata. ### Changed +- **GFQL public schema declarations (#1337)**: Added stable `graphistry.schema` exports for `NodeType`, `EdgeType`, `GraphSchema`, and `EdgeTopology`, plus top-level `graphistry` re-exports. `NodeType` and `EdgeType` now accept Arrow-first `pyarrow.Schema` declarations, preserve dtype/nullability through GFQL `RowSchema`, and export back to Arrow with label/type columns via `to_arrow()`. `graphistry.bind(..., schema=schema)` / `g.bind(schema=schema)` now attach public schema declarations to plotters, and Cypher preflight validation consumes the adapted internal `GraphSchemaCatalog` for declared labels, properties, relationship types, and source/destination topology checks. `GraphSchema(strict=False)` now makes schema-bound `g.gfql_validate(...)` permissive by default while explicit call-level `strict=True` still forces strict validation. - **GFQL / Cypher pattern predicate existence semantics (#1449)**: Direct-Cypher `WHERE (pattern)` predicates now lower through correlated semi-apply markers instead of rewriting single positive predicates into appended `MATCH` clauses, preventing existence checks from multiplying result rows. Added pandas/cuDF coverage for the residual `expr-pattern1-10`, `expr-pattern1-13`, and `expr-pattern1-18` undirected pattern-predicate wrong-row cases. - **GFQL / Cypher reentry failfast scaffolding cleanup (#1421)**: Removed the obsolete `graphistry.compute.gfql.cypher.reentry.runtime` compatibility re-export shim after compile-time reentry ownership moved to `reentry.compiletime`, moved tests off the old private `gfql_unified._compiled_query_reentry_state` access path, and lifted the stale closed-#1256 aggregate failfast so chained reentry secondary-property carries now flow through downstream aggregating `WITH` stages with positive row assertions. - **GFQL / Cypher pre-strict binder compatibility guard deletion (#1420)**: Retired the legacy loose `FrontendBinder.bind(strict_name_resolution=False)` graph traversal path and unresolved-name fallbacks now that #1357 made strict binder semantics canonical. Cypher compile prepass and graph-constructor binding now pass `strict_name_resolution=True` explicitly, and binder tests now pin that the legacy false flag no longer admits unresolved `collect(...)`, single-alias list literal, or missing-schema inputs while preserving strict source-order traversal through `WITH → UNWIND → MATCH`. diff --git a/docs/source/api/gfql/index.rst b/docs/source/api/gfql/index.rst index 1eb11b75fd..9af8e4a05c 100644 --- a/docs/source/api/gfql/index.rst +++ b/docs/source/api/gfql/index.rst @@ -13,3 +13,4 @@ GFQL API Reference hop node predicates + schema diff --git a/docs/source/api/gfql/schema.rst b/docs/source/api/gfql/schema.rst new file mode 100644 index 0000000000..dc2c3ca78a --- /dev/null +++ b/docs/source/api/gfql/schema.rst @@ -0,0 +1,7 @@ +GFQL Schema +=========== + +.. automodule:: graphistry.schema + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/gfql/index.rst b/docs/source/gfql/index.rst index ad0fa359af..8362feb6f7 100644 --- a/docs/source/gfql/index.rst +++ b/docs/source/gfql/index.rst @@ -63,6 +63,7 @@ See also: builtin_calls policy strict_mode + schema wire_protocol_examples .. toctree:: diff --git a/docs/source/gfql/schema.rst b/docs/source/gfql/schema.rst new file mode 100644 index 0000000000..4578c64249 --- /dev/null +++ b/docs/source/gfql/schema.rst @@ -0,0 +1,109 @@ +Declarative Graph Schemas +========================= + +GFQL accepts public schema declarations through the stable +``graphistry.schema`` import path. Use this when application code owns a graph +contract and wants Cypher preflight checks to fail before query execution. + +.. code-block:: python + + import graphistry + import pyarrow as pa + from graphistry.schema import EdgeType, GraphSchema, NodeType + + Person = NodeType( + "Person", + pa.schema([ + pa.field("id", pa.int64(), nullable=False), + pa.field("name", pa.large_string()), + ]), + ) + Company = NodeType( + "Company", + pa.schema([ + pa.field("id", pa.int64(), nullable=False), + pa.field("name", pa.large_string()), + ]), + ) + WorksAt = EdgeType( + "WORKS_AT", + source=Person, + destination=Company, + properties=pa.schema([pa.field("since", pa.int32(), nullable=False)]), + ) + + schema = GraphSchema( + node_types=[Person, Company], + edge_types=[WorksAt], + node_id_column="id", + edge_source_column="src", + edge_destination_column="dst", + ) + + g = graphistry.bind( + source="src", + destination="dst", + node="id", + schema=schema, + ) + + g.gfql_validate("MATCH (p:Person)-[:WORKS_AT]->(c:Company) RETURN p.name") + +Schema Objects +-------------- + +``NodeType(name, properties, labels=None)`` + Declares a node contract. ``labels`` defaults to ``(name,)`` and maps to the + existing GFQL label-column convention ``label__