-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Split proto serialization to encapsulate private state (#21835) #21929
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
6749b49
9529933
9db19ff
e53b5ac
89d6cf7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -477,6 +477,180 @@ pub trait PhysicalExpr: Any + Send + Sync + Display + Debug + DynEq + DynHash { | |
| fn expression_id(&self) -> Option<u64> { | ||
| None | ||
| } | ||
|
|
||
| /// Serialize this expression to a [`PhysicalExprNode`] proto message. | ||
| /// | ||
| /// Returning `Ok(None)` means "this expression does not know how to | ||
| /// serialize itself"; the caller (typically `datafusion-proto`) will fall | ||
| /// back to its existing codec / extension paths. This matches today's | ||
| /// behavior for expressions that aren't built into `datafusion-proto`. | ||
| /// | ||
| /// Returning `Ok(Some(node))` means the expression has serialized itself | ||
| /// fully; the caller should not try any further fallback path. | ||
| /// | ||
| /// Returning `Err(_)` means a real serialization failure (e.g. the | ||
| /// expression knows it should serialize but a child failed). | ||
| /// | ||
| /// The motivating use case is letting expressions with private state | ||
| /// (e.g. `DynamicFilterPhysicalExpr`'s `RwLock`-protected inner fields) | ||
| /// reach into their own internals for `try_to_proto`/`try_from_proto` | ||
| /// without having to expose `pub` accessors to `datafusion-proto`. See | ||
| /// <https://github.com/apache/datafusion/issues/21835>. | ||
| /// | ||
| /// The `try_` prefix matches the fallible `try_from_proto` decode | ||
| /// constructors (and the `TryFromProto` trait in `datafusion-proto`); | ||
| /// both sides of the round-trip are fallible and named consistently. | ||
| /// | ||
| /// [`PhysicalExprNode`]: datafusion_proto_models::protobuf::PhysicalExprNode | ||
| #[cfg(feature = "proto")] | ||
| fn try_to_proto( | ||
| &self, | ||
| _ctx: &proto_encode::PhysicalExprEncodeCtx<'_>, | ||
| ) -> Result<Option<datafusion_proto_models::protobuf::PhysicalExprNode>> { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @timsaucer do you think we should make this |
||
| Ok(None) | ||
| } | ||
| } | ||
|
|
||
| /// Encode-side context for [`PhysicalExpr::try_to_proto`]. | ||
| /// | ||
| /// Expression authors only ever see [`proto_encode::PhysicalExprEncodeCtx`]: | ||
| /// a concrete struct with stable methods. Internally it dispatches to a | ||
| /// [`proto_encode::PhysicalExprEncode`] implementor that lives in | ||
| /// `datafusion-proto`, which is what lets `physical-expr-common` stay free | ||
| /// of `datafusion-proto` as a dep. | ||
| /// | ||
| /// More specialized helpers (e.g. encoding UDFs/UDAFs/UDWFs through the | ||
| /// extension codec) can be added to the context as expressions migrate; | ||
| /// today they're not required because the encoder forwards to the existing | ||
| /// codec via the proto converter. | ||
| #[cfg(feature = "proto")] | ||
| pub mod proto_encode { | ||
| use std::sync::Arc; | ||
|
|
||
| use datafusion_common::Result; | ||
| use datafusion_proto_models::protobuf::PhysicalExprNode; | ||
|
|
||
| use super::PhysicalExpr; | ||
|
|
||
| /// Encoder context handed to [`super::PhysicalExpr::try_to_proto`]. | ||
| /// | ||
| /// Wraps an internal [`PhysicalExprEncode`] trait object so callers see a | ||
| /// stable concrete type while implementations can evolve in | ||
| /// `datafusion-proto`. | ||
| pub struct PhysicalExprEncodeCtx<'a> { | ||
| encoder: &'a dyn PhysicalExprEncode, | ||
| } | ||
|
|
||
| impl<'a> PhysicalExprEncodeCtx<'a> { | ||
| /// Construct a new encode context. Typically called by | ||
| /// `datafusion-proto`; expression authors receive `&PhysicalExprEncodeCtx`. | ||
| pub fn new(encoder: &'a dyn PhysicalExprEncode) -> Self { | ||
| Self { encoder } | ||
| } | ||
|
|
||
| /// Encode a child expression. Routes through the configured encoder | ||
| /// so dedup-aware encoding is preserved. | ||
| pub fn encode_child( | ||
| &self, | ||
| expr: &Arc<dyn PhysicalExpr>, | ||
| ) -> Result<PhysicalExprNode> { | ||
| self.encoder.encode(expr) | ||
| } | ||
| } | ||
|
|
||
| /// Internal dispatch trait. Implementors live in `datafusion-proto` and | ||
| /// wrap the existing `PhysicalExtensionCodec` + | ||
| /// `PhysicalProtoConverterExtension` plumbing. Expression authors should | ||
| /// use [`PhysicalExprEncodeCtx`] instead of calling this directly. | ||
| pub trait PhysicalExprEncode { | ||
| /// Encode an expression to a protobuf node. | ||
| fn encode(&self, expr: &Arc<dyn PhysicalExpr>) -> Result<PhysicalExprNode>; | ||
| } | ||
| } | ||
|
|
||
| /// Decode-side counterpart to [`proto_encode`]. | ||
| /// | ||
| /// Expression authors implement an associated `try_from_proto` on their | ||
| /// concrete type, with the signature | ||
| /// | ||
| /// ```ignore | ||
| /// fn try_from_proto( | ||
| /// node: &PhysicalExprNode, | ||
| /// ctx: &PhysicalExprDecodeCtx<'_>, | ||
| /// ) -> Result<Arc<dyn PhysicalExpr>> | ||
| /// ``` | ||
| /// | ||
| /// It takes the whole [`PhysicalExprNode`] — the exact inverse of what | ||
| /// [`PhysicalExpr::try_to_proto`] returns — so the constructor can also see | ||
| /// outer-node fields such as `expr_id`. The central match in | ||
| /// `datafusion-proto` dispatches `ExprType` variants to these constructors. | ||
| /// | ||
| /// As with the encode side, the public surface is a struct (not a `&dyn` | ||
| /// trait) so future fields/helpers (registries for third-party expressions, | ||
| /// schema-resolution caches, etc.) can be added without changing the | ||
| /// signature every expression depends on. | ||
| /// | ||
| /// [`PhysicalExprNode`]: datafusion_proto_models::protobuf::PhysicalExprNode | ||
| #[cfg(feature = "proto")] | ||
| pub mod proto_decode { | ||
| use std::sync::Arc; | ||
|
|
||
| use arrow::datatypes::Schema; | ||
| use datafusion_common::Result; | ||
| use datafusion_proto_models::protobuf::PhysicalExprNode; | ||
|
|
||
| use super::PhysicalExpr; | ||
|
|
||
| /// Decoder context handed to per-expression `try_from_proto` constructors. | ||
| /// | ||
| /// Wraps an internal [`PhysicalExprDecode`] trait object plus a borrowed | ||
| /// schema. The trait stays an implementation detail of `datafusion-proto`; | ||
| /// expression authors only see this struct. | ||
| pub struct PhysicalExprDecodeCtx<'a> { | ||
| schema: &'a Schema, | ||
| decoder: &'a dyn PhysicalExprDecode, | ||
| } | ||
|
|
||
| impl<'a> PhysicalExprDecodeCtx<'a> { | ||
| /// Construct a new decode context. Typically called by | ||
| /// `datafusion-proto`; expression authors receive | ||
| /// `&PhysicalExprDecodeCtx`. | ||
| pub fn new(schema: &'a Schema, decoder: &'a dyn PhysicalExprDecode) -> Self { | ||
| Self { schema, decoder } | ||
| } | ||
|
|
||
| /// The schema bound to this decode context. Use it for column lookups, | ||
| /// data-type resolution, etc. | ||
| pub fn schema(&self) -> &Schema { | ||
| self.schema | ||
| } | ||
|
|
||
| /// Decode an expression node, recursing into child sub-expressions. | ||
| /// | ||
| /// Routes built-in `ExprType` variants through `datafusion-proto`'s | ||
| /// central match and forwards extension nodes to the registered codec | ||
| /// (today via [`PhysicalExtensionCodec::try_decode_expr`]; later via | ||
| /// a per-type registry — see #21835). | ||
| /// | ||
| /// [`PhysicalExtensionCodec::try_decode_expr`]: https://docs.rs/datafusion-proto/latest/datafusion_proto/physical_plan/trait.PhysicalExtensionCodec.html#method.try_decode_expr | ||
| pub fn decode(&self, node: &PhysicalExprNode) -> Result<Arc<dyn PhysicalExpr>> { | ||
| self.decoder.decode(node, self.schema) | ||
| } | ||
| } | ||
|
|
||
| /// Internal dispatch trait. Implementors live in `datafusion-proto`. | ||
| /// Expression authors should use [`PhysicalExprDecodeCtx`] instead of | ||
| /// calling this directly. | ||
| pub trait PhysicalExprDecode { | ||
| /// Decode a proto node into a concrete `PhysicalExpr`. The schema is | ||
| /// passed alongside so implementations can support recursive children | ||
| /// and rebind the context per call (e.g. for nested plans). | ||
| fn decode( | ||
| &self, | ||
| node: &PhysicalExprNode, | ||
| schema: &Schema, | ||
| ) -> Result<Arc<dyn PhysicalExpr>>; | ||
| } | ||
| } | ||
|
|
||
| #[deprecated( | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.