From bbbb49e684daeb0ede9acc5cb5fa93ff2214fcd9 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 10:38:15 -0600
Subject: [PATCH 1/6] feat: add native support for get_json_object expression

Implement the Spark GetJsonObject expression natively using serde_json
for JSON parsing and a custom JSONPath evaluator supporting field access,
array indexing, bracket notation, and wildcards.

Closes #3162
---
 docs/spark_expressions_support.md             |   2 +-
 native/spark-expr/src/comet_scalar_funcs.rs   |   4 +
 .../src/string_funcs/get_json_object.rs       | 465 ++++++++++++++++++
 native/spark-expr/src/string_funcs/mod.rs     |   2 +
 .../apache/comet/serde/QueryPlanSerde.scala   |   1 +
 .../org/apache/comet/serde/strings.scala      |  20 +-
 .../expressions/string/get_json_object.sql    | 112 +++++
 7 files changed, 604 insertions(+), 2 deletions(-)
 create mode 100644 native/spark-expr/src/string_funcs/get_json_object.rs
 create mode 100644 spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql

diff --git a/docs/spark_expressions_support.md b/docs/spark_expressions_support.md
index 5474894108..6892eb358d 100644
--- a/docs/spark_expressions_support.md
+++ b/docs/spark_expressions_support.md
@@ -245,7 +245,7 @@
 ### json_funcs
 
 - [ ] from_json
-- [ ] get_json_object
+- [x] get_json_object
 - [ ] json_array_length
 - [ ] json_object_keys
 - [ ] json_tuple
diff --git a/native/spark-expr/src/comet_scalar_funcs.rs b/native/spark-expr/src/comet_scalar_funcs.rs
index ff75de763b..48cf00bc69 100644
--- a/native/spark-expr/src/comet_scalar_funcs.rs
+++ b/native/spark-expr/src/comet_scalar_funcs.rs
@@ -181,6 +181,10 @@ pub fn create_comet_physical_fun_with_eval_mode(
             let func = Arc::new(crate::string_funcs::spark_split);
             make_comet_scalar_udf!("split", func, without data_type)
         }
+        "get_json_object" => {
+            let func = Arc::new(crate::string_funcs::spark_get_json_object);
+            make_comet_scalar_udf!("get_json_object", func, without data_type)
+        }
         _ => registry.udf(fun_name).map_err(|e| {
             DataFusionError::Execution(format!(
                 "Function {fun_name} not found in the registry: {e}",
diff --git a/native/spark-expr/src/string_funcs/get_json_object.rs b/native/spark-expr/src/string_funcs/get_json_object.rs
new file mode 100644
index 0000000000..21e4116c5a
--- /dev/null
+++ b/native/spark-expr/src/string_funcs/get_json_object.rs
@@ -0,0 +1,465 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow::array::{Array, ArrayRef, StringArray};
+use datafusion::common::{
+    cast::as_generic_string_array, exec_err, Result as DataFusionResult, ScalarValue,
+};
+use datafusion::logical_expr::ColumnarValue;
+use serde_json::Value;
+use std::sync::Arc;
+
+/// Spark-compatible `get_json_object` function.
+///
+/// Extracts a JSON value from a JSON string using a JSONPath expression.
+/// Returns the result as a string, or null if the path doesn't match or input is invalid.
+///
+/// Supported JSONPath syntax:
+/// - `$` — root element
+/// - `.name` or `['name']` — named child
+/// - `[n]` — array index (0-based)
+/// - `[*]` — array wildcard
+/// - `.*` or `['*']` — object wildcard
+pub fn spark_get_json_object(args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
+    if args.len() != 2 {
+        return exec_err!(
+            "get_json_object expects 2 arguments (json, path), got {}",
+            args.len()
+        );
+    }
+
+    match (&args[0], &args[1]) {
+        // Column json, scalar path (most common case)
+        (ColumnarValue::Array(json_array), ColumnarValue::Scalar(path_scalar)) => {
+            let path_str = match path_scalar {
+                ScalarValue::Utf8(Some(p)) | ScalarValue::LargeUtf8(Some(p)) => p.as_str(),
+                ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) => {
+                    let null_array: ArrayRef = Arc::new(StringArray::new_null(json_array.len()));
+                    return Ok(ColumnarValue::Array(null_array));
+                }
+                _ => return exec_err!("get_json_object path must be a string"),
+            };
+
+            let parsed_path = match parse_json_path(path_str) {
+                Some(p) => p,
+                None => {
+                    let null_array: ArrayRef = Arc::new(StringArray::new_null(json_array.len()));
+                    return Ok(ColumnarValue::Array(null_array));
+                }
+            };
+
+            let json_strings = as_generic_string_array::<i32>(json_array)?;
+            let mut builder = StringBuilder::new();
+
+            for i in 0..json_strings.len() {
+                if json_strings.is_null(i) {
+                    builder.append_null();
+                } else {
+                    let json_str = json_strings.value(i);
+                    match evaluate_path(json_str, &parsed_path) {
+                        Some(result) => builder.append_value(&result),
+                        None => builder.append_null(),
+                    }
+                }
+            }
+
+            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+        }
+        // Scalar json, scalar path
+        (ColumnarValue::Scalar(json_scalar), ColumnarValue::Scalar(path_scalar)) => {
+            let json_str = match json_scalar {
+                ScalarValue::Utf8(Some(s)) | ScalarValue::LargeUtf8(Some(s)) => s.as_str(),
+                ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) => {
+                    return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+                }
+                _ => return exec_err!("get_json_object json must be a string"),
+            };
+            let path_str = match path_scalar {
+                ScalarValue::Utf8(Some(p)) | ScalarValue::LargeUtf8(Some(p)) => p.as_str(),
+                ScalarValue::Utf8(None) | ScalarValue::LargeUtf8(None) => {
+                    return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)));
+                }
+                _ => return exec_err!("get_json_object path must be a string"),
+            };
+
+            let parsed_path = match parse_json_path(path_str) {
+                Some(p) => p,
+                None => return Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
+            };
+
+            let result = evaluate_path(json_str, &parsed_path);
+            Ok(ColumnarValue::Scalar(ScalarValue::Utf8(result)))
+        }
+        // Column json, column path
+        (ColumnarValue::Array(json_array), ColumnarValue::Array(path_array)) => {
+            let json_strings = as_generic_string_array::<i32>(json_array)?;
+            let path_strings = as_generic_string_array::<i32>(path_array)?;
+            let mut builder = StringBuilder::new();
+
+            for i in 0..json_strings.len() {
+                if json_strings.is_null(i) || path_strings.is_null(i) {
+                    builder.append_null();
+                } else {
+                    let json_str = json_strings.value(i);
+                    let path_str = path_strings.value(i);
+                    match parse_json_path(path_str) {
+                        Some(parsed_path) => match evaluate_path(json_str, &parsed_path) {
+                            Some(result) => builder.append_value(&result),
+                            None => builder.append_null(),
+                        },
+                        None => builder.append_null(),
+                    }
+                }
+            }
+
+            Ok(ColumnarValue::Array(Arc::new(builder.finish())))
+        }
+        _ => exec_err!("get_json_object: unsupported argument types"),
+    }
+}
+
+use arrow::array::StringBuilder;
+
+/// A parsed JSONPath segment.
+#[derive(Debug, Clone)]
+enum PathSegment {
+    /// Named field: `.name` or `['name']`
+    Field(String),
+    /// Array index: `[n]`
+    Index(usize),
+    /// Wildcard: `[*]` or `.*`
+    Wildcard,
+}
+
+/// Parse a Spark-compatible JSONPath expression.
+/// Returns None for invalid paths.
+fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
+    let mut chars = path.chars().peekable();
+
+    // Must start with '$'
+    if chars.next()? != '$' {
+        return None;
+    }
+
+    let mut segments = Vec::new();
+
+    while chars.peek().is_some() {
+        match chars.peek()? {
+            '.' => {
+                chars.next();
+                if chars.peek() == Some(&'.') {
+                    // Recursive descent not supported
+                    return None;
+                }
+                if chars.peek() == Some(&'*') {
+                    chars.next();
+                    segments.push(PathSegment::Wildcard);
+                } else {
+                    // Read field name
+                    let mut name = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c == '.' || c == '[' {
+                            break;
+                        }
+                        name.push(c);
+                        chars.next();
+                    }
+                    if name.is_empty() {
+                        return None;
+                    }
+                    segments.push(PathSegment::Field(name));
+                }
+            }
+            '[' => {
+                chars.next();
+                if chars.peek() == Some(&'\'') {
+                    // Bracket notation with quotes: ['name'] or ['*']
+                    chars.next();
+                    let mut name = String::new();
+                    loop {
+                        match chars.next()? {
+                            '\'' => break,
+                            c => name.push(c),
+                        }
+                    }
+                    if chars.next()? != ']' {
+                        return None;
+                    }
+                    if name == "*" {
+                        segments.push(PathSegment::Wildcard);
+                    } else {
+                        segments.push(PathSegment::Field(name));
+                    }
+                } else if chars.peek() == Some(&'*') {
+                    // [*]
+                    chars.next();
+                    if chars.next()? != ']' {
+                        return None;
+                    }
+                    segments.push(PathSegment::Wildcard);
+                } else {
+                    // [n] — numeric index
+                    let mut num_str = String::new();
+                    while let Some(&c) = chars.peek() {
+                        if c == ']' {
+                            break;
+                        }
+                        num_str.push(c);
+                        chars.next();
+                    }
+                    if chars.next()? != ']' {
+                        return None;
+                    }
+                    let idx: usize = num_str.parse().ok()?;
+                    segments.push(PathSegment::Index(idx));
+                }
+            }
+            _ => {
+                // Unexpected character
+                return None;
+            }
+        }
+    }
+
+    Some(segments)
+}
+
+/// Evaluate a parsed JSONPath against a JSON string.
+/// Returns the result as a string, or None if no match.
+fn evaluate_path(json_str: &str, path: &[PathSegment]) -> Option<String> {
+    let value: Value = serde_json::from_str(json_str).ok()?;
+    let results = evaluate_segments(&value, path);
+
+    match results.len() {
+        0 => None,
+        1 => value_to_string(results[0]),
+        _ => {
+            // Multiple results: wrap in JSON array
+            let arr = Value::Array(results.into_iter().cloned().collect());
+            Some(arr.to_string())
+        }
+    }
+}
+
+/// Recursively evaluate path segments against a JSON value.
+/// Returns references to all matching values.
+fn evaluate_segments<'a>(value: &'a Value, segments: &[PathSegment]) -> Vec<&'a Value> {
+    if segments.is_empty() {
+        return vec![value];
+    }
+
+    let segment = &segments[0];
+    let rest = &segments[1..];
+
+    match segment {
+        PathSegment::Field(name) => match value {
+            Value::Object(map) => match map.get(name) {
+                Some(v) => evaluate_segments(v, rest),
+                None => vec![],
+            },
+            _ => vec![],
+        },
+        PathSegment::Index(idx) => match value {
+            Value::Array(arr) => match arr.get(*idx) {
+                Some(v) => evaluate_segments(v, rest),
+                None => vec![],
+            },
+            _ => vec![],
+        },
+        PathSegment::Wildcard => match value {
+            Value::Array(arr) => arr
+                .iter()
+                .flat_map(|v| evaluate_segments(v, rest))
+                .collect(),
+            Value::Object(map) => map
+                .values()
+                .flat_map(|v| evaluate_segments(v, rest))
+                .collect(),
+            _ => vec![],
+        },
+    }
+}
+
+/// Convert a JSON value to its string representation matching Spark behavior.
+/// - Strings are returned without quotes
+/// - null returns None
+/// - Numbers, booleans, objects, arrays are serialized as JSON
+fn value_to_string(value: &Value) -> Option<String> {
+    match value {
+        Value::Null => None,
+        Value::String(s) => Some(s.clone()),
+        _ => Some(value.to_string()),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_json_path() {
+        // Root only
+        let path = parse_json_path("$").unwrap();
+        assert!(path.is_empty());
+
+        // Simple field
+        let path = parse_json_path("$.name").unwrap();
+        assert!(matches!(&path[0], PathSegment::Field(n) if n == "name"));
+
+        // Array index
+        let path = parse_json_path("$[0]").unwrap();
+        assert!(matches!(&path[0], PathSegment::Index(0)));
+
+        // Bracket notation
+        let path = parse_json_path("$['key with spaces']").unwrap();
+        assert!(matches!(&path[0], PathSegment::Field(n) if n == "key with spaces"));
+
+        // Wildcard
+        let path = parse_json_path("$[*]").unwrap();
+        assert!(matches!(&path[0], PathSegment::Wildcard));
+
+        let path = parse_json_path("$.*").unwrap();
+        assert!(matches!(&path[0], PathSegment::Wildcard));
+
+        // Recursive descent not supported
+        assert!(parse_json_path("$..name").is_none());
+
+        // Must start with $
+        assert!(parse_json_path("name").is_none());
+        assert!(parse_json_path("[0]").is_none());
+    }
+
+    #[test]
+    fn test_evaluate_simple_field() {
+        assert_eq!(
+            evaluate_path(
+                r#"{"name":"John","age":30}"#,
+                &parse_json_path("$.name").unwrap()
+            ),
+            Some("John".to_string())
+        );
+        assert_eq!(
+            evaluate_path(
+                r#"{"name":"John","age":30}"#,
+                &parse_json_path("$.age").unwrap()
+            ),
+            Some("30".to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_nested() {
+        let json = r#"{"user":{"profile":{"name":"Alice"}}}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$.user.profile.name").unwrap()),
+            Some("Alice".to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_array_index() {
+        assert_eq!(
+            evaluate_path(r#"[1,2,3]"#, &parse_json_path("$[0]").unwrap()),
+            Some("1".to_string())
+        );
+        assert_eq!(
+            evaluate_path(r#"[1,2,3]"#, &parse_json_path("$[3]").unwrap()),
+            None
+        );
+    }
+
+    #[test]
+    fn test_evaluate_root() {
+        assert_eq!(
+            evaluate_path(r#"{"a":"b"}"#, &parse_json_path("$").unwrap()),
+            Some(r#"{"a":"b"}"#.to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_null_value() {
+        assert_eq!(
+            evaluate_path(r#"{"a":null}"#, &parse_json_path("$.a").unwrap()),
+            None
+        );
+    }
+
+    #[test]
+    fn test_evaluate_missing_field() {
+        assert_eq!(
+            evaluate_path(r#"{"a":"b"}"#, &parse_json_path("$.c").unwrap()),
+            None
+        );
+    }
+
+    #[test]
+    fn test_evaluate_invalid_json() {
+        assert_eq!(
+            evaluate_path("not json", &parse_json_path("$.a").unwrap()),
+            None
+        );
+    }
+
+    #[test]
+    fn test_evaluate_wildcard() {
+        let json = r#"[{"a":"b"},{"a":"c"}]"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
+            Some(r#"["b","c"]"#.to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_string_unquoted() {
+        // Strings should be returned without quotes
+        assert_eq!(
+            evaluate_path(r#"["a","b"]"#, &parse_json_path("$[1]").unwrap()),
+            Some("b".to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_nested_array_field() {
+        let json = r#"{"items":["apple","banana","cherry"]}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$.items[1]").unwrap()),
+            Some("banana".to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_bracket_notation_with_spaces() {
+        let json = r#"{"key with spaces":"it works"}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$['key with spaces']").unwrap()),
+            Some("it works".to_string())
+        );
+    }
+
+    #[test]
+    fn test_evaluate_boolean_and_nested_object() {
+        let json = r#"{"a":true,"b":{"c":1}}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$.a").unwrap()),
+            Some("true".to_string())
+        );
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$.b").unwrap()),
+            Some(r#"{"c":1}"#.to_string())
+        );
+    }
+}
diff --git a/native/spark-expr/src/string_funcs/mod.rs b/native/spark-expr/src/string_funcs/mod.rs
index 919b9dc197..bb785bdb44 100644
--- a/native/spark-expr/src/string_funcs/mod.rs
+++ b/native/spark-expr/src/string_funcs/mod.rs
@@ -16,9 +16,11 @@
 // under the License.
 
 mod contains;
+mod get_json_object;
 mod split;
 mod substring;
 
 pub use contains::SparkContains;
+pub use get_json_object::spark_get_json_object;
 pub use split::spark_split;
 pub use substring::SubstringExpr;
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 8c39ba779d..1942d6b3fe 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -156,6 +156,7 @@ object QueryPlanSerde extends Logging with CometExprShim {
     classOf[Concat] -> CometConcat,
     classOf[Contains] -> CometScalarFunction("contains"),
     classOf[EndsWith] -> CometScalarFunction("ends_with"),
+    classOf[GetJsonObject] -> CometGetJsonObject,
     classOf[InitCap] -> CometInitCap,
     classOf[Length] -> CometLength,
     classOf[Like] -> CometLike,
diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala
index 64ba644048..47dfb95e3d 100644
--- a/spark/src/main/scala/org/apache/comet/serde/strings.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala
@@ -21,7 +21,7 @@ package org.apache.comet.serde
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, ConcatWs, Expression, If, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Cast, Concat, ConcatWs, Expression, GetJsonObject, If, InitCap, IsNull, Left, Length, Like, Literal, Lower, RegExpReplace, Right, RLike, StringLPad, StringRepeat, StringRPad, StringSplit, Substring, Upper}
 import org.apache.spark.sql.types.{BinaryType, DataTypes, LongType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -382,6 +382,24 @@ object CometStringSplit extends CometExpressionSerde[StringSplit] {
   }
 }
 
+object CometGetJsonObject extends CometExpressionSerde[GetJsonObject] {
+
+  override def convert(
+      expr: GetJsonObject,
+      inputs: Seq[Attribute],
+      binding: Boolean): Option[Expr] = {
+    val jsonExpr = exprToProtoInternal(expr.json, inputs, binding)
+    val pathExpr = exprToProtoInternal(expr.path, inputs, binding)
+    val optExpr = scalarFunctionExprToProtoWithReturnType(
+      "get_json_object",
+      expr.dataType,
+      false,
+      jsonExpr,
+      pathExpr)
+    optExprWithInfo(optExpr, expr, expr.json, expr.path)
+  }
+}
+
 trait CommonStringExprs {
 
   def stringDecode(
diff --git a/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
new file mode 100644
index 0000000000..d754dda9aa
--- /dev/null
+++ b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
@@ -0,0 +1,112 @@
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--   http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing,
+-- software distributed under the License is distributed on an
+-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+-- KIND, either express or implied.  See the License for the
+-- specific language governing permissions and limitations
+-- under the License.
+
+-- ConfigMatrix: parquet.enable.dictionary=false,true
+
+statement
+CREATE TABLE test_get_json_object(json_str string, path string) USING parquet
+
+statement
+INSERT INTO test_get_json_object VALUES ('{"name":"John","age":30}', '$.name'), ('{"a":{"b":{"c":"deep"}}}', '$.a.b.c'), ('{"items":[1,2,3]}', '$.items[1]'), (NULL, '$.name'), ('{"name":"Alice"}', NULL), ('invalid json', '$.name'), ('{"a":"b"}', '$'), ('{"key":"value"}', '$.missing'), ('{"arr":["x","y","z"]}', '$.arr[0]'), ('{"a":null}', '$.a')
+
+-- column json, column path
+query
+SELECT get_json_object(json_str, path) FROM test_get_json_object
+
+-- column json, literal path
+query
+SELECT get_json_object(json_str, '$.name') FROM test_get_json_object
+
+-- simple field extraction
+query
+SELECT get_json_object('{"name":"John","age":30}', '$.name')
+
+-- numeric value
+query
+SELECT get_json_object('{"name":"John","age":30}', '$.age')
+
+-- nested field
+query
+SELECT get_json_object('{"user":{"profile":{"name":"Alice"}}}', '$.user.profile.name')
+
+-- array index
+query
+SELECT get_json_object('[1,2,3]', '$[0]'), get_json_object('[1,2,3]', '$[2]')
+
+-- out of bounds array index
+query
+SELECT get_json_object('[1,2,3]', '$[3]')
+
+-- nested array field
+query
+SELECT get_json_object('{"items":["apple","banana","cherry"]}', '$.items[1]')
+
+-- root path returns entire input
+query
+SELECT get_json_object('{"a":"b"}', '$')
+
+-- null json
+query
+SELECT get_json_object(NULL, '$.name')
+
+-- null path
+query
+SELECT get_json_object('{"a":"b"}', NULL)
+
+-- invalid json returns null
+query
+SELECT get_json_object('not json', '$.a')
+
+-- missing field returns null
+query
+SELECT get_json_object('{"a":"b"}', '$.c')
+
+-- json null value returns null
+query
+SELECT get_json_object('{"a":null}', '$.a')
+
+-- boolean value
+query
+SELECT get_json_object('{"flag":true}', '$.flag')
+
+-- nested object returned as json
+query
+SELECT get_json_object('{"a":{"b":1}}', '$.a')
+
+-- array returned as json
+query
+SELECT get_json_object('{"a":[1,2,3]}', '$.a')
+
+-- bracket notation
+query
+SELECT get_json_object('{"key with spaces":"it works"}', '$[''key with spaces'']')
+
+-- wildcard on array
+query
+SELECT get_json_object('[{"a":"b"},{"a":"c"}]', '$[*].a')
+
+-- empty string json
+query
+SELECT get_json_object('', '$.a')
+
+-- deeply nested
+query
+SELECT get_json_object('{"a":{"b":{"c":{"d":"found"}}}}', '$.a.b.c.d')
+
+-- array of arrays
+query
+SELECT get_json_object('[[1,2],[3,4]]', '$[0][1]')

From 257af9e0928b87507a74e2f65050f5e54cb3aba2 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 10:40:02 -0600
Subject: [PATCH 2/6] feat: add getSupportLevel for CometGetJsonObject

Mark as Incompatible since Spark's Jackson parser allows single-quoted
JSON and unescaped control characters which serde_json does not support.
Add allowIncompatible config to SQL test file.
---
 spark/src/main/scala/org/apache/comet/serde/strings.scala   | 6 ++++++
 .../sql-tests/expressions/string/get_json_object.sql        | 1 +
 2 files changed, 7 insertions(+)

diff --git a/spark/src/main/scala/org/apache/comet/serde/strings.scala b/spark/src/main/scala/org/apache/comet/serde/strings.scala
index 47dfb95e3d..50621fc389 100644
--- a/spark/src/main/scala/org/apache/comet/serde/strings.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/strings.scala
@@ -384,6 +384,12 @@ object CometStringSplit extends CometExpressionSerde[StringSplit] {
 
 object CometGetJsonObject extends CometExpressionSerde[GetJsonObject] {
 
+  override def getSupportLevel(expr: GetJsonObject): SupportLevel =
+    Incompatible(
+      Some(
+        "Spark allows single-quoted JSON and unescaped control characters " +
+          "which Comet does not support"))
+
   override def convert(
       expr: GetJsonObject,
       inputs: Seq[Attribute],
diff --git a/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
index d754dda9aa..87ba6bbb37 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
@@ -15,6 +15,7 @@
 -- specific language governing permissions and limitations
 -- under the License.
 
+-- Config: spark.comet.expression.GetJsonObject.allowIncompatible=true
 -- ConfigMatrix: parquet.enable.dictionary=false,true
 
 statement

From b7ddc4a742c04c4e9f7b79a1fcd5f4469603ada1 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 10:55:35 -0600
Subject: [PATCH 3/6] fix: address self-review findings for get_json_object

- Enable serde_json preserve_order feature to maintain JSON key ordering
- Fix wildcard to only work on arrays (not objects), matching Spark
- Fix single wildcard match to preserve JSON string quoting
- Add user-facing docs in expressions.md
- Add more SQL tests: object wildcard, single match, missing fields,
  invalid paths, field names with special chars, key ordering
- Add Rust unit tests for new edge cases
---
 docs/source/user-guide/latest/expressions.md  |  6 ++
 native/Cargo.lock                             |  1 +
 native/spark-expr/Cargo.toml                  |  2 +-
 .../src/string_funcs/get_json_object.rs       | 83 +++++++++++++++++--
 .../expressions/string/get_json_object.sql    | 44 ++++++++++
 5 files changed, 130 insertions(+), 6 deletions(-)

diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md
index 57b7a3455a..9b6511d42d 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -90,6 +90,12 @@ Expressions that are not Spark-compatible will fall back to Spark by default and
 | Substring       | Yes               |                                                                                                            |
 | Upper           | No                | Results can vary depending on locale and character set. Requires `spark.comet.caseConversion.enabled=true` |
 
+## JSON Functions
+
+| Expression      | Spark-Compatible? | Compatibility Notes                                                                                                           |
+| --------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------- |
+| GetJsonObject   | No                | Spark allows single-quoted JSON and unescaped control characters which Comet does not support                                 |
+
 ## Date/Time Functions
 
 | Expression     | SQL                          | Spark-Compatible? | Compatibility Notes                                                                                                              |
diff --git a/native/Cargo.lock b/native/Cargo.lock
index ef3ce87ad3..b467788c60 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -5514,6 +5514,7 @@ version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
+ "indexmap 2.13.0",
  "itoa",
  "memchr",
  "serde",
diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml
index d4639c86ea..ffc395317f 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/spark-expr/Cargo.toml
@@ -34,7 +34,7 @@ chrono-tz = { workspace = true }
 num = { workspace = true }
 regex = { workspace = true }
 serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
+serde_json = { version = "1.0", features = ["preserve_order"] }
 thiserror = { workspace = true }
 futures = { workspace = true }
 twox-hash = "2.1.2"
diff --git a/native/spark-expr/src/string_funcs/get_json_object.rs b/native/spark-expr/src/string_funcs/get_json_object.rs
index 21e4116c5a..af08ae0e5e 100644
--- a/native/spark-expr/src/string_funcs/get_json_object.rs
+++ b/native/spark-expr/src/string_funcs/get_json_object.rs
@@ -242,11 +242,28 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
 /// Returns the result as a string, or None if no match.
 fn evaluate_path(json_str: &str, path: &[PathSegment]) -> Option<String> {
     let value: Value = serde_json::from_str(json_str).ok()?;
+
+    // Check if path contains any wildcards
+    let has_wildcard = path.iter().any(|s| matches!(s, PathSegment::Wildcard));
+
     let results = evaluate_segments(&value, path);
 
     match results.len() {
         0 => None,
-        1 => value_to_string(results[0]),
+        1 if !has_wildcard => value_to_string(results[0]),
+        1 => {
+            // Single wildcard match: Spark strips outer array brackets from the
+            // JSON representation. For strings this means the quotes are preserved.
+            // We simulate by serializing as a JSON array and stripping the brackets.
+            let arr_str = serde_json::to_string(&Value::Array(vec![results[0].clone()])).ok()?;
+            // Strip leading '[' and trailing ']'
+            let inner = &arr_str[1..arr_str.len() - 1];
+            if inner == "null" {
+                None
+            } else {
+                Some(inner.to_string())
+            }
+        }
         _ => {
             // Multiple results: wrap in JSON array
             let arr = Value::Array(results.into_iter().cloned().collect());
@@ -285,10 +302,6 @@ fn evaluate_segments<'a>(value: &'a Value, segments: &[PathSegment]) -> Vec<&'a
                 .iter()
                 .flat_map(|v| evaluate_segments(v, rest))
                 .collect(),
-            Value::Object(map) => map
-                .values()
-                .flat_map(|v| evaluate_segments(v, rest))
-                .collect(),
             _ => vec![],
         },
     }
@@ -462,4 +475,64 @@ mod tests {
             Some(r#"{"c":1}"#.to_string())
         );
     }
+
+    #[test]
+    fn test_object_key_order_preserved() {
+        // serde_json with preserve_order feature should maintain insertion order
+        let json = r#"{"z":1,"a":2}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$").unwrap()),
+            Some(r#"{"z":1,"a":2}"#.to_string())
+        );
+    }
+
+    #[test]
+    fn test_wildcard_single_match() {
+        // Single wildcard match on string: Spark preserves JSON quotes
+        let json = r#"[{"a":"only"}]"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
+            Some(r#""only""#.to_string())
+        );
+
+        // Single wildcard match on number: no quotes
+        let json = r#"[{"a":42}]"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
+            Some("42".to_string())
+        );
+    }
+
+    #[test]
+    fn test_wildcard_missing_fields() {
+        // Wildcard should skip elements where the field is missing
+        let json = r#"[{"a":1},{"b":2},{"a":3}]"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
+            Some("[1,3]".to_string())
+        );
+    }
+
+    #[test]
+    fn test_field_with_colon() {
+        let json = r#"{"fb:testid":"123"}"#;
+        assert_eq!(
+            evaluate_path(json, &parse_json_path("$.fb:testid").unwrap()),
+            Some("123".to_string())
+        );
+    }
+
+    #[test]
+    fn test_dot_bracket_invalid() {
+        // $.[0] is not valid path syntax in Spark
+        assert!(parse_json_path("$.[0]").is_none());
+    }
+
+    #[test]
+    fn test_object_wildcard() {
+        // Spark returns null for $.* on objects (wildcard only works in array contexts)
+        let json = r#"{"a":1,"b":2,"c":3}"#;
+        let result = evaluate_path(json, &parse_json_path("$.*").unwrap());
+        assert_eq!(result, None);
+    }
 }
diff --git a/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
index 87ba6bbb37..aa2f482077 100644
--- a/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
+++ b/spark/src/test/resources/sql-tests/expressions/string/get_json_object.sql
@@ -111,3 +111,47 @@ SELECT get_json_object('{"a":{"b":{"c":{"d":"found"}}}}', '$.a.b.c.d')
 -- array of arrays
 query
 SELECT get_json_object('[[1,2],[3,4]]', '$[0][1]')
+
+-- wildcard on simple array returns the whole array
+query
+SELECT get_json_object('[1,2,3]', '$[*]')
+
+-- object wildcard (Spark returns null for $.* on objects)
+query
+SELECT get_json_object('{"a":1,"b":2,"c":3}', '$.*')
+
+-- single wildcard match returns unwrapped value
+query
+SELECT get_json_object('[{"a":"only"}]', '$[*].a')
+
+-- wildcard with missing fields in some elements
+query
+SELECT get_json_object('[{"a":1},{"b":2},{"a":3}]', '$[*].a')
+
+-- invalid path: recursive descent
+query
+SELECT get_json_object('{"a":1}', '$..a')
+
+-- invalid path: no dollar sign
+query
+SELECT get_json_object('{"a":1}', 'a')
+
+-- invalid path: dot-bracket
+query
+SELECT get_json_object('[1,2,3]', '$.[0]')
+
+-- field name with special chars
+query
+SELECT get_json_object('{"fb:testid":"123"}', '$.fb:testid')
+
+-- escaped quotes in string values
+query
+SELECT get_json_object('{"a":"b\\"c"}', '$.a')
+
+-- numeric precision preserved
+query
+SELECT get_json_object('{"price":8.95}', '$.price')
+
+-- object key ordering preserved in output
+query
+SELECT get_json_object('{"z":1,"a":2}', '$')

From b73fdc1072944e7969c8fbda50940794430e5e1c Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 11:10:45 -0600
Subject: [PATCH 4/6] feat: add microbenchmark for get_json_object

Benchmarks simple field, numeric field, nested field, array element,
and nested object extraction with 1M rows of JSON data.
---
 .../CometGetJsonObjectBenchmark.scala         | 86 +++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 spark/src/test/scala/org/apache/spark/sql/benchmark/CometGetJsonObjectBenchmark.scala

diff --git a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometGetJsonObjectBenchmark.scala b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometGetJsonObjectBenchmark.scala
new file mode 100644
index 0000000000..fa63945347
--- /dev/null
+++ b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometGetJsonObjectBenchmark.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+/**
+ * Benchmark to measure performance of Comet get_json_object expression. To run this benchmark:
+ * {{{
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 make benchmark-org.apache.spark.sql.benchmark.CometGetJsonObjectBenchmark
+ * }}}
+ * Results will be written to "spark/benchmarks/CometGetJsonObjectBenchmark-**results.txt".
+ */
+object CometGetJsonObjectBenchmark extends CometBenchmarkBase {
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val numRows = 1024 * 1024
+    runBenchmarkWithTable("get_json_object", numRows) { v =>
+      withTempPath { dir =>
+        withTempTable("parquetV1Table") {
+          import spark.implicits._
+
+          prepareTable(
+            dir,
+            spark
+              .range(numRows)
+              .map { i =>
+                val name = s"user_$i"
+                val age = (i % 80 + 18).toInt
+                val nested = s"""{"city":"city_${i % 100}","zip":"${10000 + i % 90000}"}"""
+                val items =
+                  (0 until (i % 5 + 1).toInt).map(j => s""""item_$j"""").mkString("[", ",", "]")
+                s"""{"name":"$name","age":$age,"address":$nested,"items":$items}"""
+              }
+              .toDF("c1"))
+
+          val extraConfigs =
+            Map("spark.comet.expression.GetJsonObject.allowIncompatible" -> "true")
+
+          val benchmarks = List(
+            StringExprConfig(
+              "simple field",
+              "select get_json_object(c1, '$.name') from parquetV1Table",
+              extraConfigs),
+            StringExprConfig(
+              "numeric field",
+              "select get_json_object(c1, '$.age') from parquetV1Table",
+              extraConfigs),
+            StringExprConfig(
+              "nested field",
+              "select get_json_object(c1, '$.address.city') from parquetV1Table",
+              extraConfigs),
+            StringExprConfig(
+              "array element",
+              "select get_json_object(c1, '$.items[0]') from parquetV1Table",
+              extraConfigs),
+            StringExprConfig(
+              "nested object",
+              "select get_json_object(c1, '$.address') from parquetV1Table",
+              extraConfigs))
+
+          benchmarks.foreach { config =>
+            runBenchmark(config.name) {
+              runExpressionBenchmark(config.name, v, config.query, config.extraCometConfigs)
+            }
+          }
+        }
+      }
+    }
+  }
+}

From 3d6d560735e9d5bdd6a960376aed19d7e7288f80 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 11:17:55 -0600
Subject: [PATCH 5/6] refactor: simplify get_json_object implementation

- Move StringBuilder import to top-level imports
- Fix doc comment to not mention object wildcard (unsupported)
- Pre-compute has_wildcard in ParsedPath struct (avoids per-row scan)
- Split evaluation into evaluate_no_wildcard (returns Option, zero Vec
  allocations) and evaluate_with_wildcard (returns Vec)
- Simplify single wildcard match: serialize directly instead of
  array-wrap-then-strip-brackets hack
- Add comment in Cargo.toml explaining preserve_order requirement
---
 native/spark-expr/Cargo.toml                  |   1 +
 .../src/string_funcs/get_json_object.rs       | 222 +++++++++---------
 2 files changed, 111 insertions(+), 112 deletions(-)

diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml
index ffc395317f..dc78303e07 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/spark-expr/Cargo.toml
@@ -34,6 +34,7 @@ chrono-tz = { workspace = true }
 num = { workspace = true }
 regex = { workspace = true }
 serde = { version = "1.0", features = ["derive"] }
+# preserve_order: needed for get_json_object to match Spark's JSON key ordering
 serde_json = { version = "1.0", features = ["preserve_order"] }
 thiserror = { workspace = true }
 futures = { workspace = true }
diff --git a/native/spark-expr/src/string_funcs/get_json_object.rs b/native/spark-expr/src/string_funcs/get_json_object.rs
index af08ae0e5e..c1b30d2806 100644
--- a/native/spark-expr/src/string_funcs/get_json_object.rs
+++ b/native/spark-expr/src/string_funcs/get_json_object.rs
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow::array::{Array, ArrayRef, StringArray};
+use arrow::array::{Array, ArrayRef, StringArray, StringBuilder};
 use datafusion::common::{
     cast::as_generic_string_array, exec_err, Result as DataFusionResult, ScalarValue,
 };
@@ -32,8 +32,7 @@ use std::sync::Arc;
 /// - `$` — root element
 /// - `.name` or `['name']` — named child
 /// - `[n]` — array index (0-based)
-/// - `[*]` — array wildcard
-/// - `.*` or `['*']` — object wildcard
+/// - `[*]` — array wildcard (iterates over array elements)
 pub fn spark_get_json_object(args: &[ColumnarValue]) -> DataFusionResult<ColumnarValue> {
     if args.len() != 2 {
         return exec_err!(
@@ -132,8 +131,6 @@ pub fn spark_get_json_object(args: &[ColumnarValue]) -> DataFusionResult<Columna
     }
 }
 
-use arrow::array::StringBuilder;
-
 /// A parsed JSONPath segment.
 #[derive(Debug, Clone)]
 enum PathSegment {
@@ -141,13 +138,19 @@ enum PathSegment {
     Field(String),
     /// Array index: `[n]`
     Index(usize),
-    /// Wildcard: `[*]` or `.*`
+    /// Wildcard: `[*]` (iterates over array elements)
     Wildcard,
 }
 
+/// A parsed JSONPath expression with precomputed metadata.
+struct ParsedPath {
+    segments: Vec<PathSegment>,
+    has_wildcard: bool,
+}
+
 /// Parse a Spark-compatible JSONPath expression.
 /// Returns None for invalid paths.
-fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
+fn parse_json_path(path: &str) -> Option<ParsedPath> {
     let mut chars = path.chars().peekable();
 
     // Must start with '$'
@@ -156,6 +159,7 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
     }
 
     let mut segments = Vec::new();
+    let mut has_wildcard = false;
 
     while chars.peek().is_some() {
         match chars.peek()? {
@@ -168,6 +172,7 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
                 if chars.peek() == Some(&'*') {
                     chars.next();
                     segments.push(PathSegment::Wildcard);
+                    has_wildcard = true;
                 } else {
                     // Read field name
                     let mut name = String::new();
@@ -201,6 +206,7 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
                     }
                     if name == "*" {
                         segments.push(PathSegment::Wildcard);
+                        has_wildcard = true;
                     } else {
                         segments.push(PathSegment::Field(name));
                     }
@@ -211,6 +217,7 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
                         return None;
                     }
                     segments.push(PathSegment::Wildcard);
+                    has_wildcard = true;
                 } else {
                     // [n] — numeric index
                     let mut num_str = String::new();
@@ -235,33 +242,36 @@ fn parse_json_path(path: &str) -> Option<Vec<PathSegment>> {
         }
     }
 
-    Some(segments)
+    Some(ParsedPath {
+        segments,
+        has_wildcard,
+    })
 }
 
 /// Evaluate a parsed JSONPath against a JSON string.
 /// Returns the result as a string, or None if no match.
-fn evaluate_path(json_str: &str, path: &[PathSegment]) -> Option<String> {
+fn evaluate_path(json_str: &str, path: &ParsedPath) -> Option<String> {
     let value: Value = serde_json::from_str(json_str).ok()?;
 
-    // Check if path contains any wildcards
-    let has_wildcard = path.iter().any(|s| matches!(s, PathSegment::Wildcard));
+    if !path.has_wildcard {
+        // Fast path: no wildcards, no Vec allocations
+        let result = evaluate_no_wildcard(&value, &path.segments)?;
+        return value_to_string(result);
+    }
 
-    let results = evaluate_segments(&value, path);
+    // Wildcard path: may return multiple results
+    let results = evaluate_with_wildcard(&value, &path.segments);
 
     match results.len() {
         0 => None,
-        1 if !has_wildcard => value_to_string(results[0]),
         1 => {
-            // Single wildcard match: Spark strips outer array brackets from the
-            // JSON representation. For strings this means the quotes are preserved.
-            // We simulate by serializing as a JSON array and stripping the brackets.
-            let arr_str = serde_json::to_string(&Value::Array(vec![results[0].clone()])).ok()?;
-            // Strip leading '[' and trailing ']'
-            let inner = &arr_str[1..arr_str.len() - 1];
-            if inner == "null" {
+            // Single wildcard match: Spark preserves JSON serialization format
+            // (strings keep their quotes, numbers don't)
+            let s = serde_json::to_string(results[0]).ok()?;
+            if s == "null" {
                 None
             } else {
-                Some(inner.to_string())
+                Some(s)
             }
         }
         _ => {
@@ -272,27 +282,46 @@ fn evaluate_path(json_str: &str, path: &[PathSegment]) -> Option<String> {
     }
 }
 
-/// Recursively evaluate path segments against a JSON value.
+/// Fast-path evaluation for paths without wildcards.
+/// Returns a reference to the matched value, or None if no match.
+fn evaluate_no_wildcard<'a>(value: &'a Value, segments: &[PathSegment]) -> Option<&'a Value> {
+    if segments.is_empty() {
+        return Some(value);
+    }
+
+    match &segments[0] {
+        PathSegment::Field(name) => {
+            let child = value.as_object()?.get(name)?;
+            evaluate_no_wildcard(child, &segments[1..])
+        }
+        PathSegment::Index(idx) => {
+            let child = value.as_array()?.get(*idx)?;
+            evaluate_no_wildcard(child, &segments[1..])
+        }
+        PathSegment::Wildcard => unreachable!("wildcard in no-wildcard path"),
+    }
+}
+
+/// Evaluation for paths containing wildcards.
 /// Returns references to all matching values.
-fn evaluate_segments<'a>(value: &'a Value, segments: &[PathSegment]) -> Vec<&'a Value> {
+fn evaluate_with_wildcard<'a>(value: &'a Value, segments: &[PathSegment]) -> Vec<&'a Value> {
     if segments.is_empty() {
         return vec![value];
     }
 
-    let segment = &segments[0];
     let rest = &segments[1..];
 
-    match segment {
+    match &segments[0] {
         PathSegment::Field(name) => match value {
             Value::Object(map) => match map.get(name) {
-                Some(v) => evaluate_segments(v, rest),
+                Some(v) => evaluate_with_wildcard(v, rest),
                 None => vec![],
             },
             _ => vec![],
         },
         PathSegment::Index(idx) => match value {
             Value::Array(arr) => match arr.get(*idx) {
-                Some(v) => evaluate_segments(v, rest),
+                Some(v) => evaluate_with_wildcard(v, rest),
                 None => vec![],
             },
             _ => vec![],
@@ -300,7 +329,7 @@ fn evaluate_segments<'a>(value: &'a Value, segments: &[PathSegment]) -> Vec<&'a
         PathSegment::Wildcard => match value {
             Value::Array(arr) => arr
                 .iter()
-                .flat_map(|v| evaluate_segments(v, rest))
+                .flat_map(|v| evaluate_with_wildcard(v, rest))
                 .collect(),
             _ => vec![],
         },
@@ -327,26 +356,30 @@ mod tests {
     fn test_parse_json_path() {
         // Root only
         let path = parse_json_path("$").unwrap();
-        assert!(path.is_empty());
+        assert!(path.segments.is_empty());
+        assert!(!path.has_wildcard);
 
         // Simple field
         let path = parse_json_path("$.name").unwrap();
-        assert!(matches!(&path[0], PathSegment::Field(n) if n == "name"));
+        assert!(matches!(&path.segments[0], PathSegment::Field(n) if n == "name"));
+        assert!(!path.has_wildcard);
 
         // Array index
         let path = parse_json_path("$[0]").unwrap();
-        assert!(matches!(&path[0], PathSegment::Index(0)));
+        assert!(matches!(&path.segments[0], PathSegment::Index(0)));
 
         // Bracket notation
         let path = parse_json_path("$['key with spaces']").unwrap();
-        assert!(matches!(&path[0], PathSegment::Field(n) if n == "key with spaces"));
+        assert!(matches!(&path.segments[0], PathSegment::Field(n) if n == "key with spaces"));
 
         // Wildcard
         let path = parse_json_path("$[*]").unwrap();
-        assert!(matches!(&path[0], PathSegment::Wildcard));
+        assert!(matches!(&path.segments[0], PathSegment::Wildcard));
+        assert!(path.has_wildcard);
 
         let path = parse_json_path("$.*").unwrap();
-        assert!(matches!(&path[0], PathSegment::Wildcard));
+        assert!(matches!(&path.segments[0], PathSegment::Wildcard));
+        assert!(path.has_wildcard);
 
         // Recursive descent not supported
         assert!(parse_json_path("$..name").is_none());
@@ -358,18 +391,14 @@ mod tests {
 
     #[test]
     fn test_evaluate_simple_field() {
+        let path = parse_json_path("$.name").unwrap();
         assert_eq!(
-            evaluate_path(
-                r#"{"name":"John","age":30}"#,
-                &parse_json_path("$.name").unwrap()
-            ),
+            evaluate_path(r#"{"name":"John","age":30}"#, &path),
             Some("John".to_string())
         );
+        let path = parse_json_path("$.age").unwrap();
         assert_eq!(
-            evaluate_path(
-                r#"{"name":"John","age":30}"#,
-                &parse_json_path("$.age").unwrap()
-            ),
+            evaluate_path(r#"{"name":"John","age":30}"#, &path),
             Some("30".to_string())
         );
     }
@@ -377,111 +406,89 @@ mod tests {
     #[test]
     fn test_evaluate_nested() {
         let json = r#"{"user":{"profile":{"name":"Alice"}}}"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$.user.profile.name").unwrap()),
-            Some("Alice".to_string())
-        );
+        let path = parse_json_path("$.user.profile.name").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some("Alice".to_string()));
     }
 
     #[test]
     fn test_evaluate_array_index() {
-        assert_eq!(
-            evaluate_path(r#"[1,2,3]"#, &parse_json_path("$[0]").unwrap()),
-            Some("1".to_string())
-        );
-        assert_eq!(
-            evaluate_path(r#"[1,2,3]"#, &parse_json_path("$[3]").unwrap()),
-            None
-        );
+        let path = parse_json_path("$[0]").unwrap();
+        assert_eq!(evaluate_path(r#"[1,2,3]"#, &path), Some("1".to_string()));
+        let path = parse_json_path("$[3]").unwrap();
+        assert_eq!(evaluate_path(r#"[1,2,3]"#, &path), None);
     }
 
     #[test]
     fn test_evaluate_root() {
+        let path = parse_json_path("$").unwrap();
         assert_eq!(
-            evaluate_path(r#"{"a":"b"}"#, &parse_json_path("$").unwrap()),
+            evaluate_path(r#"{"a":"b"}"#, &path),
             Some(r#"{"a":"b"}"#.to_string())
         );
     }
 
     #[test]
     fn test_evaluate_null_value() {
-        assert_eq!(
-            evaluate_path(r#"{"a":null}"#, &parse_json_path("$.a").unwrap()),
-            None
-        );
+        let path = parse_json_path("$.a").unwrap();
+        assert_eq!(evaluate_path(r#"{"a":null}"#, &path), None);
     }
 
     #[test]
     fn test_evaluate_missing_field() {
-        assert_eq!(
-            evaluate_path(r#"{"a":"b"}"#, &parse_json_path("$.c").unwrap()),
-            None
-        );
+        let path = parse_json_path("$.c").unwrap();
+        assert_eq!(evaluate_path(r#"{"a":"b"}"#, &path), None);
     }
 
     #[test]
     fn test_evaluate_invalid_json() {
-        assert_eq!(
-            evaluate_path("not json", &parse_json_path("$.a").unwrap()),
-            None
-        );
+        let path = parse_json_path("$.a").unwrap();
+        assert_eq!(evaluate_path("not json", &path), None);
     }
 
     #[test]
     fn test_evaluate_wildcard() {
         let json = r#"[{"a":"b"},{"a":"c"}]"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
-            Some(r#"["b","c"]"#.to_string())
-        );
+        let path = parse_json_path("$[*].a").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some(r#"["b","c"]"#.to_string()));
     }
 
     #[test]
     fn test_evaluate_string_unquoted() {
-        // Strings should be returned without quotes
-        assert_eq!(
-            evaluate_path(r#"["a","b"]"#, &parse_json_path("$[1]").unwrap()),
-            Some("b".to_string())
-        );
+        // Strings should be returned without quotes for non-wildcard paths
+        let path = parse_json_path("$[1]").unwrap();
+        assert_eq!(evaluate_path(r#"["a","b"]"#, &path), Some("b".to_string()));
     }
 
     #[test]
     fn test_evaluate_nested_array_field() {
         let json = r#"{"items":["apple","banana","cherry"]}"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$.items[1]").unwrap()),
-            Some("banana".to_string())
-        );
+        let path = parse_json_path("$.items[1]").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some("banana".to_string()));
     }
 
     #[test]
     fn test_evaluate_bracket_notation_with_spaces() {
         let json = r#"{"key with spaces":"it works"}"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$['key with spaces']").unwrap()),
-            Some("it works".to_string())
-        );
+        let path = parse_json_path("$['key with spaces']").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some("it works".to_string()));
     }
 
     #[test]
     fn test_evaluate_boolean_and_nested_object() {
         let json = r#"{"a":true,"b":{"c":1}}"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$.a").unwrap()),
-            Some("true".to_string())
-        );
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$.b").unwrap()),
-            Some(r#"{"c":1}"#.to_string())
-        );
+        let path_a = parse_json_path("$.a").unwrap();
+        assert_eq!(evaluate_path(json, &path_a), Some("true".to_string()));
+        let path_b = parse_json_path("$.b").unwrap();
+        assert_eq!(evaluate_path(json, &path_b), Some(r#"{"c":1}"#.to_string()));
     }
 
     #[test]
     fn test_object_key_order_preserved() {
-        // serde_json with preserve_order feature should maintain insertion order
+        // Depends on serde_json "preserve_order" feature (see Cargo.toml)
         let json = r#"{"z":1,"a":2}"#;
+        let path = parse_json_path("$").unwrap();
         assert_eq!(
-            evaluate_path(json, &parse_json_path("$").unwrap()),
+            evaluate_path(json, &path),
             Some(r#"{"z":1,"a":2}"#.to_string())
         );
     }
@@ -490,36 +497,27 @@ mod tests {
     fn test_wildcard_single_match() {
         // Single wildcard match on string: Spark preserves JSON quotes
         let json = r#"[{"a":"only"}]"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
-            Some(r#""only""#.to_string())
-        );
+        let path = parse_json_path("$[*].a").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some(r#""only""#.to_string()));
 
         // Single wildcard match on number: no quotes
         let json = r#"[{"a":42}]"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
-            Some("42".to_string())
-        );
+        assert_eq!(evaluate_path(json, &path), Some("42".to_string()));
     }
 
     #[test]
     fn test_wildcard_missing_fields() {
         // Wildcard should skip elements where the field is missing
         let json = r#"[{"a":1},{"b":2},{"a":3}]"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$[*].a").unwrap()),
-            Some("[1,3]".to_string())
-        );
+        let path = parse_json_path("$[*].a").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some("[1,3]".to_string()));
     }
 
     #[test]
     fn test_field_with_colon() {
         let json = r#"{"fb:testid":"123"}"#;
-        assert_eq!(
-            evaluate_path(json, &parse_json_path("$.fb:testid").unwrap()),
-            Some("123".to_string())
-        );
+        let path = parse_json_path("$.fb:testid").unwrap();
+        assert_eq!(evaluate_path(json, &path), Some("123".to_string()));
     }
 
     #[test]
@@ -532,7 +530,7 @@ mod tests {
     fn test_object_wildcard() {
         // Spark returns null for $.* on objects (wildcard only works in array contexts)
         let json = r#"{"a":1,"b":2,"c":3}"#;
-        let result = evaluate_path(json, &parse_json_path("$.*").unwrap());
-        assert_eq!(result, None);
+        let path = parse_json_path("$.*").unwrap();
+        assert_eq!(evaluate_path(json, &path), None);
     }
 }

From f558e0fd72ac42156b83e6f8da01592ded3a3430 Mon Sep 17 00:00:00 2001
From: Andy Grove <agrove@apache.org>
Date: Fri, 20 Mar 2026 11:28:53 -0600
Subject: [PATCH 6/6] style: format markdown with prettier

---
 docs/source/user-guide/latest/expressions.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/user-guide/latest/expressions.md b/docs/source/user-guide/latest/expressions.md
index 9b6511d42d..9934499401 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -92,9 +92,9 @@ Expressions that are not Spark-compatible will fall back to Spark by default and
 
 ## JSON Functions
 
-| Expression      | Spark-Compatible? | Compatibility Notes                                                                                                           |
-| --------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------- |
-| GetJsonObject   | No                | Spark allows single-quoted JSON and unescaped control characters which Comet does not support                                 |
+| Expression    | Spark-Compatible? | Compatibility Notes                                                                           |
+| ------------- | ----------------- | --------------------------------------------------------------------------------------------- |
+| GetJsonObject | No                | Spark allows single-quoted JSON and unescaped control characters which Comet does not support |
 
 ## Date/Time Functions