From 7aabb3f3c4e3430165a0fa806ebf8a8521d96d4c Mon Sep 17 00:00:00 2001 From: Hu Shenggang Date: Fri, 15 May 2026 18:36:22 +0800 Subject: [PATCH] [fix](be) Reject super wildcard path in json keys ### What problem does this PR solve? Issue Number: None Related PR: None Problem Summary: Fixes Jira DORIS-25570. json_keys/jsonb_keys rejected ordinary wildcard paths but allowed super wildcard paths such as $**.a to fall through and return NULL. The function only supports reading keys from a single object, so super wildcard paths should fail with the same INVALID_JSON_PATH error as other wildcard paths. ### Release note Reject unsupported super wildcard JSON paths in json_keys/jsonb_keys instead of returning NULL. ### Check List (For Author) - Test: Unit Test / Regression test / Static check - Unit Test: ./run-be-ut.sh --run --filter=FunctionJsonbTEST.JsonbKeysRejectSuperWildcardPath - Regression test: Added regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy (not run locally; no FE/BE cluster was listening on configured regression ports) - Static check: build-support/check-format.sh - Static check: build-support/run-clang-tidy.sh --build-dir be/ut_build_ASAN (failed due to pre-existing function_jsonb.cpp complexity diagnostics and toolchain header/NOLINTEND errors) - Behavior changed: Yes (json_keys/jsonb_keys now return INVALID_JSON_PATH for $** paths instead of NULL) - Does this need documentation: No --- be/src/exprs/function/function_jsonb.cpp | 4 +- .../exprs/function/function_jsonb_test.cpp | 74 ++++++++++++++++++- .../test_jsonb_keys_invalid_path.groovy | 35 +++++++++ 3 files changed, 110 insertions(+), 3 deletions(-) create mode 100644 regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy diff --git a/be/src/exprs/function/function_jsonb.cpp b/be/src/exprs/function/function_jsonb.cpp index 40c10cb4e6183d..1f11e496877200 100644 --- a/be/src/exprs/function/function_jsonb.cpp +++ b/be/src/exprs/function/function_jsonb.cpp @@ -568,7 +568,7 @@ class FunctionJsonbKeys : public IFunction { r_raw_ref.to_string()); } - if (const_path.is_wildcard()) { + if (const_path.is_wildcard() || const_path.is_supper_wildcard()) { return Status::InvalidJsonPath( "In this situation, path expressions may not contain the * and ** tokens " "or an array range."); @@ -610,7 +610,7 @@ class FunctionJsonbKeys : public IFunction { std::string_view(data.data, data.size), i); } - if (path.is_wildcard()) { + if (path.is_wildcard() || path.is_supper_wildcard()) { return Status::InvalidJsonPath( "In this situation, path expressions may not contain the * and ** " "tokens " diff --git a/be/test/exprs/function/function_jsonb_test.cpp b/be/test/exprs/function/function_jsonb_test.cpp index c321fffb4f440c..25377a0bce9965 100644 --- a/be/test/exprs/function/function_jsonb_test.cpp +++ b/be/test/exprs/function/function_jsonb_test.cpp @@ -16,13 +16,15 @@ // under the License. #include -#include +#include #include #include +#include #include "common/status.h" #include "core/column/column_const.h" +#include "core/data_type/data_type_array.h" #include "core/data_type/data_type_jsonb.h" #include "core/data_type/data_type_nullable.h" #include "core/data_type/data_type_number.h" @@ -41,6 +43,67 @@ namespace doris { using namespace ut_type; +namespace { + +ColumnPtr create_jsonb_column(size_t rows) { + auto jsonb_type = std::make_shared(); + auto jsonb_column = jsonb_type->create_column(); + for (size_t i = 0; i < rows; ++i) { + EXPECT_TRUE(insert_cell(jsonb_column, jsonb_type, STRING(R"({"a":{"b":1}})"))); + } + return jsonb_column; +} + +ColumnPtr create_path_column(const std::vector& paths) { + auto path_type = std::make_shared(); + auto path_column = path_type->create_column(); + for (const auto& path : paths) { + EXPECT_TRUE(insert_cell(path_column, path_type, STRING(path))); + } + return path_column; +} + +ColumnPtr create_const_path_column(const std::string& path, size_t rows) { + return ColumnConst::create(create_path_column({path}), rows); +} + +Status execute_json_keys_with_path(ColumnPtr path_column, size_t rows) { + auto jsonb_type = std::make_shared(); + auto path_type = std::make_shared(); + auto return_type = make_nullable( + std::make_shared(make_nullable(std::make_shared()))); + + Block block; + block.insert({create_jsonb_column(rows), jsonb_type, "jsonb"}); + block.insert({std::move(path_column), path_type, "path"}); + + FunctionBasePtr func = SimpleFunctionFactory::instance().get_function( + "json_keys", block.get_columns_with_type_and_name(), return_type); + DORIS_CHECK(func != nullptr); + + FunctionUtils fn_utils(return_type, {jsonb_type, path_type}, false); + auto* fn_ctx = fn_utils.get_fn_ctx(); + RETURN_IF_ERROR(func->open(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + RETURN_IF_ERROR(func->open(fn_ctx, FunctionContext::THREAD_LOCAL)); + + block.insert({nullptr, return_type, "result"}); + auto st = func->execute(fn_ctx, block, {0, 1}, block.columns() - 1, rows); + + static_cast(func->close(fn_ctx, FunctionContext::THREAD_LOCAL)); + static_cast(func->close(fn_ctx, FunctionContext::FRAGMENT_LOCAL)); + return st; +} + +void expect_invalid_json_keys_super_wildcard_path(const Status& st) { + EXPECT_EQ(st.code(), ErrorCode::INVALID_JSON_PATH) << st.to_string(); + EXPECT_NE(st.to_string().find( + "path expressions may not contain the * and ** tokens or an array range"), + std::string::npos) + << st.to_string(); +} + +} // namespace + TEST(FunctionJsonbTEST, JsonbParseTest) { std::string func_name = "json_parse"; InputTypeSet input_types = {Nullable {PrimitiveType::TYPE_VARCHAR}}; @@ -186,6 +249,15 @@ TEST(FunctionJsonbTEST, JsonbParseErrorToValueTest) { ASSERT_EQ(st.code(), ErrorCode::INVALID_ARGUMENT) << st.to_string(); } +TEST(FunctionJsonbTEST, JsonbKeysRejectSuperWildcardPath) { + auto const_path_status = execute_json_keys_with_path(create_const_path_column("$**.a", 1), 1); + expect_invalid_json_keys_super_wildcard_path(const_path_status); + + auto non_const_path_status = + execute_json_keys_with_path(create_path_column({"$.a", "$**.a"}), 2); + expect_invalid_json_keys_super_wildcard_path(non_const_path_status); +} + TEST(FunctionJsonbTEST, JsonbExtractTest) { std::string func_name = "jsonb_extract"; InputTypeSet input_types = {PrimitiveType::TYPE_JSONB, PrimitiveType::TYPE_VARCHAR}; diff --git a/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy b/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy new file mode 100644 index 00000000000000..8ba7d2f9c1b721 --- /dev/null +++ b/regression-test/suites/jsonb_p0/test_jsonb_keys_invalid_path.groovy @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_jsonb_keys_invalid_path", "p0") { + test { + sql """ + SELECT json_keys(CAST('{"a":{"b":1}}' AS JSONB), '\$**.a'); + """ + exception "In this situation, path expressions may not contain the * and ** tokens or an array range." + } + + test { + sql """ + SELECT json_keys(j, p) + FROM ( + SELECT CAST('{"a":{"b":1}}' AS JSONB) AS j, '\$**.a' AS p + ) t; + """ + exception "In this situation, path expressions may not contain the * and ** tokens or an array range." + } +}