diff --git a/be/src/util/jsonb_parser_simd.h b/be/src/util/jsonb_parser_simd.h index 43f12f024ed4c1..7632eb45fba1a7 100644 --- a/be/src/util/jsonb_parser_simd.h +++ b/be/src/util/jsonb_parser_simd.h @@ -93,6 +93,29 @@ struct JsonbParser { simdjson::padded_string json_str {pch, len}; simdjson::ondemand::document doc = simdjson_parser.iterate(json_str); + auto is_json_whitespace = [](char c) { + return c == ' ' || c == '\t' || c == '\n' || c == '\r'; + }; + const char* json_begin = json_str.data(); + const char* json_end = json_str.data() + len; + while (json_begin < json_end && is_json_whitespace(*json_begin)) { + ++json_begin; + } + while (json_end > json_begin && is_json_whitespace(*(json_end - 1))) { + --json_end; + } + + std::string_view raw_json; + simdjson::error_code raw_res = doc.raw_json().get(raw_json); + if (raw_res != simdjson::SUCCESS) { + return Status::InvalidArgument(fmt::format("simdjson raw_json failed: {}", + simdjson::error_message(raw_res))); + } + if (raw_json.data() != json_begin || raw_json.data() + raw_json.size() != json_end) { + return Status::InvalidArgument("simdjson parse exception: trailing content"); + } + doc.rewind(); + // simdjson process top level primitive types specially // so some repeated code here switch (doc.type()) { @@ -102,6 +125,12 @@ struct JsonbParser { break; } case simdjson::ondemand::json_type::null: { + bool is_null = false; + simdjson::error_code res = doc.is_null().get(is_null); + if (res != simdjson::SUCCESS || !is_null) { + return Status::InvalidArgument(fmt::format("simdjson get null failed: {}", + simdjson::error_message(res))); + } if (writer.writeNull() == 0) { return Status::InvalidArgument("writeNull failed"); } @@ -130,6 +159,9 @@ struct JsonbParser { break; } } + if (!doc.at_end()) { + return Status::InvalidArgument("simdjson parse exception: trailing content"); + } } catch (simdjson::simdjson_error& e) { return Status::InvalidArgument(fmt::format("simdjson parse exception: {}", e.what())); } diff --git a/be/test/core/value/jsonb_value_test.cpp b/be/test/core/value/jsonb_value_test.cpp index 811717950802ff..2592153822628b 100644 --- a/be/test/core/value/jsonb_value_test.cpp +++ b/be/test/core/value/jsonb_value_test.cpp @@ -29,7 +29,8 @@ TEST(JsonBinaryValueTest, TestValidation) { JsonBinaryValue json_val; // Empty string and non-JSON text should fail to parse - std::vector invalid_strs = {"", "abc"}; + std::vector invalid_strs = {"", "abc", "not", + "not json", "null junk", R"({"a":1} junk)"}; for (size_t i = 0; i < invalid_strs.size(); i++) { auto status = json_val.from_json_string(invalid_strs[i].c_str(), invalid_strs[i].size()); EXPECT_FALSE(status.ok()) << "Should fail for: [" << invalid_strs[i] << "]"; @@ -55,4 +56,4 @@ TEST(JsonBinaryValueTest, TestValidation) { EXPECT_TRUE(status.ok()) << "Should succeed for: [" << valid_strs[i] << "]"; } } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/exprs/function/function_jsonb_test.cpp b/be/test/exprs/function/function_jsonb_test.cpp index c321fffb4f440c..7db6b9febbd338 100644 --- a/be/test/exprs/function/function_jsonb_test.cpp +++ b/be/test/exprs/function/function_jsonb_test.cpp @@ -186,6 +186,24 @@ TEST(FunctionJsonbTEST, JsonbParseErrorToValueTest) { ASSERT_EQ(st.code(), ErrorCode::INVALID_ARGUMENT) << st.to_string(); } +TEST(FunctionJsonbTEST, JsonValidStrictTest) { + std::string func_name = "json_valid"; + InputTypeSet input_types = {Nullable {PrimitiveType::TYPE_VARCHAR}}; + + DataSet data_set = { + {{Null()}, Null()}, + {{STRING("not")}, INT(0)}, + {{STRING("not json")}, INT(0)}, + {{STRING("null junk")}, INT(0)}, + {{STRING(R"({"a":1} junk)")}, INT(0)}, + {{STRING("null")}, INT(1)}, + {{STRING("true")}, INT(1)}, + {{STRING(R"({"a":1})")}, INT(1)}, + }; + + static_cast(check_function(func_name, input_types, data_set)); +} + TEST(FunctionJsonbTEST, JsonbExtractTest) { std::string func_name = "jsonb_extract"; InputTypeSet input_types = {PrimitiveType::TYPE_JSONB, PrimitiveType::TYPE_VARCHAR}; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBigint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBigint.java index 64bbcbaee91254..7027dbc29f901d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBigint.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBigint.java @@ -80,6 +80,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, BigIntType.INSTANCE, false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBool.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBool.java index 6091c4f717061d..90488a68418421 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBool.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractBool.java @@ -80,6 +80,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, BooleanType.INSTANCE, false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractDouble.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractDouble.java index 35dd98ad17e6a6..832ae65b4fa542 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractDouble.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractDouble.java @@ -80,6 +80,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, DoubleType.INSTANCE, false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractInt.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractInt.java index fdc0c20f544cc7..d30ec49dfd30bd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractInt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractInt.java @@ -80,6 +80,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, IntegerType.INSTANCE, false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractLargeint.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractLargeint.java index 4c16a1e0828c1a..aadc357599f52a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractLargeint.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractLargeint.java @@ -80,6 +80,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, LargeIntType.INSTANCE, false); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractString.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractString.java index 641045ae1f96fb..16f0b0861857e5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractString.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/JsonbExtractString.java @@ -79,6 +79,8 @@ public R accept(ExpressionVisitor visitor, C context) { @Override public Expression rewriteWhenAnalyze() { JsonbExtract jsonExtract = new JsonbExtract(children.get(0), children.get(1)); + // Keep the historical JSON typed-extract behavior: extract JSONB first and then use the + // normal cast rules, so convertible mismatched JSON values are allowed by design. return new Cast(jsonExtract, StringType.INSTANCE, false); } } diff --git a/regression-test/suites/jsonb_p0/test_json_valid_strict.groovy b/regression-test/suites/jsonb_p0/test_json_valid_strict.groovy new file mode 100644 index 00000000000000..4cc85446ee4cac --- /dev/null +++ b/regression-test/suites/jsonb_p0/test_json_valid_strict.groovy @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_json_valid_strict") { + def jsonValid = sql """ + select + json_valid('not json'), + json_valid('not'), + json_valid('null junk'), + json_valid('{"a":1} junk'), + json_valid('{"a":1}') + from (select 1) t + order by 1 + """ + assertEquals(0, jsonValid[0][0]) + assertEquals(0, jsonValid[0][1]) + assertEquals(0, jsonValid[0][2]) + assertEquals(0, jsonValid[0][3]) + assertEquals(1, jsonValid[0][4]) +}