diff --git a/be/src/core/data_type_serde/data_type_serde.h b/be/src/core/data_type_serde/data_type_serde.h index 7c007c6558ddf3..43d41ddc99dc7c 100644 --- a/be/src/core/data_type_serde/data_type_serde.h +++ b/be/src/core/data_type_serde/data_type_serde.h @@ -154,6 +154,10 @@ class DataTypeSerDe { */ bool need_escape[256] = {false}; + // JSON load can stage Variant JSON as raw root string so storage materialization can parse + // it with TabletSchema-derived Variant typed paths. + bool deserialize_variant_from_json_as_raw_string = false; + /** * only used for export data */ diff --git a/be/src/core/data_type_serde/data_type_variant_serde.cpp b/be/src/core/data_type_serde/data_type_variant_serde.cpp index bb1f2b6c928f40..de2213389328b1 100644 --- a/be/src/core/data_type_serde/data_type_variant_serde.cpp +++ b/be/src/core/data_type_serde/data_type_variant_serde.cpp @@ -108,6 +108,15 @@ Status DataTypeVariantSerDe::serialize_one_cell_to_json(const IColumn& column, i Status DataTypeVariantSerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { + if (options.deserialize_variant_from_json_as_raw_string) { + auto& variant = assert_cast(column); + Field field = Field::create_field(String(slice.data, slice.size)); + VariantMap object; + object.try_emplace(PathInData(), FieldWithDataType(field)); + variant.insert(Field::create_field(std::move(object))); + return Status::OK(); + } + ParseConfig parse_config; parse_config.check_duplicate_json_path = config::variant_enable_duplicate_json_path_check; StringRef json_ref(slice.data, slice.size); @@ -131,7 +140,7 @@ Status DataTypeVariantSerDe::write_column_to_arrow(const IColumn& column, const auto& builder = assert_cast(*array_builder); FormatOptions options; options.timezone = &ctz; - for (size_t i = start; i < end; ++i) { + for (int64_t i = start; i < end; ++i) { if (null_map && (*null_map)[i]) { RETURN_IF_ERROR(checkArrowStatus(builder.AppendNull(), column.get_name(), array_builder->type()->name())); @@ -159,12 +168,12 @@ Status DataTypeVariantSerDe::write_column_to_orc(const std::string& timezone, co int64_t start, int64_t end, Arena& arena, const FormatOptions& options) const { const auto* var = assert_cast(&column); - orc::StringVectorBatch* cur_batch = dynamic_cast(orc_col_batch); + auto* cur_batch = dynamic_cast(orc_col_batch); // First pass: calculate total memory needed and collect serialized values std::vector serialized_values; std::vector valid_row_indices; size_t total_size = 0; - for (size_t row_id = start; row_id < end; row_id++) { + for (int64_t row_id = start; row_id < end; row_id++) { if (cur_batch->notNull[row_id] == 1) { // avoid move the string data, use emplace_back to construct in place serialized_values.emplace_back(); diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 39e8f236ecd16e..6cd6713fe46137 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -104,6 +104,44 @@ namespace doris::variant_util { +static bool is_decimal_typed_path_column(const TabletColumn& column) { + if (column.is_array_type()) { + CHECK_EQ(column.get_sub_columns().size(), 1); + return is_decimal_typed_path_column(*column.get_sub_columns()[0]); + } + return is_decimal(TabletColumn::get_primitive_type_by_field_type(column.type())); +} + +static void configure_decimal_number_preserve_paths(const TabletColumn& column, + ParseConfig* config) { + std::vector glob_patterns; + for (const auto& sub_column : column.get_sub_columns()) { + if (!is_decimal_typed_path_column(*sub_column)) { + continue; + } + switch (sub_column->pattern_type()) { + case PatternTypePB::MATCH_NAME: + config->preserve_decimal_number_paths.emplace(sub_column->name()); + break; + case PatternTypePB::MATCH_NAME_GLOB: + glob_patterns.emplace_back(sub_column->name()); + break; + } + } + if (!glob_patterns.empty()) { + config->preserve_decimal_number_path_matcher = + [glob_patterns = std::move(glob_patterns)](std::string_view path) { + std::string candidate_path(path); + for (const auto& pattern : glob_patterns) { + if (glob_match_re2(pattern, candidate_path)) { + return true; + } + } + return false; + }; + } +} + inline void append_escaped_regex_char(std::string* regex_output, char ch) { switch (ch) { case '.': @@ -2247,6 +2285,7 @@ Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& t } // if doc mode is not enabled, no need to parse to doc value column if (!column.variant_enable_doc_mode()) { + configure_decimal_number_preserve_paths(column, &configs[i]); configs[i].parse_to = ParseConfig::ParseTo::OnlySubcolumns; continue; } diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp index da141437fcf200..3f20365760dff0 100644 --- a/be/src/format/json/new_json_reader.cpp +++ b/be/src/format/json/new_json_reader.cpp @@ -1139,6 +1139,10 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& } auto primitive_type = type_desc->get_primitive_type(); + auto serde_options = _serde_options; + if (_is_load && remove_nullable(type_desc)->get_primitive_type() == TYPE_VARIANT) { + serde_options.deserialize_variant_from_json_as_raw_string = true; + } if (_is_load || !is_complex_type(primitive_type)) { if (value.type() == simdjson::ondemand::json_type::string) { std::string_view value_string; @@ -1157,7 +1161,7 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& Slice slice {value_string.data(), value_string.size()}; RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, - _serde_options)); + serde_options)); } else if (value.type() == simdjson::ondemand::json_type::boolean) { const char* str_value = nullptr; @@ -1169,14 +1173,14 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& } Slice slice {str_value, 1}; RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, - _serde_options)); + serde_options)); } else { // Maybe we can `switch (value->GetType()) case: kNumberType`. // Note that `if (value->IsInt())`, but column is FloatColumn. std::string_view json_str = simdjson::to_json_string(value); Slice slice {json_str.data(), json_str.size()}; RETURN_IF_ERROR(data_serde->deserialize_one_cell_from_json(*data_column_ptr, slice, - _serde_options)); + serde_options)); } } else if (primitive_type == TYPE_STRUCT) { if (value.type() != simdjson::ondemand::json_type::object) [[unlikely]] { @@ -1603,8 +1607,13 @@ Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, DataTypeSe } else { const std::string& v_str = col_value->second; Slice column_default_value {v_str}; + auto serde_options = _serde_options; + if (_is_load && + remove_nullable(slot_desc->get_data_type_ptr())->get_primitive_type() == TYPE_VARIANT) { + serde_options.deserialize_variant_from_json_as_raw_string = true; + } RETURN_IF_ERROR(serde->deserialize_one_cell_from_json(*column_ptr, column_default_value, - _serde_options)); + serde_options)); } *valid = true; return Status::OK(); diff --git a/be/src/util/json/json_parser.cpp b/be/src/util/json/json_parser.cpp index 3df723c3849eac..14345ab35a3473 100644 --- a/be/src/util/json/json_parser.cpp +++ b/be/src/util/json/json_parser.cpp @@ -31,6 +31,7 @@ #include "common/cast_set.h" // IWYU pragma: keep #include "common/status.h" +#include "util/defer_op.h" #include "util/json/path_in_data.h" #include "util/json/simd_json_parser.h" @@ -43,12 +44,15 @@ std::optional JSONDataParser::parse(const char* begin, if (!parser.parse(begin, length, document)) { return {}; } + Defer release_parser {[&]() { parser.release(); }}; ParseContext context; // deprecated_enable_flatten_nested controls nested path traversal // NestedGroup expansion is now handled at storage layer context.deprecated_enable_flatten_nested = config.deprecated_enable_flatten_nested; context.check_duplicate_json_path = config.check_duplicate_json_path; context.is_top_array = document.isArray(); + context.preserve_decimal_number_paths = &config.preserve_decimal_number_paths; + context.preserve_decimal_number_path_matcher = &config.preserve_decimal_number_path_matcher; traverse(document, context); ParseResult result; result.values = std::move(context.values); @@ -84,8 +88,30 @@ void JSONDataParser::traverse(const Element& element, ParseContext& // we should set has_nested_in_flatten to false when traverse array finished for next array otherwise it will be true for next array ctx.has_nested_in_flatten = false; } else { - appendValueIfNotDuplicate(ctx, ctx.builder.get_parts(), getValueAsField(element)); + const bool preserve_number_as_string = shouldPreserveNumberAsString(ctx); + appendValueIfNotDuplicate(ctx, ctx.builder.get_parts(), + getValueAsField(element, preserve_number_as_string)); + } +} + +template +bool JSONDataParser::shouldPreserveNumberAsString(const ParseContext& ctx) const { + const bool has_exact_paths = ctx.preserve_decimal_number_paths != nullptr && + !ctx.preserve_decimal_number_paths->empty(); + const bool has_path_matcher = ctx.preserve_decimal_number_path_matcher != nullptr && + *ctx.preserve_decimal_number_path_matcher; + if (!has_exact_paths && !has_path_matcher) { + return false; + } + PathInData::Parts path = ctx.path_prefix_for_typed_paths; + const auto& current_parts = ctx.builder.get_parts(); + path.insert(path.end(), current_parts.begin(), current_parts.end()); + const auto current_path = PathInData(path).get_path(); + if (has_exact_paths && ctx.preserve_decimal_number_paths->find(current_path) != + ctx.preserve_decimal_number_paths->end()) { + return true; } + return has_path_matcher && (*ctx.preserve_decimal_number_path_matcher)(current_path); } template @@ -201,6 +227,12 @@ void JSONDataParser::traverseArray(const JSONArray& array, ParseCont array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; array_ctx.is_top_array = ctx.is_top_array; array_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + array_ctx.preserve_decimal_number_paths = ctx.preserve_decimal_number_paths; + array_ctx.preserve_decimal_number_path_matcher = ctx.preserve_decimal_number_path_matcher; + array_ctx.path_prefix_for_typed_paths = ctx.path_prefix_for_typed_paths; + const auto& current_parts = ctx.builder.get_parts(); + array_ctx.path_prefix_for_typed_paths.insert(array_ctx.path_prefix_for_typed_paths.end(), + current_parts.begin(), current_parts.end()); array_ctx.total_size = array.size(); for (auto it = array.begin(); it != array.end(); ++it) { traverseArrayElement(*it, array_ctx); @@ -231,6 +263,9 @@ void JSONDataParser::traverseArrayElement(const Element& element, element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; element_ctx.is_top_array = ctx.is_top_array; element_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + element_ctx.preserve_decimal_number_paths = ctx.preserve_decimal_number_paths; + element_ctx.preserve_decimal_number_path_matcher = ctx.preserve_decimal_number_path_matcher; + element_ctx.path_prefix_for_typed_paths = ctx.path_prefix_for_typed_paths; traverse(element, element_ctx); auto& paths = element_ctx.paths; auto& values = element_ctx.values; diff --git a/be/src/util/json/json_parser.h b/be/src/util/json/json_parser.h index c4a165e899546f..ff783124b2e179 100644 --- a/be/src/util/json/json_parser.h +++ b/be/src/util/json/json_parser.h @@ -21,10 +21,12 @@ #pragma once #include -#include +#include +#include #include #include +#include #include #include @@ -40,7 +42,10 @@ namespace doris { template -Field getValueAsField(const Element& element) { +Field getValueAsField(const Element& element, bool preserve_number_as_string = false) { + if (preserve_number_as_string && element.isNumber()) { + return Field::create_field(String(element.getRawNumber())); + } // bool will convert to type FiledType::UInt64 if (element.isBool()) { return Field::create_field(element.getBool()); @@ -53,6 +58,9 @@ Field getValueAsField(const Element& element) { if (element.isUInt64()) { return Field::create_field(static_cast(element.getUInt64())); } + if (element.isBigInteger()) { + return Field::create_field(element.getDouble()); + } if (element.isDouble()) { return Field::create_field(element.getDouble()); } @@ -60,9 +68,9 @@ Field getValueAsField(const Element& element) { return Field::create_field(String(element.getString())); } if (element.isNull()) { - return Field(); + return {}; } - return Field(); + return {}; } template @@ -82,6 +90,10 @@ void writeValueAsJsonb(const Element& element, JsonbWriter& writer) { writer.writeInt128(static_cast(element.getUInt64())); return; } + if (element.isBigInteger()) { + writer.writeDouble(element.getDouble()); + return; + } if (element.isDouble()) { writer.writeDouble(element.getDouble()); return; @@ -107,6 +119,8 @@ struct ParseConfig { OnlyDocValueColumn = 1, }; ParseTo parse_to = ParseTo::OnlySubcolumns; + phmap::flat_hash_set preserve_decimal_number_paths; + std::function preserve_decimal_number_path_matcher; }; /// Result of parsing of a document. /// Contains all paths extracted from document @@ -133,6 +147,9 @@ class JSONDataParser { bool check_duplicate_json_path = false; bool has_nested_in_flatten = false; bool is_top_array = false; + const phmap::flat_hash_set* preserve_decimal_number_paths = nullptr; + const std::function* preserve_decimal_number_path_matcher = nullptr; + PathInData::Parts path_prefix_for_typed_paths; }; using PathPartsWithArray = std::pair; using PathToArray = phmap::flat_hash_map; @@ -145,12 +162,16 @@ class JSONDataParser { bool has_nested_in_flatten = false; bool is_top_array = false; bool check_duplicate_json_path = false; + const phmap::flat_hash_set* preserve_decimal_number_paths = nullptr; + const std::function* preserve_decimal_number_path_matcher = nullptr; + PathInData::Parts path_prefix_for_typed_paths; }; void traverse(const Element& element, ParseContext& ctx); void traverseObject(const JSONObject& object, ParseContext& ctx); void traverseArray(const JSONArray& array, ParseContext& ctx); void appendValueIfNotDuplicate(ParseContext& ctx, const PathInData::Parts& path, Field&& value); void traverseArrayElement(const Element& element, ParseArrayContext& ctx); + bool shouldPreserveNumberAsString(const ParseContext& ctx) const; void checkAmbiguousStructure(const ParseArrayContext& ctx, const std::vector& paths); void handleExistingPath(std::pair& path_data, diff --git a/be/src/util/json/simd_json_parser.h b/be/src/util/json/simd_json_parser.h index f450ac4a05f7bf..a3683c31a27d79 100644 --- a/be/src/util/json/simd_json_parser.h +++ b/be/src/util/json/simd_json_parser.h @@ -20,9 +20,15 @@ #pragma once -#include #include +#include +#include +#include +#include +#include +#include + #include "core/types.h" namespace doris { @@ -30,6 +36,31 @@ namespace doris { /// This class can be used as an argument for the template class FunctionJSON. /// It provides ability to parse JSONs using simdjson library. class SimdJSONParser { + struct Node { + enum class Type { + INT64, + UINT64, + BIG_INTEGER, + DOUBLE, + STRING, + ARRAY, + OBJECT, + BOOL, + NULL_VALUE, + }; + + Type type = Type::NULL_VALUE; + Int64 int64_value = 0; + UInt64 uint64_value = 0; + double double_value = 0; + bool bool_value = false; + std::string string_value; + std::string raw_number; + std::vector array_values; + std::vector object_keys; + std::vector object_values; + }; + public: class Array; class Object; @@ -37,54 +68,87 @@ class SimdJSONParser { /// array or object. class Element { public: - ALWAYS_INLINE Element() {} /// NOLINT - ALWAYS_INLINE Element(const simdjson::dom::element& element_) - : element(element_) {} /// NOLINT + ALWAYS_INLINE Element() {} /// NOLINT + ALWAYS_INLINE explicit Element(const Node* node_) : node(node_) {} /// NOLINT ALWAYS_INLINE bool isInt64() const { - return element.type() == simdjson::dom::element_type::INT64; + assert(node != nullptr); + return node->type == Node::Type::INT64; } ALWAYS_INLINE bool isUInt64() const { - return element.type() == simdjson::dom::element_type::UINT64; + assert(node != nullptr); + return node->type == Node::Type::UINT64; + } + ALWAYS_INLINE bool isBigInteger() const { + assert(node != nullptr); + return node->type == Node::Type::BIG_INTEGER; + } + ALWAYS_INLINE bool isNumber() const { + assert(node != nullptr); + return node->type == Node::Type::INT64 || node->type == Node::Type::UINT64 || + node->type == Node::Type::BIG_INTEGER || node->type == Node::Type::DOUBLE; } ALWAYS_INLINE bool isDouble() const { - return element.type() == simdjson::dom::element_type::DOUBLE; + assert(node != nullptr); + return node->type == Node::Type::DOUBLE; } ALWAYS_INLINE bool isString() const { - return element.type() == simdjson::dom::element_type::STRING; + assert(node != nullptr); + return node->type == Node::Type::STRING; } ALWAYS_INLINE bool isArray() const { - return element.type() == simdjson::dom::element_type::ARRAY; + assert(node != nullptr); + return node->type == Node::Type::ARRAY; } ALWAYS_INLINE bool isObject() const { - return element.type() == simdjson::dom::element_type::OBJECT; + assert(node != nullptr); + return node->type == Node::Type::OBJECT; } ALWAYS_INLINE bool isBool() const { - return element.type() == simdjson::dom::element_type::BOOLEAN; + assert(node != nullptr); + return node->type == Node::Type::BOOL; } ALWAYS_INLINE bool isNull() const { - return element.type() == simdjson::dom::element_type::NULL_VALUE; + assert(node != nullptr); + return node->type == Node::Type::NULL_VALUE; + } + ALWAYS_INLINE Int64 getInt64() const { + assert(node != nullptr); + return node->int64_value; + } + ALWAYS_INLINE double getDouble() const { + assert(node != nullptr); + return node->double_value; + } + ALWAYS_INLINE bool getBool() const { + assert(node != nullptr); + return node->bool_value; } - ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); } - ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); } - ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); } ALWAYS_INLINE std::string_view getString() const { - return element.get_string().value_unsafe(); + assert(node != nullptr); + return node->string_value; + } + ALWAYS_INLINE UInt64 getUInt64() const { + assert(node != nullptr); + return node->uint64_value; + } + ALWAYS_INLINE std::string_view getRawNumber() const { + assert(node != nullptr); + return node->raw_number; } - ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); } ALWAYS_INLINE Array getArray() const; ALWAYS_INLINE Object getObject() const; private: - simdjson::dom::element element; + const Node* node = nullptr; }; /// References an array in a JSON document. class Array { public: class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_) - : it(it_) {} /// NOLINT - ALWAYS_INLINE Element operator*() const { return *it; } + using NodeIterator = std::vector::const_iterator; + ALWAYS_INLINE explicit Iterator(NodeIterator it_) : it(it_) {} /// NOLINT + ALWAYS_INLINE Element operator*() const { return Element(&*it); } ALWAYS_INLINE Iterator& operator++() { ++it; return *this; @@ -94,19 +158,19 @@ class SimdJSONParser { } private: - simdjson::dom::array::iterator it; + NodeIterator it; }; - ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT - ALWAYS_INLINE Iterator begin() const { return array.begin(); } - ALWAYS_INLINE Iterator end() const { return array.end(); } - ALWAYS_INLINE size_t size() const { return array.size(); } + ALWAYS_INLINE explicit Array(const std::vector* array_) : array(array_) {} /// NOLINT + ALWAYS_INLINE Iterator begin() const { return Iterator(array->begin()); } + ALWAYS_INLINE Iterator end() const { return Iterator(array->end()); } + ALWAYS_INLINE size_t size() const { return array->size(); } ALWAYS_INLINE Element operator[](size_t index) const { assert(index < size()); - return array.at(index).value_unsafe(); + return Element(&(*array)[index]); } private: - simdjson::dom::array array; + const std::vector* array; }; using KeyValuePair = std::pair; /// References an object in a JSON document. @@ -114,67 +178,244 @@ class SimdJSONParser { public: class Iterator { public: - ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_) - : it(it_) {} /// NOLINT + ALWAYS_INLINE explicit Iterator(const std::vector* keys_, + const std::vector* values_, size_t index_) + : index(index_), keys(keys_), values(values_) {} /// NOLINT ALWAYS_INLINE KeyValuePair operator*() const { - const auto& res = *it; - return {res.key, res.value}; + return {(*keys)[index], Element(&(*values)[index])}; } ALWAYS_INLINE Iterator& operator++() { - ++it; + ++index; return *this; } ALWAYS_INLINE Iterator operator++(int) { auto res = *this; - ++it; + ++(*this); return res; } /// NOLINT ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) { - return left.it != right.it; + return left.index != right.index; } ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) { return !(left != right); } private: - simdjson::dom::object::iterator it; + size_t index; + const std::vector* keys; + const std::vector* values; }; - ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT - ALWAYS_INLINE Iterator begin() const { return object.begin(); } - ALWAYS_INLINE Iterator end() const { return object.end(); } - ALWAYS_INLINE size_t size() const { return object.size(); } + ALWAYS_INLINE explicit Object(const std::vector* keys_, + const std::vector* values_) + : keys(keys_), values(values_) {} /// NOLINT + ALWAYS_INLINE Iterator begin() const { return Iterator(keys, values, 0); } + ALWAYS_INLINE Iterator end() const { return Iterator(keys, values, size()); } + ALWAYS_INLINE size_t size() const { return values->size(); } /// Optional: Provides access to an object's element by index. KeyValuePair operator[](size_t index) const { assert(index < size()); - auto it = object.begin(); - while (index--) { - ++it; - } - const auto& res = *it; - return {res.key, res.value}; + return {(*keys)[index], Element(&(*values)[index])}; } private: - simdjson::dom::object object; + const std::vector* keys; + const std::vector* values; }; /// Parses a JSON document, returns the reference to its root element if succeeded. bool parse(const char* data, size_t size, Element& result) { - auto document = parser.parse(data, size); - if (document.error()) { + root = Node(); + return parse_ondemand(data, size, result); + } + void release() { root = Node(); } + +private: + bool parse_ondemand(const char* data, size_t size, Element& result) { + simdjson::padded_string padded_json(data, size); + simdjson::ondemand::document document; + auto error = ondemand_parser.iterate(padded_json).get(document); + if (error) { return false; } - result = document.value_unsafe(); + if (!build_node(document, &root)) { + root = Node(); + return false; + } + result = Element(&root); return true; } -private: - simdjson::dom::parser parser; + static std::string_view trim_raw_number(std::string_view raw_number) { + auto is_space = [](char ch) { return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'; }; + while (!raw_number.empty() && is_space(raw_number.front())) { + raw_number.remove_prefix(1); + } + while (!raw_number.empty() && is_space(raw_number.back())) { + raw_number.remove_suffix(1); + } + return raw_number; + } + + template + static bool assign_raw_number(RawNumber&& raw_number, std::string* out) { + if constexpr (std::is_same_v, std::string_view>) { + *out = std::string(trim_raw_number(raw_number)); + return true; + } else { + std::string_view raw_number_view; + auto error = std::move(raw_number).get(raw_number_view); + if (error) { + return false; + } + *out = std::string(trim_raw_number(raw_number_view)); + return true; + } + } + + template + bool build_array_node(Value& value, Node* out) { + simdjson::ondemand::array array; + auto error = value.get_array().get(array); + if (error) { + return false; + } + out->type = Node::Type::ARRAY; + for (auto element_result : array) { + simdjson::ondemand::value element; + error = std::move(element_result).get(element); + if (error) { + return false; + } + Node element_node; + if (!build_node(element, &element_node)) { + return false; + } + out->array_values.push_back(std::move(element_node)); + } + return true; + } + + template + bool build_object_node(Value& value, Node* out) { + simdjson::ondemand::object object; + auto error = value.get_object().get(object); + if (error) { + return false; + } + out->type = Node::Type::OBJECT; + for (auto field_result : object) { + simdjson::ondemand::field field; + error = std::move(field_result).get(field); + if (error) { + return false; + } + std::string_view key; + error = field.unescaped_key().get(key); + if (error) { + return false; + } + std::string key_copy(key); + simdjson::ondemand::value field_value = field.value(); + Node field_node; + if (!build_node(field_value, &field_node)) { + return false; + } + out->object_keys.push_back(std::move(key_copy)); + out->object_values.push_back(std::move(field_node)); + } + return true; + } + + template + bool build_number_node(Value& value, Node* out) { + simdjson::ondemand::number_type number_type; + auto error = value.get_number_type().get(number_type); + if (error) { + return false; + } + switch (number_type) { + case simdjson::ondemand::number_type::signed_integer: + if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { + return false; + } + out->type = Node::Type::INT64; + error = value.get_int64().get(out->int64_value); + return !error; + case simdjson::ondemand::number_type::unsigned_integer: + if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { + return false; + } + out->type = Node::Type::UINT64; + error = value.get_uint64().get(out->uint64_value); + return !error; + case simdjson::ondemand::number_type::floating_point_number: + if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { + return false; + } + out->type = Node::Type::DOUBLE; + error = value.get_double().get(out->double_value); + return !error; + case simdjson::ondemand::number_type::big_integer: { + if (!assign_raw_number(value.raw_json_token(), &out->raw_number)) { + return false; + } + out->type = Node::Type::BIG_INTEGER; + error = value.get_double().get(out->double_value); + return !error; + } + } + return false; + } + + template + bool build_string_node(Value& value, Node* out) { + std::string_view str; + auto error = value.get_string().get(str); + if (error) { + return false; + } + out->type = Node::Type::STRING; + out->string_value = std::string(str); + return true; + } + + template + bool build_node(Value& value, Node* out) { + simdjson::ondemand::json_type type; + auto error = value.type().get(type); + if (error) { + return false; + } + switch (type) { + case simdjson::ondemand::json_type::array: + return build_array_node(value, out); + case simdjson::ondemand::json_type::object: + return build_object_node(value, out); + case simdjson::ondemand::json_type::number: + return build_number_node(value, out); + case simdjson::ondemand::json_type::string: { + return build_string_node(value, out); + } + case simdjson::ondemand::json_type::boolean: + out->type = Node::Type::BOOL; + error = value.get_bool().get(out->bool_value); + return !error; + case simdjson::ondemand::json_type::null: + out->type = Node::Type::NULL_VALUE; + return true; + } + return false; + } + + simdjson::ondemand::parser ondemand_parser; + Node root; }; inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const { - return element.get_array().value_unsafe(); + assert(node != nullptr); + return Array(&node->array_values); } inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const { - return element.get_object().value_unsafe(); + assert(node != nullptr); + return Object(&node->object_keys, &node->object_values); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/test/core/jsonb/json_parser_test.cpp b/be/test/core/jsonb/json_parser_test.cpp index f8cfdf8e1626bd..c013a610890aa2 100644 --- a/be/test/core/jsonb/json_parser_test.cpp +++ b/be/test/core/jsonb/json_parser_test.cpp @@ -19,6 +19,7 @@ #include +#include #include #include "common/config.h" @@ -49,6 +50,77 @@ TEST(JsonParserTest, ParseSimpleTypes) { EXPECT_EQ(parse_result_double.paths[0].get_path(), ""); EXPECT_EQ(parse_result_double.values[0].get_type(), doris::PrimitiveType::TYPE_DOUBLE); + // untyped high precision decimal tokens keep the regular numeric behavior + std::string high_precision_decimal = "999999999999999999999999999.999999999"; + result = parser.parse(high_precision_decimal.c_str(), high_precision_decimal.size(), config); + ASSERT_TRUE(result.has_value()); + + auto parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_DOUBLE); + + // typed decimal paths keep the original text for later decimal casts + config.preserve_decimal_number_paths.emplace(""); + result = parser.parse(high_precision_decimal.c_str(), high_precision_decimal.size(), config); + ASSERT_TRUE(result.has_value()); + + parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_decimal.values[0].get(), + high_precision_decimal); + + std::string scale_sensitive_decimal = "0.57"; + result = parser.parse(scale_sensitive_decimal.c_str(), scale_sensitive_decimal.size(), config); + ASSERT_TRUE(result.has_value()); + + parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_decimal.values[0].get(), + scale_sensitive_decimal); + + std::string high_precision_decimal_with_spaces = high_precision_decimal + " \n"; + result = parser.parse(high_precision_decimal_with_spaces.c_str(), + high_precision_decimal_with_spaces.size(), config); + ASSERT_TRUE(result.has_value()); + + parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_decimal.values[0].get(), + high_precision_decimal); + + std::string high_precision_decimal_exponent = "999999999999999999999999999.999999999e0"; + result = parser.parse(high_precision_decimal_exponent.c_str(), + high_precision_decimal_exponent.size(), config); + ASSERT_TRUE(result.has_value()); + + parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_decimal.values[0].get(), + high_precision_decimal_exponent); + + std::string typed_integer = "123"; + result = parser.parse(typed_integer.c_str(), typed_integer.size(), config); + ASSERT_TRUE(result.has_value()); + + auto parse_result_typed_integer = result.value(); + EXPECT_EQ(parse_result_typed_integer.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_typed_integer.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_typed_integer.values[0].get(), + typed_integer); + + ParseConfig untyped_decimal_config; + result = parser.parse(high_precision_decimal_exponent.c_str(), + high_precision_decimal_exponent.size(), untyped_decimal_config); + ASSERT_TRUE(result.has_value()); + + parse_result_decimal = result.value(); + EXPECT_EQ(parse_result_decimal.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal.values[0].get_type(), doris::PrimitiveType::TYPE_DOUBLE); + // bool result = parser.parse("true", 4, config); ASSERT_TRUE(result.has_value()); @@ -71,11 +143,57 @@ TEST(JsonParserTest, ParseSimpleTypes) { EXPECT_EQ(parse_result_string.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); // largeint - result = parser.parse("12345678901234567890", 20, config); + ParseConfig untyped_integer_config; + result = parser.parse("12345678901234567890", 20, untyped_integer_config); ASSERT_TRUE(result.has_value()); auto parse_result_bigint = result.value(); EXPECT_EQ(parse_result_bigint.paths[0].get_path(), ""); EXPECT_EQ(parse_result_bigint.values[0].get_type(), doris::PrimitiveType::TYPE_LARGEINT); + + // untyped integers beyond uint64 keep the regular numeric behavior. + std::string big_integer = "18446744073709551616"; + result = parser.parse(big_integer.c_str(), big_integer.size(), untyped_integer_config); + ASSERT_TRUE(result.has_value()); + auto parse_result_untyped_big_integer = result.value(); + EXPECT_EQ(parse_result_untyped_big_integer.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_untyped_big_integer.values[0].get_type(), + doris::PrimitiveType::TYPE_DOUBLE); + + ParseConfig typed_integer_config; + typed_integer_config.preserve_decimal_number_paths.emplace(""); + result = parser.parse(big_integer.c_str(), big_integer.size(), typed_integer_config); + ASSERT_TRUE(result.has_value()); + auto parse_result_big_integer = result.value(); + EXPECT_EQ(parse_result_big_integer.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_big_integer.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_big_integer.values[0].get(), + big_integer); + + std::string decimal256_integer = "99999999999999999999999999999999999999999999999999"; + result = parser.parse(decimal256_integer.c_str(), decimal256_integer.size(), + typed_integer_config); + ASSERT_TRUE(result.has_value()); + auto parse_result_decimal256_integer = result.value(); + EXPECT_EQ(parse_result_decimal256_integer.paths[0].get_path(), ""); + EXPECT_EQ(parse_result_decimal256_integer.values[0].get_type(), + doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_decimal256_integer.values[0].get(), + decimal256_integer); + + ParseConfig mixed_number_config; + mixed_number_config.preserve_decimal_number_paths.emplace("amount"); + std::string mixed_json = + R"({"amount":0.57,"id":18446744073709551616,"normal":12345678901234567890})"; + result = parser.parse(mixed_json.c_str(), mixed_json.size(), mixed_number_config); + ASSERT_TRUE(result.has_value()); + auto parse_result_mixed_number = result.value(); + ASSERT_EQ(parse_result_mixed_number.paths.size(), 3); + EXPECT_EQ(parse_result_mixed_number.paths[0].get_path(), "amount"); + EXPECT_EQ(parse_result_mixed_number.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_mixed_number.paths[1].get_path(), "id"); + EXPECT_EQ(parse_result_mixed_number.values[1].get_type(), doris::PrimitiveType::TYPE_DOUBLE); + EXPECT_EQ(parse_result_mixed_number.paths[2].get_path(), "normal"); + EXPECT_EQ(parse_result_mixed_number.values[2].get_type(), doris::PrimitiveType::TYPE_LARGEINT); } TEST(JsonParserTest, ParseObjectAndArray) { @@ -117,6 +235,69 @@ TEST(JsonParserTest, ParseObjectAndArray) { EXPECT_EQ(array_field[5].get_type(), doris::PrimitiveType::TYPE_LARGEINT); } +TEST(JsonParserTest, PreserveDecimalNumbersForTypedPaths) { + JSONDataParser parser; + ParseConfig config; + config.preserve_decimal_number_paths.emplace("a"); + + std::string json = R"({"a":[999999999999999999999999999.999999999 ]})"; + auto result = parser.parse(json.c_str(), json.size(), config); + ASSERT_TRUE(result.has_value()); + auto& parse_result_decimal_array = result.value(); + EXPECT_EQ(parse_result_decimal_array.paths[0].get_path(), "a"); + EXPECT_EQ(parse_result_decimal_array.values[0].get_type(), doris::PrimitiveType::TYPE_ARRAY); + auto& decimal_array = + parse_result_decimal_array.values[0].get(); + ASSERT_EQ(decimal_array.size(), 1); + EXPECT_EQ(decimal_array[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(decimal_array[0].get(), + "999999999999999999999999999.999999999"); + + ParseConfig untyped_config; + result = parser.parse(json.c_str(), json.size(), untyped_config); + ASSERT_TRUE(result.has_value()); + auto& parse_result_untyped_decimal_array = result.value(); + auto& untyped_decimal_array = + parse_result_untyped_decimal_array.values[0].get(); + ASSERT_EQ(untyped_decimal_array.size(), 1); + EXPECT_EQ(untyped_decimal_array[0].get_type(), doris::PrimitiveType::TYPE_DOUBLE); +} + +TEST(JsonParserTest, PreserveDecimalNumbersByPathMatcher) { + JSONDataParser parser; + + ParseConfig matcher_config; + matcher_config.preserve_decimal_number_path_matcher = [](std::string_view path) { + return path.size() >= 8 && path.substr(0, 8) == "decimal_"; + }; + + std::string json = + R"({"decimal_1":999999999999999999999999999.999999999,"other":999999999999999999999999999.999999999})"; + auto result = parser.parse(json.c_str(), json.size(), matcher_config); + ASSERT_TRUE(result.has_value()); + auto& parse_result_matcher = result.value(); + EXPECT_EQ(parse_result_matcher.paths[0].get_path(), "decimal_1"); + EXPECT_EQ(parse_result_matcher.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_matcher.paths[1].get_path(), "other"); + EXPECT_EQ(parse_result_matcher.values[1].get_type(), doris::PrimitiveType::TYPE_DOUBLE); +} + +TEST(JsonParserTest, PreserveDecimalNumbersForEscapedTypedPath) { + JSONDataParser parser; + ParseConfig escaped_key_config; + escaped_key_config.preserve_decimal_number_paths.emplace("ab.decimal"); + + std::string json = R"({"a\u0062":{"decimal":0.57,"nested_key":"value"}})"; + auto result = parser.parse(json.c_str(), json.size(), escaped_key_config); + ASSERT_TRUE(result.has_value()); + auto& parse_result_escaped_key = result.value(); + ASSERT_EQ(parse_result_escaped_key.paths.size(), 2); + EXPECT_EQ(parse_result_escaped_key.paths[0].get_path(), "ab.decimal"); + EXPECT_EQ(parse_result_escaped_key.values[0].get_type(), doris::PrimitiveType::TYPE_STRING); + EXPECT_EQ(parse_result_escaped_key.values[0].get(), "0.57"); + EXPECT_EQ(parse_result_escaped_key.paths[1].get_path(), "ab.nested_key"); +} + TEST(JsonParserTest, ParseMultiLevelNestedArray) { JSONDataParser parser; ParseConfig config; @@ -313,6 +494,18 @@ TEST(JsonParserTest, TestNestedArrayWithDifferentConfigs) { // EXPECT_ANY_THROW(parser.parse(json1.c_str(), json1.size(), config2)); } +TEST(JsonParserTest, BigIntegerInJsonbKeepsNumericParse) { + JSONDataParser parser; + ParseConfig config; + config.deprecated_enable_flatten_nested = false; + + std::string json = R"({"nested": [{"big": 18446744073709551616}]})"; + auto result = parser.parse(json.c_str(), json.size(), config); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result->values.size(), 1); + EXPECT_EQ(result->values[0].get_type(), doris::PrimitiveType::TYPE_JSONB); +} + // Test case for directly calling handleNewPath to cover the if (!nested_key.empty()) branch TEST(JsonParserTest, TestHandleNewPathDirectCall) { JSONDataParser parser; @@ -455,11 +648,11 @@ TEST(JsonParserTest, KeyLengthLimitByConfig) { ScopedMaxJsonKeyLength guard(10); std::string key11(11, 'a'); - std::string obj_json = "{\"" + key11 + "\": 1}"; + std::string obj_json = R"({")" + key11 + R"(": 1})"; EXPECT_ANY_THROW(parser.parse(obj_json.c_str(), obj_json.size(), config)); config.deprecated_enable_flatten_nested = false; - std::string jsonb_json = "{\"a\": [{\"" + key11 + "\": 1}]}"; + std::string jsonb_json = R"({"a": [{")" + key11 + R"(": 1}]})"; EXPECT_ANY_THROW(parser.parse(jsonb_json.c_str(), jsonb_json.size(), config)); } @@ -467,12 +660,12 @@ TEST(JsonParserTest, KeyLengthLimitByConfig) { ScopedMaxJsonKeyLength guard(255); std::string key255(255, 'b'); - std::string obj_json = "{\"" + key255 + "\": 1}"; + std::string obj_json = R"({")" + key255 + R"(": 1})"; auto result = parser.parse(obj_json.c_str(), obj_json.size(), config); ASSERT_TRUE(result.has_value()); config.deprecated_enable_flatten_nested = false; - std::string jsonb_json = "{\"a\": [{\"" + key255 + "\": 1}]}"; + std::string jsonb_json = R"({"a": [{")" + key255 + R"(": 1}]})"; result = parser.parse(jsonb_json.c_str(), jsonb_json.size(), config); ASSERT_TRUE(result.has_value()); ASSERT_EQ(result->values.size(), 1); diff --git a/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream.json b/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream.json new file mode 100644 index 00000000000000..2959dfcd16883b --- /dev/null +++ b/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream.json @@ -0,0 +1 @@ +{"id":2,"v":"{\"number_1\":999999999999999999999999999.999999999,\"number_2\":0.0000000001,\"number_3\":-999999999999999999999999999.999999999,\"number_4\":0.57,\"number_big_integer\":99999999999999999999999999999999999999999999999999,\"number_scientific_1\":999999999999999999999999999.999999999e0,\"number_scientific_2\":1e-10,\"numberArray_1\":[999999999999999999999999999.999999999,-999999999999999999999999999.999999999],\"glob_decimal_1\":999999999999999999999999999.999999999,\"untyped_decimal\":999999999999999999999999999.999999999,\"untyped_big_integer\":340282366920938463463374607431768211456}"} diff --git a/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream_object.json b/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream_object.json new file mode 100644 index 00000000000000..d97146a07c536c --- /dev/null +++ b/regression-test/data/variant_p0/predefine/test_variant_high_precision_decimal_stream_object.json @@ -0,0 +1 @@ +{"id":4,"v":{"number_1":999999999999999999999999999.999999999,"number_2":0.0000000001,"number_3":-999999999999999999999999999.999999999,"number_4":0.57,"number_big_integer":99999999999999999999999999999999999999999999999999,"number_scientific_1":999999999999999999999999999.999999999e0,"number_scientific_2":1e-10,"numberArray_1":[999999999999999999999999999.999999999,-999999999999999999999999999.999999999],"glob_decimal_1":999999999999999999999999999.999999999,"untyped_decimal":999999999999999999999999999.999999999,"untyped_big_integer":340282366920938463463374607431768211456}} diff --git a/regression-test/suites/variant_p0/predefine/test_variant_high_precision_decimal.groovy b/regression-test/suites/variant_p0/predefine/test_variant_high_precision_decimal.groovy new file mode 100644 index 00000000000000..3ce771b7053f06 --- /dev/null +++ b/regression-test/suites/variant_p0/predefine/test_variant_high_precision_decimal.groovy @@ -0,0 +1,177 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_variant_high_precision_decimal", "p0") { + sql """ set default_variant_enable_doc_mode = false """ + sql """ set default_variant_enable_typed_paths_to_sparse = false """ + sql """ set enable_decimal256 = true """ + + def decimal256Integer = "99999999999999999999999999999999999999999999999999" + + sql "DROP TABLE IF EXISTS test_variant_high_precision_decimal" + sql "DROP TABLE IF EXISTS test_variant_high_precision_decimal_json_stage" + sql """ + CREATE TABLE test_variant_high_precision_decimal ( + `id` bigint NOT NULL, + `v` variant< + 'number_1':decimalv3(38,10), + 'number_2':decimalv3(38,10), + 'number_3':decimalv3(38,10), + 'number_4':decimalv3(38,18), + 'number_big_integer':decimalv3(76,0), + 'number_scientific_1':decimalv3(38,10), + 'number_scientific_2':decimalv3(38,10), + 'numberArray_1':array, + 'glob_decimal_*':decimalv3(38,10), + properties ( + "variant_enable_typed_paths_to_sparse" = "true", + "variant_max_subcolumns_count" = "1" + ) + > NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ) + """ + sql """ + CREATE TABLE test_variant_high_precision_decimal_json_stage ( + `id` bigint NOT NULL, + `v` string NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ) + """ + + sql """ + INSERT INTO test_variant_high_precision_decimal VALUES + ( + 1, + '{ + "number_1": 999999999999999999999999999.999999999, + "number_2": 0.0000000001, + "number_3": -999999999999999999999999999.999999999, + "number_4": 0.57, + "number_big_integer": ${decimal256Integer}, + "number_scientific_1": 999999999999999999999999999.999999999e0, + "number_scientific_2": 1e-10, + "numberArray_1": [ + 999999999999999999999999999.999999999, + -999999999999999999999999999.999999999 + ], + "glob_decimal_1": 999999999999999999999999999.999999999, + "untyped_decimal": 999999999999999999999999999.999999999, + "untyped_big_integer": 340282366920938463463374607431768211456 + }' + ) + """ + sql "sync" + + streamLoad { + table "test_variant_high_precision_decimal_json_stage" + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0' + file "test_variant_high_precision_decimal_stream.json" + time 10000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load test_variant_high_precision_decimal_stream.json result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(1, json.NumberLoadedRows as int) + } + } + sql """ + INSERT INTO test_variant_high_precision_decimal + SELECT id, v FROM test_variant_high_precision_decimal_json_stage + """ + sql "sync" + + streamLoad { + table "test_variant_high_precision_decimal" + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0' + file "test_variant_high_precision_decimal_stream.json" + time 10000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load test_variant_high_precision_decimal_stream.json to variant result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(1, json.NumberLoadedRows as int) + } + } + + streamLoad { + table "test_variant_high_precision_decimal" + set 'read_json_by_line', 'true' + set 'format', 'json' + set 'max_filter_ratio', '0' + file "test_variant_high_precision_decimal_stream_object.json" + time 10000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + logger.info("Stream load test_variant_high_precision_decimal_stream_object.json to variant result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(1, json.NumberLoadedRows as int) + } + } + sql "sync" + + def assertAllRowsMatch = { String predicate -> + def result = sql "SELECT count(*) FROM test_variant_high_precision_decimal WHERE ${predicate}" + assertEquals(4, result[0][0] as int) + } + + assertAllRowsMatch("cast(v['number_1'] as decimalv3(38,10)) = cast('999999999999999999999999999.9999999990' as decimalv3(38,10))") + assertAllRowsMatch("cast(v['number_2'] as decimalv3(38,10)) = cast('0.0000000001' as decimalv3(38,10))") + assertAllRowsMatch("cast(v['number_3'] as decimalv3(38,10)) = cast('-999999999999999999999999999.9999999990' as decimalv3(38,10))") + assertAllRowsMatch("cast(v['number_4'] as decimalv3(38,18)) = cast('0.570000000000000000' as decimalv3(38,18))") + assertAllRowsMatch("cast(v['number_big_integer'] as decimalv3(76,0)) = cast('${decimal256Integer}' as decimalv3(76,0))") + assertAllRowsMatch("cast(v['number_scientific_1'] as decimalv3(38,10)) = cast('999999999999999999999999999.9999999990' as decimalv3(38,10))") + assertAllRowsMatch("cast(v['number_scientific_2'] as decimalv3(38,10)) = cast('0.0000000001' as decimalv3(38,10))") + assertAllRowsMatch("cast(v['glob_decimal_1'] as decimalv3(38,10)) = cast('999999999999999999999999999.9999999990' as decimalv3(38,10))") + assertAllRowsMatch("array_contains(cast(v['numberArray_1'] as array), cast('999999999999999999999999999.9999999990' as decimalv3(38,10)))") + assertAllRowsMatch("array_contains(cast(v['numberArray_1'] as array), cast('-999999999999999999999999999.9999999990' as decimalv3(38,10)))") + + def untypedTypeCount = sql """ SELECT count(*) + FROM test_variant_high_precision_decimal + WHERE cast(variant_type(v['untyped_decimal']) as string) = '{"":"double"}' """ + assertEquals(4, untypedTypeCount[0][0] as int) + + def untypedBigIntegerTypeCount = sql """ SELECT count(*) + FROM test_variant_high_precision_decimal + WHERE cast(variant_type(v['untyped_big_integer']) as string) = '{"":"double"}' """ + assertEquals(4, untypedBigIntegerTypeCount[0][0] as int) +}