From c59e3952a50c2f0fd505d4606c20213ff17c7021 Mon Sep 17 00:00:00 2001 From: lihangyu Date: Tue, 12 May 2026 13:22:32 +0800 Subject: [PATCH] [feature](be) Support flexible variant partial update ### What problem does this PR solve? Issue Number: close #48122 Related PR: N/A Problem Summary: Allow MOW unique flexible partial update loads to patch VARIANT object paths while preserving undeclared paths, including duplicate-key batching, sequence semantics, routine-load replay, publish conflict replay, and rejected-row filtering for invalid VARIANT patch values. V1 accepts JSON object patches only and rejects unsupported VARIANT modes. Also reject explicit merge_type request shapes, WHERE, delete, and sequence semantics for flexible partial update across stream load and routine load so unsupported semantics cannot be accepted accidentally. Gate VARIANT flexible partial update on an explicit BE heartbeat capability so FE cannot enable marker-producing loads against old or actually dead backends. ### Release note MOW unique tables can use JSON flexible partial update loads to patch object fields inside supported VARIANT columns. Flexible partial update rejects explicit merge_type, WHERE, delete, and sequence options, and VARIANT flexible partial update requires each backend in the current cluster to be alive and advertise the required heartbeat capability. ### Check List (For Author) - Test: Unit Test and Regression test - ./build.sh --be --fe - ./build.sh --fe - ./run-fe-ut.sh --run org.apache.doris.catalog.OlapTableTest - ./run-fe-ut.sh --run org.apache.doris.nereids.load.NereidsStreamLoadPlannerTest - ./run-fe-ut.sh --run org.apache.doris.catalog.OlapTableTest,org.apache.doris.system.SystemInfoServiceTest - ./run-fe-ut.sh --run org.apache.doris.system.SystemInfoServiceTest - ./run-fe-ut.sh --run org.apache.doris.load.routineload.RoutineLoadJobTest,org.apache.doris.persist.AlterRoutineLoadOperationLogTest - ./run-fe-ut.sh --run org.apache.doris.load.routineload.RoutineLoadJobTest,org.apache.doris.load.routineload.KinesisRoutineLoadJobTest,org.apache.doris.persist.AlterRoutineLoadOperationLogTest,org.apache.doris.catalog.OlapTableTest - ./run-fe-ut.sh --run org.apache.doris.nereids.trees.plans.commands.AlterTableCommandTest,org.apache.doris.catalog.OlapTableTest,org.apache.doris.load.routineload.RoutineLoadJobTest,org.apache.doris.load.routineload.KinesisRoutineLoadJobTest,org.apache.doris.persist.AlterRoutineLoadOperationLogTest - ./run-be-ut.sh --run --filter='*VariantUtilTest*' - ./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy -d unique_with_mow_p0/flexible -s test_flexible_partial_update_variant - ./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy -d unique_with_mow_p0/flexible -s test_flexible_partial_update_restricts - ./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy -d unique_with_mow_p0/flexible -s test_flexible_partial_update_property - ./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy -d load_p0/routine_load -s test_routine_load_flexible_partial_update - ./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy -d load_p0/routine_load -s test_routine_load_flexible_partial_update_validate - git diff --check - git diff --cached --check - clang-format lint action script over modified C++ files - Behavior changed: Yes. JSON flexible partial update on MOW unique tables now merges declared VARIANT object paths and preserves undeclared paths. Flexible partial update now rejects explicit merge_type, WHERE, delete, and sequence clauses for stream/routine load request shapes. VARIANT flexible partial update now requires each backend in the current cluster to be alive and advertise the required heartbeat capability. - Does this need documentation: No --- be/src/agent/heartbeat_server.cpp | 1 + be/src/cloud/cloud_rowset_builder.cpp | 1 + be/src/exec/common/variant_util.cpp | 667 +++++++++- be/src/exec/common/variant_util.h | 18 +- be/src/format/json/new_json_reader.cpp | 100 +- be/src/format/json/new_json_reader.h | 7 +- be/src/load/delta_writer/delta_writer_v2.cpp | 3 +- be/src/load/memtable/memtable.cpp | 31 +- be/src/load/memtable/memtable.h | 1 + be/src/service/http/action/stream_load.cpp | 4 +- be/src/storage/partial_update_info.cpp | 187 ++- be/src/storage/partial_update_info.h | 11 +- be/src/storage/rowset_builder.cpp | 24 +- be/src/storage/rowset_builder.h | 2 + be/src/storage/segment/column_writer.cpp | 1 + .../variant/variant_column_writer_impl.cpp | 2 + .../segment/vertical_segment_writer.cpp | 74 +- be/src/storage/tablet/base_tablet.cpp | 30 +- be/src/util/json/json_parser.cpp | 43 +- be/src/util/json/json_parser.h | 13 +- .../exec/common/schema_util_rowset_test.cpp | 1 + be/test/storage/partial_update_info_test.cpp | 186 +++ .../variant_column_writer_reader_test.cpp | 3 + be/test/storage/segment/variant_util_test.cpp | 1098 +++++++++++++++++ .../doris/alter/SchemaChangeHandler.java | 16 + .../org/apache/doris/catalog/OlapTable.java | 93 +- .../apache/doris/load/RoutineLoadDesc.java | 14 + .../load/routineload/RoutineLoadJob.java | 141 ++- .../load/routineload/RoutineLoadManager.java | 1 - .../kafka/KafkaRoutineLoadJob.java | 9 +- .../kinesis/KinesisRoutineLoadJob.java | 9 +- .../nereids/load/NereidsLoadTaskInfo.java | 4 + .../load/NereidsStreamLoadPlanner.java | 33 + .../nereids/load/NereidsStreamLoadTask.java | 7 + .../nereids/parser/LogicalPlanBuilder.java | 5 +- .../plans/commands/AlterTableCommand.java | 37 + .../commands/info/CreateRoutineLoadInfo.java | 67 +- .../AlterRoutineLoadJobOperationLog.java | 93 +- .../java/org/apache/doris/system/Backend.java | 15 +- .../doris/system/BackendHbResponse.java | 32 +- .../org/apache/doris/system/HeartbeatMgr.java | 7 +- .../apache/doris/catalog/OlapTableTest.java | 173 +++ .../KinesisRoutineLoadJobTest.java | 125 ++ .../load/routineload/RoutineLoadJobTest.java | 529 ++++++++ .../load/NereidsStreamLoadPlannerTest.java | 160 +++ .../trees/plans/CreateTableCommandTest.java | 36 + .../plans/commands/AlterTableCommandTest.java | 163 +++ .../AlterRoutineLoadOperationLogTest.java | 19 +- .../doris/system/SystemInfoServiceTest.java | 29 +- gensrc/proto/olap_file.proto | 1 + gensrc/thrift/FrontendService.thrift | 1 + gensrc/thrift/HeartbeatService.thrift | 1 + .../flexible/variant_patch_merge.json | 3 + ...outine_load_flexible_partial_update.groovy | 382 ++++-- ...ad_flexible_partial_update_validate.groovy | 301 +++++ ...ial_update_variant_publish_conflict.groovy | 303 +++++ ...st_flexible_partial_update_property.groovy | 175 ++- ...t_flexible_partial_update_restricts.groovy | 78 +- ...est_flexible_partial_update_variant.groovy | 335 +++++ 59 files changed, 5640 insertions(+), 265 deletions(-) create mode 100644 be/test/storage/partial_update_info_test.cpp create mode 100644 fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java create mode 100644 regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json create mode 100644 regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy create mode 100644 regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy create mode 100644 regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index 327b14cf669d02..dcdd9c56c19aeb 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -90,6 +90,7 @@ void HeartbeatServer::heartbeat(THeartbeatResult& heartbeat_result, heartbeat_result.backend_info.__set_be_node_role(config::be_node_role); // If be is gracefully stop, then k_doris_exist is set to true heartbeat_result.backend_info.__set_is_shutdown(doris::k_doris_exit); + heartbeat_result.backend_info.__set_supports_variant_flexible_partial_update(true); heartbeat_result.backend_info.__set_fragment_executing_count( get_fragment_executing_count()); heartbeat_result.backend_info.__set_fragment_last_active_time( diff --git a/be/src/cloud/cloud_rowset_builder.cpp b/be/src/cloud/cloud_rowset_builder.cpp index abe36ed5790d71..e804a8425369ca 100644 --- a/be/src/cloud/cloud_rowset_builder.cpp +++ b/be/src/cloud/cloud_rowset_builder.cpp @@ -145,6 +145,7 @@ Status CloudRowsetBuilder::set_txn_related_info() { _req.txn_expiration); return Status::OK(); } + RETURN_IF_ERROR(_check_flexible_partial_update_single_segment()); if (config::enable_merge_on_write_correctness_check && _rowset->num_rows() != 0) { auto st = _tablet->check_delete_bitmap_correctness( _delete_bitmap, _rowset->end_version() - 1, _req.txn_id, *_rowset_ids); diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 39e8f236ecd16e..45b5f124f8a9fb 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -17,7 +17,6 @@ #include "exec/common/variant_util.h" -#include #include #include #include @@ -34,10 +33,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ #include "core/field.h" #include "core/typeid_cast.h" #include "core/types.h" +#include "core/value/bitmap_value.h" #include "exec/common/field_visitors.h" #include "exec/common/sip_hash.h" #include "exprs/function/function.h" @@ -101,6 +103,7 @@ #include "util/json/json_parser.h" #include "util/json/path_in_data.h" #include "util/json/simd_json_parser.h" +#include "util/jsonb_utils.h" namespace doris::variant_util { @@ -841,7 +844,9 @@ TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_ind } uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) { - if (bucket_num <= 1) return 0; + if (bucket_num <= 1) { + return 0; + } SipHash hash; hash.update(path.data, path.size); uint64_t h = hash.get64(); @@ -2141,6 +2146,659 @@ phmap::flat_hash_map materialize_doc return subcolumns; } +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT = 62; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_BITS = 31; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_BITS = 11; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_BITS = 12; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_BITS = 8; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_SHIFT = VARIANT_PATCH_PATH_MARKER_BYTE_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT = + VARIANT_PATCH_PATH_MARKER_POS_SHIFT + VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_SHIFT = + VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT + VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +static_assert(VARIANT_PATCH_PATH_MARKER_UID_SHIFT + VARIANT_PATCH_PATH_MARKER_UID_BITS == + VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT); +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_UID_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_BYTE_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_COUNT = 1ULL + << VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +// Flexible VARIANT partial update keeps exact patch paths in skip bitmap markers. +// The byte position field is the feature-level encoded-path limit. +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_BYTES = 1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MAX_COUNT = 256; +constexpr uint64_t VARIANT_PATCH_PATH_MAX_TOTAL_BYTES = 64 * 1024; + +// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch metadata uses +// values outside the int32 uid range. Each path is represented by exact, column-scoped byte +// markers with the high marker bit set; this keeps publish-conflict merge deterministic. +bool is_variant_patch_path_marker(uint64_t value) { + return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0; +} + +namespace { + +struct VariantPatchPathEncoding { + std::optional length; + std::vector> bytes; +}; + +using VariantPatchPathMap = std::map; + +void append_fixed_u32(uint32_t value, std::string* dst) { + dst->push_back(static_cast(value & 0xFF)); + dst->push_back(static_cast((value >> 8) & 0xFF)); + dst->push_back(static_cast((value >> 16) & 0xFF)); + dst->push_back(static_cast((value >> 24) & 0xFF)); +} + +bool read_fixed_u32(std::string_view src, size_t* offset, uint32_t* value) { + if (*offset + sizeof(uint32_t) > src.size()) { + return false; + } + const auto* data = reinterpret_cast(src.data() + *offset); + *value = static_cast(data[0]) | (static_cast(data[1]) << 8) | + (static_cast(data[2]) << 16) | (static_cast(data[3]) << 24); + *offset += sizeof(uint32_t); + return true; +} + +std::string encode_variant_patch_path_key(const PathInData& path) { + const auto& parts = path.get_parts(); + DCHECK(!parts.empty()); + std::string encoded; + append_fixed_u32(static_cast(parts.size()), &encoded); + for (const auto& part : parts) { + append_fixed_u32(static_cast(part.key.size()), &encoded); + encoded.append(part.key.data(), part.key.size()); + encoded.push_back(static_cast(part.is_nested ? 1 : 0)); + encoded.push_back(static_cast(part.anonymous_array_level)); + } + return encoded; +} + +Status decode_variant_patch_path_key(std::string_view encoded, PathInData* path) { + size_t offset = 0; + uint32_t part_count = 0; + if (!read_fixed_u32(encoded, &offset, &part_count) || part_count == 0) { + return Status::InternalError("Invalid VARIANT patch path marker part count"); + } + + PathInData::Parts parts; + parts.reserve(part_count); + for (uint32_t i = 0; i < part_count; ++i) { + uint32_t key_size = 0; + if (!read_fixed_u32(encoded, &offset, &key_size) || + offset + key_size + 2 > encoded.size()) { + return Status::InternalError("Invalid VARIANT patch path marker part payload"); + } + PathInData::Part part; + part.key = std::string_view(encoded.data() + offset, key_size); + offset += key_size; + part.is_nested = encoded[offset++] != 0; + part.anonymous_array_level = static_cast(encoded[offset++]); + parts.emplace_back(part); + } + if (offset != encoded.size()) { + return Status::InternalError("Trailing bytes in VARIANT patch path marker"); + } + + *path = PathInData(parts); + return Status::OK(); +} + +uint64_t variant_patch_path_max_bytes() { + return VARIANT_PATCH_PATH_MARKER_MAX_BYTES; +} + +uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) { + CHECK_GE(variant_col_unique_id, 0); + CHECK_LE(static_cast(variant_col_unique_id), VARIANT_PATCH_PATH_MARKER_UID_MASK); + return static_cast(variant_col_unique_id); +} + +uint64_t variant_patch_path_marker_uid(uint64_t marker) { + return (marker >> VARIANT_PATCH_PATH_MARKER_UID_SHIFT) & VARIANT_PATCH_PATH_MARKER_UID_MASK; +} + +bool is_variant_patch_path_marker_for_column(uint64_t marker, int32_t variant_col_unique_id) { + return is_variant_patch_path_marker(marker) && + variant_patch_path_marker_uid(marker) == + normalized_variant_col_unique_id(variant_col_unique_id); +} + +uint64_t variant_patch_path_marker_index(uint64_t marker) { + return (marker >> VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) & VARIANT_PATCH_PATH_MARKER_INDEX_MASK; +} + +bool variant_patch_path_marker_is_byte(uint64_t marker) { + return ((marker >> VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) & 1ULL) != 0; +} + +uint64_t variant_patch_path_length_marker(int32_t variant_col_unique_id, uint64_t path_index, + uint64_t length) { + DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT); + DCHECK_LE(length, VARIANT_PATCH_PATH_MARKER_MAX_BYTES); + return VARIANT_PATCH_PATH_MARKER_MASK | + (normalized_variant_col_unique_id(variant_col_unique_id) + << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | length; +} + +uint64_t variant_patch_path_byte_marker(int32_t variant_col_unique_id, uint64_t path_index, + uint64_t byte_pos, uint8_t byte) { + DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT); + DCHECK_LT(byte_pos, VARIANT_PATCH_PATH_MARKER_MAX_BYTES); + return VARIANT_PATCH_PATH_MARKER_MASK | (1ULL << VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) | + (normalized_variant_col_unique_id(variant_col_unique_id) + << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | + (byte_pos << VARIANT_PATCH_PATH_MARKER_POS_SHIFT) | byte; +} + +void remove_variant_patch_path_markers_for_column(int32_t variant_col_unique_id, + BitmapValue* bitmap) { + std::vector markers_to_remove; + for (uint64_t marker : *bitmap) { + if (is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) { + markers_to_remove.push_back(marker); + } + } + for (uint64_t marker : markers_to_remove) { + bitmap->remove(marker); + } +} + +void remove_all_variant_patch_path_markers(BitmapValue* bitmap) { + std::vector markers_to_remove; + for (uint64_t marker : *bitmap) { + if (is_variant_patch_path_marker(marker)) { + markers_to_remove.push_back(marker); + } + } + for (uint64_t marker : markers_to_remove) { + bitmap->remove(marker); + } +} + +Status decode_variant_patch_paths(const BitmapValue& bitmap, int32_t variant_col_unique_id, + VariantPatchPathMap* paths) { + paths->clear(); + std::map encoded_paths; + for (uint64_t marker : bitmap) { + if (!is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) { + continue; + } + auto& encoded_path = encoded_paths[variant_patch_path_marker_index(marker)]; + if (!variant_patch_path_marker_is_byte(marker)) { + const uint64_t length = marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1); + if (length > VARIANT_PATCH_PATH_MARKER_MAX_BYTES) { + return Status::InternalError( + "Invalid VARIANT patch path marker length {} for column {}", length, + variant_col_unique_id); + } + if (encoded_path.length.has_value() && *encoded_path.length != length) { + return Status::InternalError( + "Conflicting VARIANT patch path marker length for column {}", + variant_col_unique_id); + } + encoded_path.length = length; + continue; + } + + const uint64_t byte_pos = (marker >> VARIANT_PATCH_PATH_MARKER_POS_SHIFT) & + VARIANT_PATCH_PATH_MARKER_POS_MASK; + const uint8_t byte = marker & VARIANT_PATCH_PATH_MARKER_BYTE_MASK; + if (encoded_path.bytes.size() <= byte_pos) { + encoded_path.bytes.resize(byte_pos + 1); + } + if (encoded_path.bytes[byte_pos].has_value() && *encoded_path.bytes[byte_pos] != byte) { + return Status::InternalError("Conflicting VARIANT patch path marker byte for column {}", + variant_col_unique_id); + } + encoded_path.bytes[byte_pos] = byte; + } + + for (const auto& [_, encoded_path] : encoded_paths) { + if (!encoded_path.length.has_value()) { + if (!encoded_path.bytes.empty()) { + return Status::InternalError( + "VARIANT patch path marker byte without length for column {}", + variant_col_unique_id); + } + continue; + } + if (encoded_path.bytes.size() > *encoded_path.length) { + return Status::InternalError( + "VARIANT patch path marker byte exceeds length for column {}", + variant_col_unique_id); + } + std::string encoded_path_key; + encoded_path_key.reserve(*encoded_path.length); + for (uint64_t i = 0; i < *encoded_path.length; ++i) { + if (i >= encoded_path.bytes.size() || !encoded_path.bytes[i].has_value()) { + return Status::InternalError("Incomplete VARIANT patch path marker for column {}", + variant_col_unique_id); + } + encoded_path_key.push_back(static_cast(*encoded_path.bytes[i])); + } + PathInData path; + RETURN_IF_ERROR(decode_variant_patch_path_key(encoded_path_key, &path)); + paths->insert_or_assign(std::move(encoded_path_key), std::move(path)); + } + return Status::OK(); +} + +Status encode_variant_patch_paths(int32_t variant_col_unique_id, const VariantPatchPathMap& paths, + BitmapValue* bitmap) { + if (paths.size() > VARIANT_PATCH_PATH_MAX_COUNT) { + return Status::NotSupported( + "VARIANT flexible partial update supports at most {} patch paths per row", + VARIANT_PATCH_PATH_MAX_COUNT); + } + const uint64_t max_encoded_bytes = variant_patch_path_max_bytes(); + for (const auto& [encoded_path_key, _] : paths) { + if (encoded_path_key.size() > max_encoded_bytes) { + return Status::NotSupported( + "VARIANT flexible partial update encoded patch path exceeds {} bytes, actual " + "{} bytes", + max_encoded_bytes, encoded_path_key.size()); + } + } + + BitmapValue encoded_bitmap = *bitmap; + remove_variant_patch_path_markers_for_column(variant_col_unique_id, &encoded_bitmap); + + uint64_t path_index = 0; + for (const auto& [encoded_path_key, _] : paths) { + encoded_bitmap.add(variant_patch_path_length_marker(variant_col_unique_id, path_index, + encoded_path_key.size())); + for (uint64_t byte_pos = 0; byte_pos < encoded_path_key.size(); ++byte_pos) { + encoded_bitmap.add(variant_patch_path_byte_marker( + variant_col_unique_id, path_index, byte_pos, + static_cast(static_cast(encoded_path_key[byte_pos])))); + } + ++path_index; + } + uint64_t row_total_encoded_bytes = 0; + for (uint64_t marker : encoded_bitmap) { + if (is_variant_patch_path_marker(marker) && !variant_patch_path_marker_is_byte(marker)) { + row_total_encoded_bytes += + marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1); + if (row_total_encoded_bytes > VARIANT_PATCH_PATH_MAX_TOTAL_BYTES) { + return Status::NotSupported( + "VARIANT flexible partial update encoded patch paths exceed {} bytes per " + "row", + VARIANT_PATCH_PATH_MAX_TOTAL_BYTES); + } + } + } + *bitmap = std::move(encoded_bitmap); + return Status::OK(); +} + +void collect_variant_patch_marker_column_uids(const BitmapValue& bitmap, + std::set* variant_col_unique_ids) { + for (uint64_t marker : bitmap) { + if (is_variant_patch_path_marker(marker)) { + variant_col_unique_ids->insert( + static_cast(variant_patch_path_marker_uid(marker))); + } + } +} + +Status variant_object_patch_required_status() { + return Status::NotSupported( + "VARIANT flexible partial update only supports JSON object patch values"); +} + +Status variant_object_base_required_status() { + return Status::NotSupported( + "VARIANT flexible partial update only supports patching JSON object old values"); +} + +Status variant_doc_mode_not_supported_status() { + return Status::NotSupported( + "VARIANT flexible partial update does not support doc mode in this version"); +} + +const ColumnVariant& get_variant_nested_column(const IColumn& column) { + if (column.is_nullable()) { + return assert_cast( + assert_cast(column).get_nested_column()); + } + return assert_cast(column); +} + +ColumnVariant& get_variant_nested_column(IColumn& column) { + if (column.is_nullable()) { + return assert_cast( + assert_cast(column).get_nested_column()); + } + return assert_cast(column); +} + +bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) { + const auto& prefix_parts = prefix.get_parts(); + const auto& path_parts = path.get_parts(); + if (prefix_parts.size() > path_parts.size()) { + return false; + } + return std::equal(prefix_parts.begin(), prefix_parts.end(), path_parts.begin()); +} + +bool paths_conflict(const PathInData& left, const PathInData& right) { + return is_path_prefix_of(left, right) || is_path_prefix_of(right, left); +} + +bool path_or_prefix_is_variant_patch_path(const PathInData& path, + const VariantPatchPathMap& patch_paths) { + PathInData::Parts prefix_parts; + prefix_parts.reserve(path.get_parts().size()); + for (const auto& part : path.get_parts()) { + prefix_parts.push_back(part); + if (patch_paths.contains(encode_variant_patch_path_key(PathInData(prefix_parts)))) { + return true; + } + } + return false; +} + +bool path_conflicts_with_any_patch_path(const PathInData& path, const VariantMap& patch_object) { + return std::ranges::any_of(patch_object, [&](const auto& patch_item) { + return paths_conflict(patch_item.first, path); + }); +} + +bool starts_with_json_object(std::string_view text) { + auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return std::isspace(ch); }); + return it != text.end() && *it == '{'; +} + +bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text) { + switch (field.get_type()) { + case PrimitiveType::TYPE_JSONB: { + const auto& jsonb = field.get(); + *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()); + return true; + } + default: + return false; + } +} + +bool collect_json_object_text_map(std::string_view json_text, bool reject_json_null_value, + VariantMap* object) { + if (!starts_with_json_object(json_text)) { + return false; + } + + auto parsed = ColumnVariant::create(0, false); + ParseConfig config; + config.parse_to = ParseConfig::ParseTo::OnlySubcolumns; + config.reject_json_null_value = reject_json_null_value; + config.record_empty_object_path = true; + StringRef json_ref {json_text.data(), json_text.size()}; + parse_json_to_variant(*parsed, json_ref, nullptr, config); + parsed->finalize(); + + Field parsed_field; + parsed->get(0, parsed_field); + if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) { + return false; + } + const auto& parsed_object = parsed_field.get(); + if (parsed_object.contains(PathInData())) { + return false; + } + for (const auto& [path, value] : parsed_object) { + if (!path.empty()) { + object->insert_or_assign(path, value); + } + } + return true; +} + +void collect_materialized_variant_map(const ColumnVariant& variant, size_t row, VariantMap* object, + FieldWithDataType* root_field) { + Field field; + variant.get(row, field); + if (field.get_type() == PrimitiveType::TYPE_VARIANT) { + for (const auto& [path, value] : field.get()) { + if (path.get_path() == DOC_VALUE_COLUMN_PATH) { + continue; + } + if (path.empty()) { + *root_field = value; + continue; + } + object->insert_or_assign(path, value); + } + } + + DCHECK(!variant.has_doc_value_column(row)); +} + +Status collect_variant_patch_map(const ColumnVariant& variant, size_t row, bool* is_object_patch, + VariantMap* object) { + object->clear(); + FieldWithDataType root_field; + collect_materialized_variant_map(variant, row, object, &root_field); + if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) { + *is_object_patch = true; + return Status::OK(); + } + + std::string json_text; + if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) { + *is_object_patch = false; + return Status::OK(); + } + object->clear(); + *is_object_patch = collect_json_object_text_map(json_text, true, object); + return Status::OK(); +} + +Status collect_variant_base_map(const ColumnVariant& variant, size_t row, VariantMap* object) { + object->clear(); + FieldWithDataType root_field; + collect_materialized_variant_map(variant, row, object, &root_field); + if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) { + return Status::OK(); + } + + std::string json_text; + if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) { + return variant_object_base_required_status(); + } + object->clear(); + if (!collect_json_object_text_map(json_text, false, object)) { + return variant_object_base_required_status(); + } + return Status::OK(); +} + +Status insert_variant_field(IColumn& dst_column, const Field& field) { + DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode()); + dst_column.insert(field); + return Status::OK(); +} + +Status check_variant_object_patch_supported(const IColumn& column) { + if (get_variant_nested_column(column).enable_doc_mode()) { + return variant_doc_mode_not_supported_status(); + } + return Status::OK(); +} + +Status merge_variant_object_patch(const IColumn& old_column, size_t old_row, + VariantMap&& patch_object, IColumn& dst_column) { + VariantMap merged_object; + if (!old_column.is_null_at(old_row)) { + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row, + &merged_object)); + } + for (const auto& [patch_path, _] : patch_object) { + for (auto it = merged_object.begin(); it != merged_object.end();) { + if (paths_conflict(patch_path, it->first)) { + it = merged_object.erase(it); + } else { + ++it; + } + } + } + for (auto& [patch_path, patch_value] : patch_object) { + merged_object.insert_or_assign(patch_path, std::move(patch_value)); + } + + Field merged_field = Field::create_field(std::move(merged_object)); + return insert_variant_field(dst_column, merged_field); +} + +Status insert_variant_object_patch(VariantMap&& patch_object, IColumn& dst_column) { + Field patch_field = Field::create_field(std::move(patch_object)); + return insert_variant_field(dst_column, patch_field); +} + +} // namespace + +Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, BitmapValue* patch_path_markers) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + + bool is_object_patch = false; + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column), + patch_row, &is_object_patch, &patch_object)); + if (!is_object_patch) { + return variant_object_patch_required_status(); + } + + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(*patch_path_markers, variant_col_unique_id, + &patch_paths)); + for (const auto& [path, _] : patch_object) { + patch_paths.insert_or_assign(encode_variant_patch_path_key(path), path); + } + return encode_variant_patch_paths(variant_col_unique_id, patch_paths, patch_path_markers); + }); + return Status::OK(); +} + +Status merge_variant_patch_path_markers(const BitmapValue& left, const BitmapValue& right, + BitmapValue* merged) { + RETURN_IF_CATCH_EXCEPTION({ + *merged = left; + *merged &= right; + remove_all_variant_patch_path_markers(merged); + + std::set variant_col_unique_ids; + collect_variant_patch_marker_column_uids(left, &variant_col_unique_ids); + collect_variant_patch_marker_column_uids(right, &variant_col_unique_ids); + for (int32_t variant_col_unique_id : variant_col_unique_ids) { + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(left, variant_col_unique_id, &patch_paths)); + VariantPatchPathMap right_patch_paths; + RETURN_IF_ERROR( + decode_variant_patch_paths(right, variant_col_unique_id, &right_patch_paths)); + patch_paths.insert(right_patch_paths.begin(), right_patch_paths.end()); + RETURN_IF_ERROR(encode_variant_patch_paths(variant_col_unique_id, patch_paths, merged)); + } + return Status::OK(); + }); + return Status::OK(); +} + +Status merge_variant_patch(const IColumn& old_column, size_t old_row, const IColumn& patch_column, + size_t patch_row, IColumn& dst_column) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(old_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column)); + + bool is_object_patch = false; + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column), + patch_row, &is_object_patch, &patch_object)); + if (!is_object_patch) { + return variant_object_patch_required_status(); + } + + RETURN_IF_ERROR(merge_variant_object_patch(old_column, old_row, std::move(patch_object), + dst_column)); + return Status::OK(); + }); + return Status::OK(); +} + +Status merge_variant_patch_by_path_markers(const IColumn& old_column, size_t old_row, + const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, + const BitmapValue& patch_path_markers, + bool old_row_deleted, IColumn& dst_column) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(old_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column)); + + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(patch_column), patch_row, + &patch_object)); + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(patch_path_markers, variant_col_unique_id, + &patch_paths)); + for (auto it = patch_object.begin(); it != patch_object.end();) { + if (patch_paths.contains(encode_variant_patch_path_key(it->first))) { + ++it; + } else { + it = patch_object.erase(it); + } + } + if (old_row_deleted) { + RETURN_IF_ERROR(insert_variant_object_patch(std::move(patch_object), dst_column)); + return Status::OK(); + } + + VariantMap merged_object; + if (!old_column.is_null_at(old_row)) { + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row, + &merged_object)); + } + for (auto it = merged_object.begin(); it != merged_object.end();) { + if (path_or_prefix_is_variant_patch_path(it->first, patch_paths) || + path_conflicts_with_any_patch_path(it->first, patch_object)) { + it = merged_object.erase(it); + } else { + ++it; + } + } + for (auto& [patch_path, patch_value] : patch_object) { + merged_object.insert_or_assign(patch_path, std::move(patch_value)); + } + + Field merged_field = Field::create_field(std::move(merged_object)); + RETURN_IF_ERROR(insert_variant_field(dst_column, merged_field)); + return Status::OK(); + }); + return Status::OK(); +} + Status _parse_and_materialize_variant_columns(Block& block, const std::vector& variant_pos, const std::vector& configs) { @@ -2216,7 +2874,8 @@ Status parse_and_materialize_variant_columns(Block& block, const std::vector& column_pos) { + const std::vector& column_pos, + bool reject_json_null_value) { std::vector variant_column_pos; std::vector variant_schema_pos; variant_column_pos.reserve(column_pos.size()); @@ -2240,6 +2899,8 @@ Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& t configs[i].deprecated_enable_flatten_nested = tablet_schema.deprecated_variant_flatten_nested(); configs[i].check_duplicate_json_path = config::variant_enable_duplicate_json_path_check; + configs[i].reject_json_null_value = reject_json_null_value; + configs[i].record_empty_object_path = reject_json_null_value; const auto& column = tablet_schema.column(variant_schema_pos[i]); if (!column.is_variant_type()) { return Status::InternalError("column is not variant type, column name: {}", diff --git a/be/src/exec/common/variant_util.h b/be/src/exec/common/variant_util.h index f4302146972e2c..d8826f7700607f 100644 --- a/be/src/exec/common/variant_util.h +++ b/be/src/exec/common/variant_util.h @@ -259,7 +259,23 @@ void parse_json_to_variant(IColumn& column, const StringRef& jsons, JsonParser* // Parse variant columns by picking variant positions from `column_pos` and generating ParseConfig // based on tablet schema settings (flatten nested / doc snapshot mode). Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema, - const std::vector& column_pos); + const std::vector& column_pos, + bool reject_json_null_value = false); + +// Merge one VARIANT object patch row into an old VARIANT row and append the result to dst_column. +// Flexible VARIANT partial update only supports JSON object patches in this version. +Status merge_variant_patch(const IColumn& old_column, size_t old_row, const IColumn& patch_column, + size_t patch_row, IColumn& dst_column); +bool is_variant_patch_path_marker(uint64_t value); +Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, BitmapValue* patch_path_markers); +Status merge_variant_patch_path_markers(const BitmapValue& left, const BitmapValue& right, + BitmapValue* merged); +Status merge_variant_patch_by_path_markers(const IColumn& old_column, size_t old_row, + const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, + const BitmapValue& patch_path_markers, + bool old_row_deleted, IColumn& dst_column); // Parse doc snapshot column (paths/values/offsets stored in ColumnVariant) into per-path subcolumns. // NOTE: Returned map keys are `std::string_view` pointing into the underlying doc snapshot paths diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp index da141437fcf200..c86ddee16b1238 100644 --- a/be/src/format/json/new_json_reader.cpp +++ b/be/src/format/json/new_json_reader.cpp @@ -39,6 +39,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" +#include "common/exception.h" #include "common/status.h" #include "core/assert_cast.h" #include "core/block/column_with_type_and_name.h" @@ -48,6 +49,7 @@ #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_struct.h" +#include "core/column/column_variant.h" #include "core/custom_allocator.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_factory.hpp" @@ -55,6 +57,7 @@ #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/data_type/data_type_struct.h" #include "core/data_type/define_primitive_type.h" +#include "exec/common/variant_util.h" #include "exec/scan/scanner.h" #include "exprs/json_functions.h" #include "format/file_reader/new_plain_text_line_reader.h" @@ -984,6 +987,16 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val // set _seen_columns.assign(block.columns(), false); size_t cur_row_count = block.rows(); + auto pop_current_row = [&]() { + for (size_t index = 0; index < block.columns(); ++index) { + auto column = block.get_by_position(index).column->assume_mutable(); + if (column->size() > cur_row_count) { + DCHECK(column->size() == cur_row_count + 1); + column->pop_back(column->size() - cur_row_count); + DCHECK(column->size() == cur_row_count); + } + } + }; bool has_valid_value = false; // iterate through object, simdjson::ondemond will parsing on the fly size_t key_index = 0; @@ -1002,7 +1015,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val // This key is not exist in slot desc, just ignore continue; } - if (column_index == skip_bitmap_col_idx) { + if (skip_bitmap_col_idx >= 0 && std::cmp_equal(column_index, skip_bitmap_col_idx)) { continue; } if (_seen_columns[column_index]) { @@ -1018,8 +1031,10 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val auto* column_ptr = block.get_by_position(column_index).column->assume_mutable().get(); RETURN_IF_ERROR(_simdjson_write_data_to_column( val, slot_descs[column_index]->type(), column_ptr, - slot_descs[column_index]->col_name(), _serdes[column_index], valid)); + slot_descs[column_index]->col_name(), _serdes[column_index], valid, + _is_flexible_variant_column(*slot_descs[column_index]))); if (!(*valid)) { + pop_current_row(); return Status::OK(); } _seen_columns[column_index] = true; @@ -1050,7 +1065,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val if (_seen_columns[i]) { continue; } - if (i == skip_bitmap_col_idx) { + if (skip_bitmap_col_idx >= 0 && std::cmp_equal(i, skip_bitmap_col_idx)) { continue; } @@ -1074,15 +1089,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val "The key columns can not be ommited in flexible " "partial update, missing key column: {}", slot_desc->col_name(), valid)); - // remove this line in block - for (size_t index = 0; index < block.columns(); ++index) { - auto column = block.get_by_position(index).column->assume_mutable(); - if (column->size() != cur_row_count) { - DCHECK(column->size() == cur_row_count + 1); - column->pop_back(1); - DCHECK(column->size() == cur_row_count); - } - } + pop_current_row(); return Status::OK(); } _set_skip_bitmap_mark(slot_desc, column_ptr, block, cur_row_count, valid); @@ -1109,11 +1116,34 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& const DataTypePtr& type_desc, IColumn* column_ptr, const std::string& column_name, - DataTypeSerDeSPtr serde, bool* valid) { + DataTypeSerDeSPtr serde, bool* valid, + bool is_flexible_variant_column) { ColumnNullable* nullable_column = nullptr; IColumn* data_column_ptr = column_ptr; DataTypeSerDeSPtr data_serde = serde; + auto primitive_type = remove_nullable(type_desc)->get_primitive_type(); + const IColumn* nested_column_ptr = + column_ptr->is_nullable() + ? assert_cast(*column_ptr).get_nested_column_ptr().get() + : column_ptr; + const bool is_flexible_variant_patch_column = + _should_process_skip_bitmap_col() && + (primitive_type == TYPE_VARIANT || + check_and_get_column(nested_column_ptr) != nullptr || + is_flexible_variant_column); + if (is_flexible_variant_patch_column && value.type() != simdjson::ondemand::json_type::object) { + if (_is_load) { + RETURN_IF_ERROR(_append_error_msg( + nullptr, + "VARIANT flexible partial update only supports JSON object patch values", "", + valid)); + return Status::OK(); + } + return Status::NotSupported( + "VARIANT flexible partial update only supports JSON object patch values"); + } + if (column_ptr->is_nullable()) { nullable_column = reinterpret_cast(column_ptr); @@ -1138,8 +1168,27 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& } } - auto primitive_type = type_desc->get_primitive_type(); if (_is_load || !is_complex_type(primitive_type)) { + if (is_flexible_variant_patch_column && primitive_type == TYPE_VARIANT) { + ParseConfig parse_config; + parse_config.check_duplicate_json_path = + config::variant_enable_duplicate_json_path_check; + parse_config.reject_json_null_value = true; + parse_config.record_empty_object_path = true; + std::string_view json_str = simdjson::to_json_string(value); + StringRef json_ref {json_str.data(), json_str.size()}; + try { + variant_util::parse_json_to_variant(*data_column_ptr, json_ref, nullptr, + parse_config); + } catch (const Exception& e) { + return e.to_status(); + } + if (nullable_column) { + nullable_column->get_null_map_data().push_back(0); + } + *valid = true; + return Status::OK(); + } if (value.type() == simdjson::ondemand::json_type::string) { std::string_view value_string; if constexpr (use_string_cache) { @@ -1610,7 +1659,26 @@ Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, DataTypeSe return Status::OK(); } -void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) { +bool NewJsonReader::_is_flexible_variant_column(const SlotDescriptor& slot_desc) const { + if (!_should_process_skip_bitmap_col()) { + return false; + } + if (remove_nullable(slot_desc.type())->get_primitive_type() == TYPE_VARIANT) { + return true; + } + DORIS_CHECK(_state != nullptr); + DORIS_CHECK(_params.__isset.dest_tuple_id); + const auto* dest_tuple_desc = _state->desc_tbl().get_tuple_descriptor(_params.dest_tuple_id); + DORIS_CHECK(dest_tuple_desc != nullptr); + for (const auto* dest_slot_desc : dest_tuple_desc->slots()) { + if (dest_slot_desc->col_name() == slot_desc.col_name()) { + return remove_nullable(dest_slot_desc->type())->get_primitive_type() == TYPE_VARIANT; + } + } + return false; +} + +void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) const { auto* skip_bitmap_nullable_col_ptr = assert_cast( block.get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()); auto* skip_bitmap_col_ptr = @@ -1623,7 +1691,7 @@ void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row } void NewJsonReader::_set_skip_bitmap_mark(SlotDescriptor* slot_desc, IColumn* column_ptr, - Block& block, size_t cur_row_count, bool* valid) { + Block& block, size_t cur_row_count, bool* valid) const { // we record the missing column's column unique id in skip bitmap // to indicate which columns need to do the alignment process auto* skip_bitmap_nullable_col_ptr = assert_cast( diff --git a/be/src/format/json/new_json_reader.h b/be/src/format/json/new_json_reader.h index e74607a0e6de56..b89bb2f7f6661e 100644 --- a/be/src/format/json/new_json_reader.h +++ b/be/src/format/json/new_json_reader.h @@ -165,7 +165,7 @@ class NewJsonReader : public TableFormatReader { Status _simdjson_write_data_to_column(simdjson::ondemand::value& value, const DataTypePtr& type_desc, IColumn* column_ptr, const std::string& column_name, DataTypeSerDeSPtr serde, - bool* valid); + bool* valid, bool is_flexible_variant_column = false); Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* value, const std::vector& slot_descs, @@ -190,9 +190,10 @@ class NewJsonReader : public TableFormatReader { // flexible partial update can not be used when user specify jsonpaths, so we just fill the skip bitmap // in `_simdjson_handle_simple_json` and `_vhandle_simple_json` (which will be used when jsonpaths is not specified) bool _should_process_skip_bitmap_col() const { return skip_bitmap_col_idx != -1; } - void _append_empty_skip_bitmap_value(Block& block, size_t cur_row_count); + bool _is_flexible_variant_column(const SlotDescriptor& slot_desc) const; + void _append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) const; void _set_skip_bitmap_mark(SlotDescriptor* slot_desc, IColumn* column_ptr, Block& block, - size_t cur_row_count, bool* valid); + size_t cur_row_count, bool* valid) const; RuntimeState* _state = nullptr; RuntimeProfile* _profile = nullptr; ScannerCounter* _counter = nullptr; diff --git a/be/src/load/delta_writer/delta_writer_v2.cpp b/be/src/load/delta_writer/delta_writer_v2.cpp index 78271f2a48202e..d0d72d49d0e922 100644 --- a/be/src/load/delta_writer/delta_writer_v2.cpp +++ b/be/src/load/delta_writer/delta_writer_v2.cpp @@ -243,7 +243,8 @@ Status DeltaWriterV2::_build_current_tablet_schema(int64_t index_id, table_schema_param->partial_update_input_columns(), table_schema_param->is_strict_mode(), table_schema_param->timestamp_ms(), table_schema_param->nano_seconds(), table_schema_param->timezone(), - table_schema_param->auto_increment_coulumn())); + table_schema_param->auto_increment_coulumn(), + table_schema_param->sequence_map_col_uid())); return Status::OK(); } diff --git a/be/src/load/memtable/memtable.cpp b/be/src/load/memtable/memtable.cpp index 588d8543d7b4b4..49a9470e43a0fd 100644 --- a/be/src/load/memtable/memtable.cpp +++ b/be/src/load/memtable/memtable.cpp @@ -29,6 +29,7 @@ #include "bvar/bvar.h" #include "common/config.h" #include "core/column/column.h" +#include "core/column/column_complex.h" #include "exprs/aggregate/aggregate_function_reader.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" #include "load/memtable/memtable_memory_limiter.h" @@ -689,6 +690,9 @@ void MemTable::shrink_memtable_by_agg() { if (_keys_type == KeysType::DUP_KEYS) { return; } + if (_has_flexible_variant_patch_rows()) { + return; + } size_t same_keys_num = _sort(); if (same_keys_num != 0) { (_skip_bitmap_col_idx == -1) ? _aggregate() : _aggregate(); @@ -750,9 +754,34 @@ size_t MemTable::get_flush_reserve_memory_size() const { return static_cast(static_cast(_input_mutable_block.allocated_bytes()) * 1.2); } +bool MemTable::_has_flexible_variant_patch_rows() const { + if (_partial_update_mode != UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS || + _tablet_schema->num_variant_columns() == 0 || _skip_bitmap_col_idx == -1) { + return false; + } + DCHECK_LT(_skip_bitmap_col_idx, _input_mutable_block.columns()); + const auto& skip_bitmaps = + assert_cast( + *_input_mutable_block.get_column_by_position(_skip_bitmap_col_idx)) + .get_data(); + for (size_t cid = _tablet_schema->num_key_columns(); cid < _num_columns; ++cid) { + const auto& column = _tablet_schema->column(cid); + if (!column.is_variant_type()) { + continue; + } + for (const auto& skip_bitmap : skip_bitmaps) { + if (!skip_bitmap.contains(column.unique_id())) { + return true; + } + } + } + return false; +} + Status MemTable::_to_block(std::unique_ptr* res) { size_t same_keys_num = _sort(); - if (_keys_type == KeysType::DUP_KEYS || same_keys_num == 0) { + if (_keys_type == KeysType::DUP_KEYS || same_keys_num == 0 || + _has_flexible_variant_patch_rows()) { if (_keys_type == KeysType::DUP_KEYS && _tablet_schema->num_key_columns() == 0) { _output_mutable_block.swap(_input_mutable_block); } else { diff --git a/be/src/load/memtable/memtable.h b/be/src/load/memtable/memtable.h index 42f96dd4f5f769..4620f541ef2ce8 100644 --- a/be/src/load/memtable/memtable.h +++ b/be/src/load/memtable/memtable.h @@ -280,6 +280,7 @@ class MemTable { DorisVector>& temp_row_in_blocks); Status _put_into_output(Block& in_block); + bool _has_flexible_variant_patch_rows() const; bool _is_first_insertion; void _init_agg_functions(const Block* block); diff --git a/be/src/service/http/action/stream_load.cpp b/be/src/service/http/action/stream_load.cpp index 5d9cf40aea7483..b5990f6aa424fe 100644 --- a/be/src/service/http/action/stream_load.cpp +++ b/be/src/service/http/action/stream_load.cpp @@ -635,7 +635,8 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StringCaseMap merge_type_map = {{"APPEND", TMergeType::APPEND}, {"DELETE", TMergeType::DELETE}, {"MERGE", TMergeType::MERGE}}; - if (!http_req->header(HTTP_MERGE_TYPE).empty()) { + bool merge_type_specified = !http_req->header(HTTP_MERGE_TYPE).empty(); + if (merge_type_specified) { std::string merge_type_str = http_req->header(HTTP_MERGE_TYPE); auto iter = merge_type_map.find(merge_type_str); if (iter != merge_type_map.end()) { @@ -652,6 +653,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, } } request.__set_merge_type(merge_type); + request.__set_merge_type_specified(merge_type_specified); if (!http_req->header(HTTP_DELETE_CONDITION).empty()) { request.__set_delete_condition(http_req->header(HTTP_DELETE_CONDITION)); } diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 7b97ecfc081167..1cd80e628d2b4d 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "common/consts.h" #include "common/logging.h" @@ -27,6 +28,7 @@ #include "core/block/block.h" #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/value/bitmap_value.h" +#include "exec/common/variant_util.h" #include "storage/iterator/olap_data_convertor.h" #include "storage/olap_common.h" #include "storage/rowset/rowset.h" @@ -126,6 +128,7 @@ void PartialUpdateInfo::to_pb(PartialUpdateInfoPB* partial_update_info_pb) const is_input_columns_contains_auto_inc_column); partial_update_info_pb->set_is_schema_contains_auto_inc_column( is_schema_contains_auto_inc_column); + partial_update_info_pb->set_sequence_map_col_uid(sequence_map_col_unqiue_id); for (const auto& value : default_values) { partial_update_info_pb->add_default_values(value); } @@ -169,6 +172,9 @@ void PartialUpdateInfo::from_pb(PartialUpdateInfoPB* partial_update_info_pb) { partial_update_info_pb->is_input_columns_contains_auto_inc_column(); is_schema_contains_auto_inc_column = partial_update_info_pb->is_schema_contains_auto_inc_column(); + sequence_map_col_unqiue_id = partial_update_info_pb->has_sequence_map_col_uid() + ? partial_update_info_pb->sequence_map_col_uid() + : -1; if (partial_update_info_pb->has_nano_seconds()) { nano_seconds = partial_update_info_pb->nano_seconds(); } @@ -464,6 +470,9 @@ void FlexibleReadPlan::prepare_to_read(const RowLocation& row_location, size_t p const BitmapValue& skip_bitmap) { if (!use_row_store) { for (uint64_t col_uid : skip_bitmap) { + if (variant_util::is_variant_patch_path_marker(col_uid)) { + continue; + } plan[row_location.rowset_id][row_location.segment_id][static_cast(col_uid)] .emplace_back(row_location.row_id, pos); } @@ -567,26 +576,36 @@ Status FlexibleReadPlan::fill_non_primary_key_columns( return Status::OK(); } -static void fill_non_primary_key_cell_for_column_store( +static bool old_row_has_delete_sign_for_column_store( + const signed char* delete_sign_column_data, const TabletSchema& tablet_schema, + std::map>& read_index, uint32_t segment_pos) { + if (delete_sign_column_data == nullptr) { + return false; + } + if (auto it = read_index[tablet_schema.delete_sign_idx()].find(segment_pos); + it != read_index[tablet_schema.delete_sign_idx()].end()) { + return delete_sign_column_data[it->second] != 0; + } + return false; +} + +static Status fill_non_primary_key_cell_for_column_store( const TabletColumn& tablet_column, uint32_t cid, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, std::size_t block_pos, uint32_t segment_pos, bool skipped, bool row_has_sequence_col, bool use_default, const signed char* delete_sign_column_data, const TabletSchema& tablet_schema, - std::map>& read_index, - const PartialUpdateInfo* info) { + std::map>& read_index, const PartialUpdateInfo* info, + const BitmapValue& skip_bitmap) { if (skipped) { - DCHECK(cid != tablet_schema.skip_bitmap_col_idx()); - DCHECK(cid != tablet_schema.version_col_idx()); + DCHECK(std::cmp_not_equal(cid, tablet_schema.skip_bitmap_col_idx())); + DCHECK(std::cmp_not_equal(cid, tablet_schema.version_col_idx())); DCHECK(!tablet_column.is_row_store_column()); if (!use_default) { if (delete_sign_column_data != nullptr) { - bool old_row_delete_sign = false; - if (auto it = read_index[tablet_schema.delete_sign_idx()].find(segment_pos); - it != read_index[tablet_schema.delete_sign_idx()].end()) { - old_row_delete_sign = (delete_sign_column_data[it->second] != 0); - } + bool old_row_delete_sign = old_row_has_delete_sign_for_column_store( + delete_sign_column_data, tablet_schema, read_index, segment_pos); if (old_row_delete_sign) { if (!tablet_schema.has_sequence_col()) { @@ -625,8 +644,18 @@ static void fill_non_primary_key_cell_for_column_store( new_col->insert_from(old_value_col, pos_in_old_block); } } else { + if (tablet_column.is_variant_type() && !use_default && + !old_row_has_delete_sign_for_column_store(delete_sign_column_data, tablet_schema, + read_index, segment_pos) && + read_index.contains(cid) && read_index.at(cid).contains(segment_pos)) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, read_index.at(cid).at(segment_pos), cur_col, block_pos, + tablet_column.unique_id(), skip_bitmap, false, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, block_pos); } + return Status::OK(); } Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( @@ -665,7 +694,7 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( auto segment_pos = segment_start_pos + idx; auto block_pos = block_start_pos + idx; - fill_non_primary_key_cell_for_column_store( + RETURN_IF_ERROR(fill_non_primary_key_cell_for_column_store( tablet_column, cid, mutable_full_columns[cid], *default_value_block.get_by_position(i).column, *old_value_block.get_by_position(i).column, *block->get_by_position(cid).column, @@ -674,23 +703,25 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( ? !skip_bitmaps->at(block_pos).contains(seq_col_unique_id) : false, use_default_or_null_flag[idx], delete_sign_column_data, tablet_schema, - read_index, info); + read_index, info, skip_bitmaps->at(block_pos))); } } return Status::OK(); } -static void fill_non_primary_key_cell_for_row_store( +static Status fill_non_primary_key_cell_for_row_store( const TabletColumn& tablet_column, uint32_t cid, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, std::size_t block_pos, bool skipped, bool row_has_sequence_col, bool use_default, - const signed char* delete_sign_column_data, uint32_t pos_in_old_block, - const TabletSchema& tablet_schema, const PartialUpdateInfo* info) { + const signed char* delete_sign_column_data, bool old_row_exists, uint32_t pos_in_old_block, + const TabletSchema& tablet_schema, const PartialUpdateInfo* info, + const BitmapValue& skip_bitmap) { if (skipped) { - DCHECK(cid != tablet_schema.skip_bitmap_col_idx()); - DCHECK(cid != tablet_schema.version_col_idx()); + DCHECK(std::cmp_not_equal(cid, tablet_schema.skip_bitmap_col_idx())); + DCHECK(std::cmp_not_equal(cid, tablet_schema.version_col_idx())); DCHECK(!tablet_column.is_row_store_column()); if (!use_default) { + DCHECK(old_row_exists); if (delete_sign_column_data != nullptr) { bool old_row_delete_sign = (delete_sign_column_data[pos_in_old_block] != 0); if (old_row_delete_sign) { @@ -730,8 +761,18 @@ static void fill_non_primary_key_cell_for_row_store( new_col->insert_from(old_value_col, pos_in_old_block); } } else { + bool old_row_delete_sign = (old_row_exists && delete_sign_column_data != nullptr && + delete_sign_column_data[pos_in_old_block] != 0); + if (tablet_column.is_variant_type() && old_row_exists && !use_default && + !old_row_delete_sign) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, pos_in_old_block, cur_col, block_pos, tablet_column.unique_id(), + skip_bitmap, false, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, block_pos); } + return Status::OK(); } Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( @@ -768,9 +809,11 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { auto segment_pos = segment_start_pos + idx; auto block_pos = block_start_pos + idx; - auto pos_in_old_block = read_index[segment_pos]; + auto read_index_iter = read_index.find(segment_pos); + bool old_row_exists = (read_index_iter != read_index.end()); + uint32_t pos_in_old_block = old_row_exists ? read_index_iter->second : 0; - fill_non_primary_key_cell_for_row_store( + RETURN_IF_ERROR(fill_non_primary_key_cell_for_row_store( tablet_column, cid, mutable_full_columns[cid], *default_value_block.get_by_position(i).column, *old_value_block.get_by_position(i).column, *block->get_by_position(cid).column, @@ -778,8 +821,8 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( tablet_schema.has_sequence_col() ? !skip_bitmaps->at(block_pos).contains(seq_col_unique_id) : false, - use_default_or_null_flag[idx], delete_sign_column_data, pos_in_old_block, - tablet_schema, info); + use_default_or_null_flag[idx], delete_sign_column_data, old_row_exists, + pos_in_old_block, tablet_schema, info, skip_bitmaps->at(block_pos))); } } return Status::OK(); @@ -788,10 +831,10 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( BlockAggregator::BlockAggregator(segment_v2::VerticalSegmentWriter& vertical_segment_writer) : _writer(vertical_segment_writer), _tablet_schema(*_writer._tablet_schema) {} -void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap) { +Status BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap) { for (size_t cid {_tablet_schema.num_key_columns()}; cid < _tablet_schema.num_columns(); cid++) { - if (cid == _tablet_schema.skip_bitmap_col_idx()) { + if (std::cmp_equal(cid, _tablet_schema.skip_bitmap_col_idx())) { auto& cur_skip_bitmap = assert_cast(dst_block.mutable_columns()[cid].get()) ->get_data() @@ -800,17 +843,31 @@ void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, i assert_cast( src_block->get_by_position(cid).column->assume_mutable().get()) ->get_data()[rid]; - cur_skip_bitmap &= new_row_skip_bitmap; + BitmapValue merged_skip_bitmap; + RETURN_IF_ERROR(variant_util::merge_variant_patch_path_markers( + cur_skip_bitmap, new_row_skip_bitmap, &merged_skip_bitmap)); + cur_skip_bitmap = std::move(merged_skip_bitmap); continue; } if (!skip_bitmap.contains(_tablet_schema.column(cid).unique_id())) { - dst_block.mutable_columns()[cid]->pop_back(1); - dst_block.mutable_columns()[cid]->insert_from(*src_block->get_by_position(cid).column, - rid); + if (_tablet_schema.column(cid).is_variant_type()) { + auto merged_col = dst_block.mutable_columns()[cid]->clone_empty(); + RETURN_IF_ERROR(variant_util::merge_variant_patch( + *dst_block.mutable_columns()[cid], + dst_block.mutable_columns()[cid]->size() - 1, + *src_block->get_by_position(cid).column, rid, *merged_col)); + dst_block.mutable_columns()[cid]->pop_back(1); + dst_block.mutable_columns()[cid]->insert_from(*merged_col, 0); + } else { + dst_block.mutable_columns()[cid]->pop_back(1); + dst_block.mutable_columns()[cid]->insert_from( + *src_block->get_by_position(cid).column, rid); + } } } VLOG_DEBUG << fmt::format("merge a row, after merge, output_block.rows()={}, state: {}", dst_block.rows(), _state.to_string()); + return Status::OK(); } void BlockAggregator::append_one_row(MutableBlock& dst_block, Block* src_block, int rid) { @@ -829,8 +886,8 @@ void BlockAggregator::remove_last_n_rows(MutableBlock& dst_block, int n) { } } -void BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap, bool have_delete_sign) { +Status BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap, bool have_delete_sign) { if (have_delete_sign) { // remove all the previous batched rows remove_last_n_rows(dst_block, _state.rows); @@ -840,11 +897,12 @@ void BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_bl append_one_row(dst_block, src_block, rid); } else { if (_state.should_merge()) { - merge_one_row(dst_block, src_block, rid, skip_bitmap); + RETURN_IF_ERROR(merge_one_row(dst_block, src_block, rid, skip_bitmap)); } else { append_one_row(dst_block, src_block, rid); } } + return Status::OK(); }; Status BlockAggregator::aggregate_rows( @@ -920,12 +978,14 @@ Status BlockAggregator::aggregate_rows( bool have_delete_sign = (!skip_bitmap.contains(delete_sign_col_unique_id) && delete_signs[rid] != 0); if (!row_has_sequence_col) { - append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign); + RETURN_IF_ERROR( + append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign)); } else { std::string seq_val {}; _writer._encode_seq_column(seq_column, rid, &seq_val); if (Slice {seq_val}.compare(Slice {cur_seq_val}) >= 0) { - append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign); + RETURN_IF_ERROR(append_or_merge_row(output_block, block, rid, skip_bitmap, + have_delete_sign)); cur_seq_val = std::move(seq_val); } else { VLOG_DEBUG << fmt::format( @@ -980,6 +1040,63 @@ Status BlockAggregator::aggregate_for_sequence_column( return Status::OK(); } +Status BlockAggregator::aggregate_without_sequence_column( + Block* block, size_t num_rows, const std::vector& key_columns) { + DCHECK_EQ(block->columns(), _tablet_schema.num_columns()); + std::vector* skip_bitmaps = &( + assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) + .column->assume_mutable() + .get()) + ->get_data()); + const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); + DCHECK(delete_signs != nullptr); + int32_t delete_sign_col_unique_id = + _tablet_schema.column(_tablet_schema.delete_sign_idx()).unique_id(); + + auto aggregated_block = _tablet_schema.create_block(); + MutableBlock output_block = MutableBlock::build_mutable_block(&aggregated_block); + + auto aggregate_range = [&](int start, int end) -> Status { + if (end - start == 1) { + output_block.add_row(block, start); + return Status::OK(); + } + _state.reset(); + for (int rid = start; rid < end; ++rid) { + auto& skip_bitmap = skip_bitmaps->at(rid); + bool have_delete_sign = + (!skip_bitmap.contains(delete_sign_col_unique_id) && delete_signs[rid] != 0); + RETURN_IF_ERROR( + append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign)); + } + return Status::OK(); + }; + + int same_key_rows {0}; + std::string previous_key {}; + const auto num_rows_int = static_cast(num_rows); + for (int block_pos {0}; block_pos < num_rows_int; block_pos++) { + std::string key = _writer._full_encode_keys(key_columns, block_pos); + if (block_pos > 0 && previous_key == key) { + same_key_rows++; + } else { + if (same_key_rows > 0) { + RETURN_IF_ERROR(aggregate_range(block_pos - same_key_rows, block_pos)); + } + same_key_rows = 1; + } + previous_key = std::move(key); + } + if (same_key_rows > 0) { + RETURN_IF_ERROR(aggregate_range(num_rows_int - same_key_rows, num_rows_int)); + } + + if (output_block.rows() != num_rows) { + block->swap(output_block.to_block()); + } + return Status::OK(); +} + Status BlockAggregator::fill_sequence_column(Block* block, size_t num_rows, const FixedReadPlan& read_plan, std::vector& skip_bitmaps) { @@ -1105,7 +1222,7 @@ Status BlockAggregator::filter_block(Block* block, size_t num_rows, MutableColum RETURN_IF_ERROR(Block::filter_block(block, num_cols, num_cols)); DCHECK_EQ(num_cols, block->columns()); size_t merged_rows = num_rows - block->rows(); - if (duplicate_rows != merged_rows) { + if (std::cmp_not_equal(duplicate_rows, merged_rows)) { auto msg = fmt::format( "filter_block_for_flexible_partial_update {}: duplicate_rows != merged_rows, " "duplicate_keys={}, merged_rows={}, num_rows={}, mutable_block->rows()={}", @@ -1164,6 +1281,8 @@ Status BlockAggregator::aggregate_for_flexible_partial_update( RETURN_IF_ERROR(aggregate_for_sequence_column(block, static_cast(num_rows), key_columns, seq_column, specified_rowsets, segment_caches)); + } else { + RETURN_IF_ERROR(aggregate_without_sequence_column(block, num_rows, key_columns)); } // 2. merge duplicate rows and handle insert after delete diff --git a/be/src/storage/partial_update_info.h b/be/src/storage/partial_update_info.h index 6371a79fe71db9..5cc2f4c9d77340 100644 --- a/be/src/storage/partial_update_info.h +++ b/be/src/storage/partial_update_info.h @@ -209,16 +209,19 @@ class BlockAggregator { Block* block, size_t num_rows, const std::vector& key_columns, const std::vector& specified_rowsets, std::vector>& segment_caches); + Status aggregate_without_sequence_column( + Block* block, size_t num_rows, + const std::vector& key_columns); Status filter_block(Block* block, size_t num_rows, MutableColumnPtr filter_column, int duplicate_rows, std::string col_name); Status fill_sequence_column(Block* block, size_t num_rows, const FixedReadPlan& read_plan, std::vector& skip_bitmaps); - void append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap, bool have_delete_sign); - void merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap); + Status append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap, bool have_delete_sign); + Status merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap); void append_one_row(MutableBlock& dst_block, Block* src_block, int rid); void remove_last_n_rows(MutableBlock& dst_block, int n); diff --git a/be/src/storage/rowset_builder.cpp b/be/src/storage/rowset_builder.cpp index aa075f386c48c3..4b63050d48cfc7 100644 --- a/be/src/storage/rowset_builder.cpp +++ b/be/src/storage/rowset_builder.cpp @@ -303,6 +303,19 @@ Status GroupRowsetBuilder::build_rowset() { return _txn_rs_builder->build_rowset(); } +Status BaseRowsetBuilder::_check_flexible_partial_update_single_segment() const { + if (_partial_update_info && _partial_update_info->is_flexible_partial_update() && + _rowset->num_segments() > 1) { + // in flexible partial update, when there are more one segment in one load, + // we need to do alignment process for same keys between segments, we haven't + // implemented it yet and just report an error when encounter this situation + return Status::NotSupported( + "too large input data in flexible partial update, Please " + "reduce the amount of data imported in a single load."); + } + return Status::OK(); +} + Status BaseRowsetBuilder::submit_calc_delete_bitmap_task() { DCHECK(is_data_builder()); if (!_tablet->enable_unique_key_merge_on_write() || _rowset->num_segments() == 0) { @@ -310,16 +323,7 @@ Status BaseRowsetBuilder::submit_calc_delete_bitmap_task() { } std::lock_guard l(_lock); SCOPED_TIMER(_submit_delete_bitmap_timer); - if (_partial_update_info && _partial_update_info->is_flexible_partial_update()) { - if (_rowset->num_segments() > 1) { - // in flexible partial update, when there are more one segment in one load, - // we need to do alignment process for same keys between segments, we haven't - // implemented it yet and just report an error when encouter this situation - return Status::NotSupported( - "too large input data in flexible partial update, Please " - "reduce the amount of data imported in a single load."); - } - } + RETURN_IF_ERROR(_check_flexible_partial_update_single_segment()); auto* beta_rowset = reinterpret_cast(_rowset.get()); std::vector segments; diff --git a/be/src/storage/rowset_builder.h b/be/src/storage/rowset_builder.h index edc97432eaf9c9..514f372b977a05 100644 --- a/be/src/storage/rowset_builder.h +++ b/be/src/storage/rowset_builder.h @@ -97,6 +97,8 @@ class BaseRowsetBuilder { Status init_mow_context(std::shared_ptr& mow_context); protected: + Status _check_flexible_partial_update_single_segment() const; + Status _build_current_tablet_schema(int64_t index_id, const OlapTableSchemaParam* table_schema_param, const TabletSchema& ori_tablet_schema); diff --git a/be/src/storage/segment/column_writer.cpp b/be/src/storage/segment/column_writer.cpp index 1ba371233c4671..08424f47e2a8a2 100644 --- a/be/src/storage/segment/column_writer.cpp +++ b/be/src/storage/segment/column_writer.cpp @@ -1391,6 +1391,7 @@ Status VariantColumnWriter::write_bloom_filter_index() { Status VariantColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) { + _next_rowid += num_rows; return _impl->append_nullable(null_map, ptr, num_rows); } diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp b/be/src/storage/segment/variant/variant_column_writer_impl.cpp index 8ad08640ba19bd..d12365ef52b8db 100644 --- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp +++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp @@ -1625,6 +1625,7 @@ Status VariantSubcolumnWriter::append_data(const uint8_t** ptr, size_t num_rows) const auto& src = *reinterpret_cast(column->column_data); // TODO: if direct write we could avoid copy _column->insert_range_from(src, column->row_pos, num_rows); + _next_rowid += num_rows; return Status::OK(); } @@ -1752,6 +1753,7 @@ Status VariantDocCompactWriter::append_data(const uint8_t** ptr, size_t num_rows auto* dst_ptr = assert_cast(_column.get()); // TODO: if direct write we could avoid copy dst_ptr->insert_range_from(src, column->row_pos, num_rows); + _next_rowid += num_rows; return Status::OK(); } diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 78031d1f7d4429..5dc4d2f782b525 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -499,6 +499,12 @@ Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos if (row_pos != 0) { auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, _tablet->tablet_id()); + if (is_flexible_update) { + return Status::NotSupported( + "{}. Flexible partial update currently relies on whole-block duplicate-key " + "aggregation before writing VARIANT patches.", + msg); + } DCHECK(false) << msg; return Status::InternalError(msg); } @@ -735,6 +741,42 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_create_column_writer(cid, _tablet_schema->column(cid), _tablet_schema)); } + std::vector* skip_bitmaps = &( + assert_cast( + data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) + ->get_data()); + if (_tablet_schema->num_variant_columns() > 0) { + if (_tablet_schema->deprecated_variant_flatten_nested()) { + return Status::NotSupported( + "VARIANT flexible partial update does not support " + "deprecated_variant_enable_flatten_nested in this version"); + } + std::vector variant_cids; + variant_cids.reserve(_tablet_schema->num_variant_columns()); + for (size_t cid = _tablet_schema->num_key_columns(); cid < _tablet_schema->num_columns(); + ++cid) { + const auto& column = _tablet_schema->column(cid); + if (!column.is_variant_type()) { + continue; + } + variant_cids.push_back(cast_set(cid)); + } + RETURN_IF_ERROR(variant_util::parse_and_materialize_variant_columns( + *const_cast(data.block), *_tablet_schema, variant_cids, true)); + for (auto cid : variant_cids) { + const auto& column = _tablet_schema->column(cid); + for (size_t block_pos = data.row_pos; block_pos < data.row_pos + data.num_rows; + ++block_pos) { + auto& skip_bitmap = skip_bitmaps->at(block_pos); + if (!skip_bitmap.contains(column.unique_id())) { + RETURN_IF_ERROR(variant_util::mark_variant_patch_paths( + *data.block->get_by_position(cid).column, block_pos, column.unique_id(), + &skip_bitmap)); + } + } + } + } + // 1. aggregate duplicate rows in block RETURN_IF_ERROR(_block_aggregator.aggregate_for_flexible_partial_update( const_cast(data.block), data.num_rows, specified_rowsets, segment_caches)); @@ -742,6 +784,10 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn data.num_rows = data.block->rows(); _olap_data_convertor->clear_source_content(); } + skip_bitmaps = &( + assert_cast( + data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) + ->get_data()); // 2. encode primary key columns // we can only encode primary key columns currently becasue all non-primary columns in flexible partial update @@ -758,10 +804,6 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_block_aggregator.convert_seq_column(const_cast(data.block), data.row_pos, data.num_rows, seq_column)); - std::vector* skip_bitmaps = &( - assert_cast( - data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) - ->get_data()); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*data.block, data.row_pos + data.num_rows); DCHECK(delete_signs != nullptr); @@ -845,12 +887,11 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn _num_rows_new_added += stats.num_rows_new_added; _num_rows_filtered += stats.num_rows_filtered; - if (_num_rows_written != data.row_pos || - _primary_key_index_builder->num_rows() != _num_rows_written) { + if (_primary_key_index_builder->num_rows() != _num_rows_written) { return Status::InternalError( - "Correctness check failed, _num_rows_written: {}, row_pos: {}, primary key " + "Correctness check failed, _num_rows_written: {}, primary key " "index builder num rows: {}", - _num_rows_written, data.row_pos, _primary_key_index_builder->num_rows()); + _num_rows_written, _primary_key_index_builder->num_rows()); } // 9. build primary key index @@ -937,7 +978,22 @@ Status VerticalSegmentWriter::_generate_flexible_read_plan( &skip_bitmap); }; auto update_read_plan = [&](const RowLocation& loc) { - read_plan.prepare_to_read(loc, segment_pos, skip_bitmap); + BitmapValue read_skip_bitmap(skip_bitmap); + if (!have_delete_sign) { + bool should_merge_variant = false; + for (size_t cid = _tablet_schema->num_key_columns(); + cid < _tablet_schema->num_columns(); ++cid) { + const auto& column = _tablet_schema->column(cid); + if (column.is_variant_type() && !skip_bitmap.contains(column.unique_id())) { + read_skip_bitmap.add(column.unique_id()); + should_merge_variant = true; + } + } + if (should_merge_variant) { + read_skip_bitmap.add(delete_sign_col_unique_id); + } + } + read_plan.prepare_to_read(loc, segment_pos, read_skip_bitmap); }; RETURN_IF_ERROR(_probe_key_for_mow(std::move(key), segment_pos, row_has_sequence_col, diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp index eb59206165c96a..cfe6f3569d2521 100644 --- a/be/src/storage/tablet/base_tablet.cpp +++ b/be/src/storage/tablet/base_tablet.cpp @@ -25,8 +25,10 @@ #include #include #include +#include #include #include +#include #include "cloud/cloud_tablet.h" #include "cloud/config.h" @@ -36,6 +38,7 @@ #include "common/status.h" #include "core/assert_cast.h" #include "core/data_type/data_type_factory.hpp" +#include "exec/common/variant_util.h" #include "load/memtable/memtable.h" #include "service/point_query_executor.h" #include "storage/compaction/cumulative_compaction_time_series_policy.h" @@ -997,8 +1000,9 @@ Status BaseTablet::generate_new_block_for_partial_update( RETURN_IF_ERROR(read_plan_update.read_columns_by_plan( *rowset_schema, update_cids, rsid_to_rowset, update_block, &read_index_update, false)); size_t update_rows = read_index_update.size(); + DCHECK_LE(update_rows, std::numeric_limits::max()); for (auto i = 0; i < update_cids.size(); ++i) { - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { full_mutable_columns[update_cids[i]]->insert_from( *update_block.get_by_position(i).column, read_index_update[idx]); } @@ -1032,7 +1036,7 @@ Status BaseTablet::generate_new_block_for_partial_update( for (auto i = 0; i < missing_cids.size(); ++i) { const auto& rs_column = rowset_schema->column(missing_cids[i]); auto& mutable_column = full_mutable_columns[missing_cids[i]]; - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { // There are two cases we don't need to read values from old data: // 1. if the conflicting new row's delete sign is marked, which means the value columns // of the row will not be read. So we don't need to read the missing values from the previous rows. @@ -1084,13 +1088,14 @@ Status BaseTablet::generate_new_block_for_partial_update( return Status::OK(); } -static void fill_cell_for_flexible_partial_update( +static Status fill_cell_for_flexible_partial_update( std::map& read_index_old, std::map& read_index_update, const TabletSchemaSPtr& rowset_schema, const PartialUpdateInfo* partial_update_info, const TabletColumn& tablet_column, std::size_t idx, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, bool skipped, - bool row_has_sequence_col, const signed char* delete_sign_column_data) { + bool row_has_sequence_col, const signed char* delete_sign_column_data, + const BitmapValue& skip_bitmap) { if (skipped) { bool use_default = false; bool old_row_delete_sign = @@ -1128,8 +1133,19 @@ static void fill_cell_for_flexible_partial_update( new_col->insert_from(old_value_col, read_index_old[cast_set(idx)]); } } else { + bool old_row_delete_sign = + (delete_sign_column_data != nullptr && + delete_sign_column_data[read_index_old[cast_set(idx)]] != 0); + if (tablet_column.is_variant_type()) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, read_index_old[cast_set(idx)], cur_col, + read_index_update[cast_set(idx)], tablet_column.unique_id(), + skip_bitmap, old_row_delete_sign, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, read_index_update[cast_set(idx)]); } + return Status::OK(); } Status BaseTablet::generate_new_block_for_flexible_partial_update( @@ -1211,7 +1227,7 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( const IColumn& cur_col = *update_block.get_by_position(cid).column; const auto& rs_column = rowset_schema->column(cid); auto col_uid = rs_column.unique_id(); - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { if (cid < rowset_schema->num_key_columns()) { new_col->insert_from(cur_col, read_index_update[idx]); } else { @@ -1223,14 +1239,14 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( if (rids_be_overwritten.contains(idx)) { new_col->insert_from(old_value_col, read_index_old[idx]); } else { - fill_cell_for_flexible_partial_update( + RETURN_IF_ERROR(fill_cell_for_flexible_partial_update( read_index_old, read_index_update, rowset_schema, partial_update_info, rs_column, idx, new_col, default_value_col, old_value_col, cur_col, skip_bitmaps->at(idx).contains(col_uid), rowset_schema->has_sequence_col() ? !skip_bitmaps->at(idx).contains(seq_col_unique_id) : false, - old_block_delete_signs); + old_block_delete_signs, skip_bitmaps->at(idx))); } } } diff --git a/be/src/util/json/json_parser.cpp b/be/src/util/json/json_parser.cpp index 3df723c3849eac..d1cdffcab6f99c 100644 --- a/be/src/util/json/json_parser.cpp +++ b/be/src/util/json/json_parser.cpp @@ -48,6 +48,8 @@ std::optional JSONDataParser::parse(const char* begin, // NestedGroup expansion is now handled at storage layer context.deprecated_enable_flatten_nested = config.deprecated_enable_flatten_nested; context.check_duplicate_json_path = config.check_duplicate_json_path; + context.reject_json_null_value = config.reject_json_null_value; + context.record_empty_object_path = config.record_empty_object_path; context.is_top_array = document.isArray(); traverse(document, context); ParseResult result; @@ -62,6 +64,11 @@ std::optional JSONDataParser::parse(const char* begin, template void JSONDataParser::traverse(const Element& element, ParseContext& ctx) { // checkStackSize(); + if (element.isNull() && ctx.reject_json_null_value) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + "VARIANT flexible partial update does not support JSON null patch values"); + } if (element.isObject()) { traverseObject(element.getObject(), ctx); } else if (element.isArray()) { @@ -73,7 +80,7 @@ void JSONDataParser::traverse(const Element& element, ParseContext& if (has_nested && !ctx.deprecated_enable_flatten_nested) { // Parse nested arrays to JsonbField JsonbWriter writer; - traverseArrayAsJsonb(element.getArray(), writer); + traverseArrayAsJsonb(element.getArray(), writer, ctx.reject_json_null_value); appendValueIfNotDuplicate( ctx, ctx.builder.get_parts(), Field::create_field(JsonbField(writer.getOutput()->getBuffer(), @@ -106,6 +113,16 @@ template void JSONDataParser::traverseObject(const JSONObject& object, ParseContext& ctx) { ctx.paths.reserve(ctx.paths.size() + object.size()); ctx.values.reserve(ctx.values.size() + object.size()); + if (object.size() == 0 && ctx.record_empty_object_path && !ctx.builder.get_parts().empty()) { + JsonbWriter writer; + writer.writeStartObject(); + writer.writeEndObject(); + appendValueIfNotDuplicate( + ctx, ctx.builder.get_parts(), + Field::create_field(JsonbField(writer.getOutput()->getBuffer(), + writer.getOutput()->getSize()))); + return; + } auto check_key_length = [](const auto& key) { const size_t max_key_length = cast_set(config::variant_max_json_key_length); if (key.size() > max_key_length) { @@ -142,11 +159,17 @@ void JSONDataParser::check_has_nested_object(const Element& element) } template -void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWriter& writer) { +void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWriter& writer, + bool reject_json_null_value) { + if (element.isNull() && reject_json_null_value) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + "VARIANT flexible partial update does not support JSON null patch values"); + } if (element.isObject()) { - traverseObjectAsJsonb(element.getObject(), writer); + traverseObjectAsJsonb(element.getObject(), writer, reject_json_null_value); } else if (element.isArray()) { - traverseArrayAsJsonb(element.getArray(), writer); + traverseArrayAsJsonb(element.getArray(), writer, reject_json_null_value); } else { writeValueAsJsonb(element, writer); } @@ -154,7 +177,8 @@ void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWr template void JSONDataParser::traverseObjectAsJsonb(const JSONObject& object, - JsonbWriter& writer) { + JsonbWriter& writer, + bool reject_json_null_value) { writer.writeStartObject(); for (auto it = object.begin(); it != object.end(); ++it) { const auto& [key, value] = *it; @@ -166,16 +190,17 @@ void JSONDataParser::traverseObjectAsJsonb(const JSONObject& object, max_key_length)); } writer.writeKey(key.data(), cast_set(key.size())); - traverseAsJsonb(value, writer); + traverseAsJsonb(value, writer, reject_json_null_value); } writer.writeEndObject(); } template -void JSONDataParser::traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer) { +void JSONDataParser::traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer, + bool reject_json_null_value) { writer.writeStartArray(); for (auto it = array.begin(); it != array.end(); ++it) { - traverseAsJsonb(*it, writer); + traverseAsJsonb(*it, writer, reject_json_null_value); } writer.writeEndArray(); } @@ -201,6 +226,7 @@ void JSONDataParser::traverseArray(const JSONArray& array, ParseCont array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; array_ctx.is_top_array = ctx.is_top_array; array_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + array_ctx.reject_json_null_value = ctx.reject_json_null_value; array_ctx.total_size = array.size(); for (auto it = array.begin(); it != array.end(); ++it) { traverseArrayElement(*it, array_ctx); @@ -231,6 +257,7 @@ void JSONDataParser::traverseArrayElement(const Element& element, element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; element_ctx.is_top_array = ctx.is_top_array; element_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + element_ctx.reject_json_null_value = ctx.reject_json_null_value; traverse(element, element_ctx); auto& paths = element_ctx.paths; auto& values = element_ctx.values; diff --git a/be/src/util/json/json_parser.h b/be/src/util/json/json_parser.h index c4a165e899546f..94c21b5a212414 100644 --- a/be/src/util/json/json_parser.h +++ b/be/src/util/json/json_parser.h @@ -102,6 +102,8 @@ void writeValueAsJsonb(const Element& element, JsonbWriter& writer) { struct ParseConfig { bool deprecated_enable_flatten_nested = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; + bool record_empty_object_path = false; enum class ParseTo { OnlySubcolumns = 0, OnlyDocValueColumn = 1, @@ -131,6 +133,8 @@ class JSONDataParser { phmap::flat_hash_set visited_path_names; bool deprecated_enable_flatten_nested = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; + bool record_empty_object_path = false; bool has_nested_in_flatten = false; bool is_top_array = false; }; @@ -145,6 +149,7 @@ class JSONDataParser { bool has_nested_in_flatten = false; bool is_top_array = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; }; void traverse(const Element& element, ParseContext& ctx); void traverseObject(const JSONObject& object, ParseContext& ctx); @@ -165,9 +170,11 @@ class JSONDataParser { bool has_nested = false; void check_has_nested_object(const Element& element); - void traverseAsJsonb(const Element& element, JsonbWriter& writer); - void traverseObjectAsJsonb(const JSONObject& object, JsonbWriter& writer); - void traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer); + void traverseAsJsonb(const Element& element, JsonbWriter& writer, bool reject_json_null_value); + void traverseObjectAsJsonb(const JSONObject& object, JsonbWriter& writer, + bool reject_json_null_value); + void traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer, + bool reject_json_null_value); ParserImpl parser; }; diff --git a/be/test/exec/common/schema_util_rowset_test.cpp b/be/test/exec/common/schema_util_rowset_test.cpp index cf99c9824956c5..2f1aca621f537f 100644 --- a/be/test/exec/common/schema_util_rowset_test.cpp +++ b/be/test/exec/common/schema_util_rowset_test.cpp @@ -714,6 +714,7 @@ TEST_F(SchemaUtilRowsetTest, some_test_for_subcolumn_writer) { _variant_column_data->row_pos = 0; const uint8_t* data = (const uint8_t*)_variant_column_data.get(); EXPECT_TRUE(variant_subcolumn_writer->append_data(&data, 1)); + EXPECT_EQ(1, variant_subcolumn_writer->get_next_rowid()); // write null data EXPECT_TRUE(variant_subcolumn_writer->write_data().ok()); } diff --git a/be/test/storage/partial_update_info_test.cpp b/be/test/storage/partial_update_info_test.cpp new file mode 100644 index 00000000000000..9b397a2703f84d --- /dev/null +++ b/be/test/storage/partial_update_info_test.cpp @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "storage/partial_update_info.h" + +#include + +#include "core/block/block.h" +#include "io/fs/local_file_system.h" +#include "storage/rowset/rowset_writer.h" +#include "storage/rowset/rowset_writer_context.h" +#include "storage/segment/vertical_segment_writer.h" +#include "storage/tablet/base_tablet.h" +#include "storage/tablet/tablet_meta.h" +#include "storage/tablet/tablet_schema.h" + +namespace doris { + +namespace { + +TabletSchemaSPtr create_unique_key_schema() { + auto tablet_schema = std::make_shared(); + TabletSchemaPB tablet_schema_pb; + tablet_schema_pb.set_keys_type(UNIQUE_KEYS); + tablet_schema_pb.set_num_short_key_columns(1); + tablet_schema_pb.set_num_rows_per_row_block(1024); + tablet_schema_pb.set_compress_kind(COMPRESS_NONE); + tablet_schema_pb.set_next_column_unique_id(4); + + auto* key_column = tablet_schema_pb.add_column(); + key_column->set_unique_id(1); + key_column->set_name("k"); + key_column->set_type("INT"); + key_column->set_is_key(true); + key_column->set_length(4); + key_column->set_index_length(4); + key_column->set_is_nullable(false); + key_column->set_is_bf_column(false); + + auto* value_column = tablet_schema_pb.add_column(); + value_column->set_unique_id(2); + value_column->set_name("v"); + value_column->set_type("INT"); + value_column->set_is_key(false); + value_column->set_length(4); + value_column->set_index_length(4); + value_column->set_is_nullable(false); + value_column->set_is_bf_column(false); + + auto* delete_sign_column = tablet_schema_pb.add_column(); + delete_sign_column->set_unique_id(3); + delete_sign_column->set_name(DELETE_SIGN); + delete_sign_column->set_type("TINYINT"); + delete_sign_column->set_is_key(false); + delete_sign_column->set_length(1); + delete_sign_column->set_index_length(1); + delete_sign_column->set_is_nullable(false); + delete_sign_column->set_is_bf_column(false); + + tablet_schema->init_from_pb(tablet_schema_pb); + return tablet_schema; +} + +class FakeTablet : public BaseTablet { +public: + explicit FakeTablet(TabletSchemaSPtr schema) + : BaseTablet(std::make_shared(std::move(schema))) {} + + std::string tablet_path() const override { return ""; } + + bool exceed_version_limit(int32_t /*limit*/) override { return false; } + + Result> create_rowset_writer(RowsetWriterContext& /*context*/, + bool /*vertical*/) override { + return ResultError(Status::NotSupported("fake tablet")); + } + + Result> create_transient_rowset_writer( + const Rowset& /*rowset*/, std::shared_ptr /*partial_update_info*/, + int64_t /*txn_expiration*/ = 0) override { + return ResultError(Status::NotSupported("fake tablet")); + } + + Status capture_rs_readers(const Version& /*spec_version*/, + std::vector* /*rs_splits*/, + const CaptureRowsetOps& /*opts*/) override { + return Status::NotSupported("fake tablet"); + } + + Status save_delete_bitmap(const TabletTxnInfo* /*txn_info*/, int64_t /*txn_id*/, + DeleteBitmapPtr /*delete_bitmap*/, RowsetWriter* /*rowset_writer*/, + const RowsetIdUnorderedSet& /*cur_rowset_ids*/, + int64_t /*lock_id*/ = -1, + int64_t /*next_visible_version*/ = -1) override { + return Status::NotSupported("fake tablet"); + } + + CalcDeleteBitmapExecutor* calc_delete_bitmap_executor() override { return nullptr; } + + void clear_cache() override {} + + Versions calc_missed_versions(int64_t /*spec_version*/, + Versions /*existing_versions*/) const override { + return {}; + } + + size_t tablet_footprint() override { return 0; } +}; + +} // namespace + +TEST(PartialUpdateInfoTest, PersistsSequenceMapColumnUid) { + PartialUpdateInfo info; + info.partial_update_mode = UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS; + info.sequence_map_col_unqiue_id = 123; + + PartialUpdateInfoPB pb; + info.to_pb(&pb); + + ASSERT_TRUE(pb.has_sequence_map_col_uid()); + EXPECT_EQ(pb.sequence_map_col_uid(), 123); + + PartialUpdateInfo decoded; + decoded.from_pb(&pb); + EXPECT_EQ(decoded.sequence_map_col_uid(), 123); +} + +TEST(PartialUpdateInfoTest, DefaultsMissingSequenceMapColumnUid) { + PartialUpdateInfoPB pb; + pb.set_partial_update_mode(UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS); + + PartialUpdateInfo decoded; + decoded.from_pb(&pb); + EXPECT_EQ(decoded.sequence_map_col_uid(), -1); +} + +TEST(PartialUpdateInfoTest, FlexiblePartialUpdateRejectsSlicedRowPos) { + auto schema = create_unique_key_schema(); + + RowsetWriterContext rowset_ctx; + rowset_ctx.tablet_schema = schema; + rowset_ctx.partial_update_info = std::make_shared(); + rowset_ctx.partial_update_info->partial_update_mode = + UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS; + + segment_v2::VerticalSegmentWriterOptions opts; + opts.rowset_ctx = &rowset_ctx; + opts.write_type = DataWriteType::TYPE_DIRECT; + opts.enable_unique_key_merge_on_write = true; + + auto fs = io::global_local_filesystem(); + static_cast(fs->create_directory("./ut_dir")); + static_cast( + fs->delete_file("./ut_dir/flexible_partial_update_rejects_sliced_row_pos.dat")); + io::FileWriterPtr file_writer; + auto st = fs->create_file("./ut_dir/flexible_partial_update_rejects_sliced_row_pos.dat", + &file_writer); + ASSERT_TRUE(st.ok()) << st; + + auto tablet = std::make_shared(schema); + segment_v2::VerticalSegmentWriter writer(file_writer.get(), 0, schema, tablet, nullptr, opts, + nullptr); + + Block block = schema->create_block(); + ASSERT_TRUE(writer.batch_block(&block, 1, 1).ok()); + st = writer.write_batch(); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("whole-block duplicate-key aggregation"), std::string::npos) + << st; +} + +} // namespace doris diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 3a644cc373f896..dc67071692927e 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -1690,6 +1690,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_doc_compact_writer_and_read_doc bucket_data->row_pos = 0; const auto* data = reinterpret_cast(bucket_data.get()); EXPECT_TRUE(doc_compact_writer->append_data(&data, kRows).ok()); + EXPECT_EQ(kRows, doc_compact_writer->get_next_rowid()); } EXPECT_TRUE(root_writer->finish().ok()); @@ -1852,6 +1853,7 @@ TEST_F(VariantColumnWriterReaderTest, test_doc_compact_sparse_write_array_gap) { bucket_data->row_pos = 0; const auto* data = reinterpret_cast(bucket_data.get()); EXPECT_TRUE(doc_compact_writer->append_data(&data, kRows).ok()); + EXPECT_EQ(kRows, doc_compact_writer->get_next_rowid()); EXPECT_TRUE(doc_compact_writer->finish().ok()); EXPECT_TRUE(doc_compact_writer->write_data().ok()); @@ -2660,6 +2662,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) { const auto* ptr = (const uint8_t*)accessor->get_data(); st = vw->append_nullable(accessor->get_nullmap(), &ptr, 1000); EXPECT_TRUE(st.ok()) << st.msg(); + EXPECT_EQ(1000, vw->get_next_rowid()); st = vw->finish(); EXPECT_TRUE(st.ok()) << st.msg(); auto size = vw->estimate_buffer_size(); diff --git a/be/test/storage/segment/variant_util_test.cpp b/be/test/storage/segment/variant_util_test.cpp index 902bf9c843b115..283cd43ddc8b9b 100644 --- a/be/test/storage/segment/variant_util_test.cpp +++ b/be/test/storage/segment/variant_util_test.cpp @@ -19,23 +19,100 @@ #include +#include #include #include #include #include "common/config.h" #include "core/block/block.h" +#include "core/column/column_complex.h" +#include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_variant.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_bitmap.h" +#include "core/data_type/data_type_number.h" #include "core/data_type/data_type_variant.h" #include "core/field.h" +#include "core/value/bitmap_value.h" #include "core/value/jsonb_value.h" #include "exec/common/variant_util.h" #include "gtest/gtest.h" +#include "storage/segment/vertical_segment_writer.h" #include "storage/tablet/tablet_schema.h" +#include "storage/utils.h" namespace doris::variant_util { +namespace { + +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT = 62; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS = 8; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_POS_BITS = 12; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_INDEX_BITS = 11; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT + TEST_VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT + TEST_VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK = + (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS) - 1; + +static uint64_t _test_variant_patch_path_length_marker(int32_t variant_col_unique_id, + uint64_t path_index, uint64_t length) { + return TEST_VARIANT_PATCH_PATH_MARKER_MASK | + (static_cast(variant_col_unique_id) + << TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | length; +} + +static uint64_t _test_variant_patch_path_byte_marker(int32_t variant_col_unique_id, + uint64_t path_index, uint64_t byte_pos, + uint8_t byte) { + return TEST_VARIANT_PATCH_PATH_MARKER_MASK | + (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) | + (static_cast(variant_col_unique_id) + << TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | + (byte_pos << TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT) | byte; +} + +static void _add_test_encoded_patch_path(BitmapValue* markers, int32_t variant_col_unique_id, + uint64_t path_index, std::string_view encoded_path) { + markers->add(_test_variant_patch_path_length_marker(variant_col_unique_id, path_index, + encoded_path.size())); + for (uint64_t i = 0; i < encoded_path.size(); ++i) { + markers->add(_test_variant_patch_path_byte_marker( + variant_col_unique_id, path_index, i, + static_cast(static_cast(encoded_path[i])))); + } +} + +static std::string _test_encode_single_part_path(std::string_view key) { + std::string encoded; + auto append_u32 = [&encoded](uint32_t value) { + encoded.push_back(static_cast(value & 0xFF)); + encoded.push_back(static_cast((value >> 8) & 0xFF)); + encoded.push_back(static_cast((value >> 16) & 0xFF)); + encoded.push_back(static_cast((value >> 24) & 0xFF)); + }; + append_u32(1); + append_u32(static_cast(key.size())); + encoded.append(key.data(), key.size()); + encoded.push_back(0); + encoded.push_back(0); + return encoded; +} + +static ColumnPtr _make_nullable_variant_column(ColumnVariant::MutablePtr variant, + bool is_null = false) { + auto null_map = ColumnUInt8::create(variant->size(), is_null ? 1 : 0); + return ColumnNullable::create(std::move(variant), std::move(null_map)); +} + static ColumnString::MutablePtr _make_json_column(const std::vector& rows) { auto col = ColumnString::create(); for (const auto& s : rows) { @@ -58,6 +135,118 @@ class ScopedDuplicateJsonPathCheck { bool _old_value; }; +static ColumnVariant::MutablePtr _make_variant_column( + const std::vector& rows, bool doc_mode = false, + ParseConfig::ParseTo parse_to = ParseConfig::ParseTo::OnlySubcolumns, + bool record_empty_object_path = false) { + auto variant = ColumnVariant::create(0, doc_mode); + auto json_col = _make_json_column(rows); + ParseConfig cfg; + cfg.deprecated_enable_flatten_nested = false; + cfg.parse_to = parse_to; + cfg.record_empty_object_path = record_empty_object_path; + parse_json_to_variant(*variant, *json_col, cfg); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_raw_string_variant_column(std::string_view value) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, + Field::create_field(String(value))); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_root_array_variant_column() { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, + doris::VariantUtil::get_field("array_int")); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_root_jsonb_variant_column(std::string_view value) { + JsonBinaryValue jsonb_value; + Status st = jsonb_value.from_json_string(value.data(), value.size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, + Field::create_field(JsonbField(jsonb_value.value(), jsonb_value.size()))); + variant->finalize(); + return variant; +} + +static std::string _make_nested_json(std::string_view key, int depth, std::string_view leaf) { + std::string json; + for (int i = 0; i < depth; ++i) { + json += "{\""; + json += key; + json += "\":"; + } + json += leaf; + for (int i = 0; i < depth; ++i) { + json += "}"; + } + return json; +} + +static FieldWithDataType _get_variant_field_at(const ColumnVariant& variant, size_t row_num, + std::string_view path) { + Field field; + variant.get(row_num, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = field.get(); + auto it = path.empty() ? object.find(PathInData()) : object.find(PathInData(path)); + EXPECT_NE(it, object.end()) << path; + if (it == object.end()) { + return {}; + } + return it->second; +} + +static PathInData _make_path(std::initializer_list keys) { + PathInData::Parts parts; + parts.reserve(keys.size()); + for (std::string_view key : keys) { + parts.emplace_back(key, false, 0); + } + return PathInData(parts); +} + +} // namespace + +static FieldWithDataType _get_variant_field_by_path(const ColumnVariant& variant, + const PathInData& path) { + Field field; + variant.get(0, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = field.get(); + auto it = object.find(path); + EXPECT_NE(it, object.end()) << path.get_path(); + if (it == object.end()) { + return {}; + } + return it->second; +} + +static FieldWithDataType _get_variant_field(const ColumnVariant& variant, std::string_view path) { + return _get_variant_field_at(variant, 0, path); +} + +static void _expect_no_variant_path(const ColumnVariant& variant, std::string_view path) { + Field row; + variant.get(0, row); + if (row.get_type() == PrimitiveType::TYPE_NULL) { + return; + } + ASSERT_EQ(row.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = row.get(); + EXPECT_FALSE(object.contains(PathInData(path))) << path; +} + TEST(VariantUtilTest, ParseDocValueToSubcolumns_FillsDefaultsAndValues) { const std::vector jsons = { R"({"a":1,"b":"x"})", // @@ -473,6 +662,160 @@ TEST(VariantUtilTest, ParseVariantColumns_ScalarJsonStringToSubcolumns) { EXPECT_EQ(f.field.get(), 2); } +TEST(VariantUtilTest, ParseVariantColumns_RejectsJsonNullWhenConfigured) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":null})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + parse_cfg.reject_json_null_value = true; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, ParseVariantColumns_RejectsJsonNullInsideNestedArrayJsonbWhenConfigured) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":[{"b":null}]})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + parse_cfg.reject_json_null_value = true; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, ParseVariantColumns_RecordsEmptyObjectPathWhenConfigured) { + auto variant = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + + auto a = _get_variant_field(*variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); +} + +TEST(VariantUtilTest, ParseVariantColumns_FlexiblePatchRecordsEmptyObjectPath) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("v"); + c->set_type("VARIANT"); + c->set_is_key(false); + c->set_is_nullable(false); + c->set_variant_enable_doc_mode(false); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":{}})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, true); + EXPECT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + auto a = _get_variant_field(out, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); +} + +TEST(VariantUtilTest, ParseVariantColumns_TabletSchemaNoVariantIsNoop) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("k"); + c->set_type("INT"); + c->set_is_key(false); + c->set_is_nullable(false); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto int_col = ColumnInt32::create(); + int_col->insert_value(7); + Block block; + block.insert({int_col->get_ptr(), std::make_shared(), "k"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, true); + ASSERT_TRUE(st.ok()) << st.to_string(); + const auto& out = assert_cast(*block.get_by_position(0).column); + EXPECT_EQ(out.get_element(0), 7); +} + +TEST(VariantUtilTest, ParseVariantColumns_NonStringScalarRootKeepsVariant) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, Field::create_field(7)); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + ASSERT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + Field field; + out.get(0, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); +} + +TEST(VariantUtilTest, ParseVariantColumns_TabletSchemaDocModeUsesDocValueColumn) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("v"); + c->set_type("VARIANT"); + c->set_is_key(false); + c->set_is_nullable(false); + c->set_variant_enable_doc_mode(true); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto variant = ColumnVariant::create(0, true); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":1})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, true), "v"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, false); + ASSERT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + auto docs_subcolumns = materialize_docs_to_subcolumns_map(out); + ASSERT_TRUE(docs_subcolumns.contains("a")); +} + TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) { const std::vector jsons = { R"({"a":1,"b":"x"})", // @@ -526,6 +869,761 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) { EXPECT_EQ(f.field.get(), "y"); } +TEST(VariantUtilTest, MergeVariantPatch_MergesObjectPaths) { + auto old_variant = _make_variant_column({R"({"a":1,"c":3,"nested":{"x":1}})"}); + auto patch_variant = _make_variant_column({R"({"a":10,"b":20,"nested":{"y":2}})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 10); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 20); + auto c = _get_variant_field(*merged_variant, "c"); + EXPECT_EQ(c.field.get(), 3); + auto nested_x = _get_variant_field(*merged_variant, "nested.x"); + EXPECT_EQ(nested_x.field.get(), 1); + auto nested_y = _get_variant_field(*merged_variant, "nested.y"); + EXPECT_EQ(nested_y.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRawStringRoot) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_raw_string_variant_column(R"({"b":2})"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRawStringScalarRoot) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_raw_string_variant_column("plain text"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_MergesRootJsonbObjectBase) { + auto old_variant = _make_root_jsonb_variant_column(R"({"a":1})"); + auto patch_variant = _make_variant_column({R"({"b":2})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_MergesRootJsonbObjectPatch) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_jsonb_variant_column(R"({"b":2})"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRootJsonbObjectPatchWithNull) { + constexpr int32_t variant_col_unique_id = 100; + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_jsonb_variant_column(R"({"a":null})"); + auto merged_variant = ColumnVariant::create(0, false); + + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*patch_variant, 0, variant_col_unique_id, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); + + st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MergeVariantPatch_ReplacesConflictingPaths) { + { + auto old_variant = _make_variant_column({R"({"a":{"x":1},"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":3})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 3); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a.x"); + } + { + auto old_variant = _make_variant_column({R"({"a":3,"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":{"y":4}})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_y = _get_variant_field(*merged_variant, "a.y"); + EXPECT_EQ(a_y.field.get(), 4); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a"); + } + { + auto old_variant = _make_variant_column({R"({"a":{"x":1},"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a.x"); + } +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRootArray) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_array_variant_column(); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsNonObjectOldRootValues) { + constexpr int32_t variant_col_unique_id = 100; + auto patch_variant = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*patch_variant, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto expect_reject_old_value = [&](const ColumnVariant& old_variant) { + auto merged_variant = ColumnVariant::create(0, false); + Status merge_st = merge_variant_patch(old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(merge_st.ok()); + EXPECT_NE(merge_st.to_string().find("VARIANT flexible partial update only supports " + "patching JSON object old values"), + std::string::npos); + + merged_variant = ColumnVariant::create(0, false); + merge_st = merge_variant_patch_by_path_markers(old_variant, 0, *patch_variant, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + EXPECT_FALSE(merge_st.ok()); + EXPECT_NE(merge_st.to_string().find("VARIANT flexible partial update only supports " + "patching JSON object old values"), + std::string::npos); + }; + + expect_reject_old_value(*_make_raw_string_variant_column("plain text")); + expect_reject_old_value(*_make_root_array_variant_column()); + expect_reject_old_value(*_make_root_jsonb_variant_column("null")); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsDocModeObjectPaths) { + auto old_variant = + _make_variant_column({R"({"a":1})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + auto patch_variant = + _make_variant_column({R"({"b":2})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + auto merged_variant = ColumnVariant::create(0, true); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_HandlesNullableVariantColumns) { + auto old_variant = _make_nullable_variant_column(_make_variant_column({R"({"a":1,"b":3})"})); + auto patch_variant = _make_nullable_variant_column(_make_variant_column({R"({"a":2})"})); + auto dst_nested = ColumnVariant::create(0, false); + auto* dst_variant = dst_nested.get(); + auto dst_variant_nullable = + ColumnNullable::create(std::move(dst_nested), ColumnUInt8::create()); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *dst_variant_nullable); + ASSERT_TRUE(st.ok()) << st.to_string(); + dst_variant->finalize(); + + auto a = _get_variant_field(*dst_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*dst_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsNullPatchRows) { + constexpr int32_t variant_col_unique_id = 100; + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto null_patch = _make_nullable_variant_column(_make_variant_column({R"({"a":2})"}), true); + + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*null_patch, 0, variant_col_unique_id, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch(*old_variant, 0, *null_patch, 0, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); + + st = merge_variant_patch_by_path_markers(*old_variant, 0, *null_patch, 0, variant_col_unique_id, + patch_path_markers, false, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_PreservesConcurrentPaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyMarkersDropFlushedPatchPaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + BitmapValue patch_path_markers; + + auto merged_variant = ColumnVariant::create(0, false); + Status st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchPathMarkers_IntersectsNonVariantSkipBits) { + BitmapValue left; + left.add(1); + left.add(2); + BitmapValue right; + right.add(2); + right.add(3); + + BitmapValue merged; + Status st = merge_variant_patch_path_markers(left, right, &merged); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_FALSE(merged.contains(1)); + EXPECT_TRUE(merged.contains(2)); + EXPECT_FALSE(merged.contains(3)); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_RejectsCorruptMarkers) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2})"}); + + auto expect_merge_fails = [&](const BitmapValue& patch_path_markers, + std::string_view expected) { + auto merged_variant = ColumnVariant::create(0, false); + Status st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find(expected), std::string::npos) << st.to_string(); + }; + + { + BitmapValue patch_path_markers; + patch_path_markers.add( + _test_variant_patch_path_length_marker(variant_col_unique_id, 0, 4097)); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker length"); + } + { + BitmapValue patch_path_markers; + patch_path_markers.add(_test_variant_patch_path_byte_marker(variant_col_unique_id, 0, 0, + static_cast('a'))); + expect_merge_fails(patch_path_markers, "VARIANT patch path marker byte without length"); + } + { + BitmapValue patch_path_markers; + patch_path_markers.add(_test_variant_patch_path_length_marker(variant_col_unique_id, 0, 1)); + patch_path_markers.add(_test_variant_patch_path_byte_marker(variant_col_unique_id, 0, 2, + static_cast('a'))); + expect_merge_fails(patch_path_markers, "VARIANT patch path marker byte exceeds length"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path(4, '\0'); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker part count"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path; + encoded_path.push_back(1); + encoded_path.push_back(0); + encoded_path.push_back(0); + encoded_path.push_back(0); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker part payload"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path = _test_encode_single_part_path("a"); + encoded_path.push_back('x'); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Trailing bytes in VARIANT patch path marker"); + } + { + BitmapValue patch_path_markers; + const std::string encoded_path = _test_encode_single_part_path("a"); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + patch_path_markers.add(_test_variant_patch_path_length_marker(variant_col_unique_id, 0, + encoded_path.size() + 1)); + expect_merge_fails(patch_path_markers, "Conflicting VARIANT patch path marker length"); + } + { + BitmapValue patch_path_markers; + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, + _test_encode_single_part_path("a")); + for (uint64_t marker : patch_path_markers) { + if ((marker & (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT)) != 0) { + uint8_t byte = marker & TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK; + patch_path_markers.add((marker & ~TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK) | + static_cast(byte + 1)); + break; + } + } + expect_merge_fails(patch_path_markers, "Conflicting VARIANT patch path marker byte"); + } + { + BitmapValue patch_path_markers; + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, + _test_encode_single_part_path("a")); + for (uint64_t marker : patch_path_markers) { + if ((marker & (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT)) != 0) { + patch_path_markers.remove(marker); + break; + } + } + expect_merge_fails(patch_path_markers, "Incomplete VARIANT patch path marker"); + } +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsRootArray) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_array_variant_column(); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_MarksRootJsonbObjectPatch) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_jsonb_variant_column(R"({"a":2})"); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_GT(patch_path_markers.cardinality(), 0); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_AllowsDeepPathWithinMarkerCapacity) { + constexpr int32_t variant_col_unique_id = 100; + const std::string key(200, 'a'); + const std::string json = _make_nested_json(key, 6, "1"); + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_GT(patch_path_markers.cardinality(), 1020); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsPathBeyondMarkerCapacity) { + constexpr int32_t variant_col_unique_id = 100; + const std::string key(250, 'a'); + const std::string json = _make_nested_json(key, 20, "1"); + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update encoded patch path exceeds 4096 bytes"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTooManyPaths) { + constexpr int32_t variant_col_unique_id = 100; + std::string json = "{"; + for (int i = 0; i < 257; ++i) { + if (i != 0) { + json += ","; + } + json += "\"k"; + json += std::to_string(i); + json += "\":"; + json += std::to_string(i); + } + json += "}"; + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update supports at most 256 patch paths per row"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTotalEncodedPathBytesBeyondLimit) { + constexpr int32_t variant_col_unique_id = 100; + std::string json = "{"; + for (int i = 0; i < 256; ++i) { + if (i != 0) { + json += ","; + } + std::string key = "k" + std::to_string(i) + "_"; + key.append(255 - key.size(), 'a'); + json += "\""; + json += key; + json += "\":"; + json += std::to_string(i); + } + json += "}"; + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find("VARIANT flexible partial update encoded patch paths exceed " + "65536 bytes per row"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTotalEncodedPathBytesAcrossColumns) { + auto make_json = [] { + std::string json = "{"; + for (int i = 0; i < 128; ++i) { + if (i != 0) { + json += ","; + } + std::string key = "k" + std::to_string(i) + "_"; + key.append(255 - key.size(), 'a'); + json += "\""; + json += key; + json += "\":"; + json += std::to_string(i); + } + json += "}"; + return json; + }; + auto patch_v1 = _make_variant_column({make_json()}); + auto patch_v2 = _make_variant_column({make_json()}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*patch_v1, 0, 100, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + BitmapValue markers_before_v2 = patch_path_markers; + st = mark_variant_patch_paths(*patch_v2, 0, 101, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find("VARIANT flexible partial update encoded patch paths exceed " + "65536 bytes per row"), + std::string::npos); + EXPECT_EQ(patch_path_markers.cardinality(), markers_before_v2.cardinality()); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsRootJsonbArray) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_jsonb_variant_column(R"([1,2,3])"); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsDocMode) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = + _make_variant_column({R"({"a":2})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyObjectKeepsLatestOld) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":1,"b":1})"}); + auto original_patch = _make_variant_column({R"({})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_IsolatesVariantColumns) { + constexpr int32_t v1_unique_id = 100; + constexpr int32_t v2_unique_id = 101; + auto latest_old = _make_variant_column({R"({"a":1,"b":9})"}); + auto flushed_full_v1 = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch_v1 = _make_variant_column({R"({"a":2})"}); + auto original_patch_v2 = _make_variant_column({R"({"b":8})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch_v1, 0, v1_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = mark_variant_patch_paths(*original_patch_v2, 0, v2_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_v1, 0, v1_unique_id, + patch_path_markers, false, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 9); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_DeletedOldUsesPatchOnly) { + constexpr int32_t variant_col_unique_id = 100; + auto deleted_old = _make_variant_column({R"({"a":1,"b":9})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*deleted_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, true, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + _expect_no_variant_path(*merged_variant, "b"); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyObjectRemovesStaleSubpaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"x":9},"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{},"b":1})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto original_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); + _expect_no_variant_path(*merged_variant, "a.x"); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_PreservesSiblingChildPatch) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto original_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto a_c = _get_variant_field(*merged_variant, "a.c"); + EXPECT_EQ(a_c.field.get(), 9); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_DistinguishesDottedKeyFromNestedPath) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a.b":7,"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto original_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto nested_a_b = _get_variant_field_by_path(*merged_variant, _make_path({"a", "b"})); + EXPECT_EQ(nested_a_b.field.get(), 1); + auto dotted_a_b = _get_variant_field_by_path(*merged_variant, _make_path({"a.b"})); + EXPECT_EQ(dotted_a_b.field.get(), 7); + auto a_c = _get_variant_field(*merged_variant, "a.c"); + EXPECT_EQ(a_c.field.get(), 9); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_ParentMarkerRemovesStaleSubpaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto parent_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto child_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*parent_patch, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = mark_variant_patch_paths(*child_patch, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); + _expect_no_variant_path(*merged_variant, "a.c"); +} + +TEST(VariantUtilTest, MergeVariantPatchPathMarkers_RebuildsExactMetadata) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto parent_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto child_patch = _make_variant_column({R"({"a":{"b":1}})"}); + + BitmapValue parent_markers; + parent_markers.add(variant_col_unique_id); + Status st = mark_variant_patch_paths(*parent_patch, 0, variant_col_unique_id, &parent_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + BitmapValue child_markers; + st = mark_variant_patch_paths(*child_patch, 0, variant_col_unique_id, &child_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + BitmapValue merged_markers; + st = merge_variant_patch_path_markers(parent_markers, child_markers, &merged_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_FALSE(merged_markers.contains(variant_col_unique_id)); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, merged_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); + _expect_no_variant_path(*merged_variant, "a.c"); +} + TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) { const std::vector jsons = {R"({"a":1})"}; auto variant = ColumnVariant::create(0, true); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 1142b1645a978b..b8beb2be71b000 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -2650,6 +2650,11 @@ public int getAsInt() { throw new DdlException("only support light schema change operator when use table with binlog"); } + // Revalidate the final schema while holding the table write lock. Nereids validation + // runs before this lock, so a concurrent ALTER can otherwise change whether the + // pending VARIANT columns are allowed with flexible partial update. + validateVariantColumnsForFlexiblePartialUpdate(olapTable, indexSchemaMap); + if (lightSchemaChange) { long jobId = Env.getCurrentEnv().getNextId(); //for schema change add/drop value column optimize, direct modify table meta. @@ -2676,6 +2681,17 @@ public int getAsInt() { } } + private void validateVariantColumnsForFlexiblePartialUpdate( + OlapTable olapTable, Map> indexSchemaMap) throws UserException { + List baseSchema = indexSchemaMap.get(olapTable.getBaseIndexId()); + Preconditions.checkNotNull(baseSchema); + if (olapTable.hasSkipBitmapColumn() + || baseSchema.stream().anyMatch(Column::isSkipBitmapColumn)) { + OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + baseSchema, olapTable.variantEnableFlattenNested()); + } + } + @Override public void processForNereids(String rawSql, List alterCommands, Database db, OlapTable olapTable) throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 80a71ff25078ba..4caa1366aead69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3195,15 +3195,26 @@ public boolean getEnableUniqueKeySkipBitmap() { * Validate that the table supports flexible partial update. * Checks the following constraints: * 1. Must be MoW unique key table - * 2. Must have skip_bitmap column - * 3. Must have light_schema_change enabled - * 4. Cannot have variant columns + * 2. Must not have cluster keys + * 3. Must have skip_bitmap column + * 4. Must have light_schema_change enabled * @throws UserException if any constraint is not satisfied */ public void validateForFlexiblePartialUpdate() throws UserException { + validateForFlexiblePartialUpdate(true); + } + + /** + * Validate that the table supports flexible partial update. + */ + public void validateForFlexiblePartialUpdate(boolean validateBackendCapability) throws UserException { if (!getEnableUniqueKeyMergeOnWrite()) { throw new UserException("Flexible partial update is only supported in unique table MoW"); } + if (isUniqKeyMergeOnWriteWithClusterKeys()) { + throw new UserException( + "Flexible partial update does not support merge-on-write Unique table with cluster keys"); + } if (!hasSkipBitmapColumn()) { throw new UserException("Flexible partial update can only support table with skip bitmap hidden column." + " But table " + getName() + " doesn't have it. You can use `ALTER TABLE " + getName() @@ -3213,8 +3224,80 @@ public void validateForFlexiblePartialUpdate() throws UserException { throw new UserException("Flexible partial update can only support table with light_schema_change enabled." + " But table " + getName() + "'s property light_schema_change is false"); } - if (hasVariantColumns()) { - throw new UserException("Flexible partial update can only support table without variant columns."); + validateVariantColumnsForFlexiblePartialUpdate(validateBackendCapability); + } + + public void validateVariantColumnsForFlexiblePartialUpdate() throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(true); + } + + /** + * Validate VARIANT columns for flexible partial update. + */ + public void validateVariantColumnsForFlexiblePartialUpdate(boolean validateBackendCapability) + throws UserException { + validateVariantColumnsForFlexiblePartialUpdate( + getBaseSchema(), variantEnableFlattenNested(), validateBackendCapability); + } + + public static void validateVariantColumnsForFlexiblePartialUpdate(List columns) throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(columns, false); + } + + public static void validateVariantColumnsForFlexiblePartialUpdate( + List columns, boolean deprecatedVariantFlattenNested) throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(columns, deprecatedVariantFlattenNested, true); + } + + /** + * Validate VARIANT columns for flexible partial update. + */ + public static void validateVariantColumnsForFlexiblePartialUpdate( + List columns, boolean deprecatedVariantFlattenNested, boolean validateBackendCapability) + throws UserException { + boolean hasVariantColumn = false; + for (Column column : columns) { + validateVariantColumnForFlexiblePartialUpdate(column); + if (column.getType().isVariantType() && deprecatedVariantFlattenNested) { + throw new UserException( + "VARIANT flexible partial update does not support " + + "deprecated_variant_enable_flatten_nested in this version"); + } + hasVariantColumn |= column.getType().isVariantType(); + } + if (hasVariantColumn && validateBackendCapability) { + try { + validateBackendsSupportVariantFlexiblePartialUpdate( + Env.getCurrentSystemInfo().getBackendsByCurrentCluster().values()); + } catch (AnalysisException e) { + throw new UserException(e.getMessage(), e); + } + } + } + + public static void validateVariantColumnForFlexiblePartialUpdate(Column column) throws UserException { + if (column.getType().isVariantType() && column.getVariantEnableDocMode()) { + throw new UserException( + "VARIANT flexible partial update does not support doc mode in this version"); + } + } + + @VisibleForTesting + static void validateBackendsSupportVariantFlexiblePartialUpdate(Collection backends) + throws UserException { + for (Backend backend : backends) { + if (!backend.isAlive()) { + throw new UserException("VARIANT flexible partial update requires all backends to be " + + "alive and advertise variant patch skip-bitmap marker support. Backend " + + backend.getId() + " (" + backend.getHost() + ") is not alive"); + } + if (backend.supportsVariantFlexiblePartialUpdate()) { + continue; + } + throw new UserException("VARIANT flexible partial update requires all backends to " + + "advertise variant patch skip-bitmap marker support. Backend " + + backend.getId() + " (" + backend.getHost() + ") is running version " + + backend.getVersion() + " without the required capability"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java b/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java index 2c1ede0d13a352..b31f2e8ede18b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java @@ -37,6 +37,7 @@ public class RoutineLoadDesc { private final Expr filter; private final Expr deleteCondition; private LoadTask.MergeType mergeType; + private final boolean mergeTypeSpecified; // nullable private final PartitionNamesInfo partitionNamesInfo; private final String sequenceColName; @@ -45,6 +46,14 @@ public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List< Expr precedingFilter, Expr filter, PartitionNamesInfo partitionNamesInfo, Expr deleteCondition, LoadTask.MergeType mergeType, String sequenceColName) { + this(columnSeparator, lineDelimiter, columnsInfo, precedingFilter, filter, partitionNamesInfo, deleteCondition, + mergeType, false, sequenceColName); + } + + public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List columnsInfo, + Expr precedingFilter, Expr filter, + PartitionNamesInfo partitionNamesInfo, Expr deleteCondition, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified, String sequenceColName) { this.columnSeparator = columnSeparator; this.lineDelimiter = lineDelimiter; this.columnsInfo = columnsInfo; @@ -53,6 +62,7 @@ public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List< this.partitionNamesInfo = partitionNamesInfo; this.deleteCondition = deleteCondition; this.mergeType = mergeType; + this.mergeTypeSpecified = mergeTypeSpecified; this.sequenceColName = sequenceColName; } @@ -80,6 +90,10 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + // nullable public PartitionNamesInfo getPartitionNamesInfo() { return partitionNamesInfo; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 82cd9325dfad1d..eeaff9e77781df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -61,6 +61,7 @@ import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; import org.apache.doris.persist.RoutineLoadOperation; import org.apache.doris.persist.gson.GsonPostProcessable; +import org.apache.doris.persist.gson.GsonPreProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; @@ -114,7 +115,7 @@ */ public abstract class RoutineLoadJob extends AbstractTxnStateChangeCallback - implements Writable, LoadTaskInfo, GsonPostProcessable { + implements Writable, LoadTaskInfo, GsonPostProcessable, GsonPreProcessable { private static final Logger LOG = LogManager.getLogger(RoutineLoadJob.class); public static final long DEFAULT_MAX_ERROR_NUM = 0; @@ -182,12 +183,24 @@ public boolean isFinalState() { // this code is used to verify be task request protected long authCode; // protected RoutineLoadDesc routineLoadDesc; // optional + @SerializedName(value = "pni", alternate = {"partitionNamesInfo"}) protected PartitionNamesInfo partitionNamesInfo; // optional + @SerializedName(value = "columnDescs", alternate = {"cd"}) protected ImportColumnDescs columnDescs; // optional + @SerializedName(value = "pf", alternate = {"precedingFilter"}) protected Expr precedingFilter; // optional + @SerializedName(value = "filter", alternate = {"whereExpr"}) protected Expr whereExpr; // optional protected Separator columnSeparator; // optional protected Separator lineDelimiter; + @SerializedName("cs") + private String serializedColumnSeparator; + @SerializedName("ocs") + private String serializedOriColumnSeparator; + @SerializedName("ld") + private String serializedLineDelimiter; + @SerializedName("old") + private String serializedOriLineDelimiter; @SerializedName("dtcn") protected int desireTaskConcurrentNum; // optional @SerializedName("st") @@ -230,6 +243,7 @@ public boolean isFinalState() { protected TPartialUpdateNewRowPolicy partialUpdateNewKeyPolicy = TPartialUpdateNewRowPolicy.APPEND; protected TUniqueKeyUpdateMode uniqueKeyUpdateMode = TUniqueKeyUpdateMode.UPSERT; + @SerializedName(value = "scn", alternate = {"sequenceCol"}) protected String sequenceCol; protected boolean memtableOnSinkNode = false; @@ -271,6 +285,11 @@ public boolean isFinalState() { protected ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; // default is all data is load no delete + @SerializedName("mts") + protected boolean mergeTypeSpecified = false; + @SerializedName("mt") + private LoadTask.MergeType serializedMergeType; + @SerializedName(value = "dc", alternate = {"deleteCondition"}) protected Expr deleteCondition; // TODO(ml): error sample @@ -438,11 +457,7 @@ protected void setOptional(CreateRoutineLoadInfo info) throws UserException { protected void setRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { if (routineLoadDesc != null) { - if (routineLoadDesc.getColumnsInfo() != null) { - columnDescs = new ImportColumnDescs(); - columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); - - } + setColumnDescsFromRoutineLoadDesc(routineLoadDesc); if (routineLoadDesc.getPrecedingFilter() != null) { precedingFilter = routineLoadDesc.getPrecedingFilter(); } @@ -461,13 +476,25 @@ protected void setRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { if (routineLoadDesc.getDeleteCondition() != null) { deleteCondition = routineLoadDesc.getDeleteCondition(); } - mergeType = routineLoadDesc.getMergeType(); + if (routineLoadDesc.getMergeType() != null) { + mergeType = routineLoadDesc.getMergeType(); + } + if (routineLoadDesc.isMergeTypeSpecified()) { + mergeTypeSpecified = true; + } if (routineLoadDesc.hasSequenceCol()) { sequenceCol = routineLoadDesc.getSequenceColName(); } } } + protected void setColumnDescsFromRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { + if (routineLoadDesc != null && routineLoadDesc.getColumnsInfo() != null) { + columnDescs = new ImportColumnDescs(); + columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); + } + } + @Override public long getId() { return id; @@ -1946,8 +1973,17 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, GsonUtils.GSON.toJson(this)); } + @Override + public void gsonPreProcess() throws IOException { + syncSerializedSeparatorFields(columnSeparator, true); + syncSerializedSeparatorFields(lineDelimiter, false); + serializedMergeType = mergeType; + } + @Override public void gsonPostProcess() throws IOException { + restoreSerializedSeparators(); + RoutineLoadDesc persistedRoutineLoadDesc = currentRoutineLoadDesc(); if (tableId == 0) { isMultiTable = true; } @@ -2010,7 +2046,7 @@ public void gsonPostProcess() throws IOException { // fall through; let validate() surface the real error } } - createRoutineLoadInfo.validate(ctx); + createRoutineLoadInfo.validateForReplay(ctx); setRoutineLoadDesc(createRoutineLoadInfo.getRoutineLoadDesc()); } finally { ctx.cleanup(); @@ -2019,11 +2055,46 @@ public void gsonPostProcess() throws IOException { this.state = JobState.CANCELLED; LOG.warn("error happens when parsing create routine load stmt: " + origStmt.originStmt, e); } + setRoutineLoadDesc(persistedRoutineLoadDesc); if (userIdentity != null) { userIdentity.setIsAnalyzed(); } } + private RoutineLoadDesc currentRoutineLoadDesc() { + return new RoutineLoadDesc( + columnSeparator, lineDelimiter, + columnDescs == null ? null : new ArrayList<>(columnDescs.descs), precedingFilter, + whereExpr, partitionNamesInfo, deleteCondition, + serializedMergeType, mergeTypeSpecified, sequenceCol); + } + + private void syncSerializedSeparatorFields(Separator separator, boolean isColumnSeparator) { + if (isColumnSeparator) { + serializedColumnSeparator = separator == null ? null : separator.getSeparator(); + serializedOriColumnSeparator = separator == null ? null : separator.getOriSeparator(); + } else { + serializedLineDelimiter = separator == null ? null : separator.getSeparator(); + serializedOriLineDelimiter = separator == null ? null : separator.getOriSeparator(); + } + } + + private void restoreSerializedSeparators() { + if (serializedColumnSeparator != null || serializedOriColumnSeparator != null) { + columnSeparator = buildSeparator(serializedColumnSeparator, serializedOriColumnSeparator); + } + if (serializedLineDelimiter != null || serializedOriLineDelimiter != null) { + lineDelimiter = buildSeparator(serializedLineDelimiter, serializedOriLineDelimiter); + } + } + + private static Separator buildSeparator(String separator, String oriSeparator) { + if (separator == null && oriSeparator == null) { + return null; + } + return new Separator(separator, oriSeparator); + } + public abstract void modifyProperties(AlterRoutineLoadCommand command) throws UserException; public abstract void replayModifyProperties(AlterRoutineLoadJobOperationLog log); @@ -2066,10 +2137,6 @@ protected void modifyCommonJobProperties(Map jobProperties) thro if (jobProperties.containsKey(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)) { String modeStr = jobProperties.remove(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE); TUniqueKeyUpdateMode newMode = CreateRoutineLoadInfo.parseAndValidateUniqueKeyUpdateMode(modeStr); - // Validate flexible partial update constraints when changing to UPDATE_FLEXIBLE_COLUMNS - if (newMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { - validateFlexiblePartialUpdateForAlter(); - } this.uniqueKeyUpdateMode = newMode; this.isPartialUpdate = (uniqueKeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FIXED_COLUMNS); this.jobProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, uniqueKeyUpdateMode.name()); @@ -2092,7 +2159,17 @@ protected void modifyCommonJobProperties(Map jobProperties) thro /** * Validate flexible partial update constraints when altering routine load job. */ - private void validateFlexiblePartialUpdateForAlter() throws UserException { + protected void validateFlexiblePartialUpdateForAlter( + Map newJobProperties, RoutineLoadDesc newRoutineLoadDesc) throws UserException { + TUniqueKeyUpdateMode newMode = uniqueKeyUpdateMode; + if (newJobProperties.containsKey(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)) { + newMode = CreateRoutineLoadInfo.parseAndValidateUniqueKeyUpdateMode( + newJobProperties.get(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)); + } + if (newMode != TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { + return; + } + // Multi-table load does not support flexible partial update if (isMultiTable) { throw new DdlException("Flexible partial update is not supported in multi-table load"); @@ -2112,29 +2189,57 @@ private void validateFlexiblePartialUpdateForAlter() throws UserException { } OlapTable olapTable = (OlapTable) table; - // Validate table-level constraints (MoW, skip_bitmap, light_schema_change, variant columns) + // Validate table-level constraints (MoW, skip_bitmap, light_schema_change) olapTable.validateForFlexiblePartialUpdate(); + Map mergedJobProperties = Maps.newHashMap(this.jobProperties); + mergedJobProperties.putAll(newJobProperties); + // Routine load specific validations // Must use JSON format - String format = this.jobProperties.getOrDefault(FileFormatProperties.PROP_FORMAT, "csv"); + String format = mergedJobProperties.getOrDefault(FileFormatProperties.PROP_FORMAT, "csv"); if (!"json".equalsIgnoreCase(format)) { throw new DdlException("Flexible partial update only supports JSON format, but current job uses: " + format); } // Cannot use fuzzy_parse - if (Boolean.parseBoolean(this.jobProperties.getOrDefault( + if (Boolean.parseBoolean(mergedJobProperties.getOrDefault( JsonFileFormatProperties.PROP_FUZZY_PARSE, "false"))) { throw new DdlException("Flexible partial update does not support fuzzy_parse"); } // Cannot use jsonpaths - String jsonPaths = getJsonPaths(); + String jsonPaths = mergedJobProperties.get(JsonFileFormatProperties.PROP_JSON_PATHS); if (jsonPaths != null && !jsonPaths.isEmpty()) { throw new DdlException("Flexible partial update does not support jsonpaths"); } // Cannot specify COLUMNS mapping - if (columnDescs != null && !columnDescs.descs.isEmpty()) { + if ((columnDescs != null && !columnDescs.descs.isEmpty()) + || (newRoutineLoadDesc != null && newRoutineLoadDesc.getColumnsInfo() != null + && !newRoutineLoadDesc.getColumnsInfo().isEmpty())) { throw new DdlException("Flexible partial update does not support COLUMNS specification"); } + validateRoutineLoadDescForFlexiblePartialUpdate(newRoutineLoadDesc); + } + + private void validateRoutineLoadDescForFlexiblePartialUpdate(RoutineLoadDesc newRoutineLoadDesc) + throws DdlException { + boolean newDescHasExplicitMergeType = newRoutineLoadDesc != null + && (newRoutineLoadDesc.isMergeTypeSpecified() + || newRoutineLoadDesc.getMergeType() != LoadTask.MergeType.APPEND); + if (mergeTypeSpecified || mergeType != LoadTask.MergeType.APPEND || newDescHasExplicitMergeType) { + throw new DdlException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (whereExpr != null || (newRoutineLoadDesc != null && newRoutineLoadDesc.getFilter() != null)) { + throw new DdlException("Don't support flexible partial update when 'where' is specified"); + } + if (deleteCondition != null + || (newRoutineLoadDesc != null && newRoutineLoadDesc.getDeleteCondition() != null)) { + throw new DdlException("Don't support flexible partial update when 'delete' is specified"); + } + if (!Strings.isNullOrEmpty(sequenceCol) + || (newRoutineLoadDesc != null && newRoutineLoadDesc.hasSequenceCol())) { + throw new DdlException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java index f24b9ad2252dd5..2fc2927fa58ed4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java @@ -921,7 +921,6 @@ public void alterRoutineLoadJob(AlterRoutineLoadCommand command) throws UserExce + command.getDataSourceProperties().getDataSourceType()); } job.modifyProperties(command); - job.setRoutineLoadDesc(command.getRoutineLoadDesc()); } public void replayAlterRoutineLoadJob(AlterRoutineLoadJobOperationLog log) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java index 9464be78d05858..655011b288e9ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java @@ -709,10 +709,12 @@ public void modifyProperties(AlterRoutineLoadCommand command) throws UserExcepti throw new DdlException("Only supports modification of PAUSED jobs"); } + validateFlexiblePartialUpdateForAlter(jobProperties, command.getRoutineLoadDesc()); modifyPropertiesInternal(jobProperties, dataSourceProperties); + setRoutineLoadDesc(command.getRoutineLoadDesc()); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(this.id, - jobProperties, dataSourceProperties); + jobProperties, dataSourceProperties, command.getRoutineLoadDesc()); Env.getCurrentEnv().getEditLog().logAlterRoutineLoadJob(log); } finally { writeUnlock(); @@ -836,6 +838,11 @@ private void resetCloudProgress(Cloud.ResetRLProgressRequest.Builder builder) th public void replayModifyProperties(AlterRoutineLoadJobOperationLog log) { try { modifyPropertiesInternal(log.getJobProperties(), (KafkaDataSourceProperties) log.getDataSourceProperties()); + if (log.getRoutineLoadDesc() != null) { + setRoutineLoadDesc(log.getRoutineLoadDesc()); + } else if (log.getColumnDescs() != null) { + columnDescs = log.getColumnDescs(); + } } catch (UserException e) { // should not happen LOG.error("failed to replay modify kafka routine load job: {}", id, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java index 0c05889929924e..746f462b5f591a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java @@ -688,10 +688,12 @@ public void modifyProperties(AlterRoutineLoadCommand command) throws UserExcepti throw new DdlException("Only supports modification of PAUSED jobs"); } + validateFlexiblePartialUpdateForAlter(jobProperties, command.getRoutineLoadDesc()); modifyPropertiesInternal(jobProperties, dataSourceProperties); + setRoutineLoadDesc(command.getRoutineLoadDesc()); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(this.id, - jobProperties, dataSourceProperties); + jobProperties, dataSourceProperties, command.getRoutineLoadDesc()); Env.getCurrentEnv().getEditLog().logAlterRoutineLoadJob(log); } finally { writeUnlock(); @@ -785,6 +787,11 @@ public void replayModifyProperties(AlterRoutineLoadJobOperationLog log) { try { modifyPropertiesInternal(log.getJobProperties(), (KinesisDataSourceProperties) log.getDataSourceProperties()); + if (log.getRoutineLoadDesc() != null) { + setRoutineLoadDesc(log.getRoutineLoadDesc()); + } else if (log.getColumnDescs() != null) { + columnDescs = log.getColumnDescs(); + } } catch (UserException e) { LOG.error("failed to replay modify kinesis routine load job: {}", id, e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java index 2ece54c823ecfb..eebb11fd4e7872 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java @@ -53,6 +53,10 @@ default void setTimeout(int timeout) { LoadTask.MergeType getMergeType(); + default boolean isMergeTypeSpecified() { + return false; + } + Expression getDeleteCondition(); boolean hasSequenceCol(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java index 8c3e0abd830f22..01c109716b623a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java @@ -45,6 +45,7 @@ import org.apache.doris.service.FrontendOptions; import org.apache.doris.thrift.PaloInternalServiceVersion; import org.apache.doris.thrift.TBrokerFileStatus; +import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPartialUpdateNewRowPolicy; @@ -150,6 +151,7 @@ public TPipelineFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde if (uniquekeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { // Validate table-level constraints for flexible partial update destTable.validateForFlexiblePartialUpdate(); + validateLoadTaskForFlexiblePartialUpdate(taskInfo); } HashSet partialUpdateInputColumns = new HashSet<>(); if (uniquekeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FIXED_COLUMNS) { @@ -336,4 +338,35 @@ public TPipelineFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde params.setIsMowTable(destTable.getEnableUniqueKeyMergeOnWrite()); return params; } + + static void validateLoadTaskForFlexiblePartialUpdate(NereidsLoadTaskInfo taskInfo) throws UserException { + if (taskInfo.getFormatType() != TFileFormatType.FORMAT_JSON) { + throw new UserException("flexible partial update only support json format as input file currently"); + } + if (taskInfo.isFuzzyParse()) { + throw new UserException("Don't support flexible partial update when 'fuzzy_parse' is enabled"); + } + if (!taskInfo.getColumnExprDescs().descs.isEmpty()) { + throw new UserException("Don't support flexible partial update when 'columns' is specified"); + } + if (taskInfo.getJsonPaths() != null && !taskInfo.getJsonPaths().isEmpty()) { + throw new UserException("Don't support flexible partial update when 'jsonpaths' is specified"); + } + if (taskInfo.getHiddenColumns() != null && !taskInfo.getHiddenColumns().isEmpty()) { + throw new UserException("Don't support flexible partial update when 'hidden_columns' is specified"); + } + if (taskInfo.hasSequenceCol()) { + throw new UserException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } + if (taskInfo.isMergeTypeSpecified() || taskInfo.getMergeType() != LoadTask.MergeType.APPEND) { + throw new UserException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (taskInfo.getWhereExpr() != null) { + throw new UserException("Don't support flexible partial update when 'where' is specified"); + } + if (taskInfo.getDeleteCondition() != null) { + throw new UserException("Don't support flexible partial update when 'delete' is specified"); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java index f5ddca41f19a29..fb522ee2a03ee0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java @@ -75,6 +75,7 @@ public class NereidsStreamLoadTask implements NereidsLoadTaskInfo { private int timeout = Config.stream_load_default_timeout_second; private long execMemLimit = 2 * 1024 * 1024 * 1024L; // default is 2GB private LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; // default is all data is load no delete + private boolean mergeTypeSpecified; private Expression deleteCondition; private String sequenceCol; private int sendBatchParallelism = 1; @@ -270,6 +271,11 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + @Override + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + public Expression getDeleteCondition() { return deleteCondition; } @@ -458,6 +464,7 @@ private void setOptionalFromTSLPutRequest(TStreamLoadPutRequest request) throws readJsonByLine = request.isReadJsonByLine(); } if (request.isSetMergeType()) { + mergeTypeSpecified = request.isSetMergeTypeSpecified() && request.isMergeTypeSpecified(); try { mergeType = LoadTask.MergeType.valueOf(request.getMergeType().toString()); } catch (IllegalArgumentException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 2938d9efb7b487..ab6dbccbe8ec0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -2475,8 +2475,9 @@ public LogicalPlan visitCreateRoutineLoad(CreateRoutineLoadContext ctx) { // NOTICE: we should not generate immutable map here, because it will be modified when analyzing. ? Maps.newHashMap(visitPropertyItemList(ctx.customProperties)) : Maps.newHashMap(); + boolean mergeTypeSpecified = ctx.WITH() != null; LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; - if (ctx.WITH() != null) { + if (mergeTypeSpecified) { if (ctx.DELETE() != null) { mergeType = LoadTask.MergeType.DELETE; } else if (ctx.MERGE() != null) { @@ -2498,7 +2499,7 @@ public LogicalPlan visitCreateRoutineLoad(CreateRoutineLoadContext ctx) { } } CreateRoutineLoadInfo createRoutineLoadInfo = new CreateRoutineLoadInfo(jobLabelInfo, tableName, - loadPropertyMap, properties, type, customProperties, mergeType, comment); + loadPropertyMap, properties, type, customProperties, mergeType, mergeTypeSpecified, comment); return new CreateRoutineLoadCommand(createRoutineLoadInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java index 86b085740e2925..0aba4d44a69e96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java @@ -137,6 +137,7 @@ private void validate(ConnectContext ctx) throws UserException { } private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) throws UserException { + validateAlterVariantColumnsForFlexiblePartialUpdate(table); List alterTableOps = new ArrayList<>(); for (AlterTableOp alterClause : ops) { if (alterClause instanceof EnableFeatureOp) { @@ -157,10 +158,19 @@ private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) thr throw new AnalysisException("Update flexible columns feature is only supported" + " on merge-on-write unique tables."); } + if (table.isUniqKeyMergeOnWriteWithClusterKeys()) { + throw new AnalysisException("Update flexible columns feature does not support" + + " merge-on-write Unique tables with cluster keys."); + } if (table.hasSkipBitmapColumn()) { throw new AnalysisException("table " + table.getName() + " has enabled update flexible columns feature already."); } + if (!table.getEnableLightSchemaChange()) { + throw new AnalysisException("Update flexible columns feature requires " + + "light_schema_change to be enabled."); + } + table.validateVariantColumnsForFlexiblePartialUpdate(); } // analyse sequence column Type sequenceColType = null; @@ -230,6 +240,33 @@ private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) thr ops = alterTableOps; } + private void validateAlterVariantColumnsForFlexiblePartialUpdate(OlapTable table) throws UserException { + boolean enableFlexiblePartialUpdate = false; + for (AlterTableOp alterClause : ops) { + if (alterClause instanceof EnableFeatureOp + && ((EnableFeatureOp) alterClause).getFeature() + == EnableFeatureOp.Features.UPDATE_FLEXIBLE_COLUMNS) { + enableFlexiblePartialUpdate = true; + } + } + if (!enableFlexiblePartialUpdate && !table.hasSkipBitmapColumn()) { + return; + } + for (AlterTableOp alterClause : ops) { + if (alterClause instanceof AddColumnOp) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate( + ((AddColumnOp) alterClause).getColumn()); + } else if (alterClause instanceof AddColumnsOp) { + for (Column column : ((AddColumnsOp) alterClause).getColumns()) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate(column); + } + } else if (alterClause instanceof ModifyColumnOp) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate( + ((ModifyColumnOp) alterClause).getColumn()); + } + } + } + /** * checkExternalTableOperationAllow */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java index e8a75c0299b4d6..788105e653d090 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java @@ -175,6 +175,8 @@ public class CreateRoutineLoadInfo { private LoadTask.MergeType mergeType; + private boolean mergeTypeSpecified; + private boolean isMultiTable = false; private AbstractDataSourceProperties dataSourceProperties; @@ -187,6 +189,18 @@ public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, Map jobProperties, String typeName, Map dataSourceProperties, LoadTask.MergeType mergeType, String comment) { + this(labelNameInfo, tableName, loadPropertyMap, jobProperties, typeName, dataSourceProperties, + mergeType, false, comment); + } + + /** + * constructor for create table + */ + public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, + Map loadPropertyMap, + Map jobProperties, String typeName, + Map dataSourceProperties, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified, String comment) { this.labelNameInfo = labelNameInfo; if (StringUtils.isBlank(tableName)) { this.isMultiTable = true; @@ -198,6 +212,7 @@ public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, this.dataSourceProperties = RoutineLoadDataSourcePropertyFactory .createDataSource(typeName, dataSourceProperties, this.isMultiTable); this.mergeType = mergeType; + this.mergeTypeSpecified = mergeTypeSpecified; // Parse unique_key_update_mode first (takes precedence) if (this.jobProperties.containsKey(UNIQUE_KEY_UPDATE_MODE)) { String modeStr = this.jobProperties.get(UNIQUE_KEY_UPDATE_MODE); @@ -362,6 +377,10 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + public boolean isMultiTable() { return isMultiTable; } @@ -382,8 +401,12 @@ public String getWorkloadGroupName() { * analyze create table info */ public void validate(ConnectContext ctx) throws UserException { + validate(ctx, true); + } + + private void validate(ConnectContext ctx, boolean validateBackendCapability) throws UserException { // check dbName and tableName - checkDBTable(ctx); + checkDBTable(ctx, validateBackendCapability); // check name try { FeNameFormat.checkCommonName(NAME_TYPE, name); @@ -394,7 +417,8 @@ public void validate(ConnectContext ctx) throws UserException { + " Maybe routine load job name is longer than 64 or contains illegal characters"); } // check load properties include column separator etc. - routineLoadDesc = checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType); + routineLoadDesc = checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType, + mergeTypeSpecified); // check routine load job properties include desired concurrent number etc. checkJobProperties(); // check data source properties @@ -412,7 +436,14 @@ public void validate(ConnectContext ctx) throws UserException { } } - private void checkDBTable(ConnectContext ctx) throws AnalysisException { + /** + * Validate persisted routine-load metadata while restoring FE image. + */ + public void validateForReplay(ConnectContext ctx) throws UserException { + validate(ctx, false); + } + + private void checkDBTable(ConnectContext ctx, boolean validateBackendCapability) throws AnalysisException { labelNameInfo.validate(ctx); dbName = labelNameInfo.getDb(); name = labelNameInfo.getLabel(); @@ -442,14 +473,15 @@ private void checkDBTable(ConnectContext ctx) throws AnalysisException { } // Validate flexible partial update constraints if (uniqueKeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { - validateFlexiblePartialUpdate((OlapTable) table); + validateFlexiblePartialUpdate((OlapTable) table, validateBackendCapability); } } - private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisException { - // Validate table-level constraints (MoW, skip_bitmap, light_schema_change, variant columns) + private void validateFlexiblePartialUpdate(OlapTable table, boolean validateBackendCapability) + throws AnalysisException { + // Validate table-level constraints (MoW, skip_bitmap, light_schema_change) try { - table.validateForFlexiblePartialUpdate(); + table.validateForFlexiblePartialUpdate(validateBackendCapability); } catch (UserException e) { throw new AnalysisException(e.getMessage(), e); } @@ -473,6 +505,19 @@ private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisExcep .anyMatch(p -> p instanceof LoadColumnClause)) { throw new AnalysisException("Flexible partial update does not support COLUMNS specification"); } + // Cannot specify merge/delete mode, WHERE filter, or load-level sequence column. + if (mergeTypeSpecified || mergeType != LoadTask.MergeType.APPEND) { + throw new AnalysisException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (loadPropertyMap != null && loadPropertyMap.values().stream() + .anyMatch(p -> p instanceof LoadWhereClause)) { + throw new AnalysisException("Don't support flexible partial update when 'where' is specified"); + } + if (loadPropertyMap != null && loadPropertyMap.values().stream() + .anyMatch(p -> p instanceof LoadSequenceClause)) { + throw new AnalysisException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } } /** @@ -489,6 +534,12 @@ private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisExcep public static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map loadPropertyMap, String dbName, String tableName, boolean isMultiTable, LoadTask.MergeType mergeType) throws UserException { + return checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType, false); + } + + private static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map loadPropertyMap, + String dbName, String tableName, boolean isMultiTable, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified) throws UserException { Separator columnSeparator = null; // TODO(yangzhengguo01): add line delimiter to properties Separator lineDelimiter = null; @@ -547,7 +598,7 @@ public static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map jobProperties; @SerializedName(value = "dataSourceProperties") private AbstractDataSourceProperties dataSourceProperties; + @SerializedName(value = "columnDescs") + private ImportColumnDescs columnDescs; + @SerializedName(value = "hrld") + private boolean hasRoutineLoadDesc; + @SerializedName(value = "cs") + private String columnSeparator; + @SerializedName(value = "ocs") + private String oriColumnSeparator; + @SerializedName(value = "ld") + private String lineDelimiter; + @SerializedName(value = "old") + private String oriLineDelimiter; + @SerializedName(value = "pf") + private Expr precedingFilter; + @SerializedName(value = "filter") + private Expr filter; + @SerializedName(value = "dc") + private Expr deleteCondition; + @SerializedName(value = "pni") + private PartitionNamesInfo partitionNamesInfo; + @SerializedName(value = "mt") + private LoadTask.MergeType mergeType; + @SerializedName(value = "mts") + private boolean mergeTypeSpecified; + @SerializedName(value = "scn") + private String sequenceColName; public AlterRoutineLoadJobOperationLog(long jobId, Map jobProperties, AbstractDataSourceProperties dataSourceProperties) { + this(jobId, jobProperties, dataSourceProperties, null); + } + + public AlterRoutineLoadJobOperationLog(long jobId, Map jobProperties, + AbstractDataSourceProperties dataSourceProperties, RoutineLoadDesc routineLoadDesc) { this.jobId = jobId; - this.jobProperties = jobProperties; + this.jobProperties = new HashMap<>(jobProperties); this.dataSourceProperties = dataSourceProperties; + if (routineLoadDesc == null) { + return; + } + hasRoutineLoadDesc = true; + setSeparatorFields(routineLoadDesc.getColumnSeparator(), true); + setSeparatorFields(routineLoadDesc.getLineDelimiter(), false); + precedingFilter = routineLoadDesc.getPrecedingFilter(); + filter = routineLoadDesc.getFilter(); + deleteCondition = routineLoadDesc.getDeleteCondition(); + partitionNamesInfo = routineLoadDesc.getPartitionNamesInfo(); + mergeType = routineLoadDesc.getMergeType(); + mergeTypeSpecified = routineLoadDesc.isMergeTypeSpecified(); + sequenceColName = routineLoadDesc.getSequenceColName(); + if (routineLoadDesc.getColumnsInfo() != null) { + this.columnDescs = new ImportColumnDescs(); + this.columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); + } } public long getJobId() { @@ -57,6 +113,41 @@ public AbstractDataSourceProperties getDataSourceProperties() { return dataSourceProperties; } + public ImportColumnDescs getColumnDescs() { + return columnDescs; + } + + public RoutineLoadDesc getRoutineLoadDesc() { + if (!hasRoutineLoadDesc) { + return null; + } + return new RoutineLoadDesc( + buildSeparator(columnSeparator, oriColumnSeparator), + buildSeparator(lineDelimiter, oriLineDelimiter), + columnDescs == null ? null : new ArrayList<>(columnDescs.descs), precedingFilter, + filter, partitionNamesInfo, deleteCondition, mergeType, mergeTypeSpecified, sequenceColName); + } + + private void setSeparatorFields(Separator separator, boolean isColumnSeparator) { + if (separator == null) { + return; + } + if (isColumnSeparator) { + columnSeparator = separator.getSeparator(); + oriColumnSeparator = separator.getOriSeparator(); + } else { + lineDelimiter = separator.getSeparator(); + oriLineDelimiter = separator.getOriSeparator(); + } + } + + private static Separator buildSeparator(String separator, String oriSeparator) { + if (separator == null && oriSeparator == null) { + return null; + } + return new Separator(separator, oriSeparator); + } + public static AlterRoutineLoadJobOperationLog read(DataInput in) throws IOException { String json = Text.readString(in); return GsonUtils.GSON.fromJson(json, AlterRoutineLoadJobOperationLog.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index d403c88732ea2d..f962f4be62c3ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -148,6 +148,7 @@ public class Backend implements Writable { // from config::pipeline_executor_size , default equal cpuCores @SerializedName("pipelineExecutorSize") private int pipelineExecutorSize = 1; + private transient boolean supportsVariantFlexiblePartialUpdate = false; // Counter of heartbeat failure. // Once a heartbeat failed, increase this counter by one. @@ -911,6 +912,11 @@ public boolean handleHbResponse(BackendHbResponse hbResponse, boolean isReplay) isChanged = true; this.beMemory = hbResponse.getBeMemory(); } + if (this.supportsVariantFlexiblePartialUpdate + != hbResponse.supportsVariantFlexiblePartialUpdate()) { + isChanged = true; + this.supportsVariantFlexiblePartialUpdate = hbResponse.supportsVariantFlexiblePartialUpdate(); + } this.lastUpdateMs = hbResponse.getHbTime(); if (!isAlive.get()) { @@ -960,6 +966,10 @@ public boolean handleHbResponse(BackendHbResponse hbResponse, boolean isReplay) LOG.warn("{} is dead,", this.toString()); } } + if (!isAlive.get() && supportsVariantFlexiblePartialUpdate) { + isChanged = true; + supportsVariantFlexiblePartialUpdate = false; + } // still set error msg and missing time even if we may not mark this backend as dead, // for debug easily. @@ -979,6 +989,10 @@ public long getTabletMaxCompactionScore() { return tabletMaxCompactionScore; } + public boolean supportsVariantFlexiblePartialUpdate() { + return supportsVariantFlexiblePartialUpdate; + } + private long getDiskNumByStorageMedium(TStorageMedium storageMedium) { return disksRef.values().stream().filter(v -> v.getStorageMedium() == storageMedium).count(); } @@ -1138,4 +1152,3 @@ public static Backend fromThrift(TBackend backend) { } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java index 41fad9961a4e94..0ce0375c2e1666 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java @@ -51,6 +51,8 @@ public class BackendHbResponse extends HeartbeatResponse implements Writable { private boolean isShutDown = false; // The physical memory available for use by BE. private long beMemory = 0; + @SerializedName(value = "supportsVariantFlexiblePartialUpdate") + private boolean supportsVariantFlexiblePartialUpdate = false; public BackendHbResponse() { super(HeartbeatResponse.Type.BACKEND); @@ -59,25 +61,21 @@ public BackendHbResponse() { public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, boolean isShutDown, int arrowFlightSqlPort) { - super(HeartbeatResponse.Type.BACKEND); - this.beId = beId; - this.status = HbStatus.OK; - this.bePort = bePort; - this.httpPort = httpPort; - this.brpcPort = brpcPort; - this.hbTime = hbTime; - this.beStartTime = beStartTime; - this.version = version; - this.nodeRole = nodeRole; - this.fragmentNum = fragmentNum; - this.lastFragmentUpdateTime = lastFragmentUpdateTime; - this.isShutDown = isShutDown; - this.arrowFlightSqlPort = arrowFlightSqlPort; + this(beId, bePort, httpPort, brpcPort, hbTime, beStartTime, version, nodeRole, fragmentNum, + lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, 0, false); } public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, boolean isShutDown, int arrowFlightSqlPort, long beMemory) { + this(beId, bePort, httpPort, brpcPort, hbTime, beStartTime, version, nodeRole, fragmentNum, + lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory, false); + } + + public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, + String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, + boolean isShutDown, int arrowFlightSqlPort, long beMemory, + boolean supportsVariantFlexiblePartialUpdate) { super(HeartbeatResponse.Type.BACKEND); this.beId = beId; this.status = HbStatus.OK; @@ -93,6 +91,7 @@ public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long this.isShutDown = isShutDown; this.arrowFlightSqlPort = arrowFlightSqlPort; this.beMemory = beMemory; + this.supportsVariantFlexiblePartialUpdate = supportsVariantFlexiblePartialUpdate; } public BackendHbResponse(long beId, String host, long lastHbTime, String errMsg) { @@ -152,6 +151,10 @@ public long getBeMemory() { return beMemory; } + public boolean supportsVariantFlexiblePartialUpdate() { + return supportsVariantFlexiblePartialUpdate; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -162,6 +165,7 @@ public String toString() { sb.append(", httpPort: ").append(httpPort); sb.append(", brpcPort: ").append(brpcPort); sb.append(", arrowFlightSqlPort: ").append(arrowFlightSqlPort); + sb.append(", supportsVariantFlexiblePartialUpdate: ").append(supportsVariantFlexiblePartialUpdate); return sb.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java index 4500ce6da0aefd..ce984225790e8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java @@ -321,6 +321,7 @@ private HeartbeatResponse pingOnce() { backendInfo.setBrpcPort(4); backendInfo.setArrowFlightSqlPort(8); backendInfo.setVersion("test-1234"); + backendInfo.setSupportsVariantFlexiblePartialUpdate(true); result = new THeartbeatResult(); result.setStatus(new TStatus(TStatusCode.OK)); result.setBackendInfo(backendInfo); @@ -365,9 +366,13 @@ private HeartbeatResponse pingOnce() { isShutDown = tBackendInfo.isIsShutdown(); } long beMemory = tBackendInfo.isSetBeMem() ? tBackendInfo.getBeMem() : 0; + boolean supportsVariantFlexiblePartialUpdate = + tBackendInfo.isSetSupportsVariantFlexiblePartialUpdate() + && tBackendInfo.isSupportsVariantFlexiblePartialUpdate(); return new BackendHbResponse(backendId, bePort, httpPort, brpcPort, System.currentTimeMillis(), beStartTime, version, nodeRole, - fragmentNum, lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory); + fragmentNum, lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory, + supportsVariantFlexiblePartialUpdate); } else { return new BackendHbResponse(backendId, backend.getHost(), backend.getLastUpdateMs(), result.getStatus().getErrorMsgs().isEmpty() diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java index 0cefab3d91d6b8..e825237e3cba21 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java @@ -17,19 +17,25 @@ package org.apache.doris.catalog; +import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.info.IndexType; import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.cloud.rpc.VersionHelper; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.UserException; import org.apache.doris.common.io.FastByteArrayOutputStream; import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.UnitTestUtil; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TStorageType; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.junit.Assert; @@ -40,8 +46,11 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -146,6 +155,170 @@ public void testBuildVariantEnableFlattenNestedWithLegacyPropertyKey() throws IO tableProperty.getProperties().containsKey(PropertyAnalyzer.LEGACY_PROPERTIES_VARIANT_ENABLE_FLATTEN_NESTED)); } + @Test + public void testValidateVariantColumnsForFlexiblePartialUpdate() throws UserException { + Column normalVariant = new Column("v", Type.VARIANT); + OlapTable.validateVariantColumnsForFlexiblePartialUpdate(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariant)); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariant)); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + table.validateForFlexiblePartialUpdate(); + + VariantType docModeVariant = new VariantType(new ArrayList<>(), 0, false, 10000, 0, + true, 0L, 64, false); + Column docModeColumn = new Column("doc_v", docModeVariant); + UserException exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + Lists.newArrayList(docModeColumn))); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support doc mode in this version")); + + exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + Lists.newArrayList(normalVariant), true)); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + exception = Assert.assertThrows(UserException.class, table::validateForFlexiblePartialUpdate); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(true); + exception = Assert.assertThrows(UserException.class, table::validateForFlexiblePartialUpdate); + Assert.assertTrue(exception.getMessage().contains("cluster keys")); + } + + @Test + public void testValidateVariantFlexiblePartialUpdateRejectsUnsupportedBackendCapability() + throws UserException { + Backend supportedBackend = Mockito.mock(Backend.class); + Mockito.when(supportedBackend.isAlive()).thenReturn(true); + Mockito.when(supportedBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(true); + OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate(Lists.newArrayList(supportedBackend)); + + Backend oldBackend = Mockito.mock(Backend.class); + Mockito.when(oldBackend.isAlive()).thenReturn(true); + Mockito.when(oldBackend.getId()).thenReturn(2L); + Mockito.when(oldBackend.getHost()).thenReturn("127.0.0.2"); + Mockito.when(oldBackend.getVersion()).thenReturn("old-version"); + Mockito.when(oldBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(false); + + UserException exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate( + Lists.newArrayList(supportedBackend, oldBackend))); + Assert.assertTrue(exception.getMessage().contains("variant patch skip-bitmap marker support")); + Assert.assertTrue(exception.getMessage().contains("old-version")); + + Mockito.when(oldBackend.isAlive()).thenReturn(false); + exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate( + Lists.newArrayList(supportedBackend, oldBackend))); + Assert.assertTrue(exception.getMessage().contains("not alive")); + } + + @Test + public void testValidateVariantFlexiblePartialUpdateUsesCurrentClusterBackends() + throws UserException, AnalysisException { + Backend supportedBackend = Mockito.mock(Backend.class); + Mockito.when(supportedBackend.isAlive()).thenReturn(true); + Mockito.when(supportedBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(true); + + SystemInfoService systemInfoService = Mockito.mock(SystemInfoService.class); + Mockito.when(systemInfoService.getBackendsByCurrentCluster()) + .thenReturn(ImmutableMap.of(1L, supportedBackend)); + Mockito.when(systemInfoService.getAllBackendsByAllCluster()) + .thenThrow(new AnalysisException("unexpected all-backend validation")); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("v", Type.VARIANT))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentSystemInfo).thenReturn(systemInfoService); + table.validateForFlexiblePartialUpdate(); + } + } + + @Test + public void testSchemaChangeHandlerValidatesVariantColumnsForFlexiblePartialUpdate() throws Throwable { + Column normalVariant = new Column("v", Type.VARIANT); + Column skipBitmap = new Column(Column.SKIP_BITMAP_COL, Type.BITMAP); + skipBitmap.setIsVisible(false); + LinkedList baseSchema = Lists.newLinkedList( + Lists.newArrayList(new Column("k", PrimitiveType.INT), normalVariant, skipBitmap)); + Map> indexSchemaMap = Maps.newHashMap(); + indexSchemaMap.put(1L, baseSchema); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getBaseIndexId()).thenReturn(1L); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + Method method = SchemaChangeHandler.class.getDeclaredMethod( + "validateVariantColumnsForFlexiblePartialUpdate", OlapTable.class, Map.class); + method.setAccessible(true); + try { + method.invoke(new SchemaChangeHandler(), table, indexSchemaMap); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + InvocationTargetException exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, indexSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + try { + method.invoke(new SchemaChangeHandler(), table, indexSchemaMap); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, indexSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + VariantType docModeVariant = new VariantType(new ArrayList<>(), 0, false, 10000, 0, + true, 0L, 64, false); + Map> docModeSchemaMap = Maps.newHashMap(); + docModeSchemaMap.put(1L, Lists.newLinkedList(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("doc_v", docModeVariant), skipBitmap))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, docModeSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support doc mode in this version")); + } + @Test public void testGetPartitionRowCount() { OlapTable olapTable = new OlapTable(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java index aa1dc052605f2b..0d7742e7f4dfdf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java @@ -17,20 +17,40 @@ package org.apache.doris.load.routineload; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.Separator; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.Config; +import org.apache.doris.common.UserException; import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.datasource.property.fileformat.FileFormatProperties; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kinesis.KinesisConfiguration; import org.apache.doris.load.routineload.kinesis.KinesisDataSourceProperties; import org.apache.doris.load.routineload.kinesis.KinesisProgress; import org.apache.doris.load.routineload.kinesis.KinesisRoutineLoadJob; import org.apache.doris.load.routineload.kinesis.KinesisTaskInfo; +import org.apache.doris.nereids.trees.plans.commands.AlterRoutineLoadCommand; +import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import org.apache.doris.nereids.trees.plans.commands.info.LabelNameInfo; +import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; +import org.apache.doris.persist.EditLog; +import org.apache.doris.thrift.TUniqueKeyUpdateMode; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; import org.junit.Assert; import org.junit.Test; +import org.mockito.MockedStatic; +import org.mockito.Mockito; import java.util.HashMap; import java.util.HashSet; @@ -229,6 +249,111 @@ public void testModifyPropertiesShouldReplaceCustomShardsWhenExplicitShardsProvi Assert.assertEquals("202", progress.getSequenceNumberByShard("shard-2")); } + @Test + public void testModifyPropertiesShouldApplyAndPersistRoutineLoadDesc() throws Exception { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + Deencapsulation.setField(routineLoadJob, "state", RoutineLoadJob.JobState.PAUSED); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + AlterRoutineLoadCommand command = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(command, "routineLoadDesc", routineLoadDesc); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + + routineLoadJob.modifyProperties(command); + } + + Assert.assertEquals(1, routineLoadJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", routineLoadJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", routineLoadJob.getSequenceCol()); + Mockito.verify(editLog).logAlterRoutineLoadJob(Mockito.argThat(log -> + log.getRoutineLoadDesc() != null + && "|".equals(log.getRoutineLoadDesc().getColumnSeparator().getSeparator()) + && "seq".equals(log.getRoutineLoadDesc().getSequenceColName()))); + } + + @Test + public void testModifyPropertiesShouldValidateFlexibleAlterAgainstRoutineLoadDesc() throws Exception { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + Deencapsulation.setField(routineLoadJob, "dbId", 1L); + Deencapsulation.setField(routineLoadJob, "tableId", 2L); + Deencapsulation.setField(routineLoadJob, "isMultiTable", false); + Deencapsulation.setField(routineLoadJob, "state", RoutineLoadJob.JobState.PAUSED); + Deencapsulation.setField(routineLoadJob, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(routineLoadJob, "jobProperties", currentJobProperties); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + AlterRoutineLoadCommand command = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(command, "routineLoadDesc", routineLoadDesc); + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + Deencapsulation.setField(command, "analyzedJobProperties", flexibleProperties); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + UserException exception = Assert.assertThrows(UserException.class, + () -> routineLoadJob.modifyProperties(command)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + Mockito.verify(editLog, Mockito.never()).logAlterRoutineLoadJob(Mockito.any()); + } + + @Test + public void testReplayModifyPropertiesShouldRestoreRoutineLoadDesc() { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog( + 1L, Maps.newHashMap(), null, routineLoadDesc); + routineLoadJob.replayModifyProperties(log); + + Assert.assertEquals(1, routineLoadJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", routineLoadJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", routineLoadJob.getSequenceCol()); + } + @Test public void testShardRefreshShouldMoveRetiredParentToClosedUntilConsumed() throws Exception { KinesisRoutineLoadJob routineLoadJob = diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java index 8876c6a8aea9bc..fdf36700b5da13 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java @@ -17,21 +17,43 @@ package org.apache.doris.load.routineload; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.analysis.Separator; +import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.info.PartitionNamesInfo; import org.apache.doris.common.InternalErrorCode; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.kafka.KafkaUtil; +import org.apache.doris.datasource.property.fileformat.FileFormatProperties; +import org.apache.doris.datasource.property.fileformat.JsonFileFormatProperties; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kafka.KafkaProgress; import org.apache.doris.load.routineload.kafka.KafkaRoutineLoadJob; import org.apache.doris.load.routineload.kafka.KafkaTaskInfo; +import org.apache.doris.nereids.trees.plans.commands.AlterRoutineLoadCommand; import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import org.apache.doris.nereids.trees.plans.commands.info.LabelNameInfo; +import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; import org.apache.doris.persist.EditLog; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.qe.OriginStatement; +import org.apache.doris.task.LoadTaskInfo.ImportColumnDescs; import org.apache.doris.thrift.TKafkaRLTaskProgress; import org.apache.doris.thrift.TUniqueKeyUpdateMode; import org.apache.doris.transaction.GlobalTransactionMgrIface; @@ -42,6 +64,7 @@ import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.gson.JsonObject; import org.apache.kafka.common.PartitionInfo; import org.junit.Assert; import org.junit.Test; @@ -51,6 +74,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Optional; public class RoutineLoadJobTest { @Test @@ -459,4 +483,509 @@ public void testUniqueKeyUpdateModeTakesPrecedenceOverPartialColumns() throws Ex Assert.assertFalse(isPartialUpdate); } + @Test + public void testValidateFlexiblePartialUpdateForAlterUsesAlteredProperties() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + Map modeAndJsonPathsProperties = Maps.newHashMap(); + modeAndJsonPathsProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + modeAndJsonPathsProperties.put(JsonFileFormatProperties.PROP_JSON_PATHS, "[\"$.id\"]"); + UserException exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(modeAndJsonPathsProperties, null)); + Assert.assertTrue(exception.getMessage().contains("jsonpaths")); + + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS); + + Map fuzzyParseProperties = Maps.newHashMap(); + fuzzyParseProperties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "true"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(fuzzyParseProperties, null)); + Assert.assertTrue(exception.getMessage().contains("fuzzy_parse")); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), routineLoadDesc)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.APPEND); + + Deencapsulation.setField(job, "mergeTypeSpecified", true); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + Deencapsulation.setField(job, "mergeTypeSpecified", false); + + RoutineLoadDesc explicitAppendDesc = new RoutineLoadDesc(null, null, null, null, null, null, null, + LoadTask.MergeType.APPEND, true, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), explicitAppendDesc)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "id"), new IntLiteral(1))); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("where")); + Deencapsulation.setField(job, "whereExpr", null); + + RoutineLoadDesc whereDesc = new RoutineLoadDesc(null, null, null, null, + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "id"), new IntLiteral(1)), + null, null, LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), whereDesc)); + Assert.assertTrue(exception.getMessage().contains("where")); + + RoutineLoadDesc deleteDesc = new RoutineLoadDesc(null, null, null, null, null, null, + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "is_delete"), + new IntLiteral(1)), + LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), deleteDesc)); + Assert.assertTrue(exception.getMessage().contains("delete")); + + Deencapsulation.setField(job, "sequenceCol", "seq"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("function_column.sequence_col")); + Deencapsulation.setField(job, "sequenceCol", null); + + RoutineLoadDesc sequenceDesc = new RoutineLoadDesc(null, null, null, null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), sequenceDesc)); + Assert.assertTrue(exception.getMessage().contains("function_column.sequence_col")); + } + } + + @Test + public void testRoutineLoadDescIsInstalledBeforeFlexibleAlterValidation() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "state", RoutineLoadJob.JobState.PAUSED); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + RoutineLoadDesc columnsDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + AlterRoutineLoadCommand columnsCommand = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(columnsCommand, "routineLoadDesc", columnsDesc); + + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + AlterRoutineLoadCommand flexibleCommand = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(flexibleCommand, "analyzedJobProperties", flexibleProperties); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + job.modifyProperties(columnsCommand); + Assert.assertEquals(1, job.getColumnExprDescs().descs.size()); + + UserException exception = Assert.assertThrows( + UserException.class, () -> job.modifyProperties(flexibleCommand)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + } + + @Test + public void testReplayModifyPropertiesRestoresRoutineLoadDescForFlexibleValidation() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + RoutineLoadDesc columnsDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + Map replayProperties = Maps.newHashMap(); + replayProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog( + 1L, replayProperties, null, columnsDesc); + job.replayModifyProperties(log); + Assert.assertEquals(1, job.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", job.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", job.getSequenceCol()); + Assert.assertEquals(TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, job.getUniqueKeyUpdateMode()); + Assert.assertEquals("UPDATE_FLEXIBLE_COLUMNS", + log.getJobProperties().get(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)); + + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + UserException exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(flexibleProperties, null)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + } + + @Test + public void testColumnDescsSerializedInRoutineLoadJobSnapshot() { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("id", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "origStmt", new OriginStatement("INVALID", 0)); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + JsonObject jsonObject = GsonUtils.GSON.fromJson(json, JsonObject.class); + + Assert.assertTrue(jsonObject.has("columnDescs")); + Assert.assertEquals("id", + jsonObject.getAsJsonObject("columnDescs").getAsJsonArray("des").get(0).getAsJsonObject().get("cn") + .getAsString()); + + RoutineLoadJob legacyKeyJob = GsonUtils.GSON.fromJson( + json.replace("\"columnDescs\"", "\"cd\""), RoutineLoadJob.class); + Assert.assertEquals("id", legacyKeyJob.getColumnExprDescs().descs.get(0).getColumnName()); + } + + @Test + public void testColumnDescsSnapshotOverridesOrigStmtAfterRead() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("score", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "columnSeparator", new Separator("|", "|")); + Deencapsulation.setField(job, "lineDelimiter", new Separator("\n", "\\n")); + Deencapsulation.setField(job, "partitionNamesInfo", + new PartitionNamesInfo(false, Lists.newArrayList("p2"))); + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "score"), new IntLiteral(10))); + Deencapsulation.setField(job, "deleteCondition", + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "deleted"), new IntLiteral(1))); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + Deencapsulation.setField(job, "mergeTypeSpecified", true); + Deencapsulation.setField(job, "sequenceCol", "seq2"); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + Table table = Mockito.mock(Table.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(json, RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(1, restoredJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("score", restoredJob.getColumnExprDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", restoredJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("\n", restoredJob.getLineDelimiter().getSeparator()); + Assert.assertEquals(Lists.newArrayList("p2"), + restoredJob.getPartitionNamesInfo().getPartitionNames()); + Assert.assertNotNull(restoredJob.getWhereExpr()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertTrue(Deencapsulation.getField(restoredJob, "mergeTypeSpecified")); + Assert.assertEquals("seq2", restoredJob.getSequenceCol()); + } + } + + @Test + public void testLegacyRoutineLoadDescSnapshotOverridesOrigStmtAfterRead() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "WITH MERGE " + + "DELETE ON stale_deleted = 1 " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("score", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "columnSeparator", new Separator("|", "|")); + Deencapsulation.setField(job, "lineDelimiter", new Separator("\n", "\\n")); + Deencapsulation.setField(job, "partitionNamesInfo", + new PartitionNamesInfo(false, Lists.newArrayList("p2"))); + Deencapsulation.setField(job, "precedingFilter", + new BinaryPredicate(BinaryPredicate.Operator.GE, new SlotRef(null, "id"), new IntLiteral(1))); + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "score"), new IntLiteral(10))); + Deencapsulation.setField(job, "deleteCondition", + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "deleted"), new IntLiteral(1))); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + Deencapsulation.setField(job, "sequenceCol", "seq2"); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + JsonObject legacyImage = GsonUtils.GSON.fromJson( + GsonUtils.GSON.toJson(job, RoutineLoadJob.class), JsonObject.class); + legacyImage.add("cd", legacyImage.remove("columnDescs")); + legacyImage.add("partitionNamesInfo", legacyImage.remove("pni")); + legacyImage.add("precedingFilter", legacyImage.remove("pf")); + legacyImage.add("whereExpr", legacyImage.remove("filter")); + legacyImage.add("deleteCondition", legacyImage.remove("dc")); + legacyImage.add("sequenceCol", legacyImage.remove("scn")); + legacyImage.remove("cs"); + legacyImage.remove("ocs"); + legacyImage.remove("ld"); + legacyImage.remove("old"); + legacyImage.remove("mt"); + JsonObject legacyColumnSeparator = new JsonObject(); + legacyColumnSeparator.addProperty("separator", "|"); + legacyColumnSeparator.addProperty("oriSeparator", "|"); + legacyImage.add("columnSeparator", legacyColumnSeparator); + JsonObject legacyLineDelimiter = new JsonObject(); + legacyLineDelimiter.addProperty("separator", "\n"); + legacyLineDelimiter.addProperty("oriSeparator", "\\n"); + legacyImage.add("lineDelimiter", legacyLineDelimiter); + + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(legacyImage, RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(1, restoredJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("score", restoredJob.getColumnExprDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", restoredJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("\n", restoredJob.getLineDelimiter().getSeparator()); + Assert.assertEquals(Lists.newArrayList("p2"), + restoredJob.getPartitionNamesInfo().getPartitionNames()); + Assert.assertNotNull(restoredJob.getPrecedingFilter()); + Assert.assertNotNull(restoredJob.getWhereExpr()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertEquals("seq2", restoredJob.getSequenceCol()); + } + } + + @Test + public void testLegacyRoutineLoadImageKeepsMergeTypeFromOrigStmt() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "WITH MERGE " + + "DELETE ON is_delete = 1 " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + JsonObject legacyImage = GsonUtils.GSON.fromJson(json, JsonObject.class); + legacyImage.remove("mt"); + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(legacyImage, RoutineLoadJob.class); + + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + } + } + + @Test + public void testFlexibleRoutineLoadImageRestoreSkipsBackendCapabilityCheck() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "PROPERTIES (\"format\" = \"json\", " + + "\"unique_key_update_mode\" = \"UPDATE_FLEXIBLE_COLUMNS\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + Map jobProperties = Maps.newHashMap(); + jobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + jobProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + Deencapsulation.setField(job, "jobProperties", jobProperties); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("v", Type.VARIANT))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson( + GsonUtils.GSON.toJson(job, RoutineLoadJob.class), RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, + restoredJob.getUniqueKeyUpdateMode()); + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java new file mode 100644 index 00000000000000..f4cf3abf40194b --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.load; + +import org.apache.doris.common.UserException; +import org.apache.doris.load.loadv2.LoadTask; +import org.apache.doris.nereids.analyzer.UnboundSlot; +import org.apache.doris.thrift.TFileCompressType; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileType; +import org.apache.doris.thrift.TMergeType; +import org.apache.doris.thrift.TPartialUpdateNewRowPolicy; +import org.apache.doris.thrift.TStreamLoadPutRequest; +import org.apache.doris.thrift.TUniqueId; +import org.apache.doris.thrift.TUniqueKeyUpdateMode; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.util.Collections; + +public class NereidsStreamLoadPlannerTest { + @Test + public void testValidateFlexiblePartialUpdateStreamLoadOptions() throws Exception { + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(newFlexibleTaskInfo()); + + NereidsLoadTaskInfo csvTaskInfo = newFlexibleTaskInfo(); + Mockito.when(csvTaskInfo.getFormatType()).thenReturn(TFileFormatType.FORMAT_CSV_PLAIN); + assertRejected(csvTaskInfo, "flexible partial update only support json format"); + + NereidsLoadTaskInfo fuzzyParseTaskInfo = newFlexibleTaskInfo(); + Mockito.when(fuzzyParseTaskInfo.isFuzzyParse()).thenReturn(true); + assertRejected(fuzzyParseTaskInfo, "Don't support flexible partial update when 'fuzzy_parse' is enabled"); + + NereidsLoadTaskInfo columnsTaskInfo = newFlexibleTaskInfo(); + NereidsLoadTaskInfo.NereidsImportColumnDescs columnDescs = + new NereidsLoadTaskInfo.NereidsImportColumnDescs(); + columnDescs.descs.add(new NereidsImportColumnDesc("k")); + Mockito.when(columnsTaskInfo.getColumnExprDescs()).thenReturn(columnDescs); + assertRejected(columnsTaskInfo, "Don't support flexible partial update when 'columns' is specified"); + + NereidsLoadTaskInfo jsonPathsTaskInfo = newFlexibleTaskInfo(); + Mockito.when(jsonPathsTaskInfo.getJsonPaths()).thenReturn("[\"$.k\",\"$.v\"]"); + assertRejected(jsonPathsTaskInfo, "Don't support flexible partial update when 'jsonpaths' is specified"); + + NereidsLoadTaskInfo hiddenColumnsTaskInfo = newFlexibleTaskInfo(); + Mockito.when(hiddenColumnsTaskInfo.getHiddenColumns()) + .thenReturn(Collections.singletonList("__DORIS_DELETE_SIGN__")); + assertRejected(hiddenColumnsTaskInfo, + "Don't support flexible partial update when 'hidden_columns' is specified"); + + NereidsLoadTaskInfo sequenceColTaskInfo = newFlexibleTaskInfo(); + Mockito.when(sequenceColTaskInfo.hasSequenceCol()).thenReturn(true); + assertRejected(sequenceColTaskInfo, + "Don't support flexible partial update when 'function_column.sequence_col' is specified"); + + NereidsLoadTaskInfo mergeTaskInfo = newFlexibleTaskInfo(); + Mockito.when(mergeTaskInfo.getMergeType()).thenReturn(LoadTask.MergeType.MERGE); + assertRejected(mergeTaskInfo, "Don't support flexible partial update when 'merge_type' is specified"); + + NereidsLoadTaskInfo explicitAppendTaskInfo = newFlexibleTaskInfo(); + Mockito.when(explicitAppendTaskInfo.isMergeTypeSpecified()).thenReturn(true); + assertRejected(explicitAppendTaskInfo, + "Don't support flexible partial update when 'merge_type' is specified"); + + NereidsLoadTaskInfo whereTaskInfo = newFlexibleTaskInfo(); + Mockito.when(whereTaskInfo.getWhereExpr()).thenReturn(new UnboundSlot("v")); + assertRejected(whereTaskInfo, "Don't support flexible partial update when 'where' is specified"); + + NereidsLoadTaskInfo deleteTaskInfo = newFlexibleTaskInfo(); + Mockito.when(deleteTaskInfo.getDeleteCondition()).thenReturn(new UnboundSlot("v")); + assertRejected(deleteTaskInfo, "Don't support flexible partial update when 'delete' is specified"); + } + + @Test + public void testValidateFlexiblePartialUpdateRoutineLoadOptions() throws Exception { + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate( + newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, null, null)); + + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.MERGE, null, null), + "Don't support flexible partial update when 'merge_type' is specified"); + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, new UnboundSlot("v"), null), + "Don't support flexible partial update when 'where' is specified"); + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, null, "seq"), + "Don't support flexible partial update when 'function_column.sequence_col' is specified"); + } + + @Test + public void testValidateFlexiblePartialUpdateStreamLoadDefaultMergeType() throws Exception { + TStreamLoadPutRequest request = newFlexibleStreamLoadRequest(); + request.setMergeType(TMergeType.APPEND); + NereidsStreamLoadTask defaultAppendTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(request); + Assertions.assertFalse(defaultAppendTask.isMergeTypeSpecified()); + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(defaultAppendTask); + + request.setMergeTypeSpecified(true); + NereidsStreamLoadTask explicitAppendTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(request); + Assertions.assertTrue(explicitAppendTask.isMergeTypeSpecified()); + assertRejected(explicitAppendTask, + "Don't support flexible partial update when 'merge_type' is specified"); + + TStreamLoadPutRequest deleteRequest = newFlexibleStreamLoadRequest(); + deleteRequest.setMergeType(TMergeType.APPEND); + deleteRequest.setDeleteCondition("v = 1"); + NereidsStreamLoadTask deleteTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(deleteRequest); + Assertions.assertFalse(deleteTask.isMergeTypeSpecified()); + assertRejected(deleteTask, "Don't support flexible partial update when 'delete' is specified"); + } + + private NereidsLoadTaskInfo newFlexibleTaskInfo() { + NereidsLoadTaskInfo taskInfo = Mockito.mock(NereidsLoadTaskInfo.class); + Mockito.when(taskInfo.getFormatType()).thenReturn(TFileFormatType.FORMAT_JSON); + Mockito.when(taskInfo.getColumnExprDescs()).thenReturn(new NereidsLoadTaskInfo.NereidsImportColumnDescs()); + Mockito.when(taskInfo.getJsonPaths()).thenReturn(""); + Mockito.when(taskInfo.getMergeType()).thenReturn(LoadTask.MergeType.APPEND); + return taskInfo; + } + + private TStreamLoadPutRequest newFlexibleStreamLoadRequest() { + TStreamLoadPutRequest request = new TStreamLoadPutRequest(); + request.setLoadId(new TUniqueId(1, 2)); + request.setTxnId(1); + request.setFileType(TFileType.FILE_STREAM); + request.setFormatType(TFileFormatType.FORMAT_JSON); + request.setCompressType(TFileCompressType.UNKNOWN); + request.setReadJsonByLine(true); + return request; + } + + private NereidsRoutineLoadTaskInfo newFlexibleRoutineTaskInfo( + LoadTask.MergeType mergeType, UnboundSlot whereExpr, String sequenceCol) { + return new NereidsRoutineLoadTaskInfo( + 1024L, Collections.singletonMap("format", "json"), 10L, null, mergeType, null, sequenceCol, 1.0, + new NereidsLoadTaskInfo.NereidsImportColumnDescs(), null, whereExpr, null, null, + (byte) 0, (byte) 0, 1, false, TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, + TPartialUpdateNewRowPolicy.APPEND, false); + } + + private void assertRejected(NereidsLoadTaskInfo taskInfo, String expectedMessage) { + UserException exception = Assertions.assertThrows(UserException.class, + () -> NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(taskInfo)); + Assertions.assertTrue(exception.getMessage().contains(expectedMessage), exception.getMessage()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java index 3530f3536197f4..ba176ab4a0acab 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java @@ -1114,6 +1114,42 @@ public void testVariantFieldPatternDictCompressionValidation() { Assertions.assertDoesNotThrow(() -> createTable(validSql)); } + @Test + public void testCreateTableAllowsUnsupportedFlexibleVariantModes() { + String docModeSql = "create table test.tbl_flexible_variant_doc\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true');"; + Assertions.assertDoesNotThrow(() -> createTable(docModeSql)); + + String flattenNestedSql = "create table test.tbl_flexible_variant_flatten\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true'," + + "'deprecated_variant_enable_flatten_nested' = 'true');"; + connectContext.getSessionVariable().setEnableVariantFlattenNested(true); + try { + Assertions.assertDoesNotThrow(() -> createTable(flattenNestedSql)); + } finally { + connectContext.getSessionVariable().setEnableVariantFlattenNested(false); + } + + String validSql = "create table test.tbl_flexible_variant_normal\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true');"; + Assertions.assertDoesNotThrow(() -> createTable(validSql)); + } + @Test public void testMTMVRejectVarbinary() throws Exception { String mv = "CREATE MATERIALIZED VIEW mv_vb\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java index db64fe354d5cc3..2dd7768fdc761d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java @@ -17,22 +17,78 @@ package org.apache.doris.nereids.trees.plans.commands; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.info.ColumnPosition; import org.apache.doris.catalog.info.TableNameInfo; +import org.apache.doris.common.UserException; +import org.apache.doris.nereids.trees.plans.commands.info.AddColumnOp; +import org.apache.doris.nereids.trees.plans.commands.info.AddColumnsOp; import org.apache.doris.nereids.trees.plans.commands.info.AddPartitionFieldOp; import org.apache.doris.nereids.trees.plans.commands.info.AlterTableOp; +import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition; import org.apache.doris.nereids.trees.plans.commands.info.DropPartitionFieldOp; import org.apache.doris.nereids.trees.plans.commands.info.EnableFeatureOp; +import org.apache.doris.nereids.trees.plans.commands.info.ModifyColumnOp; import org.apache.doris.nereids.trees.plans.commands.info.ReplacePartitionFieldOp; +import org.apache.doris.nereids.types.IntegerType; +import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class AlterTableCommandTest { + private static Column normalVariantColumn(String name) { + return new Column(name, Type.VARIANT); + } + + private static Column docModeVariantColumn(String name) { + org.apache.doris.catalog.VariantType docModeVariant = new org.apache.doris.catalog.VariantType( + new ArrayList<>(), 0, false, 10000, 0, true, 0L, 64, false); + return new Column(name, docModeVariant); + } + + private static ColumnDefinition intColumnDefinition(String name) { + return new ColumnDefinition(name, IntegerType.INSTANCE, false); + } + + private static void invokeRewriteForOlapTable(AlterTableCommand command, OlapTable table) throws Exception { + invokePrivate(command, "rewriteAlterOpForOlapTable", + new Class[] {org.apache.doris.qe.ConnectContext.class, OlapTable.class}, + new Object[] {null, table}); + } + + private static void invokeValidateVariantAlter(AlterTableCommand command, OlapTable table) throws Exception { + invokePrivate(command, "validateAlterVariantColumnsForFlexiblePartialUpdate", + new Class[] {OlapTable.class}, new Object[] {table}); + } + + private static void invokePrivate(Object target, String methodName, Class[] parameterTypes, Object[] args) + throws Exception { + Method method = target.getClass().getDeclaredMethod(methodName, parameterTypes); + method.setAccessible(true); + try { + method.invoke(target, args); + } catch (InvocationTargetException e) { + if (e.getCause() instanceof Exception) { + throw (Exception) e.getCause(); + } + throw e; + } + } + @Test void testEnableFeatureOp() { List ops = new ArrayList<>(); @@ -56,6 +112,113 @@ void testEnableFeatureOp() { alterTableCommand.toSql()); } + @Test + void testRewriteEnableFlexiblePartialUpdateOnVariantTable() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getVisibleIndex()).thenReturn(Lists.newArrayList( + new MaterializedIndex(1L, MaterializedIndex.IndexState.NORMAL))); + Mockito.when(table.getBaseSchema(true)).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariantColumn("v"))); + + List ops = new ArrayList<>(); + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + ops.add(enableFlexibleUpdate); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, ops); + + invokeRewriteForOlapTable(alterTableCommand, table); + + Assertions.assertEquals(1, alterTableCommand.getOps().size()); + Assertions.assertTrue(alterTableCommand.getOps().get(0) instanceof AddColumnOp); + AddColumnOp addColumnOp = (AddColumnOp) alterTableCommand.getOps().get(0); + Assertions.assertTrue(addColumnOp.getColumn().isSkipBitmapColumn()); + Assertions.assertEquals(Column.SKIP_BITMAP_COL, addColumnOp.getColumn().getName()); + Assertions.assertEquals("AFTER `v`", addColumnOp.getColPos().toSql()); + } + + @Test + void testEnableFlexiblePartialUpdateRequiresLightSchemaChange() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(false); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, + Lists.newArrayList(enableFlexibleUpdate)); + + UserException exception = Assertions.assertThrows(UserException.class, + () -> invokeRewriteForOlapTable(alterTableCommand, table)); + Assertions.assertTrue(exception.getMessage().contains("light_schema_change")); + } + + @Test + void testEnableFlexiblePartialUpdateRejectsClusterKeyTable() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(true); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, + Lists.newArrayList(enableFlexibleUpdate)); + + UserException exception = Assertions.assertThrows(UserException.class, + () -> invokeRewriteForOlapTable(alterTableCommand, table)); + Assertions.assertTrue(exception.getMessage().contains("cluster keys")); + } + + @Test + void testValidateVariantAlterOnlyWhenEnablingFlexiblePartialUpdate() throws Exception { + OlapTable tableWithoutFlexibleUpdate = Mockito.mock(OlapTable.class); + Mockito.when(tableWithoutFlexibleUpdate.hasSkipBitmapColumn()).thenReturn(false); + OlapTable tableWithFlexibleUpdate = Mockito.mock(OlapTable.class); + Mockito.when(tableWithFlexibleUpdate.hasSkipBitmapColumn()).thenReturn(true); + + AddColumnOp addNormalVariant = new AddColumnOp(intColumnDefinition("normal_v"), null, null, null); + addNormalVariant.setColumn(normalVariantColumn("normal_v")); + AlterTableCommand normalVariantCommand = new AlterTableCommand(null, Lists.newArrayList(addNormalVariant)); + Assertions.assertDoesNotThrow(() -> invokeValidateVariantAlter(normalVariantCommand, tableWithFlexibleUpdate)); + + AddColumnOp addDocModeVariant = new AddColumnOp(intColumnDefinition("doc_v"), null, null, null); + addDocModeVariant.setColumn(docModeVariantColumn("doc_v")); + AlterTableCommand addDocOnlyCommand = new AlterTableCommand(null, Lists.newArrayList(addDocModeVariant)); + Assertions.assertDoesNotThrow(() -> invokeValidateVariantAlter(addDocOnlyCommand, tableWithoutFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addDocOnlyCommand, tableWithFlexibleUpdate)); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand addDocWithEnableCommand = new AlterTableCommand(null, + Lists.newArrayList(addDocModeVariant, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addDocWithEnableCommand, tableWithoutFlexibleUpdate)); + + AddColumnsOp addColumnsOp = new AddColumnsOp(null, null, Lists.newArrayList( + new Column("plain", PrimitiveType.INT), docModeVariantColumn("doc_v2"))); + AlterTableCommand addColumnsCommand = new AlterTableCommand(null, Lists.newArrayList( + addColumnsOp, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addColumnsCommand, tableWithoutFlexibleUpdate)); + + ModifyColumnOp modifyColumnOp = new ModifyColumnOp(intColumnDefinition("doc_v3"), + new ColumnPosition("plain"), null, null); + modifyColumnOp.setColumn(docModeVariantColumn("doc_v3")); + AlterTableCommand modifyCommand = new AlterTableCommand(null, Lists.newArrayList( + modifyColumnOp, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(modifyCommand, tableWithoutFlexibleUpdate)); + } + @Test void testAddPartitionFieldOp() { List ops = new ArrayList<>(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java b/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java index 8a1550d48f5d13..5cfb8047b99199 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java @@ -17,12 +17,17 @@ package org.apache.doris.persist; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.Separator; import org.apache.doris.common.UserException; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kafka.KafkaConfiguration; import org.apache.doris.load.routineload.kafka.KafkaDataSourceProperties; import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.junit.Assert; import org.junit.Test; @@ -60,8 +65,12 @@ public void testSerializeAlterRoutineLoadOperationLog() throws IOException, User routineLoadDataSourceProperties.setTimezone(TimeUtils.DEFAULT_TIME_ZONE); routineLoadDataSourceProperties.analyze(); + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, true, "seq"); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(jobId, - jobProperties, routineLoadDataSourceProperties); + jobProperties, routineLoadDataSourceProperties, routineLoadDesc); + jobProperties.put(CreateRoutineLoadInfo.DESIRED_CONCURRENT_NUMBER_PROPERTY, "7"); log.write(out); out.flush(); out.close(); @@ -72,7 +81,13 @@ public void testSerializeAlterRoutineLoadOperationLog() throws IOException, User AlterRoutineLoadJobOperationLog log2 = AlterRoutineLoadJobOperationLog.read(in); Assert.assertEquals(1, log2.getJobProperties().size()); Assert.assertEquals("5", log2.getJobProperties().get(CreateRoutineLoadInfo.DESIRED_CONCURRENT_NUMBER_PROPERTY)); - KafkaDataSourceProperties kafkaDataSourceProperties = (KafkaDataSourceProperties) log2.getDataSourceProperties(); + KafkaDataSourceProperties kafkaDataSourceProperties = + (KafkaDataSourceProperties) log2.getDataSourceProperties(); + Assert.assertEquals(1, log2.getColumnDescs().descs.size()); + Assert.assertEquals("id", log2.getColumnDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", log2.getRoutineLoadDesc().getColumnSeparator().getSeparator()); + Assert.assertTrue(log2.getRoutineLoadDesc().isMergeTypeSpecified()); + Assert.assertEquals("seq", log2.getRoutineLoadDesc().getSequenceColName()); Assert.assertEquals(null, kafkaDataSourceProperties.getBrokerList()); Assert.assertEquals(null, kafkaDataSourceProperties.getTopic()); Assert.assertEquals(1, kafkaDataSourceProperties.getCustomKafkaProperties().size()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java index 033568017d90f3..afef92822922eb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java @@ -101,7 +101,7 @@ public void testBackendHbResponseSerialization() throws IOException { System.out.println(Env.getCurrentEnvJournalVersion()); BackendHbResponse writeResponse = new BackendHbResponse(1L, 1234, 1234, 1234, 1234, 1234, "test", - Tag.VALUE_COMPUTATION, 10, 100, false, 1234); + Tag.VALUE_COMPUTATION, 10, 100, false, 1234, 2048, true); // Write objects to file File file1 = new File("./BackendHbResponseSerialization"); @@ -122,6 +122,7 @@ public void testBackendHbResponseSerialization() throws IOException { // Before meta version 121, nodeRole will not be read, so readResponse is not equal to writeResponse Assert.assertTrue(readResponse.toString().equals(writeResponse.toString())); Assert.assertTrue(Tag.VALUE_COMPUTATION.equals(readResponse.getNodeRole())); + Assert.assertTrue(readResponse.supportsVariantFlexiblePartialUpdate()); } catch (IOException e) { e.printStackTrace(); Assert.fail(); @@ -132,6 +133,32 @@ public void testBackendHbResponseSerialization() throws IOException { } } + @Test + public void testBackendVariantFlexiblePartialUpdateCapabilityFromHeartbeat() { + long oldToleranceCount = Config.max_backend_heartbeat_failure_tolerance_count; + Config.max_backend_heartbeat_failure_tolerance_count = 2; + try { + Backend backend = new Backend(1L, "127.0.0.1", 9050); + BackendHbResponse okResponse = new BackendHbResponse(1L, 9060, 8040, 8060, + 1234, 5678, "test", Tag.VALUE_MIX, 10, 100, false, 8070, 2048, true); + backend.handleHbResponse(okResponse, false); + Assert.assertTrue(backend.isAlive()); + Assert.assertTrue(backend.supportsVariantFlexiblePartialUpdate()); + + BackendHbResponse badResponse = new BackendHbResponse(1L, "127.0.0.1", + okResponse.getHbTime(), "bad heartbeat"); + backend.handleHbResponse(badResponse, false); + Assert.assertTrue(backend.isAlive()); + Assert.assertTrue(backend.supportsVariantFlexiblePartialUpdate()); + + backend.handleHbResponse(badResponse, false); + Assert.assertFalse(backend.isAlive()); + Assert.assertFalse(backend.supportsVariantFlexiblePartialUpdate()); + } finally { + Config.max_backend_heartbeat_failure_tolerance_count = oldToleranceCount; + } + } + @Test public void testSelectBackendIdsByPolicy() throws Exception { Config.disable_backend_black_list = true; diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 593a89d0aa992e..0e4fcf7cacc085 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -791,6 +791,7 @@ message PartialUpdateInfoPB { optional int32 nano_seconds = 13 [default = 0]; optional UniqueKeyUpdateModePB partial_update_mode = 14 [default = UPSERT]; optional PartialUpdateNewRowPolicyPB partial_update_new_key_policy = 15 [default = APPEND]; + optional int32 sequence_map_col_uid = 16 [default = -1]; } message FileEncryptionInfoPB { diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index 07eb9333637721..300630ab220e16 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -587,6 +587,7 @@ struct TStreamLoadPutRequest { 58: optional Descriptors.TPartialUpdateNewRowPolicy partial_update_new_key_policy 59: optional bool empty_field_as_null 60: optional TCertBasedAuth cert_based_auth + 61: optional bool merge_type_specified // For cloud 1000: optional string cloud_cluster diff --git a/gensrc/thrift/HeartbeatService.thrift b/gensrc/thrift/HeartbeatService.thrift index ecf9727cf3e0db..ef22209d2bb47e 100644 --- a/gensrc/thrift/HeartbeatService.thrift +++ b/gensrc/thrift/HeartbeatService.thrift @@ -62,6 +62,7 @@ struct TBackendInfo { 8: optional bool is_shutdown 9: optional Types.TPort arrow_flight_sql_port 10: optional i64 be_mem // The physical memory available for use by BE. + 11: optional bool supports_variant_flexible_partial_update // For cloud 1000: optional i64 fragment_executing_count 1001: optional i64 fragment_last_active_time diff --git a/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json b/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json new file mode 100644 index 00000000000000..f79265847946e5 --- /dev/null +++ b/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json @@ -0,0 +1,3 @@ +{"k":1,"v":{"a":10}} +{"k":1,"v":{"b":20}} +{"k":2,"c":22,"v":{"nested":{"y":2}}} diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy index 510eeb0ecab73e..74561130b966d0 100644 --- a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy @@ -22,9 +22,29 @@ import org.apache.kafka.clients.producer.ProducerRecord suite("test_routine_load_flexible_partial_update", "nonConcurrent") { if (RoutineLoadTestUtils.isKafkaTestEnabled(context)) { + sql "set default_variant_enable_doc_mode = false" + def runSql = { String q -> sql q } def kafka_broker = RoutineLoadTestUtils.getKafkaBroker(context) def producer = RoutineLoadTestUtils.createKafkaProducer(kafka_broker) + def waitForLoadedRows = { String jobName, long expectedLoadedRows, int maxWait = 120 -> + def waited = 0 + while (waited < maxWait) { + def res = sql "show routine load for ${jobName}" + def state = res[0][8].toString() + def statJson = new groovy.json.JsonSlurper().parseText(res[0][14].toString()) + long loadedRows = statJson.loadedRows as long + logger.info("waitForLoadedRows: state=${state}, loadedRows=${loadedRows}, expected>=${expectedLoadedRows}") + if (state == "RUNNING" && loadedRows >= expectedLoadedRows) { + break + } + if (waited >= maxWait - 1) { + assertTrue("Timeout waiting for loadedRows >= ${expectedLoadedRows}, got ${loadedRows}", false) + } + sleep(1000) + waited++ + } + } // Test 1: Basic flexible partial update def kafkaJsonTopic1 = "test_routine_load_flexible_partial_update_basic" @@ -365,7 +385,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { exception "Flexible partial update does not support COLUMNS specification" } - // Test 7: Success case - WHERE clause works with flexible partial update + // Test 7: Error case - WHERE clause not supported with flexible partial update def kafkaJsonTopic7 = "test_routine_load_flexible_partial_update_where" def tableName7 = "test_routine_load_flex_update_where" def job7 = "test_flex_partial_update_job_where" @@ -389,18 +409,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - // insert initial data - sql """ - INSERT INTO ${tableName7} VALUES - (1, 'alice', 100, 20), - (2, 'bob', 90, 21), - (3, 'charlie', 80, 22) - """ - - qt_select_initial7 "SELECT id, name, score, age FROM ${tableName7} ORDER BY id" - - try { - // create routine load with WHERE clause and flexible partial update + test { sql """ CREATE ROUTINE LOAD ${job7} ON ${tableName7} WHERE id > 1 @@ -417,37 +426,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { "property.kafka_default_offsets" = "OFFSET_BEGINNING" ); """ - - // send JSON data - WHERE clause filters id > 1, so id=1 row should NOT be processed - def data7 = [ - '{"id": 1, "score": 999}', - '{"id": 2, "score": 95}', - '{"id": 3, "name": "chuck"}', - '{"id": 4, "name": "diana", "score": 70}' - ] - - data7.each { line -> - logger.info("Sending to Kafka: ${line}") - def record = new ProducerRecord<>(kafkaJsonTopic7, null, line) - producer.send(record).get() - } - producer.flush() - - // With skip_delete_bitmap=true and WHERE id > 1: - // - id=1: 1 version (not updated, filtered by WHERE) - // - id=2: 2 versions (original + partial update) - // - id=3: 2 versions (original + partial update) - // - id=4: 1 version (new row) - // Total: 6 rows, so expectedMinRows = 5 (waits for count > 5) - RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job7, tableName7, 5) - - // verify: id=1 should NOT be updated (filtered by WHERE), id=2,3,4 should be updated - qt_select_after_flex_where "SELECT id, name, score, age FROM ${tableName7} ORDER BY id" - } catch (Exception e) { - logger.error("Error during test: " + e.getMessage()) - throw e - } finally { - sql "STOP ROUTINE LOAD FOR ${job7}" + exception "where" } // Test 8: Error case - table without skip_bitmap column @@ -491,7 +470,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { exception "Flexible partial update can only support table with skip bitmap hidden column" } - // Test 9: Error case - table with variant column + // Test 9: table with variant column def kafkaJsonTopic9 = "test_routine_load_flexible_partial_update_variant" def tableName9 = "test_routine_load_flex_update_variant" def job9 = "test_flex_partial_update_job_variant" @@ -513,7 +492,8 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - test { + sql """ INSERT INTO ${tableName9} VALUES (1, 'base', '{"a": 1, "b": 1}') """ + try { sql """ CREATE ROUTINE LOAD ${job9} ON ${tableName9} PROPERTIES @@ -529,7 +509,214 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { "property.kafka_default_offsets" = "OFFSET_BEGINNING" ); """ - exception "Flexible partial update can only support table without variant columns" + + def data9 = [ + '{"id": 1, "data": {"a": 10}}', + '{"id": 1, "data": {"c": 3}}', + '{"id": 2, "name": "new", "data": {"x": 5}}' + ] + data9.each { line -> + logger.info("Sending to Kafka: ${line}") + def record = new ProducerRecord<>(kafkaJsonTopic9, null, line) + producer.send(record).get() + } + producer.flush() + + RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job9, tableName9, 1) + def variantRows = sql """ + SELECT id, name, cast(data['a'] as int), cast(data['b'] as int), + cast(data['c'] as int), cast(data['x'] as int) + FROM ${tableName9} ORDER BY id + """ + assertEquals("[[1, base, 10, 1, 3, null], [2, new, null, null, null, 5]]", + variantRows.toString()) + } finally { + sql "STOP ROUTINE LOAD FOR ${job9}" + } + + def tableName9Doc = "test_routine_load_flex_update_variant_doc" + def job9Doc = "test_flex_partial_update_job_variant_doc" + + sql """ DROP TABLE IF EXISTS ${tableName9Doc} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Doc} ( + `id` int NOT NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Doc} ON ${tableName9Doc} + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9}_doc", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "VARIANT flexible partial update does not support doc mode" + } + + def kafkaJsonTopic9Seq = "test_routine_load_flexible_partial_update_variant_seq" + def tableName9Seq = "test_routine_load_flex_update_variant_seq" + def job9Seq = "test_flex_partial_update_job_variant_seq" + + sql """ DROP TABLE IF EXISTS ${tableName9Seq} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Seq} ( + `id` int NOT NULL, + `seq` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq" + ); + """ + sql """ INSERT INTO ${tableName9Seq} VALUES (1, 10, '{"a": 1, "b": 1}') """ + try { + sql """ + CREATE ROUTINE LOAD ${job9Seq} ON ${tableName9Seq} + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Seq}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + + def data9Seq = [ + '{"id": 1, "seq": 20, "data": {"a": 2}}', + '{"id": 1, "seq": 15, "data": {"b": 3}}', + '{"id": 1, "seq": 20, "data": {"b": 4}}' + ] + data9Seq.each { line -> + logger.info("Sending to Kafka: ${line}") + def record = new ProducerRecord<>(kafkaJsonTopic9Seq, null, line) + producer.send(record).get() + } + producer.flush() + + waitForLoadedRows(job9Seq, 3) + def seqVariantRows = sql """ + SELECT id, seq, __DORIS_SEQUENCE_COL__, cast(data['a'] as int), cast(data['b'] as int) + FROM ${tableName9Seq} ORDER BY id + """ + assertEquals("[[1, 20, 20, 2, 4]]", seqVariantRows.toString()) + } finally { + sql "STOP ROUTINE LOAD FOR ${job9Seq}" + } + + def kafkaJsonTopic9Order = "test_routine_load_flexible_partial_update_variant_order" + def tableName9Order = "test_routine_load_flex_update_variant_order" + def job9Order = "test_flex_partial_update_job_variant_order" + + sql """ DROP TABLE IF EXISTS ${tableName9Order} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Order} ( + `id` int NOT NULL, + `seq` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Order} ON ${tableName9Order} + ORDER BY seq + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Order}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "function_column.sequence_col" + } + + def kafkaJsonTopic9Merge = "test_routine_load_flexible_partial_update_variant_merge" + def tableName9Merge = "test_routine_load_flex_update_variant_merge" + def job9Merge = "test_flex_partial_update_job_variant_merge" + + sql """ DROP TABLE IF EXISTS ${tableName9Merge} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Merge} ( + `id` int NOT NULL, + `name` varchar(65533) NULL, + `is_delete` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Merge} ON ${tableName9Merge} + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Merge}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "merge_type" } // Test 10: Error case - invalid unique_key_update_mode value @@ -702,6 +889,18 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { // pause the job before altering sql "PAUSE ROUTINE LOAD FOR ${job12}" + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS", + "jsonpaths" = '[\"\$.id\"]' + ); + """ + exception "Flexible partial update does not support jsonpaths" + } + // alter to UPDATE_FLEXIBLE_COLUMNS mode sql """ ALTER ROUTINE LOAD FOR ${job12} @@ -717,6 +916,36 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { logger.info("Altered routine load job properties: ${jobProperties}") assertTrue(jobProperties.contains("UPDATE_FLEXIBLE_COLUMNS")) + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "jsonpaths" = '[\"\$.id\"]' + ); + """ + exception "Flexible partial update does not support jsonpaths" + } + + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "fuzzy_parse" = "true" + ); + """ + exception "Flexible partial update does not support fuzzy_parse" + } + + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + COLUMNS(id, score); + """ + exception "Flexible partial update does not support COLUMNS specification" + } + // resume the job sql "RESUME ROUTINE LOAD FOR ${job12}" @@ -1050,7 +1279,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { sql "STOP ROUTINE LOAD FOR ${job17}" } - // Test 18: ALTER to flex mode succeeds with WHERE clause + // Test 18: ALTER to flex mode fails with WHERE clause def kafkaJsonTopic18 = "test_routine_load_alter_flex_where" def tableName18 = "test_routine_load_alter_flex_where" def job18 = "test_alter_flex_where_job" @@ -1074,15 +1303,6 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - // insert initial data - sql """ - INSERT INTO ${tableName18} VALUES - (1, 'alice', 100, 20), - (2, 'bob', 90, 21) - """ - - qt_select_initial18 "SELECT id, name, score, age FROM ${tableName18} ORDER BY id" - try { // create routine load with WHERE clause (UPSERT mode) sql """ @@ -1103,46 +1323,16 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { sql "PAUSE ROUTINE LOAD FOR ${job18}" - // alter to UPDATE_FLEXIBLE_COLUMNS mode - should succeed - sql """ - ALTER ROUTINE LOAD FOR ${job18} - PROPERTIES - ( - "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" - ); - """ - - // verify the property was changed - def res = sql "SHOW ROUTINE LOAD FOR ${job18}" - def jobProperties = res[0][11].toString() - logger.info("Altered routine load job properties: ${jobProperties}") - assertTrue(jobProperties.contains("UPDATE_FLEXIBLE_COLUMNS")) - - sql "RESUME ROUTINE LOAD FOR ${job18}" - - // send JSON data - WHERE clause filters id > 1 - def data18 = [ - '{"id": 1, "score": 999}', - '{"id": 2, "score": 95}', - '{"id": 3, "name": "charlie", "score": 80}' - ] - - data18.each { line -> - logger.info("Sending to Kafka: ${line}") - def record = new ProducerRecord<>(kafkaJsonTopic18, null, line) - producer.send(record).get() + test { + sql """ + ALTER ROUTINE LOAD FOR ${job18} + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "where" } - producer.flush() - - // With skip_delete_bitmap=true and WHERE id > 1: - // - id=1: 1 version (not updated, filtered by WHERE) - // - id=2: 2 versions (original + partial update) - // - id=3: 1 version (new row) - // Total: 4 rows, so expectedMinRows = 3 (waits for count > 3) - RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job18, tableName18, 3) - - // verify: id=1 should NOT be updated (filtered by WHERE), id=2,3 should be updated - qt_select_after_alter_flex_where "SELECT id, name, score, age FROM ${tableName18} ORDER BY id" } catch (Exception e) { logger.error("Error during test: " + e.getMessage()) throw e diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy new file mode 100644 index 00000000000000..1c0877808dbf79 --- /dev/null +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy @@ -0,0 +1,301 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_routine_load_flexible_partial_update_validate", "nonConcurrent") { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_where_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_merge_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_append_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_order_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_table_seq_job; """ + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_where force; """ + sql """ + CREATE TABLE test_rl_flex_validate_where ( + `id` int NOT NULL, + `name` varchar(32) NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_where_job ON test_rl_flex_validate_where + WHERE id > 1 + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_where" + ); + """ + exception "where" + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_merge force; """ + sql """ + CREATE TABLE test_rl_flex_validate_merge ( + `id` int NOT NULL, + `name` varchar(32) NULL, + `is_delete` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_merge_job ON test_rl_flex_validate_merge + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_merge" + ); + """ + exception "merge_type" + } + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_append_job ON test_rl_flex_validate_merge + WITH APPEND + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_append" + ); + """ + exception "merge_type" + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_order force; """ + sql """ + CREATE TABLE test_rl_flex_validate_order ( + `id` int NOT NULL, + `seq` int NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_order_job ON test_rl_flex_validate_order + ORDER BY seq + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_order" + ); + """ + exception "function_column.sequence_col" + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_where_job ON test_rl_flex_validate_where + WHERE id > 1 + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_where" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "where" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_merge_job ON test_rl_flex_validate_merge + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_merge" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "merge_type" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_delete_job ON test_rl_flex_validate_merge + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_delete" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job + DELETE ON is_delete = 1; + """ + exception "delete" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_order_job ON test_rl_flex_validate_order + ORDER BY seq + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_order" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "function_column.sequence_col" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_table_seq force; """ + sql """ + CREATE TABLE test_rl_flex_validate_table_seq ( + `id` int NOT NULL, + `seq` int NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq" + ); + """ + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_table_seq_job ON test_rl_flex_validate_table_seq + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_table_seq" + ); + """ + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_table_seq_job; """ + } +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy b/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy new file mode 100644 index 00000000000000..de8605abf71fe8 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy @@ -0,0 +1,303 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert + +suite("test_flexible_partial_update_variant_publish_conflict") { + if (isCloudMode()) { + logger.info("skip test_flexible_partial_update_variant_publish_conflict in cloud mode") + return + } + + sql "set default_variant_enable_doc_mode = false" + + def dbName = context.config.getDbNameByFile(context.file) + + def do_streamload_2pc_commit = { tableName, txnId -> + def command = "curl -X PUT --location-trusted -u ${context.config.feHttpUser}:${context.config.feHttpPassword}" + + " -H txn_id:${txnId}" + + " -H txn_operation:commit" + + " http://${context.config.feHttpAddress}/api/${dbName}/${tableName}/_stream_load_2pc" + log.info("http_stream execute 2pc: ${command}") + + def process = command.execute() + def code = process.waitFor() + def out = process.text + def json2pc = parseJson(out) + log.info("http_stream 2pc result: ${out}".toString()) + assertEquals(0, code) + assertEquals("success", json2pc.status.toLowerCase()) + } + + def wait_for_publish = { txnId, waitSecond -> + String st = "PREPARE" + while (!st.equalsIgnoreCase("VISIBLE") && !st.equalsIgnoreCase("ABORTED") && waitSecond > 0) { + Thread.sleep(1000) + waitSecond -= 1 + def result = sql_return_maparray "show transaction from ${dbName} where id = ${txnId}" + assertNotNull(result) + st = result[0].TransactionStatus + } + log.info("Stream load with txn ${txnId} is ${st}") + assertEquals("VISIBLE", st) + } + + def prepare_streamload = { tableName, loadBody -> + def txnId = null + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'two_phase_commit', 'true' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 40000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + + def json = parseJson(result) + txnId = json.TxnId + assertEquals("success", json.Status.toLowerCase()) + } + } + return txnId + } + + def streamload = { tableName, loadBody -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + } + } + + for (def use_row_store : [false, true]) { + def tableName = "t_fpu_var_pub_conf_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${tableName} FORCE """ + sql """ + CREATE TABLE ${tableName} ( + `k` int NOT NULL, + `c` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${tableName} VALUES (1, 1, '{"a":1,"b":1}') """ + + def txnA = prepare_streamload(tableName, """{"k":1,"v":{"a":2}}\n""") + def txnB = prepare_streamload(tableName, """{"k":1,"c":9,"v":{"b":3}}\n""") + do_streamload_2pc_commit(tableName, txnB) + wait_for_publish(txnB, 60) + do_streamload_2pc_commit(tableName, txnA) + wait_for_publish(txnA, 60) + + def disjointPathRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 2, 3, 9]]", disjointPathRows.toString()) + + def txnC = prepare_streamload(tableName, """{"k":1,"v":{"a":4}}\n""") + def txnD = prepare_streamload(tableName, """{"k":1,"v":{"a":5}}\n""") + do_streamload_2pc_commit(tableName, txnD) + wait_for_publish(txnD, 60) + do_streamload_2pc_commit(tableName, txnC) + wait_for_publish(txnC, 60) + + def samePathRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 4, 3, 9]]", samePathRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (3, 3, '{"nested":{"x":1},"b":1}') """ + def txnEmptyObject = prepare_streamload(tableName, """{"k":3,"v":{"nested":{}}}\n""") + def txnOtherPath = prepare_streamload(tableName, """{"k":3,"v":{"b":3}}\n""") + do_streamload_2pc_commit(tableName, txnOtherPath) + wait_for_publish(txnOtherPath, 60) + do_streamload_2pc_commit(tableName, txnEmptyObject) + wait_for_publish(txnEmptyObject, 60) + + def emptyObjectRows = sql """ + SELECT k, v['nested']['x'] IS NULL, cast(v['b'] as int), c + FROM ${tableName} WHERE k = 3 ORDER BY k + """ + assertEquals("[[3, true, 3, 3]]", emptyObjectRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (4, 4, '{"a":{"c":0},"x":1}') """ + def txnParentThenChild = prepare_streamload(tableName, + """{"k":4,"v":{"a":{}}} +{"k":4,"v":{"a":{"b":1}}} +""") + def txnConcurrentChild = prepare_streamload(tableName, """{"k":4,"v":{"a":{"c":9}}}\n""") + do_streamload_2pc_commit(tableName, txnConcurrentChild) + wait_for_publish(txnConcurrentChild, 60) + do_streamload_2pc_commit(tableName, txnParentThenChild) + wait_for_publish(txnParentThenChild, 60) + + def parentThenChildRows = sql """ + SELECT k, cast(v['a']['b'] as int), v['a']['c'] IS NULL, cast(v['x'] as int) + FROM ${tableName} WHERE k = 4 ORDER BY k + """ + assertEquals("[[4, 1, true, 1]]", parentThenChildRows.toString()) + + def multiTableName = "t_fpu_var_pub_multi_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${multiTableName} FORCE """ + sql """ + CREATE TABLE ${multiTableName} ( + `k` int NOT NULL, + `v1` variant NULL, + `v2` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${multiTableName} VALUES (1, '{"a":1,"b":1}', '{"a":1,"b":1}') """ + def txnMultiA = prepare_streamload(multiTableName, + """{"k":1,"v1":{"a":2},"v2":{"b":8}}\n""") + def txnMultiB = prepare_streamload(multiTableName, """{"k":1,"v1":{"b":9}}\n""") + do_streamload_2pc_commit(multiTableName, txnMultiB) + wait_for_publish(txnMultiB, 60) + do_streamload_2pc_commit(multiTableName, txnMultiA) + wait_for_publish(txnMultiA, 60) + + def multiVariantRows = sql """ + SELECT k, cast(v1['a'] as int), cast(v1['b'] as int), + cast(v2['a'] as int), cast(v2['b'] as int) + FROM ${multiTableName} ORDER BY k + """ + assertEquals("[[1, 2, 9, 1, 8]]", multiVariantRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (2, 1, '{"a":1,"b":1}') """ + def txnPatchAfterDelete = prepare_streamload(tableName, """{"k":2,"v":{"a":2}}\n""") + streamload(tableName, """{"k":2,"__DORIS_DELETE_SIGN__":1}\n""") + do_streamload_2pc_commit(tableName, txnPatchAfterDelete) + wait_for_publish(txnPatchAfterDelete, 60) + + def patchAfterDeleteRows = sql """ + SELECT k, cast(v['a'] as int), v['b'] IS NULL, c + FROM ${tableName} WHERE k = 2 ORDER BY k + """ + assertEquals("[[2, 2, true, null]]", patchAfterDeleteRows.toString()) + + def sparseTableName = "t_fpu_var_pub_sparse_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${sparseTableName} FORCE """ + sql """ + CREATE TABLE ${sparseTableName} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${sparseTableName} VALUES (1, '{"a":1,"b":1,"c":1}') """ + def txnSparseA = prepare_streamload(sparseTableName, """{"k":1,"v":{"b":2}}\n""") + def txnSparseB = prepare_streamload(sparseTableName, """{"k":1,"v":{"c":3}}\n""") + do_streamload_2pc_commit(sparseTableName, txnSparseB) + wait_for_publish(txnSparseB, 60) + do_streamload_2pc_commit(sparseTableName, txnSparseA) + wait_for_publish(txnSparseA, 60) + + def sparseRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int) + FROM ${sparseTableName} ORDER BY k + """ + assertEquals("[[1, 1, 2, 3]]", sparseRows.toString()) + } + + for (def use_row_store : [false, true]) { + def seqTableName = "t_fpu_var_pub_seq_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${seqTableName} FORCE """ + sql """ + CREATE TABLE ${seqTableName} ( + `k` int NOT NULL, + `seq` int NULL, + `v` variant NULL, + `c` int NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${seqTableName} VALUES (1, 10, '{"a":1,"b":1}', 1) """ + + def txnLowSeq = prepare_streamload(seqTableName, """{"k":1,"seq":5,"v":{"a":2}}\n""") + def txnHighSeq = prepare_streamload(seqTableName, """{"k":1,"seq":20,"v":{"b":3}}\n""") + do_streamload_2pc_commit(seqTableName, txnHighSeq) + wait_for_publish(txnHighSeq, 60) + do_streamload_2pc_commit(seqTableName, txnLowSeq) + wait_for_publish(txnLowSeq, 60) + + def lowSeqDiscardRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 20, 1, 3, 1]]", lowSeqDiscardRows.toString()) + + def txnMissingSeq = prepare_streamload(seqTableName, """{"k":1,"v":{"a":4}}\n""") + do_streamload_2pc_commit(seqTableName, txnMissingSeq) + wait_for_publish(txnMissingSeq, 60) + + def missingSeqRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 20, 4, 3, 1]]", missingSeqRows.toString()) + + def txnHigherSeq = prepare_streamload(seqTableName, """{"k":1,"seq":30,"v":{"a":5}}\n""") + do_streamload_2pc_commit(seqTableName, txnHigherSeq) + wait_for_publish(txnHigherSeq, 60) + + def highSeqRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 30, 5, 3, 1]]", highSeqRows.toString()) + } +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy index 4e26a6a7de45ad..df7f2fd2d9671d 100644 --- a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy @@ -16,6 +16,7 @@ // under the License. suite('test_flexible_partial_update_property') { + sql "set default_variant_enable_doc_mode = false" def tableName = "test_flexible_partial_update_property" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -37,11 +38,85 @@ suite('test_flexible_partial_update_property') { def show_res = sql "show create table ${tableName}" assertTrue(show_res.toString().contains('"enable_unique_key_skip_bitmap_column" = "true"')) + def doSchemaChange = { cmd -> + sql cmd + waitForSchemaChangeDone { + sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1""" + time 2000 + } + } + + def expect_flexible_streamload_fail = { targetTable, loadBody, expectedMessage -> + streamLoad { + table "${targetTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + assertTrue(exception.getMessage().contains(expectedMessage)) + return + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains(expectedMessage)) + } + } + } + test { sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS"; """ - exception "table ${tableName} has enabled update flexible columns feature already." + exception "table ${tableName} has enabled update flexible columns feature already." + } + doSchemaChange """alter table ${tableName} add column v_normal variant NULL;""" + test { + sql """alter table ${tableName} add column v_doc variant NULL;""" + exception "VARIANT flexible partial update does not support doc mode" + } + test { + sql """alter table ${tableName} add column (v_doc_multi variant NULL);""" + exception "VARIANT flexible partial update does not support doc mode" } + tableName = "test_flexible_partial_update_property_doc_create" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "false"); """ + expect_flexible_streamload_fail(tableName, """{"k":1,"v":{"a":1}}\n""", + "VARIANT flexible partial update does not support doc mode") + + tableName = "test_flexible_partial_update_property_flatten_create" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql "set enable_variant_flatten_nested = true" + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "deprecated_variant_enable_flatten_nested" = "true", + "store_row_column" = "false"); """ + sql "set enable_variant_flatten_nested = false" + expect_flexible_streamload_fail(tableName, """{"k":1,"v":{"a":1}}\n""", + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested") + // the default value "enable_unique_key_skip_bitmap_column" is "false" tableName = "test_flexible_partial_update_property2" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -65,19 +140,101 @@ suite('test_flexible_partial_update_property') { sql """insert into ${tableName} select number, number, number, number, number, number from numbers("number" = "6"); """ order_qt_sql "select k,v1,v2,v3,v4,v5 from ${tableName};" - def doSchemaChange = { cmd -> - sql cmd - waitForSchemaChangeDone { - sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1""" - time 2000 + test { + sql """alter table ${tableName} set ("enable_unique_key_skip_bitmap_column"="true");""" + exception "You can not modify property 'enable_unique_key_skip_bitmap_column'." + } + + if (!isCloudMode()) { + tableName = "test_flexible_partial_update_property_lsc_false" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` BIGINT NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "false", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "light_schema_change" } } + tableName = "test_flexible_partial_update_property_cluster_key" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` BIGINT NOT NULL + ) UNIQUE KEY(`k`) ORDER BY(`v`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ test { - sql """alter table ${tableName} set ("enable_unique_key_skip_bitmap_column"="true");""" - exception "You can not modify property 'enable_unique_key_skip_bitmap_column'." + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "cluster keys" + } + + tableName = "test_flexible_partial_update_property_doc" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support doc mode" + } + + sql "set enable_variant_flatten_nested = true" + tableName = "test_flexible_partial_update_property_flatten" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "deprecated_variant_enable_flatten_nested" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested" + } + sql "set enable_variant_flatten_nested = false" + + tableName = "test_flexible_partial_update_property_doc_add_enable" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} + add column v variant NULL, + enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support doc mode" } + tableName = "test_flexible_partial_update_property2" + doSchemaChange """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" show_res = sql "show create table ${tableName}" assertTrue(show_res.toString().contains('"enable_unique_key_skip_bitmap_column" = "true"')) @@ -95,4 +252,4 @@ suite('test_flexible_partial_update_property') { order_qt_sql "select k,v1,v2,v3,v4,v5,BITMAP_TO_STRING(__DORIS_SKIP_BITMAP_COL__) from ${tableName};" -} \ No newline at end of file +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy index d82ae7e8336701..9e650bc0d3c618 100644 --- a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy @@ -16,6 +16,7 @@ // under the License. suite('test_flexible_partial_update_restricts') { + sql "set default_variant_enable_doc_mode = false" def tableName = "test_flexible_partial_update_restricts" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -178,6 +179,24 @@ suite('test_flexible_partial_update_restricts') { } } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + set 'delete', 'v2 > 5' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'delete' is specified")); + } + } + if (!isCloudMode()) { // in cloud mode, all tables has light schema change on tableName = "test_flexible_partial_update_restricts2" @@ -264,13 +283,32 @@ suite('test_flexible_partial_update_restricts') { PROPERTIES( "replication_num" = "1", "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", "enable_unique_key_skip_bitmap_column" = "true", "store_row_column" = "false"); """ - + + sql """insert into ${tableName} select number, number, number, number, number, number, null from numbers("number" = "6"); """ streamLoad { table "${tableName}" set 'format', 'json' set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'fuzzy_parse', 'true' set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' file "test1.json" time 20000 @@ -280,7 +318,41 @@ suite('test_flexible_partial_update_restricts') { } def json = parseJson(result) assertEquals("fail", json.Status.toLowerCase()) - assertTrue(json.Message.contains("Flexible partial update can only support table without variant columns.")); + assertTrue(json.Message.contains("Don't support flexible partial update when 'fuzzy_parse' is enabled")); + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'columns', 'k,v1,v3,v5' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'columns' is specified")); + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'jsonpaths', '["$.k","$.v1","$.v3"]' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'jsonpaths' is specified")); } } @@ -404,4 +476,4 @@ suite('test_flexible_partial_update_restricts') { "store_row_column" = "false"); """ exception "Disable to create table column with name start with __DORIS_: __DORIS_SKIP_BITMAP_COL__" } -} \ No newline at end of file +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy new file mode 100644 index 00000000000000..46a3a364e7aa06 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy @@ -0,0 +1,335 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_flexible_partial_update_variant") { + sql "set default_variant_enable_doc_mode = false" + + def docModeUnsupportedMsg = "VARIANT flexible partial update does not support doc mode" + def nullPatchUnsupportedMsg = "VARIANT flexible partial update does not support JSON null patch values" + def oldValueUnsupportedMsg = "VARIANT flexible partial update only supports patching JSON object old values" + def expect_streamload_fail = { tableName, loadBody, expectedMessage -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + assertTrue(exception.getMessage().contains(expectedMessage)) + return + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains(expectedMessage)) + } + } + } + def expect_streamload_filtered = { tableName, loadBody, expectedTotalRows, expectedLoadedRows, + expectedFilteredRows -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'max_filter_ratio', '1' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(expectedTotalRows, json.NumberTotalRows) + assertEquals(expectedLoadedRows, json.NumberLoadedRows) + assertEquals(expectedFilteredRows, json.NumberFilteredRows) + } + } + } + + for (def use_row_store : [false, true]) { + def tableName = "test_flexible_partial_update_variant_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE ${tableName} ( + `k` int NOT NULL, + `c` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ + INSERT INTO ${tableName} VALUES + (1, 1, '{"a": 1, "c": 3, "nested": {"x": 1}}'), + (2, 2, '{"nested": {"x": 1}, "keep": 9}') + """ + + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "variant_patch_merge.json" + time 20000 + } + + def merged = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int), + cast(v['nested']['x'] as int), cast(v['nested']['y'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 10, 20, 3, 1, null, 1], [2, null, null, null, 1, 2, 22]]", + merged.toString()) + + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":4,"v":{"new_path":4}} +""".getBytes("UTF-8")) + time 20000 + } + + def newKeyRows = sql """ + SELECT k, cast(v['new_path'] as int), c + FROM ${tableName} WHERE k = 4 ORDER BY k + """ + assertEquals("[[4, 4, null]]", newKeyRows.toString()) + + expect_streamload_filtered(tableName, """{"k":1,"v":[1,2,3]} +{"k":1,"v":"{\\"b\\":2}"} +{"k":1,"v":null} +{"k":1,"v":{"b":30}} +""", 4, 1, 3) + expect_streamload_fail(tableName, """{"k":1,"v":{"a":null}}\n""", nullPatchUnsupportedMsg) + + def afterRejectedRootValues = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int), + cast(v['nested']['x'] as int) + FROM ${tableName} WHERE k = 1 ORDER BY k + """ + assertEquals("[[1, 10, 30, 3, 1]]", afterRejectedRootValues.toString()) + + sql """ INSERT INTO ${tableName} VALUES (3, 3, '{"nested": {"x": 1}, "b": 1}') """ + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":3,"v":{"nested":{}}} +{"k":3,"v":{"b":2}} +""".getBytes("UTF-8")) + time 20000 + } + + def emptyObjectRows = sql """ + SELECT k, v['nested']['x'] IS NULL, cast(v['b'] as int), c + FROM ${tableName} WHERE k = 3 ORDER BY k + """ + assertEquals("[[3, true, 2, 3]]", emptyObjectRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (5, 5, '{"a": 1, "b": 1, "keep": 1}') """ + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":5,"v":{"a":2}} +{"k":5,"__DORIS_DELETE_SIGN__":1} +{"k":5,"v":{"b":3}} +""".getBytes("UTF-8")) + time 20000 + } + + def patchAfterDeleteRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['keep'] as int), c + FROM ${tableName} WHERE k = 5 ORDER BY k + """ + assertEquals("[[5, null, 3, null, null]]", patchAfterDeleteRows.toString()) + } + + def oldRootTable = "test_flexible_partial_update_variant_old_root" + sql """ DROP TABLE IF EXISTS ${oldRootTable} """ + sql """ + CREATE TABLE ${oldRootTable} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + streamLoad { + table "${oldRootTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + inputStream new ByteArrayInputStream("""{"k":1,"v":[1,2]} +{"k":2,"v":"plain"} +""".getBytes("UTF-8")) + time 20000 + } + expect_streamload_fail(oldRootTable, """{"k":1,"v":{"a":1}}\n""", oldValueUnsupportedMsg) + expect_streamload_fail(oldRootTable, """{"k":2,"v":{"a":1}}\n""", oldValueUnsupportedMsg) + + def typedTable = "test_flexible_partial_update_variant_typed" + sql """ DROP TABLE IF EXISTS ${typedTable} """ + sql """ + CREATE TABLE ${typedTable} ( + `k` int NOT NULL, + `v` variant<'a' : int, 'b' : int, properties("variant_enable_typed_paths_to_sparse" = "false")> NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + sql """ INSERT INTO ${typedTable} VALUES (1, '{"a": 1, "c": 3}') """ + + streamLoad { + table "${typedTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "variant_patch_merge.json" + time 20000 + } + + def typedMerged = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int) + FROM ${typedTable} WHERE k = 1 ORDER BY k + """ + assertEquals("[[1, 10, 20, 3]]", typedMerged.toString()) + + def docTable = "test_flexible_partial_update_variant_doc" + sql """ DROP TABLE IF EXISTS ${docTable} """ + sql """ + CREATE TABLE ${docTable} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + expect_streamload_fail(docTable, """{"k":1,"v":{"a":1}}\n""", docModeUnsupportedMsg) + + def seqTable = "test_flexible_partial_update_variant_same_batch_seq" + sql """ DROP TABLE IF EXISTS ${seqTable} """ + sql """ + CREATE TABLE ${seqTable} ( + `k` int NOT NULL, + `seq` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq"); + """ + sql """ INSERT INTO ${seqTable} VALUES (1, 10, '{"a":1,"b":1}') """ + + String seqLoad = """{"k":1,"seq":20,"v":{"a":2}} +{"k":1,"seq":15,"v":{"b":3}} +{"k":1,"seq":20,"v":{"b":4}} +""" + streamLoad { + table "${seqTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(seqLoad.getBytes("UTF-8")) + time 20000 + } + + def seqMerged = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int) + FROM ${seqTable} ORDER BY k + """ + assertEquals("[[1, 20, 2, 4]]", seqMerged.toString()) + + def seqMapTable = "test_flexible_partial_update_variant_seq_map" + sql """ DROP TABLE IF EXISTS ${seqMapTable} """ + sql """ + CREATE TABLE ${seqMapTable} ( + `k` int NOT NULL, + `seq_map` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq_map"); + """ + sql """ + INSERT INTO ${seqMapTable} VALUES + (1, 10, '{"a":1,"b":1}'), + (2, 20, '{"a":2,"b":2}') + """ + + String seqMapLoad = """{"k":1,"v":{"a":3}} +{"k":2,"seq_map":15,"v":{"a":4}} +{"k":2,"seq_map":25,"v":{"b":5}} +""" + streamLoad { + table "${seqMapTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(seqMapLoad.getBytes("UTF-8")) + time 20000 + } + + def seqMapMerged = sql """ + SELECT k, seq_map, __DORIS_SEQUENCE_COL__, cast(v['a'] as int), cast(v['b'] as int) + FROM ${seqMapTable} ORDER BY k + """ + assertEquals("[[1, 10, 10, 3, 1], [2, 25, 25, 2, 5]]", seqMapMerged.toString()) +}