diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index 327b14cf669d02..dcdd9c56c19aeb 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -90,6 +90,7 @@ void HeartbeatServer::heartbeat(THeartbeatResult& heartbeat_result, heartbeat_result.backend_info.__set_be_node_role(config::be_node_role); // If be is gracefully stop, then k_doris_exist is set to true heartbeat_result.backend_info.__set_is_shutdown(doris::k_doris_exit); + heartbeat_result.backend_info.__set_supports_variant_flexible_partial_update(true); heartbeat_result.backend_info.__set_fragment_executing_count( get_fragment_executing_count()); heartbeat_result.backend_info.__set_fragment_last_active_time( diff --git a/be/src/cloud/cloud_rowset_builder.cpp b/be/src/cloud/cloud_rowset_builder.cpp index abe36ed5790d71..e804a8425369ca 100644 --- a/be/src/cloud/cloud_rowset_builder.cpp +++ b/be/src/cloud/cloud_rowset_builder.cpp @@ -145,6 +145,7 @@ Status CloudRowsetBuilder::set_txn_related_info() { _req.txn_expiration); return Status::OK(); } + RETURN_IF_ERROR(_check_flexible_partial_update_single_segment()); if (config::enable_merge_on_write_correctness_check && _rowset->num_rows() != 0) { auto st = _tablet->check_delete_bitmap_correctness( _delete_bitmap, _rowset->end_version() - 1, _req.txn_id, *_rowset_ids); diff --git a/be/src/exec/common/variant_util.cpp b/be/src/exec/common/variant_util.cpp index 39e8f236ecd16e..45b5f124f8a9fb 100644 --- a/be/src/exec/common/variant_util.cpp +++ b/be/src/exec/common/variant_util.cpp @@ -17,7 +17,6 @@ #include "exec/common/variant_util.h" -#include #include #include #include @@ -34,10 +33,12 @@ #include #include +#include #include #include #include #include +#include #include #include #include @@ -76,6 +77,7 @@ #include "core/field.h" #include "core/typeid_cast.h" #include "core/types.h" +#include "core/value/bitmap_value.h" #include "exec/common/field_visitors.h" #include "exec/common/sip_hash.h" #include "exprs/function/function.h" @@ -101,6 +103,7 @@ #include "util/json/json_parser.h" #include "util/json/path_in_data.h" #include "util/json/simd_json_parser.h" +#include "util/jsonb_utils.h" namespace doris::variant_util { @@ -841,7 +844,9 @@ TabletColumn create_doc_value_column(const TabletColumn& variant, int bucket_ind } uint32_t variant_binary_shard_of(const StringRef& path, uint32_t bucket_num) { - if (bucket_num <= 1) return 0; + if (bucket_num <= 1) { + return 0; + } SipHash hash; hash.update(path.data, path.size); uint64_t h = hash.get64(); @@ -2141,6 +2146,659 @@ phmap::flat_hash_map materialize_doc return subcolumns; } +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT = 62; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_BITS = 31; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_BITS = 11; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_BITS = 12; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_BITS = 8; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_SHIFT = VARIANT_PATCH_PATH_MARKER_BYTE_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT = + VARIANT_PATCH_PATH_MARKER_POS_SHIFT + VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_SHIFT = + VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT + VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +static_assert(VARIANT_PATCH_PATH_MARKER_UID_SHIFT + VARIANT_PATCH_PATH_MARKER_UID_BITS == + VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT); +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_UID_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_UID_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_INDEX_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_POS_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_BYTE_MASK = + (1ULL << VARIANT_PATCH_PATH_MARKER_BYTE_BITS) - 1; +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_COUNT = 1ULL + << VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +// Flexible VARIANT partial update keeps exact patch paths in skip bitmap markers. +// The byte position field is the feature-level encoded-path limit. +constexpr uint64_t VARIANT_PATCH_PATH_MARKER_MAX_BYTES = 1ULL << VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t VARIANT_PATCH_PATH_MAX_COUNT = 256; +constexpr uint64_t VARIANT_PATCH_PATH_MAX_TOTAL_BYTES = 64 * 1024; + +// The hidden skip bitmap stores top-level column unique ids, so VARIANT patch metadata uses +// values outside the int32 uid range. Each path is represented by exact, column-scoped byte +// markers with the high marker bit set; this keeps publish-conflict merge deterministic. +bool is_variant_patch_path_marker(uint64_t value) { + return (value & VARIANT_PATCH_PATH_MARKER_MASK) != 0; +} + +namespace { + +struct VariantPatchPathEncoding { + std::optional length; + std::vector> bytes; +}; + +using VariantPatchPathMap = std::map; + +void append_fixed_u32(uint32_t value, std::string* dst) { + dst->push_back(static_cast(value & 0xFF)); + dst->push_back(static_cast((value >> 8) & 0xFF)); + dst->push_back(static_cast((value >> 16) & 0xFF)); + dst->push_back(static_cast((value >> 24) & 0xFF)); +} + +bool read_fixed_u32(std::string_view src, size_t* offset, uint32_t* value) { + if (*offset + sizeof(uint32_t) > src.size()) { + return false; + } + const auto* data = reinterpret_cast(src.data() + *offset); + *value = static_cast(data[0]) | (static_cast(data[1]) << 8) | + (static_cast(data[2]) << 16) | (static_cast(data[3]) << 24); + *offset += sizeof(uint32_t); + return true; +} + +std::string encode_variant_patch_path_key(const PathInData& path) { + const auto& parts = path.get_parts(); + DCHECK(!parts.empty()); + std::string encoded; + append_fixed_u32(static_cast(parts.size()), &encoded); + for (const auto& part : parts) { + append_fixed_u32(static_cast(part.key.size()), &encoded); + encoded.append(part.key.data(), part.key.size()); + encoded.push_back(static_cast(part.is_nested ? 1 : 0)); + encoded.push_back(static_cast(part.anonymous_array_level)); + } + return encoded; +} + +Status decode_variant_patch_path_key(std::string_view encoded, PathInData* path) { + size_t offset = 0; + uint32_t part_count = 0; + if (!read_fixed_u32(encoded, &offset, &part_count) || part_count == 0) { + return Status::InternalError("Invalid VARIANT patch path marker part count"); + } + + PathInData::Parts parts; + parts.reserve(part_count); + for (uint32_t i = 0; i < part_count; ++i) { + uint32_t key_size = 0; + if (!read_fixed_u32(encoded, &offset, &key_size) || + offset + key_size + 2 > encoded.size()) { + return Status::InternalError("Invalid VARIANT patch path marker part payload"); + } + PathInData::Part part; + part.key = std::string_view(encoded.data() + offset, key_size); + offset += key_size; + part.is_nested = encoded[offset++] != 0; + part.anonymous_array_level = static_cast(encoded[offset++]); + parts.emplace_back(part); + } + if (offset != encoded.size()) { + return Status::InternalError("Trailing bytes in VARIANT patch path marker"); + } + + *path = PathInData(parts); + return Status::OK(); +} + +uint64_t variant_patch_path_max_bytes() { + return VARIANT_PATCH_PATH_MARKER_MAX_BYTES; +} + +uint64_t normalized_variant_col_unique_id(int32_t variant_col_unique_id) { + CHECK_GE(variant_col_unique_id, 0); + CHECK_LE(static_cast(variant_col_unique_id), VARIANT_PATCH_PATH_MARKER_UID_MASK); + return static_cast(variant_col_unique_id); +} + +uint64_t variant_patch_path_marker_uid(uint64_t marker) { + return (marker >> VARIANT_PATCH_PATH_MARKER_UID_SHIFT) & VARIANT_PATCH_PATH_MARKER_UID_MASK; +} + +bool is_variant_patch_path_marker_for_column(uint64_t marker, int32_t variant_col_unique_id) { + return is_variant_patch_path_marker(marker) && + variant_patch_path_marker_uid(marker) == + normalized_variant_col_unique_id(variant_col_unique_id); +} + +uint64_t variant_patch_path_marker_index(uint64_t marker) { + return (marker >> VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) & VARIANT_PATCH_PATH_MARKER_INDEX_MASK; +} + +bool variant_patch_path_marker_is_byte(uint64_t marker) { + return ((marker >> VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) & 1ULL) != 0; +} + +uint64_t variant_patch_path_length_marker(int32_t variant_col_unique_id, uint64_t path_index, + uint64_t length) { + DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT); + DCHECK_LE(length, VARIANT_PATCH_PATH_MARKER_MAX_BYTES); + return VARIANT_PATCH_PATH_MARKER_MASK | + (normalized_variant_col_unique_id(variant_col_unique_id) + << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | length; +} + +uint64_t variant_patch_path_byte_marker(int32_t variant_col_unique_id, uint64_t path_index, + uint64_t byte_pos, uint8_t byte) { + DCHECK_LT(path_index, VARIANT_PATCH_PATH_MARKER_MAX_COUNT); + DCHECK_LT(byte_pos, VARIANT_PATCH_PATH_MARKER_MAX_BYTES); + return VARIANT_PATCH_PATH_MARKER_MASK | (1ULL << VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) | + (normalized_variant_col_unique_id(variant_col_unique_id) + << VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | + (byte_pos << VARIANT_PATCH_PATH_MARKER_POS_SHIFT) | byte; +} + +void remove_variant_patch_path_markers_for_column(int32_t variant_col_unique_id, + BitmapValue* bitmap) { + std::vector markers_to_remove; + for (uint64_t marker : *bitmap) { + if (is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) { + markers_to_remove.push_back(marker); + } + } + for (uint64_t marker : markers_to_remove) { + bitmap->remove(marker); + } +} + +void remove_all_variant_patch_path_markers(BitmapValue* bitmap) { + std::vector markers_to_remove; + for (uint64_t marker : *bitmap) { + if (is_variant_patch_path_marker(marker)) { + markers_to_remove.push_back(marker); + } + } + for (uint64_t marker : markers_to_remove) { + bitmap->remove(marker); + } +} + +Status decode_variant_patch_paths(const BitmapValue& bitmap, int32_t variant_col_unique_id, + VariantPatchPathMap* paths) { + paths->clear(); + std::map encoded_paths; + for (uint64_t marker : bitmap) { + if (!is_variant_patch_path_marker_for_column(marker, variant_col_unique_id)) { + continue; + } + auto& encoded_path = encoded_paths[variant_patch_path_marker_index(marker)]; + if (!variant_patch_path_marker_is_byte(marker)) { + const uint64_t length = marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1); + if (length > VARIANT_PATCH_PATH_MARKER_MAX_BYTES) { + return Status::InternalError( + "Invalid VARIANT patch path marker length {} for column {}", length, + variant_col_unique_id); + } + if (encoded_path.length.has_value() && *encoded_path.length != length) { + return Status::InternalError( + "Conflicting VARIANT patch path marker length for column {}", + variant_col_unique_id); + } + encoded_path.length = length; + continue; + } + + const uint64_t byte_pos = (marker >> VARIANT_PATCH_PATH_MARKER_POS_SHIFT) & + VARIANT_PATCH_PATH_MARKER_POS_MASK; + const uint8_t byte = marker & VARIANT_PATCH_PATH_MARKER_BYTE_MASK; + if (encoded_path.bytes.size() <= byte_pos) { + encoded_path.bytes.resize(byte_pos + 1); + } + if (encoded_path.bytes[byte_pos].has_value() && *encoded_path.bytes[byte_pos] != byte) { + return Status::InternalError("Conflicting VARIANT patch path marker byte for column {}", + variant_col_unique_id); + } + encoded_path.bytes[byte_pos] = byte; + } + + for (const auto& [_, encoded_path] : encoded_paths) { + if (!encoded_path.length.has_value()) { + if (!encoded_path.bytes.empty()) { + return Status::InternalError( + "VARIANT patch path marker byte without length for column {}", + variant_col_unique_id); + } + continue; + } + if (encoded_path.bytes.size() > *encoded_path.length) { + return Status::InternalError( + "VARIANT patch path marker byte exceeds length for column {}", + variant_col_unique_id); + } + std::string encoded_path_key; + encoded_path_key.reserve(*encoded_path.length); + for (uint64_t i = 0; i < *encoded_path.length; ++i) { + if (i >= encoded_path.bytes.size() || !encoded_path.bytes[i].has_value()) { + return Status::InternalError("Incomplete VARIANT patch path marker for column {}", + variant_col_unique_id); + } + encoded_path_key.push_back(static_cast(*encoded_path.bytes[i])); + } + PathInData path; + RETURN_IF_ERROR(decode_variant_patch_path_key(encoded_path_key, &path)); + paths->insert_or_assign(std::move(encoded_path_key), std::move(path)); + } + return Status::OK(); +} + +Status encode_variant_patch_paths(int32_t variant_col_unique_id, const VariantPatchPathMap& paths, + BitmapValue* bitmap) { + if (paths.size() > VARIANT_PATCH_PATH_MAX_COUNT) { + return Status::NotSupported( + "VARIANT flexible partial update supports at most {} patch paths per row", + VARIANT_PATCH_PATH_MAX_COUNT); + } + const uint64_t max_encoded_bytes = variant_patch_path_max_bytes(); + for (const auto& [encoded_path_key, _] : paths) { + if (encoded_path_key.size() > max_encoded_bytes) { + return Status::NotSupported( + "VARIANT flexible partial update encoded patch path exceeds {} bytes, actual " + "{} bytes", + max_encoded_bytes, encoded_path_key.size()); + } + } + + BitmapValue encoded_bitmap = *bitmap; + remove_variant_patch_path_markers_for_column(variant_col_unique_id, &encoded_bitmap); + + uint64_t path_index = 0; + for (const auto& [encoded_path_key, _] : paths) { + encoded_bitmap.add(variant_patch_path_length_marker(variant_col_unique_id, path_index, + encoded_path_key.size())); + for (uint64_t byte_pos = 0; byte_pos < encoded_path_key.size(); ++byte_pos) { + encoded_bitmap.add(variant_patch_path_byte_marker( + variant_col_unique_id, path_index, byte_pos, + static_cast(static_cast(encoded_path_key[byte_pos])))); + } + ++path_index; + } + uint64_t row_total_encoded_bytes = 0; + for (uint64_t marker : encoded_bitmap) { + if (is_variant_patch_path_marker(marker) && !variant_patch_path_marker_is_byte(marker)) { + row_total_encoded_bytes += + marker & ((1ULL << VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) - 1); + if (row_total_encoded_bytes > VARIANT_PATCH_PATH_MAX_TOTAL_BYTES) { + return Status::NotSupported( + "VARIANT flexible partial update encoded patch paths exceed {} bytes per " + "row", + VARIANT_PATCH_PATH_MAX_TOTAL_BYTES); + } + } + } + *bitmap = std::move(encoded_bitmap); + return Status::OK(); +} + +void collect_variant_patch_marker_column_uids(const BitmapValue& bitmap, + std::set* variant_col_unique_ids) { + for (uint64_t marker : bitmap) { + if (is_variant_patch_path_marker(marker)) { + variant_col_unique_ids->insert( + static_cast(variant_patch_path_marker_uid(marker))); + } + } +} + +Status variant_object_patch_required_status() { + return Status::NotSupported( + "VARIANT flexible partial update only supports JSON object patch values"); +} + +Status variant_object_base_required_status() { + return Status::NotSupported( + "VARIANT flexible partial update only supports patching JSON object old values"); +} + +Status variant_doc_mode_not_supported_status() { + return Status::NotSupported( + "VARIANT flexible partial update does not support doc mode in this version"); +} + +const ColumnVariant& get_variant_nested_column(const IColumn& column) { + if (column.is_nullable()) { + return assert_cast( + assert_cast(column).get_nested_column()); + } + return assert_cast(column); +} + +ColumnVariant& get_variant_nested_column(IColumn& column) { + if (column.is_nullable()) { + return assert_cast( + assert_cast(column).get_nested_column()); + } + return assert_cast(column); +} + +bool is_path_prefix_of(const PathInData& prefix, const PathInData& path) { + const auto& prefix_parts = prefix.get_parts(); + const auto& path_parts = path.get_parts(); + if (prefix_parts.size() > path_parts.size()) { + return false; + } + return std::equal(prefix_parts.begin(), prefix_parts.end(), path_parts.begin()); +} + +bool paths_conflict(const PathInData& left, const PathInData& right) { + return is_path_prefix_of(left, right) || is_path_prefix_of(right, left); +} + +bool path_or_prefix_is_variant_patch_path(const PathInData& path, + const VariantPatchPathMap& patch_paths) { + PathInData::Parts prefix_parts; + prefix_parts.reserve(path.get_parts().size()); + for (const auto& part : path.get_parts()) { + prefix_parts.push_back(part); + if (patch_paths.contains(encode_variant_patch_path_key(PathInData(prefix_parts)))) { + return true; + } + } + return false; +} + +bool path_conflicts_with_any_patch_path(const PathInData& path, const VariantMap& patch_object) { + return std::ranges::any_of(patch_object, [&](const auto& patch_item) { + return paths_conflict(patch_item.first, path); + }); +} + +bool starts_with_json_object(std::string_view text) { + auto it = std::ranges::find_if_not(text, [](unsigned char ch) { return std::isspace(ch); }); + return it != text.end() && *it == '{'; +} + +bool root_jsonb_field_to_json_text(const Field& field, std::string* json_text) { + switch (field.get_type()) { + case PrimitiveType::TYPE_JSONB: { + const auto& jsonb = field.get(); + *json_text = JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()); + return true; + } + default: + return false; + } +} + +bool collect_json_object_text_map(std::string_view json_text, bool reject_json_null_value, + VariantMap* object) { + if (!starts_with_json_object(json_text)) { + return false; + } + + auto parsed = ColumnVariant::create(0, false); + ParseConfig config; + config.parse_to = ParseConfig::ParseTo::OnlySubcolumns; + config.reject_json_null_value = reject_json_null_value; + config.record_empty_object_path = true; + StringRef json_ref {json_text.data(), json_text.size()}; + parse_json_to_variant(*parsed, json_ref, nullptr, config); + parsed->finalize(); + + Field parsed_field; + parsed->get(0, parsed_field); + if (parsed_field.get_type() != PrimitiveType::TYPE_VARIANT) { + return false; + } + const auto& parsed_object = parsed_field.get(); + if (parsed_object.contains(PathInData())) { + return false; + } + for (const auto& [path, value] : parsed_object) { + if (!path.empty()) { + object->insert_or_assign(path, value); + } + } + return true; +} + +void collect_materialized_variant_map(const ColumnVariant& variant, size_t row, VariantMap* object, + FieldWithDataType* root_field) { + Field field; + variant.get(row, field); + if (field.get_type() == PrimitiveType::TYPE_VARIANT) { + for (const auto& [path, value] : field.get()) { + if (path.get_path() == DOC_VALUE_COLUMN_PATH) { + continue; + } + if (path.empty()) { + *root_field = value; + continue; + } + object->insert_or_assign(path, value); + } + } + + DCHECK(!variant.has_doc_value_column(row)); +} + +Status collect_variant_patch_map(const ColumnVariant& variant, size_t row, bool* is_object_patch, + VariantMap* object) { + object->clear(); + FieldWithDataType root_field; + collect_materialized_variant_map(variant, row, object, &root_field); + if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) { + *is_object_patch = true; + return Status::OK(); + } + + std::string json_text; + if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) { + *is_object_patch = false; + return Status::OK(); + } + object->clear(); + *is_object_patch = collect_json_object_text_map(json_text, true, object); + return Status::OK(); +} + +Status collect_variant_base_map(const ColumnVariant& variant, size_t row, VariantMap* object) { + object->clear(); + FieldWithDataType root_field; + collect_materialized_variant_map(variant, row, object, &root_field); + if (root_field.field.get_type() == PrimitiveType::TYPE_NULL) { + return Status::OK(); + } + + std::string json_text; + if (!root_jsonb_field_to_json_text(root_field.field, &json_text)) { + return variant_object_base_required_status(); + } + object->clear(); + if (!collect_json_object_text_map(json_text, false, object)) { + return variant_object_base_required_status(); + } + return Status::OK(); +} + +Status insert_variant_field(IColumn& dst_column, const Field& field) { + DCHECK(!get_variant_nested_column(dst_column).enable_doc_mode()); + dst_column.insert(field); + return Status::OK(); +} + +Status check_variant_object_patch_supported(const IColumn& column) { + if (get_variant_nested_column(column).enable_doc_mode()) { + return variant_doc_mode_not_supported_status(); + } + return Status::OK(); +} + +Status merge_variant_object_patch(const IColumn& old_column, size_t old_row, + VariantMap&& patch_object, IColumn& dst_column) { + VariantMap merged_object; + if (!old_column.is_null_at(old_row)) { + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row, + &merged_object)); + } + for (const auto& [patch_path, _] : patch_object) { + for (auto it = merged_object.begin(); it != merged_object.end();) { + if (paths_conflict(patch_path, it->first)) { + it = merged_object.erase(it); + } else { + ++it; + } + } + } + for (auto& [patch_path, patch_value] : patch_object) { + merged_object.insert_or_assign(patch_path, std::move(patch_value)); + } + + Field merged_field = Field::create_field(std::move(merged_object)); + return insert_variant_field(dst_column, merged_field); +} + +Status insert_variant_object_patch(VariantMap&& patch_object, IColumn& dst_column) { + Field patch_field = Field::create_field(std::move(patch_object)); + return insert_variant_field(dst_column, patch_field); +} + +} // namespace + +Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, BitmapValue* patch_path_markers) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + + bool is_object_patch = false; + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column), + patch_row, &is_object_patch, &patch_object)); + if (!is_object_patch) { + return variant_object_patch_required_status(); + } + + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(*patch_path_markers, variant_col_unique_id, + &patch_paths)); + for (const auto& [path, _] : patch_object) { + patch_paths.insert_or_assign(encode_variant_patch_path_key(path), path); + } + return encode_variant_patch_paths(variant_col_unique_id, patch_paths, patch_path_markers); + }); + return Status::OK(); +} + +Status merge_variant_patch_path_markers(const BitmapValue& left, const BitmapValue& right, + BitmapValue* merged) { + RETURN_IF_CATCH_EXCEPTION({ + *merged = left; + *merged &= right; + remove_all_variant_patch_path_markers(merged); + + std::set variant_col_unique_ids; + collect_variant_patch_marker_column_uids(left, &variant_col_unique_ids); + collect_variant_patch_marker_column_uids(right, &variant_col_unique_ids); + for (int32_t variant_col_unique_id : variant_col_unique_ids) { + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(left, variant_col_unique_id, &patch_paths)); + VariantPatchPathMap right_patch_paths; + RETURN_IF_ERROR( + decode_variant_patch_paths(right, variant_col_unique_id, &right_patch_paths)); + patch_paths.insert(right_patch_paths.begin(), right_patch_paths.end()); + RETURN_IF_ERROR(encode_variant_patch_paths(variant_col_unique_id, patch_paths, merged)); + } + return Status::OK(); + }); + return Status::OK(); +} + +Status merge_variant_patch(const IColumn& old_column, size_t old_row, const IColumn& patch_column, + size_t patch_row, IColumn& dst_column) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(old_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column)); + + bool is_object_patch = false; + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_patch_map(get_variant_nested_column(patch_column), + patch_row, &is_object_patch, &patch_object)); + if (!is_object_patch) { + return variant_object_patch_required_status(); + } + + RETURN_IF_ERROR(merge_variant_object_patch(old_column, old_row, std::move(patch_object), + dst_column)); + return Status::OK(); + }); + return Status::OK(); +} + +Status merge_variant_patch_by_path_markers(const IColumn& old_column, size_t old_row, + const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, + const BitmapValue& patch_path_markers, + bool old_row_deleted, IColumn& dst_column) { + RETURN_IF_CATCH_EXCEPTION({ + if (patch_column.is_null_at(patch_row)) { + return variant_object_patch_required_status(); + } + RETURN_IF_ERROR(check_variant_object_patch_supported(old_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(patch_column)); + RETURN_IF_ERROR(check_variant_object_patch_supported(dst_column)); + + VariantMap patch_object; + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(patch_column), patch_row, + &patch_object)); + VariantPatchPathMap patch_paths; + RETURN_IF_ERROR(decode_variant_patch_paths(patch_path_markers, variant_col_unique_id, + &patch_paths)); + for (auto it = patch_object.begin(); it != patch_object.end();) { + if (patch_paths.contains(encode_variant_patch_path_key(it->first))) { + ++it; + } else { + it = patch_object.erase(it); + } + } + if (old_row_deleted) { + RETURN_IF_ERROR(insert_variant_object_patch(std::move(patch_object), dst_column)); + return Status::OK(); + } + + VariantMap merged_object; + if (!old_column.is_null_at(old_row)) { + RETURN_IF_ERROR(collect_variant_base_map(get_variant_nested_column(old_column), old_row, + &merged_object)); + } + for (auto it = merged_object.begin(); it != merged_object.end();) { + if (path_or_prefix_is_variant_patch_path(it->first, patch_paths) || + path_conflicts_with_any_patch_path(it->first, patch_object)) { + it = merged_object.erase(it); + } else { + ++it; + } + } + for (auto& [patch_path, patch_value] : patch_object) { + merged_object.insert_or_assign(patch_path, std::move(patch_value)); + } + + Field merged_field = Field::create_field(std::move(merged_object)); + RETURN_IF_ERROR(insert_variant_field(dst_column, merged_field)); + return Status::OK(); + }); + return Status::OK(); +} + Status _parse_and_materialize_variant_columns(Block& block, const std::vector& variant_pos, const std::vector& configs) { @@ -2216,7 +2874,8 @@ Status parse_and_materialize_variant_columns(Block& block, const std::vector& column_pos) { + const std::vector& column_pos, + bool reject_json_null_value) { std::vector variant_column_pos; std::vector variant_schema_pos; variant_column_pos.reserve(column_pos.size()); @@ -2240,6 +2899,8 @@ Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& t configs[i].deprecated_enable_flatten_nested = tablet_schema.deprecated_variant_flatten_nested(); configs[i].check_duplicate_json_path = config::variant_enable_duplicate_json_path_check; + configs[i].reject_json_null_value = reject_json_null_value; + configs[i].record_empty_object_path = reject_json_null_value; const auto& column = tablet_schema.column(variant_schema_pos[i]); if (!column.is_variant_type()) { return Status::InternalError("column is not variant type, column name: {}", diff --git a/be/src/exec/common/variant_util.h b/be/src/exec/common/variant_util.h index f4302146972e2c..d8826f7700607f 100644 --- a/be/src/exec/common/variant_util.h +++ b/be/src/exec/common/variant_util.h @@ -259,7 +259,23 @@ void parse_json_to_variant(IColumn& column, const StringRef& jsons, JsonParser* // Parse variant columns by picking variant positions from `column_pos` and generating ParseConfig // based on tablet schema settings (flatten nested / doc snapshot mode). Status parse_and_materialize_variant_columns(Block& block, const TabletSchema& tablet_schema, - const std::vector& column_pos); + const std::vector& column_pos, + bool reject_json_null_value = false); + +// Merge one VARIANT object patch row into an old VARIANT row and append the result to dst_column. +// Flexible VARIANT partial update only supports JSON object patches in this version. +Status merge_variant_patch(const IColumn& old_column, size_t old_row, const IColumn& patch_column, + size_t patch_row, IColumn& dst_column); +bool is_variant_patch_path_marker(uint64_t value); +Status mark_variant_patch_paths(const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, BitmapValue* patch_path_markers); +Status merge_variant_patch_path_markers(const BitmapValue& left, const BitmapValue& right, + BitmapValue* merged); +Status merge_variant_patch_by_path_markers(const IColumn& old_column, size_t old_row, + const IColumn& patch_column, size_t patch_row, + int32_t variant_col_unique_id, + const BitmapValue& patch_path_markers, + bool old_row_deleted, IColumn& dst_column); // Parse doc snapshot column (paths/values/offsets stored in ColumnVariant) into per-path subcolumns. // NOTE: Returned map keys are `std::string_view` pointing into the underlying doc snapshot paths diff --git a/be/src/format/json/new_json_reader.cpp b/be/src/format/json/new_json_reader.cpp index da141437fcf200..c86ddee16b1238 100644 --- a/be/src/format/json/new_json_reader.cpp +++ b/be/src/format/json/new_json_reader.cpp @@ -39,6 +39,7 @@ #include "common/compiler_util.h" // IWYU pragma: keep #include "common/config.h" +#include "common/exception.h" #include "common/status.h" #include "core/assert_cast.h" #include "core/block/column_with_type_and_name.h" @@ -48,6 +49,7 @@ #include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_struct.h" +#include "core/column/column_variant.h" #include "core/custom_allocator.h" #include "core/data_type/data_type_array.h" #include "core/data_type/data_type_factory.hpp" @@ -55,6 +57,7 @@ #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/data_type/data_type_struct.h" #include "core/data_type/define_primitive_type.h" +#include "exec/common/variant_util.h" #include "exec/scan/scanner.h" #include "exprs/json_functions.h" #include "format/file_reader/new_plain_text_line_reader.h" @@ -984,6 +987,16 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val // set _seen_columns.assign(block.columns(), false); size_t cur_row_count = block.rows(); + auto pop_current_row = [&]() { + for (size_t index = 0; index < block.columns(); ++index) { + auto column = block.get_by_position(index).column->assume_mutable(); + if (column->size() > cur_row_count) { + DCHECK(column->size() == cur_row_count + 1); + column->pop_back(column->size() - cur_row_count); + DCHECK(column->size() == cur_row_count); + } + } + }; bool has_valid_value = false; // iterate through object, simdjson::ondemond will parsing on the fly size_t key_index = 0; @@ -1002,7 +1015,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val // This key is not exist in slot desc, just ignore continue; } - if (column_index == skip_bitmap_col_idx) { + if (skip_bitmap_col_idx >= 0 && std::cmp_equal(column_index, skip_bitmap_col_idx)) { continue; } if (_seen_columns[column_index]) { @@ -1018,8 +1031,10 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val auto* column_ptr = block.get_by_position(column_index).column->assume_mutable().get(); RETURN_IF_ERROR(_simdjson_write_data_to_column( val, slot_descs[column_index]->type(), column_ptr, - slot_descs[column_index]->col_name(), _serdes[column_index], valid)); + slot_descs[column_index]->col_name(), _serdes[column_index], valid, + _is_flexible_variant_column(*slot_descs[column_index]))); if (!(*valid)) { + pop_current_row(); return Status::OK(); } _seen_columns[column_index] = true; @@ -1050,7 +1065,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val if (_seen_columns[i]) { continue; } - if (i == skip_bitmap_col_idx) { + if (skip_bitmap_col_idx >= 0 && std::cmp_equal(i, skip_bitmap_col_idx)) { continue; } @@ -1074,15 +1089,7 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val "The key columns can not be ommited in flexible " "partial update, missing key column: {}", slot_desc->col_name(), valid)); - // remove this line in block - for (size_t index = 0; index < block.columns(); ++index) { - auto column = block.get_by_position(index).column->assume_mutable(); - if (column->size() != cur_row_count) { - DCHECK(column->size() == cur_row_count + 1); - column->pop_back(1); - DCHECK(column->size() == cur_row_count); - } - } + pop_current_row(); return Status::OK(); } _set_skip_bitmap_mark(slot_desc, column_ptr, block, cur_row_count, valid); @@ -1109,11 +1116,34 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& const DataTypePtr& type_desc, IColumn* column_ptr, const std::string& column_name, - DataTypeSerDeSPtr serde, bool* valid) { + DataTypeSerDeSPtr serde, bool* valid, + bool is_flexible_variant_column) { ColumnNullable* nullable_column = nullptr; IColumn* data_column_ptr = column_ptr; DataTypeSerDeSPtr data_serde = serde; + auto primitive_type = remove_nullable(type_desc)->get_primitive_type(); + const IColumn* nested_column_ptr = + column_ptr->is_nullable() + ? assert_cast(*column_ptr).get_nested_column_ptr().get() + : column_ptr; + const bool is_flexible_variant_patch_column = + _should_process_skip_bitmap_col() && + (primitive_type == TYPE_VARIANT || + check_and_get_column(nested_column_ptr) != nullptr || + is_flexible_variant_column); + if (is_flexible_variant_patch_column && value.type() != simdjson::ondemand::json_type::object) { + if (_is_load) { + RETURN_IF_ERROR(_append_error_msg( + nullptr, + "VARIANT flexible partial update only supports JSON object patch values", "", + valid)); + return Status::OK(); + } + return Status::NotSupported( + "VARIANT flexible partial update only supports JSON object patch values"); + } + if (column_ptr->is_nullable()) { nullable_column = reinterpret_cast(column_ptr); @@ -1138,8 +1168,27 @@ Status NewJsonReader::_simdjson_write_data_to_column(simdjson::ondemand::value& } } - auto primitive_type = type_desc->get_primitive_type(); if (_is_load || !is_complex_type(primitive_type)) { + if (is_flexible_variant_patch_column && primitive_type == TYPE_VARIANT) { + ParseConfig parse_config; + parse_config.check_duplicate_json_path = + config::variant_enable_duplicate_json_path_check; + parse_config.reject_json_null_value = true; + parse_config.record_empty_object_path = true; + std::string_view json_str = simdjson::to_json_string(value); + StringRef json_ref {json_str.data(), json_str.size()}; + try { + variant_util::parse_json_to_variant(*data_column_ptr, json_ref, nullptr, + parse_config); + } catch (const Exception& e) { + return e.to_status(); + } + if (nullable_column) { + nullable_column->get_null_map_data().push_back(0); + } + *valid = true; + return Status::OK(); + } if (value.type() == simdjson::ondemand::json_type::string) { std::string_view value_string; if constexpr (use_string_cache) { @@ -1610,7 +1659,26 @@ Status NewJsonReader::_fill_missing_column(SlotDescriptor* slot_desc, DataTypeSe return Status::OK(); } -void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) { +bool NewJsonReader::_is_flexible_variant_column(const SlotDescriptor& slot_desc) const { + if (!_should_process_skip_bitmap_col()) { + return false; + } + if (remove_nullable(slot_desc.type())->get_primitive_type() == TYPE_VARIANT) { + return true; + } + DORIS_CHECK(_state != nullptr); + DORIS_CHECK(_params.__isset.dest_tuple_id); + const auto* dest_tuple_desc = _state->desc_tbl().get_tuple_descriptor(_params.dest_tuple_id); + DORIS_CHECK(dest_tuple_desc != nullptr); + for (const auto* dest_slot_desc : dest_tuple_desc->slots()) { + if (dest_slot_desc->col_name() == slot_desc.col_name()) { + return remove_nullable(dest_slot_desc->type())->get_primitive_type() == TYPE_VARIANT; + } + } + return false; +} + +void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) const { auto* skip_bitmap_nullable_col_ptr = assert_cast( block.get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()); auto* skip_bitmap_col_ptr = @@ -1623,7 +1691,7 @@ void NewJsonReader::_append_empty_skip_bitmap_value(Block& block, size_t cur_row } void NewJsonReader::_set_skip_bitmap_mark(SlotDescriptor* slot_desc, IColumn* column_ptr, - Block& block, size_t cur_row_count, bool* valid) { + Block& block, size_t cur_row_count, bool* valid) const { // we record the missing column's column unique id in skip bitmap // to indicate which columns need to do the alignment process auto* skip_bitmap_nullable_col_ptr = assert_cast( diff --git a/be/src/format/json/new_json_reader.h b/be/src/format/json/new_json_reader.h index e74607a0e6de56..b89bb2f7f6661e 100644 --- a/be/src/format/json/new_json_reader.h +++ b/be/src/format/json/new_json_reader.h @@ -165,7 +165,7 @@ class NewJsonReader : public TableFormatReader { Status _simdjson_write_data_to_column(simdjson::ondemand::value& value, const DataTypePtr& type_desc, IColumn* column_ptr, const std::string& column_name, DataTypeSerDeSPtr serde, - bool* valid); + bool* valid, bool is_flexible_variant_column = false); Status _simdjson_write_columns_by_jsonpath(simdjson::ondemand::object* value, const std::vector& slot_descs, @@ -190,9 +190,10 @@ class NewJsonReader : public TableFormatReader { // flexible partial update can not be used when user specify jsonpaths, so we just fill the skip bitmap // in `_simdjson_handle_simple_json` and `_vhandle_simple_json` (which will be used when jsonpaths is not specified) bool _should_process_skip_bitmap_col() const { return skip_bitmap_col_idx != -1; } - void _append_empty_skip_bitmap_value(Block& block, size_t cur_row_count); + bool _is_flexible_variant_column(const SlotDescriptor& slot_desc) const; + void _append_empty_skip_bitmap_value(Block& block, size_t cur_row_count) const; void _set_skip_bitmap_mark(SlotDescriptor* slot_desc, IColumn* column_ptr, Block& block, - size_t cur_row_count, bool* valid); + size_t cur_row_count, bool* valid) const; RuntimeState* _state = nullptr; RuntimeProfile* _profile = nullptr; ScannerCounter* _counter = nullptr; diff --git a/be/src/load/delta_writer/delta_writer_v2.cpp b/be/src/load/delta_writer/delta_writer_v2.cpp index 78271f2a48202e..d0d72d49d0e922 100644 --- a/be/src/load/delta_writer/delta_writer_v2.cpp +++ b/be/src/load/delta_writer/delta_writer_v2.cpp @@ -243,7 +243,8 @@ Status DeltaWriterV2::_build_current_tablet_schema(int64_t index_id, table_schema_param->partial_update_input_columns(), table_schema_param->is_strict_mode(), table_schema_param->timestamp_ms(), table_schema_param->nano_seconds(), table_schema_param->timezone(), - table_schema_param->auto_increment_coulumn())); + table_schema_param->auto_increment_coulumn(), + table_schema_param->sequence_map_col_uid())); return Status::OK(); } diff --git a/be/src/load/memtable/memtable.cpp b/be/src/load/memtable/memtable.cpp index 588d8543d7b4b4..49a9470e43a0fd 100644 --- a/be/src/load/memtable/memtable.cpp +++ b/be/src/load/memtable/memtable.cpp @@ -29,6 +29,7 @@ #include "bvar/bvar.h" #include "common/config.h" #include "core/column/column.h" +#include "core/column/column_complex.h" #include "exprs/aggregate/aggregate_function_reader.h" #include "exprs/aggregate/aggregate_function_simple_factory.h" #include "load/memtable/memtable_memory_limiter.h" @@ -689,6 +690,9 @@ void MemTable::shrink_memtable_by_agg() { if (_keys_type == KeysType::DUP_KEYS) { return; } + if (_has_flexible_variant_patch_rows()) { + return; + } size_t same_keys_num = _sort(); if (same_keys_num != 0) { (_skip_bitmap_col_idx == -1) ? _aggregate() : _aggregate(); @@ -750,9 +754,34 @@ size_t MemTable::get_flush_reserve_memory_size() const { return static_cast(static_cast(_input_mutable_block.allocated_bytes()) * 1.2); } +bool MemTable::_has_flexible_variant_patch_rows() const { + if (_partial_update_mode != UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS || + _tablet_schema->num_variant_columns() == 0 || _skip_bitmap_col_idx == -1) { + return false; + } + DCHECK_LT(_skip_bitmap_col_idx, _input_mutable_block.columns()); + const auto& skip_bitmaps = + assert_cast( + *_input_mutable_block.get_column_by_position(_skip_bitmap_col_idx)) + .get_data(); + for (size_t cid = _tablet_schema->num_key_columns(); cid < _num_columns; ++cid) { + const auto& column = _tablet_schema->column(cid); + if (!column.is_variant_type()) { + continue; + } + for (const auto& skip_bitmap : skip_bitmaps) { + if (!skip_bitmap.contains(column.unique_id())) { + return true; + } + } + } + return false; +} + Status MemTable::_to_block(std::unique_ptr* res) { size_t same_keys_num = _sort(); - if (_keys_type == KeysType::DUP_KEYS || same_keys_num == 0) { + if (_keys_type == KeysType::DUP_KEYS || same_keys_num == 0 || + _has_flexible_variant_patch_rows()) { if (_keys_type == KeysType::DUP_KEYS && _tablet_schema->num_key_columns() == 0) { _output_mutable_block.swap(_input_mutable_block); } else { diff --git a/be/src/load/memtable/memtable.h b/be/src/load/memtable/memtable.h index 42f96dd4f5f769..4620f541ef2ce8 100644 --- a/be/src/load/memtable/memtable.h +++ b/be/src/load/memtable/memtable.h @@ -280,6 +280,7 @@ class MemTable { DorisVector>& temp_row_in_blocks); Status _put_into_output(Block& in_block); + bool _has_flexible_variant_patch_rows() const; bool _is_first_insertion; void _init_agg_functions(const Block* block); diff --git a/be/src/service/http/action/stream_load.cpp b/be/src/service/http/action/stream_load.cpp index 5d9cf40aea7483..b5990f6aa424fe 100644 --- a/be/src/service/http/action/stream_load.cpp +++ b/be/src/service/http/action/stream_load.cpp @@ -635,7 +635,8 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StringCaseMap merge_type_map = {{"APPEND", TMergeType::APPEND}, {"DELETE", TMergeType::DELETE}, {"MERGE", TMergeType::MERGE}}; - if (!http_req->header(HTTP_MERGE_TYPE).empty()) { + bool merge_type_specified = !http_req->header(HTTP_MERGE_TYPE).empty(); + if (merge_type_specified) { std::string merge_type_str = http_req->header(HTTP_MERGE_TYPE); auto iter = merge_type_map.find(merge_type_str); if (iter != merge_type_map.end()) { @@ -652,6 +653,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, } } request.__set_merge_type(merge_type); + request.__set_merge_type_specified(merge_type_specified); if (!http_req->header(HTTP_DELETE_CONDITION).empty()) { request.__set_delete_condition(http_req->header(HTTP_DELETE_CONDITION)); } diff --git a/be/src/storage/partial_update_info.cpp b/be/src/storage/partial_update_info.cpp index 7b97ecfc081167..1cd80e628d2b4d 100644 --- a/be/src/storage/partial_update_info.cpp +++ b/be/src/storage/partial_update_info.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "common/consts.h" #include "common/logging.h" @@ -27,6 +28,7 @@ #include "core/block/block.h" #include "core/data_type/data_type_number.h" // IWYU pragma: keep #include "core/value/bitmap_value.h" +#include "exec/common/variant_util.h" #include "storage/iterator/olap_data_convertor.h" #include "storage/olap_common.h" #include "storage/rowset/rowset.h" @@ -126,6 +128,7 @@ void PartialUpdateInfo::to_pb(PartialUpdateInfoPB* partial_update_info_pb) const is_input_columns_contains_auto_inc_column); partial_update_info_pb->set_is_schema_contains_auto_inc_column( is_schema_contains_auto_inc_column); + partial_update_info_pb->set_sequence_map_col_uid(sequence_map_col_unqiue_id); for (const auto& value : default_values) { partial_update_info_pb->add_default_values(value); } @@ -169,6 +172,9 @@ void PartialUpdateInfo::from_pb(PartialUpdateInfoPB* partial_update_info_pb) { partial_update_info_pb->is_input_columns_contains_auto_inc_column(); is_schema_contains_auto_inc_column = partial_update_info_pb->is_schema_contains_auto_inc_column(); + sequence_map_col_unqiue_id = partial_update_info_pb->has_sequence_map_col_uid() + ? partial_update_info_pb->sequence_map_col_uid() + : -1; if (partial_update_info_pb->has_nano_seconds()) { nano_seconds = partial_update_info_pb->nano_seconds(); } @@ -464,6 +470,9 @@ void FlexibleReadPlan::prepare_to_read(const RowLocation& row_location, size_t p const BitmapValue& skip_bitmap) { if (!use_row_store) { for (uint64_t col_uid : skip_bitmap) { + if (variant_util::is_variant_patch_path_marker(col_uid)) { + continue; + } plan[row_location.rowset_id][row_location.segment_id][static_cast(col_uid)] .emplace_back(row_location.row_id, pos); } @@ -567,26 +576,36 @@ Status FlexibleReadPlan::fill_non_primary_key_columns( return Status::OK(); } -static void fill_non_primary_key_cell_for_column_store( +static bool old_row_has_delete_sign_for_column_store( + const signed char* delete_sign_column_data, const TabletSchema& tablet_schema, + std::map>& read_index, uint32_t segment_pos) { + if (delete_sign_column_data == nullptr) { + return false; + } + if (auto it = read_index[tablet_schema.delete_sign_idx()].find(segment_pos); + it != read_index[tablet_schema.delete_sign_idx()].end()) { + return delete_sign_column_data[it->second] != 0; + } + return false; +} + +static Status fill_non_primary_key_cell_for_column_store( const TabletColumn& tablet_column, uint32_t cid, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, std::size_t block_pos, uint32_t segment_pos, bool skipped, bool row_has_sequence_col, bool use_default, const signed char* delete_sign_column_data, const TabletSchema& tablet_schema, - std::map>& read_index, - const PartialUpdateInfo* info) { + std::map>& read_index, const PartialUpdateInfo* info, + const BitmapValue& skip_bitmap) { if (skipped) { - DCHECK(cid != tablet_schema.skip_bitmap_col_idx()); - DCHECK(cid != tablet_schema.version_col_idx()); + DCHECK(std::cmp_not_equal(cid, tablet_schema.skip_bitmap_col_idx())); + DCHECK(std::cmp_not_equal(cid, tablet_schema.version_col_idx())); DCHECK(!tablet_column.is_row_store_column()); if (!use_default) { if (delete_sign_column_data != nullptr) { - bool old_row_delete_sign = false; - if (auto it = read_index[tablet_schema.delete_sign_idx()].find(segment_pos); - it != read_index[tablet_schema.delete_sign_idx()].end()) { - old_row_delete_sign = (delete_sign_column_data[it->second] != 0); - } + bool old_row_delete_sign = old_row_has_delete_sign_for_column_store( + delete_sign_column_data, tablet_schema, read_index, segment_pos); if (old_row_delete_sign) { if (!tablet_schema.has_sequence_col()) { @@ -625,8 +644,18 @@ static void fill_non_primary_key_cell_for_column_store( new_col->insert_from(old_value_col, pos_in_old_block); } } else { + if (tablet_column.is_variant_type() && !use_default && + !old_row_has_delete_sign_for_column_store(delete_sign_column_data, tablet_schema, + read_index, segment_pos) && + read_index.contains(cid) && read_index.at(cid).contains(segment_pos)) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, read_index.at(cid).at(segment_pos), cur_col, block_pos, + tablet_column.unique_id(), skip_bitmap, false, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, block_pos); } + return Status::OK(); } Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( @@ -665,7 +694,7 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( auto segment_pos = segment_start_pos + idx; auto block_pos = block_start_pos + idx; - fill_non_primary_key_cell_for_column_store( + RETURN_IF_ERROR(fill_non_primary_key_cell_for_column_store( tablet_column, cid, mutable_full_columns[cid], *default_value_block.get_by_position(i).column, *old_value_block.get_by_position(i).column, *block->get_by_position(cid).column, @@ -674,23 +703,25 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_column_store( ? !skip_bitmaps->at(block_pos).contains(seq_col_unique_id) : false, use_default_or_null_flag[idx], delete_sign_column_data, tablet_schema, - read_index, info); + read_index, info, skip_bitmaps->at(block_pos))); } } return Status::OK(); } -static void fill_non_primary_key_cell_for_row_store( +static Status fill_non_primary_key_cell_for_row_store( const TabletColumn& tablet_column, uint32_t cid, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, std::size_t block_pos, bool skipped, bool row_has_sequence_col, bool use_default, - const signed char* delete_sign_column_data, uint32_t pos_in_old_block, - const TabletSchema& tablet_schema, const PartialUpdateInfo* info) { + const signed char* delete_sign_column_data, bool old_row_exists, uint32_t pos_in_old_block, + const TabletSchema& tablet_schema, const PartialUpdateInfo* info, + const BitmapValue& skip_bitmap) { if (skipped) { - DCHECK(cid != tablet_schema.skip_bitmap_col_idx()); - DCHECK(cid != tablet_schema.version_col_idx()); + DCHECK(std::cmp_not_equal(cid, tablet_schema.skip_bitmap_col_idx())); + DCHECK(std::cmp_not_equal(cid, tablet_schema.version_col_idx())); DCHECK(!tablet_column.is_row_store_column()); if (!use_default) { + DCHECK(old_row_exists); if (delete_sign_column_data != nullptr) { bool old_row_delete_sign = (delete_sign_column_data[pos_in_old_block] != 0); if (old_row_delete_sign) { @@ -730,8 +761,18 @@ static void fill_non_primary_key_cell_for_row_store( new_col->insert_from(old_value_col, pos_in_old_block); } } else { + bool old_row_delete_sign = (old_row_exists && delete_sign_column_data != nullptr && + delete_sign_column_data[pos_in_old_block] != 0); + if (tablet_column.is_variant_type() && old_row_exists && !use_default && + !old_row_delete_sign) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, pos_in_old_block, cur_col, block_pos, tablet_column.unique_id(), + skip_bitmap, false, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, block_pos); } + return Status::OK(); } Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( @@ -768,9 +809,11 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { auto segment_pos = segment_start_pos + idx; auto block_pos = block_start_pos + idx; - auto pos_in_old_block = read_index[segment_pos]; + auto read_index_iter = read_index.find(segment_pos); + bool old_row_exists = (read_index_iter != read_index.end()); + uint32_t pos_in_old_block = old_row_exists ? read_index_iter->second : 0; - fill_non_primary_key_cell_for_row_store( + RETURN_IF_ERROR(fill_non_primary_key_cell_for_row_store( tablet_column, cid, mutable_full_columns[cid], *default_value_block.get_by_position(i).column, *old_value_block.get_by_position(i).column, *block->get_by_position(cid).column, @@ -778,8 +821,8 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( tablet_schema.has_sequence_col() ? !skip_bitmaps->at(block_pos).contains(seq_col_unique_id) : false, - use_default_or_null_flag[idx], delete_sign_column_data, pos_in_old_block, - tablet_schema, info); + use_default_or_null_flag[idx], delete_sign_column_data, old_row_exists, + pos_in_old_block, tablet_schema, info, skip_bitmaps->at(block_pos))); } } return Status::OK(); @@ -788,10 +831,10 @@ Status FlexibleReadPlan::fill_non_primary_key_columns_for_row_store( BlockAggregator::BlockAggregator(segment_v2::VerticalSegmentWriter& vertical_segment_writer) : _writer(vertical_segment_writer), _tablet_schema(*_writer._tablet_schema) {} -void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap) { +Status BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap) { for (size_t cid {_tablet_schema.num_key_columns()}; cid < _tablet_schema.num_columns(); cid++) { - if (cid == _tablet_schema.skip_bitmap_col_idx()) { + if (std::cmp_equal(cid, _tablet_schema.skip_bitmap_col_idx())) { auto& cur_skip_bitmap = assert_cast(dst_block.mutable_columns()[cid].get()) ->get_data() @@ -800,17 +843,31 @@ void BlockAggregator::merge_one_row(MutableBlock& dst_block, Block* src_block, i assert_cast( src_block->get_by_position(cid).column->assume_mutable().get()) ->get_data()[rid]; - cur_skip_bitmap &= new_row_skip_bitmap; + BitmapValue merged_skip_bitmap; + RETURN_IF_ERROR(variant_util::merge_variant_patch_path_markers( + cur_skip_bitmap, new_row_skip_bitmap, &merged_skip_bitmap)); + cur_skip_bitmap = std::move(merged_skip_bitmap); continue; } if (!skip_bitmap.contains(_tablet_schema.column(cid).unique_id())) { - dst_block.mutable_columns()[cid]->pop_back(1); - dst_block.mutable_columns()[cid]->insert_from(*src_block->get_by_position(cid).column, - rid); + if (_tablet_schema.column(cid).is_variant_type()) { + auto merged_col = dst_block.mutable_columns()[cid]->clone_empty(); + RETURN_IF_ERROR(variant_util::merge_variant_patch( + *dst_block.mutable_columns()[cid], + dst_block.mutable_columns()[cid]->size() - 1, + *src_block->get_by_position(cid).column, rid, *merged_col)); + dst_block.mutable_columns()[cid]->pop_back(1); + dst_block.mutable_columns()[cid]->insert_from(*merged_col, 0); + } else { + dst_block.mutable_columns()[cid]->pop_back(1); + dst_block.mutable_columns()[cid]->insert_from( + *src_block->get_by_position(cid).column, rid); + } } } VLOG_DEBUG << fmt::format("merge a row, after merge, output_block.rows()={}, state: {}", dst_block.rows(), _state.to_string()); + return Status::OK(); } void BlockAggregator::append_one_row(MutableBlock& dst_block, Block* src_block, int rid) { @@ -829,8 +886,8 @@ void BlockAggregator::remove_last_n_rows(MutableBlock& dst_block, int n) { } } -void BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap, bool have_delete_sign) { +Status BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap, bool have_delete_sign) { if (have_delete_sign) { // remove all the previous batched rows remove_last_n_rows(dst_block, _state.rows); @@ -840,11 +897,12 @@ void BlockAggregator::append_or_merge_row(MutableBlock& dst_block, Block* src_bl append_one_row(dst_block, src_block, rid); } else { if (_state.should_merge()) { - merge_one_row(dst_block, src_block, rid, skip_bitmap); + RETURN_IF_ERROR(merge_one_row(dst_block, src_block, rid, skip_bitmap)); } else { append_one_row(dst_block, src_block, rid); } } + return Status::OK(); }; Status BlockAggregator::aggregate_rows( @@ -920,12 +978,14 @@ Status BlockAggregator::aggregate_rows( bool have_delete_sign = (!skip_bitmap.contains(delete_sign_col_unique_id) && delete_signs[rid] != 0); if (!row_has_sequence_col) { - append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign); + RETURN_IF_ERROR( + append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign)); } else { std::string seq_val {}; _writer._encode_seq_column(seq_column, rid, &seq_val); if (Slice {seq_val}.compare(Slice {cur_seq_val}) >= 0) { - append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign); + RETURN_IF_ERROR(append_or_merge_row(output_block, block, rid, skip_bitmap, + have_delete_sign)); cur_seq_val = std::move(seq_val); } else { VLOG_DEBUG << fmt::format( @@ -980,6 +1040,63 @@ Status BlockAggregator::aggregate_for_sequence_column( return Status::OK(); } +Status BlockAggregator::aggregate_without_sequence_column( + Block* block, size_t num_rows, const std::vector& key_columns) { + DCHECK_EQ(block->columns(), _tablet_schema.num_columns()); + std::vector* skip_bitmaps = &( + assert_cast(block->get_by_position(_tablet_schema.skip_bitmap_col_idx()) + .column->assume_mutable() + .get()) + ->get_data()); + const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*block, num_rows); + DCHECK(delete_signs != nullptr); + int32_t delete_sign_col_unique_id = + _tablet_schema.column(_tablet_schema.delete_sign_idx()).unique_id(); + + auto aggregated_block = _tablet_schema.create_block(); + MutableBlock output_block = MutableBlock::build_mutable_block(&aggregated_block); + + auto aggregate_range = [&](int start, int end) -> Status { + if (end - start == 1) { + output_block.add_row(block, start); + return Status::OK(); + } + _state.reset(); + for (int rid = start; rid < end; ++rid) { + auto& skip_bitmap = skip_bitmaps->at(rid); + bool have_delete_sign = + (!skip_bitmap.contains(delete_sign_col_unique_id) && delete_signs[rid] != 0); + RETURN_IF_ERROR( + append_or_merge_row(output_block, block, rid, skip_bitmap, have_delete_sign)); + } + return Status::OK(); + }; + + int same_key_rows {0}; + std::string previous_key {}; + const auto num_rows_int = static_cast(num_rows); + for (int block_pos {0}; block_pos < num_rows_int; block_pos++) { + std::string key = _writer._full_encode_keys(key_columns, block_pos); + if (block_pos > 0 && previous_key == key) { + same_key_rows++; + } else { + if (same_key_rows > 0) { + RETURN_IF_ERROR(aggregate_range(block_pos - same_key_rows, block_pos)); + } + same_key_rows = 1; + } + previous_key = std::move(key); + } + if (same_key_rows > 0) { + RETURN_IF_ERROR(aggregate_range(num_rows_int - same_key_rows, num_rows_int)); + } + + if (output_block.rows() != num_rows) { + block->swap(output_block.to_block()); + } + return Status::OK(); +} + Status BlockAggregator::fill_sequence_column(Block* block, size_t num_rows, const FixedReadPlan& read_plan, std::vector& skip_bitmaps) { @@ -1105,7 +1222,7 @@ Status BlockAggregator::filter_block(Block* block, size_t num_rows, MutableColum RETURN_IF_ERROR(Block::filter_block(block, num_cols, num_cols)); DCHECK_EQ(num_cols, block->columns()); size_t merged_rows = num_rows - block->rows(); - if (duplicate_rows != merged_rows) { + if (std::cmp_not_equal(duplicate_rows, merged_rows)) { auto msg = fmt::format( "filter_block_for_flexible_partial_update {}: duplicate_rows != merged_rows, " "duplicate_keys={}, merged_rows={}, num_rows={}, mutable_block->rows()={}", @@ -1164,6 +1281,8 @@ Status BlockAggregator::aggregate_for_flexible_partial_update( RETURN_IF_ERROR(aggregate_for_sequence_column(block, static_cast(num_rows), key_columns, seq_column, specified_rowsets, segment_caches)); + } else { + RETURN_IF_ERROR(aggregate_without_sequence_column(block, num_rows, key_columns)); } // 2. merge duplicate rows and handle insert after delete diff --git a/be/src/storage/partial_update_info.h b/be/src/storage/partial_update_info.h index 6371a79fe71db9..5cc2f4c9d77340 100644 --- a/be/src/storage/partial_update_info.h +++ b/be/src/storage/partial_update_info.h @@ -209,16 +209,19 @@ class BlockAggregator { Block* block, size_t num_rows, const std::vector& key_columns, const std::vector& specified_rowsets, std::vector>& segment_caches); + Status aggregate_without_sequence_column( + Block* block, size_t num_rows, + const std::vector& key_columns); Status filter_block(Block* block, size_t num_rows, MutableColumnPtr filter_column, int duplicate_rows, std::string col_name); Status fill_sequence_column(Block* block, size_t num_rows, const FixedReadPlan& read_plan, std::vector& skip_bitmaps); - void append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap, bool have_delete_sign); - void merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, - BitmapValue& skip_bitmap); + Status append_or_merge_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap, bool have_delete_sign); + Status merge_one_row(MutableBlock& dst_block, Block* src_block, int rid, + BitmapValue& skip_bitmap); void append_one_row(MutableBlock& dst_block, Block* src_block, int rid); void remove_last_n_rows(MutableBlock& dst_block, int n); diff --git a/be/src/storage/rowset_builder.cpp b/be/src/storage/rowset_builder.cpp index aa075f386c48c3..4b63050d48cfc7 100644 --- a/be/src/storage/rowset_builder.cpp +++ b/be/src/storage/rowset_builder.cpp @@ -303,6 +303,19 @@ Status GroupRowsetBuilder::build_rowset() { return _txn_rs_builder->build_rowset(); } +Status BaseRowsetBuilder::_check_flexible_partial_update_single_segment() const { + if (_partial_update_info && _partial_update_info->is_flexible_partial_update() && + _rowset->num_segments() > 1) { + // in flexible partial update, when there are more one segment in one load, + // we need to do alignment process for same keys between segments, we haven't + // implemented it yet and just report an error when encounter this situation + return Status::NotSupported( + "too large input data in flexible partial update, Please " + "reduce the amount of data imported in a single load."); + } + return Status::OK(); +} + Status BaseRowsetBuilder::submit_calc_delete_bitmap_task() { DCHECK(is_data_builder()); if (!_tablet->enable_unique_key_merge_on_write() || _rowset->num_segments() == 0) { @@ -310,16 +323,7 @@ Status BaseRowsetBuilder::submit_calc_delete_bitmap_task() { } std::lock_guard l(_lock); SCOPED_TIMER(_submit_delete_bitmap_timer); - if (_partial_update_info && _partial_update_info->is_flexible_partial_update()) { - if (_rowset->num_segments() > 1) { - // in flexible partial update, when there are more one segment in one load, - // we need to do alignment process for same keys between segments, we haven't - // implemented it yet and just report an error when encouter this situation - return Status::NotSupported( - "too large input data in flexible partial update, Please " - "reduce the amount of data imported in a single load."); - } - } + RETURN_IF_ERROR(_check_flexible_partial_update_single_segment()); auto* beta_rowset = reinterpret_cast(_rowset.get()); std::vector segments; diff --git a/be/src/storage/rowset_builder.h b/be/src/storage/rowset_builder.h index edc97432eaf9c9..514f372b977a05 100644 --- a/be/src/storage/rowset_builder.h +++ b/be/src/storage/rowset_builder.h @@ -97,6 +97,8 @@ class BaseRowsetBuilder { Status init_mow_context(std::shared_ptr& mow_context); protected: + Status _check_flexible_partial_update_single_segment() const; + Status _build_current_tablet_schema(int64_t index_id, const OlapTableSchemaParam* table_schema_param, const TabletSchema& ori_tablet_schema); diff --git a/be/src/storage/segment/column_writer.cpp b/be/src/storage/segment/column_writer.cpp index 1ba371233c4671..08424f47e2a8a2 100644 --- a/be/src/storage/segment/column_writer.cpp +++ b/be/src/storage/segment/column_writer.cpp @@ -1391,6 +1391,7 @@ Status VariantColumnWriter::write_bloom_filter_index() { Status VariantColumnWriter::append_nullable(const uint8_t* null_map, const uint8_t** ptr, size_t num_rows) { + _next_rowid += num_rows; return _impl->append_nullable(null_map, ptr, num_rows); } diff --git a/be/src/storage/segment/variant/variant_column_writer_impl.cpp b/be/src/storage/segment/variant/variant_column_writer_impl.cpp index 8ad08640ba19bd..d12365ef52b8db 100644 --- a/be/src/storage/segment/variant/variant_column_writer_impl.cpp +++ b/be/src/storage/segment/variant/variant_column_writer_impl.cpp @@ -1625,6 +1625,7 @@ Status VariantSubcolumnWriter::append_data(const uint8_t** ptr, size_t num_rows) const auto& src = *reinterpret_cast(column->column_data); // TODO: if direct write we could avoid copy _column->insert_range_from(src, column->row_pos, num_rows); + _next_rowid += num_rows; return Status::OK(); } @@ -1752,6 +1753,7 @@ Status VariantDocCompactWriter::append_data(const uint8_t** ptr, size_t num_rows auto* dst_ptr = assert_cast(_column.get()); // TODO: if direct write we could avoid copy dst_ptr->insert_range_from(src, column->row_pos, num_rows); + _next_rowid += num_rows; return Status::OK(); } diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 78031d1f7d4429..5dc4d2f782b525 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -499,6 +499,12 @@ Status VerticalSegmentWriter::_partial_update_preconditions_check(size_t row_pos if (row_pos != 0) { auto msg = fmt::format("row_pos should be 0, but found {}, tablet_id={}", row_pos, _tablet->tablet_id()); + if (is_flexible_update) { + return Status::NotSupported( + "{}. Flexible partial update currently relies on whole-block duplicate-key " + "aggregation before writing VARIANT patches.", + msg); + } DCHECK(false) << msg; return Status::InternalError(msg); } @@ -735,6 +741,42 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_create_column_writer(cid, _tablet_schema->column(cid), _tablet_schema)); } + std::vector* skip_bitmaps = &( + assert_cast( + data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) + ->get_data()); + if (_tablet_schema->num_variant_columns() > 0) { + if (_tablet_schema->deprecated_variant_flatten_nested()) { + return Status::NotSupported( + "VARIANT flexible partial update does not support " + "deprecated_variant_enable_flatten_nested in this version"); + } + std::vector variant_cids; + variant_cids.reserve(_tablet_schema->num_variant_columns()); + for (size_t cid = _tablet_schema->num_key_columns(); cid < _tablet_schema->num_columns(); + ++cid) { + const auto& column = _tablet_schema->column(cid); + if (!column.is_variant_type()) { + continue; + } + variant_cids.push_back(cast_set(cid)); + } + RETURN_IF_ERROR(variant_util::parse_and_materialize_variant_columns( + *const_cast(data.block), *_tablet_schema, variant_cids, true)); + for (auto cid : variant_cids) { + const auto& column = _tablet_schema->column(cid); + for (size_t block_pos = data.row_pos; block_pos < data.row_pos + data.num_rows; + ++block_pos) { + auto& skip_bitmap = skip_bitmaps->at(block_pos); + if (!skip_bitmap.contains(column.unique_id())) { + RETURN_IF_ERROR(variant_util::mark_variant_patch_paths( + *data.block->get_by_position(cid).column, block_pos, column.unique_id(), + &skip_bitmap)); + } + } + } + } + // 1. aggregate duplicate rows in block RETURN_IF_ERROR(_block_aggregator.aggregate_for_flexible_partial_update( const_cast(data.block), data.num_rows, specified_rowsets, segment_caches)); @@ -742,6 +784,10 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn data.num_rows = data.block->rows(); _olap_data_convertor->clear_source_content(); } + skip_bitmaps = &( + assert_cast( + data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) + ->get_data()); // 2. encode primary key columns // we can only encode primary key columns currently becasue all non-primary columns in flexible partial update @@ -758,10 +804,6 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn RETURN_IF_ERROR(_block_aggregator.convert_seq_column(const_cast(data.block), data.row_pos, data.num_rows, seq_column)); - std::vector* skip_bitmaps = &( - assert_cast( - data.block->get_by_position(skip_bitmap_col_idx).column->assume_mutable().get()) - ->get_data()); const auto* delete_signs = BaseTablet::get_delete_sign_column_data(*data.block, data.row_pos + data.num_rows); DCHECK(delete_signs != nullptr); @@ -845,12 +887,11 @@ Status VerticalSegmentWriter::_append_block_with_flexible_partial_content(RowsIn _num_rows_new_added += stats.num_rows_new_added; _num_rows_filtered += stats.num_rows_filtered; - if (_num_rows_written != data.row_pos || - _primary_key_index_builder->num_rows() != _num_rows_written) { + if (_primary_key_index_builder->num_rows() != _num_rows_written) { return Status::InternalError( - "Correctness check failed, _num_rows_written: {}, row_pos: {}, primary key " + "Correctness check failed, _num_rows_written: {}, primary key " "index builder num rows: {}", - _num_rows_written, data.row_pos, _primary_key_index_builder->num_rows()); + _num_rows_written, _primary_key_index_builder->num_rows()); } // 9. build primary key index @@ -937,7 +978,22 @@ Status VerticalSegmentWriter::_generate_flexible_read_plan( &skip_bitmap); }; auto update_read_plan = [&](const RowLocation& loc) { - read_plan.prepare_to_read(loc, segment_pos, skip_bitmap); + BitmapValue read_skip_bitmap(skip_bitmap); + if (!have_delete_sign) { + bool should_merge_variant = false; + for (size_t cid = _tablet_schema->num_key_columns(); + cid < _tablet_schema->num_columns(); ++cid) { + const auto& column = _tablet_schema->column(cid); + if (column.is_variant_type() && !skip_bitmap.contains(column.unique_id())) { + read_skip_bitmap.add(column.unique_id()); + should_merge_variant = true; + } + } + if (should_merge_variant) { + read_skip_bitmap.add(delete_sign_col_unique_id); + } + } + read_plan.prepare_to_read(loc, segment_pos, read_skip_bitmap); }; RETURN_IF_ERROR(_probe_key_for_mow(std::move(key), segment_pos, row_has_sequence_col, diff --git a/be/src/storage/tablet/base_tablet.cpp b/be/src/storage/tablet/base_tablet.cpp index eb59206165c96a..cfe6f3569d2521 100644 --- a/be/src/storage/tablet/base_tablet.cpp +++ b/be/src/storage/tablet/base_tablet.cpp @@ -25,8 +25,10 @@ #include #include #include +#include #include #include +#include #include "cloud/cloud_tablet.h" #include "cloud/config.h" @@ -36,6 +38,7 @@ #include "common/status.h" #include "core/assert_cast.h" #include "core/data_type/data_type_factory.hpp" +#include "exec/common/variant_util.h" #include "load/memtable/memtable.h" #include "service/point_query_executor.h" #include "storage/compaction/cumulative_compaction_time_series_policy.h" @@ -997,8 +1000,9 @@ Status BaseTablet::generate_new_block_for_partial_update( RETURN_IF_ERROR(read_plan_update.read_columns_by_plan( *rowset_schema, update_cids, rsid_to_rowset, update_block, &read_index_update, false)); size_t update_rows = read_index_update.size(); + DCHECK_LE(update_rows, std::numeric_limits::max()); for (auto i = 0; i < update_cids.size(); ++i) { - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { full_mutable_columns[update_cids[i]]->insert_from( *update_block.get_by_position(i).column, read_index_update[idx]); } @@ -1032,7 +1036,7 @@ Status BaseTablet::generate_new_block_for_partial_update( for (auto i = 0; i < missing_cids.size(); ++i) { const auto& rs_column = rowset_schema->column(missing_cids[i]); auto& mutable_column = full_mutable_columns[missing_cids[i]]; - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { // There are two cases we don't need to read values from old data: // 1. if the conflicting new row's delete sign is marked, which means the value columns // of the row will not be read. So we don't need to read the missing values from the previous rows. @@ -1084,13 +1088,14 @@ Status BaseTablet::generate_new_block_for_partial_update( return Status::OK(); } -static void fill_cell_for_flexible_partial_update( +static Status fill_cell_for_flexible_partial_update( std::map& read_index_old, std::map& read_index_update, const TabletSchemaSPtr& rowset_schema, const PartialUpdateInfo* partial_update_info, const TabletColumn& tablet_column, std::size_t idx, MutableColumnPtr& new_col, const IColumn& default_value_col, const IColumn& old_value_col, const IColumn& cur_col, bool skipped, - bool row_has_sequence_col, const signed char* delete_sign_column_data) { + bool row_has_sequence_col, const signed char* delete_sign_column_data, + const BitmapValue& skip_bitmap) { if (skipped) { bool use_default = false; bool old_row_delete_sign = @@ -1128,8 +1133,19 @@ static void fill_cell_for_flexible_partial_update( new_col->insert_from(old_value_col, read_index_old[cast_set(idx)]); } } else { + bool old_row_delete_sign = + (delete_sign_column_data != nullptr && + delete_sign_column_data[read_index_old[cast_set(idx)]] != 0); + if (tablet_column.is_variant_type()) { + RETURN_IF_ERROR(variant_util::merge_variant_patch_by_path_markers( + old_value_col, read_index_old[cast_set(idx)], cur_col, + read_index_update[cast_set(idx)], tablet_column.unique_id(), + skip_bitmap, old_row_delete_sign, *new_col)); + return Status::OK(); + } new_col->insert_from(cur_col, read_index_update[cast_set(idx)]); } + return Status::OK(); } Status BaseTablet::generate_new_block_for_flexible_partial_update( @@ -1211,7 +1227,7 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( const IColumn& cur_col = *update_block.get_by_position(cid).column; const auto& rs_column = rowset_schema->column(cid); auto col_uid = rs_column.unique_id(); - for (auto idx = 0; idx < update_rows; ++idx) { + for (uint32_t idx = 0; std::cmp_less(idx, update_rows); ++idx) { if (cid < rowset_schema->num_key_columns()) { new_col->insert_from(cur_col, read_index_update[idx]); } else { @@ -1223,14 +1239,14 @@ Status BaseTablet::generate_new_block_for_flexible_partial_update( if (rids_be_overwritten.contains(idx)) { new_col->insert_from(old_value_col, read_index_old[idx]); } else { - fill_cell_for_flexible_partial_update( + RETURN_IF_ERROR(fill_cell_for_flexible_partial_update( read_index_old, read_index_update, rowset_schema, partial_update_info, rs_column, idx, new_col, default_value_col, old_value_col, cur_col, skip_bitmaps->at(idx).contains(col_uid), rowset_schema->has_sequence_col() ? !skip_bitmaps->at(idx).contains(seq_col_unique_id) : false, - old_block_delete_signs); + old_block_delete_signs, skip_bitmaps->at(idx))); } } } diff --git a/be/src/util/json/json_parser.cpp b/be/src/util/json/json_parser.cpp index 3df723c3849eac..d1cdffcab6f99c 100644 --- a/be/src/util/json/json_parser.cpp +++ b/be/src/util/json/json_parser.cpp @@ -48,6 +48,8 @@ std::optional JSONDataParser::parse(const char* begin, // NestedGroup expansion is now handled at storage layer context.deprecated_enable_flatten_nested = config.deprecated_enable_flatten_nested; context.check_duplicate_json_path = config.check_duplicate_json_path; + context.reject_json_null_value = config.reject_json_null_value; + context.record_empty_object_path = config.record_empty_object_path; context.is_top_array = document.isArray(); traverse(document, context); ParseResult result; @@ -62,6 +64,11 @@ std::optional JSONDataParser::parse(const char* begin, template void JSONDataParser::traverse(const Element& element, ParseContext& ctx) { // checkStackSize(); + if (element.isNull() && ctx.reject_json_null_value) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + "VARIANT flexible partial update does not support JSON null patch values"); + } if (element.isObject()) { traverseObject(element.getObject(), ctx); } else if (element.isArray()) { @@ -73,7 +80,7 @@ void JSONDataParser::traverse(const Element& element, ParseContext& if (has_nested && !ctx.deprecated_enable_flatten_nested) { // Parse nested arrays to JsonbField JsonbWriter writer; - traverseArrayAsJsonb(element.getArray(), writer); + traverseArrayAsJsonb(element.getArray(), writer, ctx.reject_json_null_value); appendValueIfNotDuplicate( ctx, ctx.builder.get_parts(), Field::create_field(JsonbField(writer.getOutput()->getBuffer(), @@ -106,6 +113,16 @@ template void JSONDataParser::traverseObject(const JSONObject& object, ParseContext& ctx) { ctx.paths.reserve(ctx.paths.size() + object.size()); ctx.values.reserve(ctx.values.size() + object.size()); + if (object.size() == 0 && ctx.record_empty_object_path && !ctx.builder.get_parts().empty()) { + JsonbWriter writer; + writer.writeStartObject(); + writer.writeEndObject(); + appendValueIfNotDuplicate( + ctx, ctx.builder.get_parts(), + Field::create_field(JsonbField(writer.getOutput()->getBuffer(), + writer.getOutput()->getSize()))); + return; + } auto check_key_length = [](const auto& key) { const size_t max_key_length = cast_set(config::variant_max_json_key_length); if (key.size() > max_key_length) { @@ -142,11 +159,17 @@ void JSONDataParser::check_has_nested_object(const Element& element) } template -void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWriter& writer) { +void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWriter& writer, + bool reject_json_null_value) { + if (element.isNull() && reject_json_null_value) { + throw doris::Exception( + doris::ErrorCode::INVALID_ARGUMENT, + "VARIANT flexible partial update does not support JSON null patch values"); + } if (element.isObject()) { - traverseObjectAsJsonb(element.getObject(), writer); + traverseObjectAsJsonb(element.getObject(), writer, reject_json_null_value); } else if (element.isArray()) { - traverseArrayAsJsonb(element.getArray(), writer); + traverseArrayAsJsonb(element.getArray(), writer, reject_json_null_value); } else { writeValueAsJsonb(element, writer); } @@ -154,7 +177,8 @@ void JSONDataParser::traverseAsJsonb(const Element& element, JsonbWr template void JSONDataParser::traverseObjectAsJsonb(const JSONObject& object, - JsonbWriter& writer) { + JsonbWriter& writer, + bool reject_json_null_value) { writer.writeStartObject(); for (auto it = object.begin(); it != object.end(); ++it) { const auto& [key, value] = *it; @@ -166,16 +190,17 @@ void JSONDataParser::traverseObjectAsJsonb(const JSONObject& object, max_key_length)); } writer.writeKey(key.data(), cast_set(key.size())); - traverseAsJsonb(value, writer); + traverseAsJsonb(value, writer, reject_json_null_value); } writer.writeEndObject(); } template -void JSONDataParser::traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer) { +void JSONDataParser::traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer, + bool reject_json_null_value) { writer.writeStartArray(); for (auto it = array.begin(); it != array.end(); ++it) { - traverseAsJsonb(*it, writer); + traverseAsJsonb(*it, writer, reject_json_null_value); } writer.writeEndArray(); } @@ -201,6 +226,7 @@ void JSONDataParser::traverseArray(const JSONArray& array, ParseCont array_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; array_ctx.is_top_array = ctx.is_top_array; array_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + array_ctx.reject_json_null_value = ctx.reject_json_null_value; array_ctx.total_size = array.size(); for (auto it = array.begin(); it != array.end(); ++it) { traverseArrayElement(*it, array_ctx); @@ -231,6 +257,7 @@ void JSONDataParser::traverseArrayElement(const Element& element, element_ctx.has_nested_in_flatten = ctx.has_nested_in_flatten; element_ctx.is_top_array = ctx.is_top_array; element_ctx.check_duplicate_json_path = ctx.check_duplicate_json_path; + element_ctx.reject_json_null_value = ctx.reject_json_null_value; traverse(element, element_ctx); auto& paths = element_ctx.paths; auto& values = element_ctx.values; diff --git a/be/src/util/json/json_parser.h b/be/src/util/json/json_parser.h index c4a165e899546f..94c21b5a212414 100644 --- a/be/src/util/json/json_parser.h +++ b/be/src/util/json/json_parser.h @@ -102,6 +102,8 @@ void writeValueAsJsonb(const Element& element, JsonbWriter& writer) { struct ParseConfig { bool deprecated_enable_flatten_nested = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; + bool record_empty_object_path = false; enum class ParseTo { OnlySubcolumns = 0, OnlyDocValueColumn = 1, @@ -131,6 +133,8 @@ class JSONDataParser { phmap::flat_hash_set visited_path_names; bool deprecated_enable_flatten_nested = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; + bool record_empty_object_path = false; bool has_nested_in_flatten = false; bool is_top_array = false; }; @@ -145,6 +149,7 @@ class JSONDataParser { bool has_nested_in_flatten = false; bool is_top_array = false; bool check_duplicate_json_path = false; + bool reject_json_null_value = false; }; void traverse(const Element& element, ParseContext& ctx); void traverseObject(const JSONObject& object, ParseContext& ctx); @@ -165,9 +170,11 @@ class JSONDataParser { bool has_nested = false; void check_has_nested_object(const Element& element); - void traverseAsJsonb(const Element& element, JsonbWriter& writer); - void traverseObjectAsJsonb(const JSONObject& object, JsonbWriter& writer); - void traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer); + void traverseAsJsonb(const Element& element, JsonbWriter& writer, bool reject_json_null_value); + void traverseObjectAsJsonb(const JSONObject& object, JsonbWriter& writer, + bool reject_json_null_value); + void traverseArrayAsJsonb(const JSONArray& array, JsonbWriter& writer, + bool reject_json_null_value); ParserImpl parser; }; diff --git a/be/test/exec/common/schema_util_rowset_test.cpp b/be/test/exec/common/schema_util_rowset_test.cpp index cf99c9824956c5..2f1aca621f537f 100644 --- a/be/test/exec/common/schema_util_rowset_test.cpp +++ b/be/test/exec/common/schema_util_rowset_test.cpp @@ -714,6 +714,7 @@ TEST_F(SchemaUtilRowsetTest, some_test_for_subcolumn_writer) { _variant_column_data->row_pos = 0; const uint8_t* data = (const uint8_t*)_variant_column_data.get(); EXPECT_TRUE(variant_subcolumn_writer->append_data(&data, 1)); + EXPECT_EQ(1, variant_subcolumn_writer->get_next_rowid()); // write null data EXPECT_TRUE(variant_subcolumn_writer->write_data().ok()); } diff --git a/be/test/storage/partial_update_info_test.cpp b/be/test/storage/partial_update_info_test.cpp new file mode 100644 index 00000000000000..9b397a2703f84d --- /dev/null +++ b/be/test/storage/partial_update_info_test.cpp @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "storage/partial_update_info.h" + +#include + +#include "core/block/block.h" +#include "io/fs/local_file_system.h" +#include "storage/rowset/rowset_writer.h" +#include "storage/rowset/rowset_writer_context.h" +#include "storage/segment/vertical_segment_writer.h" +#include "storage/tablet/base_tablet.h" +#include "storage/tablet/tablet_meta.h" +#include "storage/tablet/tablet_schema.h" + +namespace doris { + +namespace { + +TabletSchemaSPtr create_unique_key_schema() { + auto tablet_schema = std::make_shared(); + TabletSchemaPB tablet_schema_pb; + tablet_schema_pb.set_keys_type(UNIQUE_KEYS); + tablet_schema_pb.set_num_short_key_columns(1); + tablet_schema_pb.set_num_rows_per_row_block(1024); + tablet_schema_pb.set_compress_kind(COMPRESS_NONE); + tablet_schema_pb.set_next_column_unique_id(4); + + auto* key_column = tablet_schema_pb.add_column(); + key_column->set_unique_id(1); + key_column->set_name("k"); + key_column->set_type("INT"); + key_column->set_is_key(true); + key_column->set_length(4); + key_column->set_index_length(4); + key_column->set_is_nullable(false); + key_column->set_is_bf_column(false); + + auto* value_column = tablet_schema_pb.add_column(); + value_column->set_unique_id(2); + value_column->set_name("v"); + value_column->set_type("INT"); + value_column->set_is_key(false); + value_column->set_length(4); + value_column->set_index_length(4); + value_column->set_is_nullable(false); + value_column->set_is_bf_column(false); + + auto* delete_sign_column = tablet_schema_pb.add_column(); + delete_sign_column->set_unique_id(3); + delete_sign_column->set_name(DELETE_SIGN); + delete_sign_column->set_type("TINYINT"); + delete_sign_column->set_is_key(false); + delete_sign_column->set_length(1); + delete_sign_column->set_index_length(1); + delete_sign_column->set_is_nullable(false); + delete_sign_column->set_is_bf_column(false); + + tablet_schema->init_from_pb(tablet_schema_pb); + return tablet_schema; +} + +class FakeTablet : public BaseTablet { +public: + explicit FakeTablet(TabletSchemaSPtr schema) + : BaseTablet(std::make_shared(std::move(schema))) {} + + std::string tablet_path() const override { return ""; } + + bool exceed_version_limit(int32_t /*limit*/) override { return false; } + + Result> create_rowset_writer(RowsetWriterContext& /*context*/, + bool /*vertical*/) override { + return ResultError(Status::NotSupported("fake tablet")); + } + + Result> create_transient_rowset_writer( + const Rowset& /*rowset*/, std::shared_ptr /*partial_update_info*/, + int64_t /*txn_expiration*/ = 0) override { + return ResultError(Status::NotSupported("fake tablet")); + } + + Status capture_rs_readers(const Version& /*spec_version*/, + std::vector* /*rs_splits*/, + const CaptureRowsetOps& /*opts*/) override { + return Status::NotSupported("fake tablet"); + } + + Status save_delete_bitmap(const TabletTxnInfo* /*txn_info*/, int64_t /*txn_id*/, + DeleteBitmapPtr /*delete_bitmap*/, RowsetWriter* /*rowset_writer*/, + const RowsetIdUnorderedSet& /*cur_rowset_ids*/, + int64_t /*lock_id*/ = -1, + int64_t /*next_visible_version*/ = -1) override { + return Status::NotSupported("fake tablet"); + } + + CalcDeleteBitmapExecutor* calc_delete_bitmap_executor() override { return nullptr; } + + void clear_cache() override {} + + Versions calc_missed_versions(int64_t /*spec_version*/, + Versions /*existing_versions*/) const override { + return {}; + } + + size_t tablet_footprint() override { return 0; } +}; + +} // namespace + +TEST(PartialUpdateInfoTest, PersistsSequenceMapColumnUid) { + PartialUpdateInfo info; + info.partial_update_mode = UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS; + info.sequence_map_col_unqiue_id = 123; + + PartialUpdateInfoPB pb; + info.to_pb(&pb); + + ASSERT_TRUE(pb.has_sequence_map_col_uid()); + EXPECT_EQ(pb.sequence_map_col_uid(), 123); + + PartialUpdateInfo decoded; + decoded.from_pb(&pb); + EXPECT_EQ(decoded.sequence_map_col_uid(), 123); +} + +TEST(PartialUpdateInfoTest, DefaultsMissingSequenceMapColumnUid) { + PartialUpdateInfoPB pb; + pb.set_partial_update_mode(UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS); + + PartialUpdateInfo decoded; + decoded.from_pb(&pb); + EXPECT_EQ(decoded.sequence_map_col_uid(), -1); +} + +TEST(PartialUpdateInfoTest, FlexiblePartialUpdateRejectsSlicedRowPos) { + auto schema = create_unique_key_schema(); + + RowsetWriterContext rowset_ctx; + rowset_ctx.tablet_schema = schema; + rowset_ctx.partial_update_info = std::make_shared(); + rowset_ctx.partial_update_info->partial_update_mode = + UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS; + + segment_v2::VerticalSegmentWriterOptions opts; + opts.rowset_ctx = &rowset_ctx; + opts.write_type = DataWriteType::TYPE_DIRECT; + opts.enable_unique_key_merge_on_write = true; + + auto fs = io::global_local_filesystem(); + static_cast(fs->create_directory("./ut_dir")); + static_cast( + fs->delete_file("./ut_dir/flexible_partial_update_rejects_sliced_row_pos.dat")); + io::FileWriterPtr file_writer; + auto st = fs->create_file("./ut_dir/flexible_partial_update_rejects_sliced_row_pos.dat", + &file_writer); + ASSERT_TRUE(st.ok()) << st; + + auto tablet = std::make_shared(schema); + segment_v2::VerticalSegmentWriter writer(file_writer.get(), 0, schema, tablet, nullptr, opts, + nullptr); + + Block block = schema->create_block(); + ASSERT_TRUE(writer.batch_block(&block, 1, 1).ok()); + st = writer.write_batch(); + EXPECT_TRUE(st.is()) << st; + EXPECT_NE(st.to_string().find("whole-block duplicate-key aggregation"), std::string::npos) + << st; +} + +} // namespace doris diff --git a/be/test/storage/segment/variant_column_writer_reader_test.cpp b/be/test/storage/segment/variant_column_writer_reader_test.cpp index 3a644cc373f896..dc67071692927e 100644 --- a/be/test/storage/segment/variant_column_writer_reader_test.cpp +++ b/be/test/storage/segment/variant_column_writer_reader_test.cpp @@ -1690,6 +1690,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_doc_compact_writer_and_read_doc bucket_data->row_pos = 0; const auto* data = reinterpret_cast(bucket_data.get()); EXPECT_TRUE(doc_compact_writer->append_data(&data, kRows).ok()); + EXPECT_EQ(kRows, doc_compact_writer->get_next_rowid()); } EXPECT_TRUE(root_writer->finish().ok()); @@ -1852,6 +1853,7 @@ TEST_F(VariantColumnWriterReaderTest, test_doc_compact_sparse_write_array_gap) { bucket_data->row_pos = 0; const auto* data = reinterpret_cast(bucket_data.get()); EXPECT_TRUE(doc_compact_writer->append_data(&data, kRows).ok()); + EXPECT_EQ(kRows, doc_compact_writer->get_next_rowid()); EXPECT_TRUE(doc_compact_writer->finish().ok()); EXPECT_TRUE(doc_compact_writer->write_data().ok()); @@ -2660,6 +2662,7 @@ TEST_F(VariantColumnWriterReaderTest, test_write_data_nullable) { const auto* ptr = (const uint8_t*)accessor->get_data(); st = vw->append_nullable(accessor->get_nullmap(), &ptr, 1000); EXPECT_TRUE(st.ok()) << st.msg(); + EXPECT_EQ(1000, vw->get_next_rowid()); st = vw->finish(); EXPECT_TRUE(st.ok()) << st.msg(); auto size = vw->estimate_buffer_size(); diff --git a/be/test/storage/segment/variant_util_test.cpp b/be/test/storage/segment/variant_util_test.cpp index 902bf9c843b115..283cd43ddc8b9b 100644 --- a/be/test/storage/segment/variant_util_test.cpp +++ b/be/test/storage/segment/variant_util_test.cpp @@ -19,23 +19,100 @@ #include +#include #include #include #include #include "common/config.h" #include "core/block/block.h" +#include "core/column/column_complex.h" +#include "core/column/column_nullable.h" #include "core/column/column_string.h" #include "core/column/column_variant.h" +#include "core/column/column_vector.h" +#include "core/data_type/data_type_bitmap.h" +#include "core/data_type/data_type_number.h" #include "core/data_type/data_type_variant.h" #include "core/field.h" +#include "core/value/bitmap_value.h" #include "core/value/jsonb_value.h" #include "exec/common/variant_util.h" #include "gtest/gtest.h" +#include "storage/segment/vertical_segment_writer.h" #include "storage/tablet/tablet_schema.h" +#include "storage/utils.h" namespace doris::variant_util { +namespace { + +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_MASK = 1ULL << 63; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT = 62; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS = 8; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_POS_BITS = 12; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_INDEX_BITS = 11; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT + TEST_VARIANT_PATCH_PATH_MARKER_POS_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT = + TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT + TEST_VARIANT_PATCH_PATH_MARKER_INDEX_BITS; +constexpr uint64_t TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK = + (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_BYTE_BITS) - 1; + +static uint64_t _test_variant_patch_path_length_marker(int32_t variant_col_unique_id, + uint64_t path_index, uint64_t length) { + return TEST_VARIANT_PATCH_PATH_MARKER_MASK | + (static_cast(variant_col_unique_id) + << TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | length; +} + +static uint64_t _test_variant_patch_path_byte_marker(int32_t variant_col_unique_id, + uint64_t path_index, uint64_t byte_pos, + uint8_t byte) { + return TEST_VARIANT_PATCH_PATH_MARKER_MASK | + (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT) | + (static_cast(variant_col_unique_id) + << TEST_VARIANT_PATCH_PATH_MARKER_UID_SHIFT) | + (path_index << TEST_VARIANT_PATCH_PATH_MARKER_INDEX_SHIFT) | + (byte_pos << TEST_VARIANT_PATCH_PATH_MARKER_POS_SHIFT) | byte; +} + +static void _add_test_encoded_patch_path(BitmapValue* markers, int32_t variant_col_unique_id, + uint64_t path_index, std::string_view encoded_path) { + markers->add(_test_variant_patch_path_length_marker(variant_col_unique_id, path_index, + encoded_path.size())); + for (uint64_t i = 0; i < encoded_path.size(); ++i) { + markers->add(_test_variant_patch_path_byte_marker( + variant_col_unique_id, path_index, i, + static_cast(static_cast(encoded_path[i])))); + } +} + +static std::string _test_encode_single_part_path(std::string_view key) { + std::string encoded; + auto append_u32 = [&encoded](uint32_t value) { + encoded.push_back(static_cast(value & 0xFF)); + encoded.push_back(static_cast((value >> 8) & 0xFF)); + encoded.push_back(static_cast((value >> 16) & 0xFF)); + encoded.push_back(static_cast((value >> 24) & 0xFF)); + }; + append_u32(1); + append_u32(static_cast(key.size())); + encoded.append(key.data(), key.size()); + encoded.push_back(0); + encoded.push_back(0); + return encoded; +} + +static ColumnPtr _make_nullable_variant_column(ColumnVariant::MutablePtr variant, + bool is_null = false) { + auto null_map = ColumnUInt8::create(variant->size(), is_null ? 1 : 0); + return ColumnNullable::create(std::move(variant), std::move(null_map)); +} + static ColumnString::MutablePtr _make_json_column(const std::vector& rows) { auto col = ColumnString::create(); for (const auto& s : rows) { @@ -58,6 +135,118 @@ class ScopedDuplicateJsonPathCheck { bool _old_value; }; +static ColumnVariant::MutablePtr _make_variant_column( + const std::vector& rows, bool doc_mode = false, + ParseConfig::ParseTo parse_to = ParseConfig::ParseTo::OnlySubcolumns, + bool record_empty_object_path = false) { + auto variant = ColumnVariant::create(0, doc_mode); + auto json_col = _make_json_column(rows); + ParseConfig cfg; + cfg.deprecated_enable_flatten_nested = false; + cfg.parse_to = parse_to; + cfg.record_empty_object_path = record_empty_object_path; + parse_json_to_variant(*variant, *json_col, cfg); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_raw_string_variant_column(std::string_view value) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, + Field::create_field(String(value))); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_root_array_variant_column() { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, + doris::VariantUtil::get_field("array_int")); + variant->finalize(); + return variant; +} + +static ColumnVariant::MutablePtr _make_root_jsonb_variant_column(std::string_view value) { + JsonBinaryValue jsonb_value; + Status st = jsonb_value.from_json_string(value.data(), value.size()); + EXPECT_TRUE(st.ok()) << st.to_string(); + + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, + Field::create_field(JsonbField(jsonb_value.value(), jsonb_value.size()))); + variant->finalize(); + return variant; +} + +static std::string _make_nested_json(std::string_view key, int depth, std::string_view leaf) { + std::string json; + for (int i = 0; i < depth; ++i) { + json += "{\""; + json += key; + json += "\":"; + } + json += leaf; + for (int i = 0; i < depth; ++i) { + json += "}"; + } + return json; +} + +static FieldWithDataType _get_variant_field_at(const ColumnVariant& variant, size_t row_num, + std::string_view path) { + Field field; + variant.get(row_num, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = field.get(); + auto it = path.empty() ? object.find(PathInData()) : object.find(PathInData(path)); + EXPECT_NE(it, object.end()) << path; + if (it == object.end()) { + return {}; + } + return it->second; +} + +static PathInData _make_path(std::initializer_list keys) { + PathInData::Parts parts; + parts.reserve(keys.size()); + for (std::string_view key : keys) { + parts.emplace_back(key, false, 0); + } + return PathInData(parts); +} + +} // namespace + +static FieldWithDataType _get_variant_field_by_path(const ColumnVariant& variant, + const PathInData& path) { + Field field; + variant.get(0, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = field.get(); + auto it = object.find(path); + EXPECT_NE(it, object.end()) << path.get_path(); + if (it == object.end()) { + return {}; + } + return it->second; +} + +static FieldWithDataType _get_variant_field(const ColumnVariant& variant, std::string_view path) { + return _get_variant_field_at(variant, 0, path); +} + +static void _expect_no_variant_path(const ColumnVariant& variant, std::string_view path) { + Field row; + variant.get(0, row); + if (row.get_type() == PrimitiveType::TYPE_NULL) { + return; + } + ASSERT_EQ(row.get_type(), PrimitiveType::TYPE_VARIANT); + const auto& object = row.get(); + EXPECT_FALSE(object.contains(PathInData(path))) << path; +} + TEST(VariantUtilTest, ParseDocValueToSubcolumns_FillsDefaultsAndValues) { const std::vector jsons = { R"({"a":1,"b":"x"})", // @@ -473,6 +662,160 @@ TEST(VariantUtilTest, ParseVariantColumns_ScalarJsonStringToSubcolumns) { EXPECT_EQ(f.field.get(), 2); } +TEST(VariantUtilTest, ParseVariantColumns_RejectsJsonNullWhenConfigured) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":null})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + parse_cfg.reject_json_null_value = true; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, ParseVariantColumns_RejectsJsonNullInsideNestedArrayJsonbWhenConfigured) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":[{"b":null}]})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + parse_cfg.reject_json_null_value = true; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, ParseVariantColumns_RecordsEmptyObjectPathWhenConfigured) { + auto variant = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + + auto a = _get_variant_field(*variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); +} + +TEST(VariantUtilTest, ParseVariantColumns_FlexiblePatchRecordsEmptyObjectPath) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("v"); + c->set_type("VARIANT"); + c->set_is_key(false); + c->set_is_nullable(false); + c->set_variant_enable_doc_mode(false); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":{}})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, true); + EXPECT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + auto a = _get_variant_field(out, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); +} + +TEST(VariantUtilTest, ParseVariantColumns_TabletSchemaNoVariantIsNoop) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("k"); + c->set_type("INT"); + c->set_is_key(false); + c->set_is_nullable(false); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto int_col = ColumnInt32::create(); + int_col->insert_value(7); + Block block; + block.insert({int_col->get_ptr(), std::make_shared(), "k"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, true); + ASSERT_TRUE(st.ok()) << st.to_string(); + const auto& out = assert_cast(*block.get_by_position(0).column); + EXPECT_EQ(out.get_element(0), 7); +} + +TEST(VariantUtilTest, ParseVariantColumns_NonStringScalarRootKeepsVariant) { + auto variant = ColumnVariant::create(0, false); + doris::VariantUtil::insert_root_scalar_field(*variant, Field::create_field(7)); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, false), "v"}); + + ParseConfig parse_cfg; + parse_cfg.deprecated_enable_flatten_nested = false; + Status st = + parse_and_materialize_variant_columns(block, std::vector {0}, {parse_cfg}); + ASSERT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + Field field; + out.get(0, field); + EXPECT_EQ(field.get_type(), PrimitiveType::TYPE_VARIANT); +} + +TEST(VariantUtilTest, ParseVariantColumns_TabletSchemaDocModeUsesDocValueColumn) { + TabletSchemaPB schema_pb; + schema_pb.set_keys_type(KeysType::DUP_KEYS); + auto* c = schema_pb.add_column(); + c->set_unique_id(1); + c->set_name("v"); + c->set_type("VARIANT"); + c->set_is_key(false); + c->set_is_nullable(false); + c->set_variant_enable_doc_mode(true); + + TabletSchema tablet_schema; + tablet_schema.init_from_pb(schema_pb); + + auto variant = ColumnVariant::create(0, true); + doris::VariantUtil::insert_root_scalar_field( + *variant, Field::create_field(String(R"({"a":1})"))); + + Block block; + block.insert({variant->get_ptr(), std::make_shared(0, true), "v"}); + + Status st = parse_and_materialize_variant_columns(block, tablet_schema, + std::vector {0}, false); + ASSERT_TRUE(st.ok()) << st.to_string(); + + const auto& out = assert_cast(*block.get_by_position(0).column); + auto docs_subcolumns = materialize_docs_to_subcolumns_map(out); + ASSERT_TRUE(docs_subcolumns.contains("a")); +} + TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) { const std::vector jsons = { R"({"a":1,"b":"x"})", // @@ -526,6 +869,761 @@ TEST(VariantUtilTest, ParseVariantColumns_DocModeBinaryToSubcolumns) { EXPECT_EQ(f.field.get(), "y"); } +TEST(VariantUtilTest, MergeVariantPatch_MergesObjectPaths) { + auto old_variant = _make_variant_column({R"({"a":1,"c":3,"nested":{"x":1}})"}); + auto patch_variant = _make_variant_column({R"({"a":10,"b":20,"nested":{"y":2}})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 10); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 20); + auto c = _get_variant_field(*merged_variant, "c"); + EXPECT_EQ(c.field.get(), 3); + auto nested_x = _get_variant_field(*merged_variant, "nested.x"); + EXPECT_EQ(nested_x.field.get(), 1); + auto nested_y = _get_variant_field(*merged_variant, "nested.y"); + EXPECT_EQ(nested_y.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRawStringRoot) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_raw_string_variant_column(R"({"b":2})"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRawStringScalarRoot) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_raw_string_variant_column("plain text"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_MergesRootJsonbObjectBase) { + auto old_variant = _make_root_jsonb_variant_column(R"({"a":1})"); + auto patch_variant = _make_variant_column({R"({"b":2})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_MergesRootJsonbObjectPatch) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_jsonb_variant_column(R"({"b":2})"); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRootJsonbObjectPatchWithNull) { + constexpr int32_t variant_col_unique_id = 100; + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_jsonb_variant_column(R"({"a":null})"); + auto merged_variant = ColumnVariant::create(0, false); + + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*patch_variant, 0, variant_col_unique_id, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); + + st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update does not support JSON null patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MergeVariantPatch_ReplacesConflictingPaths) { + { + auto old_variant = _make_variant_column({R"({"a":{"x":1},"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":3})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 3); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a.x"); + } + { + auto old_variant = _make_variant_column({R"({"a":3,"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":{"y":4}})"}); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_y = _get_variant_field(*merged_variant, "a.y"); + EXPECT_EQ(a_y.field.get(), 4); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a"); + } + { + auto old_variant = _make_variant_column({R"({"a":{"x":1},"b":2})"}); + auto patch_variant = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 2); + _expect_no_variant_path(*merged_variant, "a.x"); + } +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsRootArray) { + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto patch_variant = _make_root_array_variant_column(); + auto merged_variant = ColumnVariant::create(0, false); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsNonObjectOldRootValues) { + constexpr int32_t variant_col_unique_id = 100; + auto patch_variant = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*patch_variant, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto expect_reject_old_value = [&](const ColumnVariant& old_variant) { + auto merged_variant = ColumnVariant::create(0, false); + Status merge_st = merge_variant_patch(old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(merge_st.ok()); + EXPECT_NE(merge_st.to_string().find("VARIANT flexible partial update only supports " + "patching JSON object old values"), + std::string::npos); + + merged_variant = ColumnVariant::create(0, false); + merge_st = merge_variant_patch_by_path_markers(old_variant, 0, *patch_variant, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + EXPECT_FALSE(merge_st.ok()); + EXPECT_NE(merge_st.to_string().find("VARIANT flexible partial update only supports " + "patching JSON object old values"), + std::string::npos); + }; + + expect_reject_old_value(*_make_raw_string_variant_column("plain text")); + expect_reject_old_value(*_make_root_array_variant_column()); + expect_reject_old_value(*_make_root_jsonb_variant_column("null")); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsDocModeObjectPaths) { + auto old_variant = + _make_variant_column({R"({"a":1})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + auto patch_variant = + _make_variant_column({R"({"b":2})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + auto merged_variant = ColumnVariant::create(0, true); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *merged_variant); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatch_HandlesNullableVariantColumns) { + auto old_variant = _make_nullable_variant_column(_make_variant_column({R"({"a":1,"b":3})"})); + auto patch_variant = _make_nullable_variant_column(_make_variant_column({R"({"a":2})"})); + auto dst_nested = ColumnVariant::create(0, false); + auto* dst_variant = dst_nested.get(); + auto dst_variant_nullable = + ColumnNullable::create(std::move(dst_nested), ColumnUInt8::create()); + + Status st = merge_variant_patch(*old_variant, 0, *patch_variant, 0, *dst_variant_nullable); + ASSERT_TRUE(st.ok()) << st.to_string(); + dst_variant->finalize(); + + auto a = _get_variant_field(*dst_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*dst_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatch_RejectsNullPatchRows) { + constexpr int32_t variant_col_unique_id = 100; + auto old_variant = _make_variant_column({R"({"a":1})"}); + auto null_patch = _make_nullable_variant_column(_make_variant_column({R"({"a":2})"}), true); + + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*null_patch, 0, variant_col_unique_id, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch(*old_variant, 0, *null_patch, 0, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); + + st = merge_variant_patch_by_path_markers(*old_variant, 0, *null_patch, 0, variant_col_unique_id, + patch_path_markers, false, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_PreservesConcurrentPaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyMarkersDropFlushedPatchPaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + BitmapValue patch_path_markers; + + auto merged_variant = ColumnVariant::create(0, false); + Status st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchPathMarkers_IntersectsNonVariantSkipBits) { + BitmapValue left; + left.add(1); + left.add(2); + BitmapValue right; + right.add(2); + right.add(3); + + BitmapValue merged; + Status st = merge_variant_patch_path_markers(left, right, &merged); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_FALSE(merged.contains(1)); + EXPECT_TRUE(merged.contains(2)); + EXPECT_FALSE(merged.contains(3)); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_RejectsCorruptMarkers) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2})"}); + + auto expect_merge_fails = [&](const BitmapValue& patch_path_markers, + std::string_view expected) { + auto merged_variant = ColumnVariant::create(0, false); + Status st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, + false, *merged_variant); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find(expected), std::string::npos) << st.to_string(); + }; + + { + BitmapValue patch_path_markers; + patch_path_markers.add( + _test_variant_patch_path_length_marker(variant_col_unique_id, 0, 4097)); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker length"); + } + { + BitmapValue patch_path_markers; + patch_path_markers.add(_test_variant_patch_path_byte_marker(variant_col_unique_id, 0, 0, + static_cast('a'))); + expect_merge_fails(patch_path_markers, "VARIANT patch path marker byte without length"); + } + { + BitmapValue patch_path_markers; + patch_path_markers.add(_test_variant_patch_path_length_marker(variant_col_unique_id, 0, 1)); + patch_path_markers.add(_test_variant_patch_path_byte_marker(variant_col_unique_id, 0, 2, + static_cast('a'))); + expect_merge_fails(patch_path_markers, "VARIANT patch path marker byte exceeds length"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path(4, '\0'); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker part count"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path; + encoded_path.push_back(1); + encoded_path.push_back(0); + encoded_path.push_back(0); + encoded_path.push_back(0); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Invalid VARIANT patch path marker part payload"); + } + { + BitmapValue patch_path_markers; + std::string encoded_path = _test_encode_single_part_path("a"); + encoded_path.push_back('x'); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + expect_merge_fails(patch_path_markers, "Trailing bytes in VARIANT patch path marker"); + } + { + BitmapValue patch_path_markers; + const std::string encoded_path = _test_encode_single_part_path("a"); + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, encoded_path); + patch_path_markers.add(_test_variant_patch_path_length_marker(variant_col_unique_id, 0, + encoded_path.size() + 1)); + expect_merge_fails(patch_path_markers, "Conflicting VARIANT patch path marker length"); + } + { + BitmapValue patch_path_markers; + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, + _test_encode_single_part_path("a")); + for (uint64_t marker : patch_path_markers) { + if ((marker & (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT)) != 0) { + uint8_t byte = marker & TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK; + patch_path_markers.add((marker & ~TEST_VARIANT_PATCH_PATH_MARKER_BYTE_MASK) | + static_cast(byte + 1)); + break; + } + } + expect_merge_fails(patch_path_markers, "Conflicting VARIANT patch path marker byte"); + } + { + BitmapValue patch_path_markers; + _add_test_encoded_patch_path(&patch_path_markers, variant_col_unique_id, 0, + _test_encode_single_part_path("a")); + for (uint64_t marker : patch_path_markers) { + if ((marker & (1ULL << TEST_VARIANT_PATCH_PATH_MARKER_CLASS_SHIFT)) != 0) { + patch_path_markers.remove(marker); + break; + } + } + expect_merge_fails(patch_path_markers, "Incomplete VARIANT patch path marker"); + } +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsRootArray) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_array_variant_column(); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_MarksRootJsonbObjectPatch) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_jsonb_variant_column(R"({"a":2})"); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_GT(patch_path_markers.cardinality(), 0); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_AllowsDeepPathWithinMarkerCapacity) { + constexpr int32_t variant_col_unique_id = 100; + const std::string key(200, 'a'); + const std::string json = _make_nested_json(key, 6, "1"); + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_GT(patch_path_markers.cardinality(), 1020); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsPathBeyondMarkerCapacity) { + constexpr int32_t variant_col_unique_id = 100; + const std::string key(250, 'a'); + const std::string json = _make_nested_json(key, 20, "1"); + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update encoded patch path exceeds 4096 bytes"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTooManyPaths) { + constexpr int32_t variant_col_unique_id = 100; + std::string json = "{"; + for (int i = 0; i < 257; ++i) { + if (i != 0) { + json += ","; + } + json += "\"k"; + json += std::to_string(i); + json += "\":"; + json += std::to_string(i); + } + json += "}"; + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update supports at most 256 patch paths per row"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTotalEncodedPathBytesBeyondLimit) { + constexpr int32_t variant_col_unique_id = 100; + std::string json = "{"; + for (int i = 0; i < 256; ++i) { + if (i != 0) { + json += ","; + } + std::string key = "k" + std::to_string(i) + "_"; + key.append(255 - key.size(), 'a'); + json += "\""; + json += key; + json += "\":"; + json += std::to_string(i); + } + json += "}"; + auto original_patch = _make_variant_column({std::string_view(json)}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find("VARIANT flexible partial update encoded patch paths exceed " + "65536 bytes per row"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsTotalEncodedPathBytesAcrossColumns) { + auto make_json = [] { + std::string json = "{"; + for (int i = 0; i < 128; ++i) { + if (i != 0) { + json += ","; + } + std::string key = "k" + std::to_string(i) + "_"; + key.append(255 - key.size(), 'a'); + json += "\""; + json += key; + json += "\":"; + json += std::to_string(i); + } + json += "}"; + return json; + }; + auto patch_v1 = _make_variant_column({make_json()}); + auto patch_v2 = _make_variant_column({make_json()}); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*patch_v1, 0, 100, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + BitmapValue markers_before_v2 = patch_path_markers; + st = mark_variant_patch_paths(*patch_v2, 0, 101, &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find("VARIANT flexible partial update encoded patch paths exceed " + "65536 bytes per row"), + std::string::npos); + EXPECT_EQ(patch_path_markers.cardinality(), markers_before_v2.cardinality()); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsRootJsonbArray) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = _make_root_jsonb_variant_column(R"([1,2,3])"); + + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); + EXPECT_NE(st.to_string().find( + "VARIANT flexible partial update only supports JSON object patch values"), + std::string::npos); +} + +TEST(VariantUtilTest, MarkVariantPatchPaths_RejectsDocMode) { + constexpr int32_t variant_col_unique_id = 100; + auto original_patch = + _make_variant_column({R"({"a":2})"}, true, ParseConfig::ParseTo::OnlyDocValueColumn); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + EXPECT_FALSE(st.ok()); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyObjectKeepsLatestOld) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":1,"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":1,"b":1})"}); + auto original_patch = _make_variant_column({R"({})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 1); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_IsolatesVariantColumns) { + constexpr int32_t v1_unique_id = 100; + constexpr int32_t v2_unique_id = 101; + auto latest_old = _make_variant_column({R"({"a":1,"b":9})"}); + auto flushed_full_v1 = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch_v1 = _make_variant_column({R"({"a":2})"}); + auto original_patch_v2 = _make_variant_column({R"({"b":8})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch_v1, 0, v1_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = mark_variant_patch_paths(*original_patch_v2, 0, v2_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_v1, 0, v1_unique_id, + patch_path_markers, false, *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 9); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_DeletedOldUsesPatchOnly) { + constexpr int32_t variant_col_unique_id = 100; + auto deleted_old = _make_variant_column({R"({"a":1,"b":9})"}); + auto flushed_full_value = _make_variant_column({R"({"a":2,"b":1})"}); + auto original_patch = _make_variant_column({R"({"a":2})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*deleted_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, true, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + EXPECT_EQ(a.field.get(), 2); + _expect_no_variant_path(*merged_variant, "b"); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_EmptyObjectRemovesStaleSubpaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"x":9},"b":3})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{},"b":1})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto original_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a = _get_variant_field(*merged_variant, "a"); + ASSERT_EQ(a.field.get_type(), PrimitiveType::TYPE_JSONB); + const auto& jsonb = a.field.get(); + EXPECT_EQ(JsonbToJson::jsonb_to_json_string(jsonb.get_value(), jsonb.get_size()), "{}"); + auto b = _get_variant_field(*merged_variant, "b"); + EXPECT_EQ(b.field.get(), 3); + _expect_no_variant_path(*merged_variant, "a.x"); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_PreservesSiblingChildPatch) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto original_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto a_c = _get_variant_field(*merged_variant, "a.c"); + EXPECT_EQ(a_c.field.get(), 9); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_DistinguishesDottedKeyFromNestedPath) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a.b":7,"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto original_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = mark_variant_patch_paths(*original_patch, 0, variant_col_unique_id, + &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto nested_a_b = _get_variant_field_by_path(*merged_variant, _make_path({"a", "b"})); + EXPECT_EQ(nested_a_b.field.get(), 1); + auto dotted_a_b = _get_variant_field_by_path(*merged_variant, _make_path({"a.b"})); + EXPECT_EQ(dotted_a_b.field.get(), 7); + auto a_c = _get_variant_field(*merged_variant, "a.c"); + EXPECT_EQ(a_c.field.get(), 9); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); +} + +TEST(VariantUtilTest, MergeVariantPatchByPathMarkers_ParentMarkerRemovesStaleSubpaths) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto parent_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto child_patch = _make_variant_column({R"({"a":{"b":1}})"}); + BitmapValue patch_path_markers; + Status st = + mark_variant_patch_paths(*parent_patch, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + st = mark_variant_patch_paths(*child_patch, 0, variant_col_unique_id, &patch_path_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, patch_path_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); + _expect_no_variant_path(*merged_variant, "a.c"); +} + +TEST(VariantUtilTest, MergeVariantPatchPathMarkers_RebuildsExactMetadata) { + constexpr int32_t variant_col_unique_id = 100; + auto latest_old = _make_variant_column({R"({"a":{"c":9},"x":1})"}); + auto flushed_full_value = _make_variant_column({R"({"a":{"b":1}})"}); + auto parent_patch = _make_variant_column({R"({"a":{}})"}, false, + ParseConfig::ParseTo::OnlySubcolumns, true); + auto child_patch = _make_variant_column({R"({"a":{"b":1}})"}); + + BitmapValue parent_markers; + parent_markers.add(variant_col_unique_id); + Status st = mark_variant_patch_paths(*parent_patch, 0, variant_col_unique_id, &parent_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + BitmapValue child_markers; + st = mark_variant_patch_paths(*child_patch, 0, variant_col_unique_id, &child_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + + BitmapValue merged_markers; + st = merge_variant_patch_path_markers(parent_markers, child_markers, &merged_markers); + ASSERT_TRUE(st.ok()) << st.to_string(); + EXPECT_FALSE(merged_markers.contains(variant_col_unique_id)); + + auto merged_variant = ColumnVariant::create(0, false); + st = merge_variant_patch_by_path_markers(*latest_old, 0, *flushed_full_value, 0, + variant_col_unique_id, merged_markers, false, + *merged_variant); + ASSERT_TRUE(st.ok()) << st.to_string(); + merged_variant->finalize(); + + auto a_b = _get_variant_field(*merged_variant, "a.b"); + EXPECT_EQ(a_b.field.get(), 1); + auto x = _get_variant_field(*merged_variant, "x"); + EXPECT_EQ(x.field.get(), 1); + _expect_no_variant_path(*merged_variant, "a.c"); +} + TEST(VariantUtilTest, ParseVariantColumns_DocModeRejectOnlySubcolumnsConfig) { const std::vector jsons = {R"({"a":1})"}; auto variant = ColumnVariant::create(0, true); diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 1142b1645a978b..b8beb2be71b000 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -2650,6 +2650,11 @@ public int getAsInt() { throw new DdlException("only support light schema change operator when use table with binlog"); } + // Revalidate the final schema while holding the table write lock. Nereids validation + // runs before this lock, so a concurrent ALTER can otherwise change whether the + // pending VARIANT columns are allowed with flexible partial update. + validateVariantColumnsForFlexiblePartialUpdate(olapTable, indexSchemaMap); + if (lightSchemaChange) { long jobId = Env.getCurrentEnv().getNextId(); //for schema change add/drop value column optimize, direct modify table meta. @@ -2676,6 +2681,17 @@ public int getAsInt() { } } + private void validateVariantColumnsForFlexiblePartialUpdate( + OlapTable olapTable, Map> indexSchemaMap) throws UserException { + List baseSchema = indexSchemaMap.get(olapTable.getBaseIndexId()); + Preconditions.checkNotNull(baseSchema); + if (olapTable.hasSkipBitmapColumn() + || baseSchema.stream().anyMatch(Column::isSkipBitmapColumn)) { + OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + baseSchema, olapTable.variantEnableFlattenNested()); + } + } + @Override public void processForNereids(String rawSql, List alterCommands, Database db, OlapTable olapTable) throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 80a71ff25078ba..4caa1366aead69 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3195,15 +3195,26 @@ public boolean getEnableUniqueKeySkipBitmap() { * Validate that the table supports flexible partial update. * Checks the following constraints: * 1. Must be MoW unique key table - * 2. Must have skip_bitmap column - * 3. Must have light_schema_change enabled - * 4. Cannot have variant columns + * 2. Must not have cluster keys + * 3. Must have skip_bitmap column + * 4. Must have light_schema_change enabled * @throws UserException if any constraint is not satisfied */ public void validateForFlexiblePartialUpdate() throws UserException { + validateForFlexiblePartialUpdate(true); + } + + /** + * Validate that the table supports flexible partial update. + */ + public void validateForFlexiblePartialUpdate(boolean validateBackendCapability) throws UserException { if (!getEnableUniqueKeyMergeOnWrite()) { throw new UserException("Flexible partial update is only supported in unique table MoW"); } + if (isUniqKeyMergeOnWriteWithClusterKeys()) { + throw new UserException( + "Flexible partial update does not support merge-on-write Unique table with cluster keys"); + } if (!hasSkipBitmapColumn()) { throw new UserException("Flexible partial update can only support table with skip bitmap hidden column." + " But table " + getName() + " doesn't have it. You can use `ALTER TABLE " + getName() @@ -3213,8 +3224,80 @@ public void validateForFlexiblePartialUpdate() throws UserException { throw new UserException("Flexible partial update can only support table with light_schema_change enabled." + " But table " + getName() + "'s property light_schema_change is false"); } - if (hasVariantColumns()) { - throw new UserException("Flexible partial update can only support table without variant columns."); + validateVariantColumnsForFlexiblePartialUpdate(validateBackendCapability); + } + + public void validateVariantColumnsForFlexiblePartialUpdate() throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(true); + } + + /** + * Validate VARIANT columns for flexible partial update. + */ + public void validateVariantColumnsForFlexiblePartialUpdate(boolean validateBackendCapability) + throws UserException { + validateVariantColumnsForFlexiblePartialUpdate( + getBaseSchema(), variantEnableFlattenNested(), validateBackendCapability); + } + + public static void validateVariantColumnsForFlexiblePartialUpdate(List columns) throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(columns, false); + } + + public static void validateVariantColumnsForFlexiblePartialUpdate( + List columns, boolean deprecatedVariantFlattenNested) throws UserException { + validateVariantColumnsForFlexiblePartialUpdate(columns, deprecatedVariantFlattenNested, true); + } + + /** + * Validate VARIANT columns for flexible partial update. + */ + public static void validateVariantColumnsForFlexiblePartialUpdate( + List columns, boolean deprecatedVariantFlattenNested, boolean validateBackendCapability) + throws UserException { + boolean hasVariantColumn = false; + for (Column column : columns) { + validateVariantColumnForFlexiblePartialUpdate(column); + if (column.getType().isVariantType() && deprecatedVariantFlattenNested) { + throw new UserException( + "VARIANT flexible partial update does not support " + + "deprecated_variant_enable_flatten_nested in this version"); + } + hasVariantColumn |= column.getType().isVariantType(); + } + if (hasVariantColumn && validateBackendCapability) { + try { + validateBackendsSupportVariantFlexiblePartialUpdate( + Env.getCurrentSystemInfo().getBackendsByCurrentCluster().values()); + } catch (AnalysisException e) { + throw new UserException(e.getMessage(), e); + } + } + } + + public static void validateVariantColumnForFlexiblePartialUpdate(Column column) throws UserException { + if (column.getType().isVariantType() && column.getVariantEnableDocMode()) { + throw new UserException( + "VARIANT flexible partial update does not support doc mode in this version"); + } + } + + @VisibleForTesting + static void validateBackendsSupportVariantFlexiblePartialUpdate(Collection backends) + throws UserException { + for (Backend backend : backends) { + if (!backend.isAlive()) { + throw new UserException("VARIANT flexible partial update requires all backends to be " + + "alive and advertise variant patch skip-bitmap marker support. Backend " + + backend.getId() + " (" + backend.getHost() + ") is not alive"); + } + if (backend.supportsVariantFlexiblePartialUpdate()) { + continue; + } + throw new UserException("VARIANT flexible partial update requires all backends to " + + "advertise variant patch skip-bitmap marker support. Backend " + + backend.getId() + " (" + backend.getHost() + ") is running version " + + backend.getVersion() + " without the required capability"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java b/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java index 2c1ede0d13a352..b31f2e8ede18b0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/RoutineLoadDesc.java @@ -37,6 +37,7 @@ public class RoutineLoadDesc { private final Expr filter; private final Expr deleteCondition; private LoadTask.MergeType mergeType; + private final boolean mergeTypeSpecified; // nullable private final PartitionNamesInfo partitionNamesInfo; private final String sequenceColName; @@ -45,6 +46,14 @@ public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List< Expr precedingFilter, Expr filter, PartitionNamesInfo partitionNamesInfo, Expr deleteCondition, LoadTask.MergeType mergeType, String sequenceColName) { + this(columnSeparator, lineDelimiter, columnsInfo, precedingFilter, filter, partitionNamesInfo, deleteCondition, + mergeType, false, sequenceColName); + } + + public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List columnsInfo, + Expr precedingFilter, Expr filter, + PartitionNamesInfo partitionNamesInfo, Expr deleteCondition, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified, String sequenceColName) { this.columnSeparator = columnSeparator; this.lineDelimiter = lineDelimiter; this.columnsInfo = columnsInfo; @@ -53,6 +62,7 @@ public RoutineLoadDesc(Separator columnSeparator, Separator lineDelimiter, List< this.partitionNamesInfo = partitionNamesInfo; this.deleteCondition = deleteCondition; this.mergeType = mergeType; + this.mergeTypeSpecified = mergeTypeSpecified; this.sequenceColName = sequenceColName; } @@ -80,6 +90,10 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + // nullable public PartitionNamesInfo getPartitionNamesInfo() { return partitionNamesInfo; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 82cd9325dfad1d..eeaff9e77781df 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -61,6 +61,7 @@ import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; import org.apache.doris.persist.RoutineLoadOperation; import org.apache.doris.persist.gson.GsonPostProcessable; +import org.apache.doris.persist.gson.GsonPreProcessable; import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; @@ -114,7 +115,7 @@ */ public abstract class RoutineLoadJob extends AbstractTxnStateChangeCallback - implements Writable, LoadTaskInfo, GsonPostProcessable { + implements Writable, LoadTaskInfo, GsonPostProcessable, GsonPreProcessable { private static final Logger LOG = LogManager.getLogger(RoutineLoadJob.class); public static final long DEFAULT_MAX_ERROR_NUM = 0; @@ -182,12 +183,24 @@ public boolean isFinalState() { // this code is used to verify be task request protected long authCode; // protected RoutineLoadDesc routineLoadDesc; // optional + @SerializedName(value = "pni", alternate = {"partitionNamesInfo"}) protected PartitionNamesInfo partitionNamesInfo; // optional + @SerializedName(value = "columnDescs", alternate = {"cd"}) protected ImportColumnDescs columnDescs; // optional + @SerializedName(value = "pf", alternate = {"precedingFilter"}) protected Expr precedingFilter; // optional + @SerializedName(value = "filter", alternate = {"whereExpr"}) protected Expr whereExpr; // optional protected Separator columnSeparator; // optional protected Separator lineDelimiter; + @SerializedName("cs") + private String serializedColumnSeparator; + @SerializedName("ocs") + private String serializedOriColumnSeparator; + @SerializedName("ld") + private String serializedLineDelimiter; + @SerializedName("old") + private String serializedOriLineDelimiter; @SerializedName("dtcn") protected int desireTaskConcurrentNum; // optional @SerializedName("st") @@ -230,6 +243,7 @@ public boolean isFinalState() { protected TPartialUpdateNewRowPolicy partialUpdateNewKeyPolicy = TPartialUpdateNewRowPolicy.APPEND; protected TUniqueKeyUpdateMode uniqueKeyUpdateMode = TUniqueKeyUpdateMode.UPSERT; + @SerializedName(value = "scn", alternate = {"sequenceCol"}) protected String sequenceCol; protected boolean memtableOnSinkNode = false; @@ -271,6 +285,11 @@ public boolean isFinalState() { protected ReentrantReadWriteLock lock = new ReentrantReadWriteLock(true); protected LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; // default is all data is load no delete + @SerializedName("mts") + protected boolean mergeTypeSpecified = false; + @SerializedName("mt") + private LoadTask.MergeType serializedMergeType; + @SerializedName(value = "dc", alternate = {"deleteCondition"}) protected Expr deleteCondition; // TODO(ml): error sample @@ -438,11 +457,7 @@ protected void setOptional(CreateRoutineLoadInfo info) throws UserException { protected void setRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { if (routineLoadDesc != null) { - if (routineLoadDesc.getColumnsInfo() != null) { - columnDescs = new ImportColumnDescs(); - columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); - - } + setColumnDescsFromRoutineLoadDesc(routineLoadDesc); if (routineLoadDesc.getPrecedingFilter() != null) { precedingFilter = routineLoadDesc.getPrecedingFilter(); } @@ -461,13 +476,25 @@ protected void setRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { if (routineLoadDesc.getDeleteCondition() != null) { deleteCondition = routineLoadDesc.getDeleteCondition(); } - mergeType = routineLoadDesc.getMergeType(); + if (routineLoadDesc.getMergeType() != null) { + mergeType = routineLoadDesc.getMergeType(); + } + if (routineLoadDesc.isMergeTypeSpecified()) { + mergeTypeSpecified = true; + } if (routineLoadDesc.hasSequenceCol()) { sequenceCol = routineLoadDesc.getSequenceColName(); } } } + protected void setColumnDescsFromRoutineLoadDesc(RoutineLoadDesc routineLoadDesc) { + if (routineLoadDesc != null && routineLoadDesc.getColumnsInfo() != null) { + columnDescs = new ImportColumnDescs(); + columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); + } + } + @Override public long getId() { return id; @@ -1946,8 +1973,17 @@ public void write(DataOutput out) throws IOException { Text.writeString(out, GsonUtils.GSON.toJson(this)); } + @Override + public void gsonPreProcess() throws IOException { + syncSerializedSeparatorFields(columnSeparator, true); + syncSerializedSeparatorFields(lineDelimiter, false); + serializedMergeType = mergeType; + } + @Override public void gsonPostProcess() throws IOException { + restoreSerializedSeparators(); + RoutineLoadDesc persistedRoutineLoadDesc = currentRoutineLoadDesc(); if (tableId == 0) { isMultiTable = true; } @@ -2010,7 +2046,7 @@ public void gsonPostProcess() throws IOException { // fall through; let validate() surface the real error } } - createRoutineLoadInfo.validate(ctx); + createRoutineLoadInfo.validateForReplay(ctx); setRoutineLoadDesc(createRoutineLoadInfo.getRoutineLoadDesc()); } finally { ctx.cleanup(); @@ -2019,11 +2055,46 @@ public void gsonPostProcess() throws IOException { this.state = JobState.CANCELLED; LOG.warn("error happens when parsing create routine load stmt: " + origStmt.originStmt, e); } + setRoutineLoadDesc(persistedRoutineLoadDesc); if (userIdentity != null) { userIdentity.setIsAnalyzed(); } } + private RoutineLoadDesc currentRoutineLoadDesc() { + return new RoutineLoadDesc( + columnSeparator, lineDelimiter, + columnDescs == null ? null : new ArrayList<>(columnDescs.descs), precedingFilter, + whereExpr, partitionNamesInfo, deleteCondition, + serializedMergeType, mergeTypeSpecified, sequenceCol); + } + + private void syncSerializedSeparatorFields(Separator separator, boolean isColumnSeparator) { + if (isColumnSeparator) { + serializedColumnSeparator = separator == null ? null : separator.getSeparator(); + serializedOriColumnSeparator = separator == null ? null : separator.getOriSeparator(); + } else { + serializedLineDelimiter = separator == null ? null : separator.getSeparator(); + serializedOriLineDelimiter = separator == null ? null : separator.getOriSeparator(); + } + } + + private void restoreSerializedSeparators() { + if (serializedColumnSeparator != null || serializedOriColumnSeparator != null) { + columnSeparator = buildSeparator(serializedColumnSeparator, serializedOriColumnSeparator); + } + if (serializedLineDelimiter != null || serializedOriLineDelimiter != null) { + lineDelimiter = buildSeparator(serializedLineDelimiter, serializedOriLineDelimiter); + } + } + + private static Separator buildSeparator(String separator, String oriSeparator) { + if (separator == null && oriSeparator == null) { + return null; + } + return new Separator(separator, oriSeparator); + } + public abstract void modifyProperties(AlterRoutineLoadCommand command) throws UserException; public abstract void replayModifyProperties(AlterRoutineLoadJobOperationLog log); @@ -2066,10 +2137,6 @@ protected void modifyCommonJobProperties(Map jobProperties) thro if (jobProperties.containsKey(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)) { String modeStr = jobProperties.remove(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE); TUniqueKeyUpdateMode newMode = CreateRoutineLoadInfo.parseAndValidateUniqueKeyUpdateMode(modeStr); - // Validate flexible partial update constraints when changing to UPDATE_FLEXIBLE_COLUMNS - if (newMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { - validateFlexiblePartialUpdateForAlter(); - } this.uniqueKeyUpdateMode = newMode; this.isPartialUpdate = (uniqueKeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FIXED_COLUMNS); this.jobProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, uniqueKeyUpdateMode.name()); @@ -2092,7 +2159,17 @@ protected void modifyCommonJobProperties(Map jobProperties) thro /** * Validate flexible partial update constraints when altering routine load job. */ - private void validateFlexiblePartialUpdateForAlter() throws UserException { + protected void validateFlexiblePartialUpdateForAlter( + Map newJobProperties, RoutineLoadDesc newRoutineLoadDesc) throws UserException { + TUniqueKeyUpdateMode newMode = uniqueKeyUpdateMode; + if (newJobProperties.containsKey(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)) { + newMode = CreateRoutineLoadInfo.parseAndValidateUniqueKeyUpdateMode( + newJobProperties.get(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)); + } + if (newMode != TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { + return; + } + // Multi-table load does not support flexible partial update if (isMultiTable) { throw new DdlException("Flexible partial update is not supported in multi-table load"); @@ -2112,29 +2189,57 @@ private void validateFlexiblePartialUpdateForAlter() throws UserException { } OlapTable olapTable = (OlapTable) table; - // Validate table-level constraints (MoW, skip_bitmap, light_schema_change, variant columns) + // Validate table-level constraints (MoW, skip_bitmap, light_schema_change) olapTable.validateForFlexiblePartialUpdate(); + Map mergedJobProperties = Maps.newHashMap(this.jobProperties); + mergedJobProperties.putAll(newJobProperties); + // Routine load specific validations // Must use JSON format - String format = this.jobProperties.getOrDefault(FileFormatProperties.PROP_FORMAT, "csv"); + String format = mergedJobProperties.getOrDefault(FileFormatProperties.PROP_FORMAT, "csv"); if (!"json".equalsIgnoreCase(format)) { throw new DdlException("Flexible partial update only supports JSON format, but current job uses: " + format); } // Cannot use fuzzy_parse - if (Boolean.parseBoolean(this.jobProperties.getOrDefault( + if (Boolean.parseBoolean(mergedJobProperties.getOrDefault( JsonFileFormatProperties.PROP_FUZZY_PARSE, "false"))) { throw new DdlException("Flexible partial update does not support fuzzy_parse"); } // Cannot use jsonpaths - String jsonPaths = getJsonPaths(); + String jsonPaths = mergedJobProperties.get(JsonFileFormatProperties.PROP_JSON_PATHS); if (jsonPaths != null && !jsonPaths.isEmpty()) { throw new DdlException("Flexible partial update does not support jsonpaths"); } // Cannot specify COLUMNS mapping - if (columnDescs != null && !columnDescs.descs.isEmpty()) { + if ((columnDescs != null && !columnDescs.descs.isEmpty()) + || (newRoutineLoadDesc != null && newRoutineLoadDesc.getColumnsInfo() != null + && !newRoutineLoadDesc.getColumnsInfo().isEmpty())) { throw new DdlException("Flexible partial update does not support COLUMNS specification"); } + validateRoutineLoadDescForFlexiblePartialUpdate(newRoutineLoadDesc); + } + + private void validateRoutineLoadDescForFlexiblePartialUpdate(RoutineLoadDesc newRoutineLoadDesc) + throws DdlException { + boolean newDescHasExplicitMergeType = newRoutineLoadDesc != null + && (newRoutineLoadDesc.isMergeTypeSpecified() + || newRoutineLoadDesc.getMergeType() != LoadTask.MergeType.APPEND); + if (mergeTypeSpecified || mergeType != LoadTask.MergeType.APPEND || newDescHasExplicitMergeType) { + throw new DdlException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (whereExpr != null || (newRoutineLoadDesc != null && newRoutineLoadDesc.getFilter() != null)) { + throw new DdlException("Don't support flexible partial update when 'where' is specified"); + } + if (deleteCondition != null + || (newRoutineLoadDesc != null && newRoutineLoadDesc.getDeleteCondition() != null)) { + throw new DdlException("Don't support flexible partial update when 'delete' is specified"); + } + if (!Strings.isNullOrEmpty(sequenceCol) + || (newRoutineLoadDesc != null && newRoutineLoadDesc.hasSequenceCol())) { + throw new DdlException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java index f24b9ad2252dd5..2fc2927fa58ed4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadManager.java @@ -921,7 +921,6 @@ public void alterRoutineLoadJob(AlterRoutineLoadCommand command) throws UserExce + command.getDataSourceProperties().getDataSourceType()); } job.modifyProperties(command); - job.setRoutineLoadDesc(command.getRoutineLoadDesc()); } public void replayAlterRoutineLoadJob(AlterRoutineLoadJobOperationLog log) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java index 9464be78d05858..655011b288e9ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kafka/KafkaRoutineLoadJob.java @@ -709,10 +709,12 @@ public void modifyProperties(AlterRoutineLoadCommand command) throws UserExcepti throw new DdlException("Only supports modification of PAUSED jobs"); } + validateFlexiblePartialUpdateForAlter(jobProperties, command.getRoutineLoadDesc()); modifyPropertiesInternal(jobProperties, dataSourceProperties); + setRoutineLoadDesc(command.getRoutineLoadDesc()); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(this.id, - jobProperties, dataSourceProperties); + jobProperties, dataSourceProperties, command.getRoutineLoadDesc()); Env.getCurrentEnv().getEditLog().logAlterRoutineLoadJob(log); } finally { writeUnlock(); @@ -836,6 +838,11 @@ private void resetCloudProgress(Cloud.ResetRLProgressRequest.Builder builder) th public void replayModifyProperties(AlterRoutineLoadJobOperationLog log) { try { modifyPropertiesInternal(log.getJobProperties(), (KafkaDataSourceProperties) log.getDataSourceProperties()); + if (log.getRoutineLoadDesc() != null) { + setRoutineLoadDesc(log.getRoutineLoadDesc()); + } else if (log.getColumnDescs() != null) { + columnDescs = log.getColumnDescs(); + } } catch (UserException e) { // should not happen LOG.error("failed to replay modify kafka routine load job: {}", id, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java index 0c05889929924e..746f462b5f591a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/kinesis/KinesisRoutineLoadJob.java @@ -688,10 +688,12 @@ public void modifyProperties(AlterRoutineLoadCommand command) throws UserExcepti throw new DdlException("Only supports modification of PAUSED jobs"); } + validateFlexiblePartialUpdateForAlter(jobProperties, command.getRoutineLoadDesc()); modifyPropertiesInternal(jobProperties, dataSourceProperties); + setRoutineLoadDesc(command.getRoutineLoadDesc()); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(this.id, - jobProperties, dataSourceProperties); + jobProperties, dataSourceProperties, command.getRoutineLoadDesc()); Env.getCurrentEnv().getEditLog().logAlterRoutineLoadJob(log); } finally { writeUnlock(); @@ -785,6 +787,11 @@ public void replayModifyProperties(AlterRoutineLoadJobOperationLog log) { try { modifyPropertiesInternal(log.getJobProperties(), (KinesisDataSourceProperties) log.getDataSourceProperties()); + if (log.getRoutineLoadDesc() != null) { + setRoutineLoadDesc(log.getRoutineLoadDesc()); + } else if (log.getColumnDescs() != null) { + columnDescs = log.getColumnDescs(); + } } catch (UserException e) { LOG.error("failed to replay modify kinesis routine load job: {}", id, e); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java index 2ece54c823ecfb..eebb11fd4e7872 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsLoadTaskInfo.java @@ -53,6 +53,10 @@ default void setTimeout(int timeout) { LoadTask.MergeType getMergeType(); + default boolean isMergeTypeSpecified() { + return false; + } + Expression getDeleteCondition(); boolean hasSequenceCol(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java index 8c3e0abd830f22..01c109716b623a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadPlanner.java @@ -45,6 +45,7 @@ import org.apache.doris.service.FrontendOptions; import org.apache.doris.thrift.PaloInternalServiceVersion; import org.apache.doris.thrift.TBrokerFileStatus; +import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPartialUpdateNewRowPolicy; @@ -150,6 +151,7 @@ public TPipelineFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde if (uniquekeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { // Validate table-level constraints for flexible partial update destTable.validateForFlexiblePartialUpdate(); + validateLoadTaskForFlexiblePartialUpdate(taskInfo); } HashSet partialUpdateInputColumns = new HashSet<>(); if (uniquekeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FIXED_COLUMNS) { @@ -336,4 +338,35 @@ public TPipelineFragmentParams plan(TUniqueId loadId, int fragmentInstanceIdInde params.setIsMowTable(destTable.getEnableUniqueKeyMergeOnWrite()); return params; } + + static void validateLoadTaskForFlexiblePartialUpdate(NereidsLoadTaskInfo taskInfo) throws UserException { + if (taskInfo.getFormatType() != TFileFormatType.FORMAT_JSON) { + throw new UserException("flexible partial update only support json format as input file currently"); + } + if (taskInfo.isFuzzyParse()) { + throw new UserException("Don't support flexible partial update when 'fuzzy_parse' is enabled"); + } + if (!taskInfo.getColumnExprDescs().descs.isEmpty()) { + throw new UserException("Don't support flexible partial update when 'columns' is specified"); + } + if (taskInfo.getJsonPaths() != null && !taskInfo.getJsonPaths().isEmpty()) { + throw new UserException("Don't support flexible partial update when 'jsonpaths' is specified"); + } + if (taskInfo.getHiddenColumns() != null && !taskInfo.getHiddenColumns().isEmpty()) { + throw new UserException("Don't support flexible partial update when 'hidden_columns' is specified"); + } + if (taskInfo.hasSequenceCol()) { + throw new UserException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } + if (taskInfo.isMergeTypeSpecified() || taskInfo.getMergeType() != LoadTask.MergeType.APPEND) { + throw new UserException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (taskInfo.getWhereExpr() != null) { + throw new UserException("Don't support flexible partial update when 'where' is specified"); + } + if (taskInfo.getDeleteCondition() != null) { + throw new UserException("Don't support flexible partial update when 'delete' is specified"); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java index f5ddca41f19a29..fb522ee2a03ee0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/load/NereidsStreamLoadTask.java @@ -75,6 +75,7 @@ public class NereidsStreamLoadTask implements NereidsLoadTaskInfo { private int timeout = Config.stream_load_default_timeout_second; private long execMemLimit = 2 * 1024 * 1024 * 1024L; // default is 2GB private LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; // default is all data is load no delete + private boolean mergeTypeSpecified; private Expression deleteCondition; private String sequenceCol; private int sendBatchParallelism = 1; @@ -270,6 +271,11 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + @Override + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + public Expression getDeleteCondition() { return deleteCondition; } @@ -458,6 +464,7 @@ private void setOptionalFromTSLPutRequest(TStreamLoadPutRequest request) throws readJsonByLine = request.isReadJsonByLine(); } if (request.isSetMergeType()) { + mergeTypeSpecified = request.isSetMergeTypeSpecified() && request.isMergeTypeSpecified(); try { mergeType = LoadTask.MergeType.valueOf(request.getMergeType().toString()); } catch (IllegalArgumentException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 2938d9efb7b487..ab6dbccbe8ec0f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -2475,8 +2475,9 @@ public LogicalPlan visitCreateRoutineLoad(CreateRoutineLoadContext ctx) { // NOTICE: we should not generate immutable map here, because it will be modified when analyzing. ? Maps.newHashMap(visitPropertyItemList(ctx.customProperties)) : Maps.newHashMap(); + boolean mergeTypeSpecified = ctx.WITH() != null; LoadTask.MergeType mergeType = LoadTask.MergeType.APPEND; - if (ctx.WITH() != null) { + if (mergeTypeSpecified) { if (ctx.DELETE() != null) { mergeType = LoadTask.MergeType.DELETE; } else if (ctx.MERGE() != null) { @@ -2498,7 +2499,7 @@ public LogicalPlan visitCreateRoutineLoad(CreateRoutineLoadContext ctx) { } } CreateRoutineLoadInfo createRoutineLoadInfo = new CreateRoutineLoadInfo(jobLabelInfo, tableName, - loadPropertyMap, properties, type, customProperties, mergeType, comment); + loadPropertyMap, properties, type, customProperties, mergeType, mergeTypeSpecified, comment); return new CreateRoutineLoadCommand(createRoutineLoadInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java index 86b085740e2925..0aba4d44a69e96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommand.java @@ -137,6 +137,7 @@ private void validate(ConnectContext ctx) throws UserException { } private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) throws UserException { + validateAlterVariantColumnsForFlexiblePartialUpdate(table); List alterTableOps = new ArrayList<>(); for (AlterTableOp alterClause : ops) { if (alterClause instanceof EnableFeatureOp) { @@ -157,10 +158,19 @@ private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) thr throw new AnalysisException("Update flexible columns feature is only supported" + " on merge-on-write unique tables."); } + if (table.isUniqKeyMergeOnWriteWithClusterKeys()) { + throw new AnalysisException("Update flexible columns feature does not support" + + " merge-on-write Unique tables with cluster keys."); + } if (table.hasSkipBitmapColumn()) { throw new AnalysisException("table " + table.getName() + " has enabled update flexible columns feature already."); } + if (!table.getEnableLightSchemaChange()) { + throw new AnalysisException("Update flexible columns feature requires " + + "light_schema_change to be enabled."); + } + table.validateVariantColumnsForFlexiblePartialUpdate(); } // analyse sequence column Type sequenceColType = null; @@ -230,6 +240,33 @@ private void rewriteAlterOpForOlapTable(ConnectContext ctx, OlapTable table) thr ops = alterTableOps; } + private void validateAlterVariantColumnsForFlexiblePartialUpdate(OlapTable table) throws UserException { + boolean enableFlexiblePartialUpdate = false; + for (AlterTableOp alterClause : ops) { + if (alterClause instanceof EnableFeatureOp + && ((EnableFeatureOp) alterClause).getFeature() + == EnableFeatureOp.Features.UPDATE_FLEXIBLE_COLUMNS) { + enableFlexiblePartialUpdate = true; + } + } + if (!enableFlexiblePartialUpdate && !table.hasSkipBitmapColumn()) { + return; + } + for (AlterTableOp alterClause : ops) { + if (alterClause instanceof AddColumnOp) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate( + ((AddColumnOp) alterClause).getColumn()); + } else if (alterClause instanceof AddColumnsOp) { + for (Column column : ((AddColumnsOp) alterClause).getColumns()) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate(column); + } + } else if (alterClause instanceof ModifyColumnOp) { + OlapTable.validateVariantColumnForFlexiblePartialUpdate( + ((ModifyColumnOp) alterClause).getColumn()); + } + } + } + /** * checkExternalTableOperationAllow */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java index e8a75c0299b4d6..788105e653d090 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateRoutineLoadInfo.java @@ -175,6 +175,8 @@ public class CreateRoutineLoadInfo { private LoadTask.MergeType mergeType; + private boolean mergeTypeSpecified; + private boolean isMultiTable = false; private AbstractDataSourceProperties dataSourceProperties; @@ -187,6 +189,18 @@ public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, Map jobProperties, String typeName, Map dataSourceProperties, LoadTask.MergeType mergeType, String comment) { + this(labelNameInfo, tableName, loadPropertyMap, jobProperties, typeName, dataSourceProperties, + mergeType, false, comment); + } + + /** + * constructor for create table + */ + public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, + Map loadPropertyMap, + Map jobProperties, String typeName, + Map dataSourceProperties, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified, String comment) { this.labelNameInfo = labelNameInfo; if (StringUtils.isBlank(tableName)) { this.isMultiTable = true; @@ -198,6 +212,7 @@ public CreateRoutineLoadInfo(LabelNameInfo labelNameInfo, String tableName, this.dataSourceProperties = RoutineLoadDataSourcePropertyFactory .createDataSource(typeName, dataSourceProperties, this.isMultiTable); this.mergeType = mergeType; + this.mergeTypeSpecified = mergeTypeSpecified; // Parse unique_key_update_mode first (takes precedence) if (this.jobProperties.containsKey(UNIQUE_KEY_UPDATE_MODE)) { String modeStr = this.jobProperties.get(UNIQUE_KEY_UPDATE_MODE); @@ -362,6 +377,10 @@ public LoadTask.MergeType getMergeType() { return mergeType; } + public boolean isMergeTypeSpecified() { + return mergeTypeSpecified; + } + public boolean isMultiTable() { return isMultiTable; } @@ -382,8 +401,12 @@ public String getWorkloadGroupName() { * analyze create table info */ public void validate(ConnectContext ctx) throws UserException { + validate(ctx, true); + } + + private void validate(ConnectContext ctx, boolean validateBackendCapability) throws UserException { // check dbName and tableName - checkDBTable(ctx); + checkDBTable(ctx, validateBackendCapability); // check name try { FeNameFormat.checkCommonName(NAME_TYPE, name); @@ -394,7 +417,8 @@ public void validate(ConnectContext ctx) throws UserException { + " Maybe routine load job name is longer than 64 or contains illegal characters"); } // check load properties include column separator etc. - routineLoadDesc = checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType); + routineLoadDesc = checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType, + mergeTypeSpecified); // check routine load job properties include desired concurrent number etc. checkJobProperties(); // check data source properties @@ -412,7 +436,14 @@ public void validate(ConnectContext ctx) throws UserException { } } - private void checkDBTable(ConnectContext ctx) throws AnalysisException { + /** + * Validate persisted routine-load metadata while restoring FE image. + */ + public void validateForReplay(ConnectContext ctx) throws UserException { + validate(ctx, false); + } + + private void checkDBTable(ConnectContext ctx, boolean validateBackendCapability) throws AnalysisException { labelNameInfo.validate(ctx); dbName = labelNameInfo.getDb(); name = labelNameInfo.getLabel(); @@ -442,14 +473,15 @@ private void checkDBTable(ConnectContext ctx) throws AnalysisException { } // Validate flexible partial update constraints if (uniqueKeyUpdateMode == TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS) { - validateFlexiblePartialUpdate((OlapTable) table); + validateFlexiblePartialUpdate((OlapTable) table, validateBackendCapability); } } - private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisException { - // Validate table-level constraints (MoW, skip_bitmap, light_schema_change, variant columns) + private void validateFlexiblePartialUpdate(OlapTable table, boolean validateBackendCapability) + throws AnalysisException { + // Validate table-level constraints (MoW, skip_bitmap, light_schema_change) try { - table.validateForFlexiblePartialUpdate(); + table.validateForFlexiblePartialUpdate(validateBackendCapability); } catch (UserException e) { throw new AnalysisException(e.getMessage(), e); } @@ -473,6 +505,19 @@ private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisExcep .anyMatch(p -> p instanceof LoadColumnClause)) { throw new AnalysisException("Flexible partial update does not support COLUMNS specification"); } + // Cannot specify merge/delete mode, WHERE filter, or load-level sequence column. + if (mergeTypeSpecified || mergeType != LoadTask.MergeType.APPEND) { + throw new AnalysisException("Don't support flexible partial update when 'merge_type' is specified"); + } + if (loadPropertyMap != null && loadPropertyMap.values().stream() + .anyMatch(p -> p instanceof LoadWhereClause)) { + throw new AnalysisException("Don't support flexible partial update when 'where' is specified"); + } + if (loadPropertyMap != null && loadPropertyMap.values().stream() + .anyMatch(p -> p instanceof LoadSequenceClause)) { + throw new AnalysisException("Don't support flexible partial update when " + + "'function_column.sequence_col' is specified"); + } } /** @@ -489,6 +534,12 @@ private void validateFlexiblePartialUpdate(OlapTable table) throws AnalysisExcep public static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map loadPropertyMap, String dbName, String tableName, boolean isMultiTable, LoadTask.MergeType mergeType) throws UserException { + return checkLoadProperties(ctx, loadPropertyMap, dbName, tableName, isMultiTable, mergeType, false); + } + + private static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map loadPropertyMap, + String dbName, String tableName, boolean isMultiTable, LoadTask.MergeType mergeType, + boolean mergeTypeSpecified) throws UserException { Separator columnSeparator = null; // TODO(yangzhengguo01): add line delimiter to properties Separator lineDelimiter = null; @@ -547,7 +598,7 @@ public static RoutineLoadDesc checkLoadProperties(ConnectContext ctx, Map jobProperties; @SerializedName(value = "dataSourceProperties") private AbstractDataSourceProperties dataSourceProperties; + @SerializedName(value = "columnDescs") + private ImportColumnDescs columnDescs; + @SerializedName(value = "hrld") + private boolean hasRoutineLoadDesc; + @SerializedName(value = "cs") + private String columnSeparator; + @SerializedName(value = "ocs") + private String oriColumnSeparator; + @SerializedName(value = "ld") + private String lineDelimiter; + @SerializedName(value = "old") + private String oriLineDelimiter; + @SerializedName(value = "pf") + private Expr precedingFilter; + @SerializedName(value = "filter") + private Expr filter; + @SerializedName(value = "dc") + private Expr deleteCondition; + @SerializedName(value = "pni") + private PartitionNamesInfo partitionNamesInfo; + @SerializedName(value = "mt") + private LoadTask.MergeType mergeType; + @SerializedName(value = "mts") + private boolean mergeTypeSpecified; + @SerializedName(value = "scn") + private String sequenceColName; public AlterRoutineLoadJobOperationLog(long jobId, Map jobProperties, AbstractDataSourceProperties dataSourceProperties) { + this(jobId, jobProperties, dataSourceProperties, null); + } + + public AlterRoutineLoadJobOperationLog(long jobId, Map jobProperties, + AbstractDataSourceProperties dataSourceProperties, RoutineLoadDesc routineLoadDesc) { this.jobId = jobId; - this.jobProperties = jobProperties; + this.jobProperties = new HashMap<>(jobProperties); this.dataSourceProperties = dataSourceProperties; + if (routineLoadDesc == null) { + return; + } + hasRoutineLoadDesc = true; + setSeparatorFields(routineLoadDesc.getColumnSeparator(), true); + setSeparatorFields(routineLoadDesc.getLineDelimiter(), false); + precedingFilter = routineLoadDesc.getPrecedingFilter(); + filter = routineLoadDesc.getFilter(); + deleteCondition = routineLoadDesc.getDeleteCondition(); + partitionNamesInfo = routineLoadDesc.getPartitionNamesInfo(); + mergeType = routineLoadDesc.getMergeType(); + mergeTypeSpecified = routineLoadDesc.isMergeTypeSpecified(); + sequenceColName = routineLoadDesc.getSequenceColName(); + if (routineLoadDesc.getColumnsInfo() != null) { + this.columnDescs = new ImportColumnDescs(); + this.columnDescs.descs.addAll(routineLoadDesc.getColumnsInfo()); + } } public long getJobId() { @@ -57,6 +113,41 @@ public AbstractDataSourceProperties getDataSourceProperties() { return dataSourceProperties; } + public ImportColumnDescs getColumnDescs() { + return columnDescs; + } + + public RoutineLoadDesc getRoutineLoadDesc() { + if (!hasRoutineLoadDesc) { + return null; + } + return new RoutineLoadDesc( + buildSeparator(columnSeparator, oriColumnSeparator), + buildSeparator(lineDelimiter, oriLineDelimiter), + columnDescs == null ? null : new ArrayList<>(columnDescs.descs), precedingFilter, + filter, partitionNamesInfo, deleteCondition, mergeType, mergeTypeSpecified, sequenceColName); + } + + private void setSeparatorFields(Separator separator, boolean isColumnSeparator) { + if (separator == null) { + return; + } + if (isColumnSeparator) { + columnSeparator = separator.getSeparator(); + oriColumnSeparator = separator.getOriSeparator(); + } else { + lineDelimiter = separator.getSeparator(); + oriLineDelimiter = separator.getOriSeparator(); + } + } + + private static Separator buildSeparator(String separator, String oriSeparator) { + if (separator == null && oriSeparator == null) { + return null; + } + return new Separator(separator, oriSeparator); + } + public static AlterRoutineLoadJobOperationLog read(DataInput in) throws IOException { String json = Text.readString(in); return GsonUtils.GSON.fromJson(json, AlterRoutineLoadJobOperationLog.class); diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java index d403c88732ea2d..f962f4be62c3ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Backend.java @@ -148,6 +148,7 @@ public class Backend implements Writable { // from config::pipeline_executor_size , default equal cpuCores @SerializedName("pipelineExecutorSize") private int pipelineExecutorSize = 1; + private transient boolean supportsVariantFlexiblePartialUpdate = false; // Counter of heartbeat failure. // Once a heartbeat failed, increase this counter by one. @@ -911,6 +912,11 @@ public boolean handleHbResponse(BackendHbResponse hbResponse, boolean isReplay) isChanged = true; this.beMemory = hbResponse.getBeMemory(); } + if (this.supportsVariantFlexiblePartialUpdate + != hbResponse.supportsVariantFlexiblePartialUpdate()) { + isChanged = true; + this.supportsVariantFlexiblePartialUpdate = hbResponse.supportsVariantFlexiblePartialUpdate(); + } this.lastUpdateMs = hbResponse.getHbTime(); if (!isAlive.get()) { @@ -960,6 +966,10 @@ public boolean handleHbResponse(BackendHbResponse hbResponse, boolean isReplay) LOG.warn("{} is dead,", this.toString()); } } + if (!isAlive.get() && supportsVariantFlexiblePartialUpdate) { + isChanged = true; + supportsVariantFlexiblePartialUpdate = false; + } // still set error msg and missing time even if we may not mark this backend as dead, // for debug easily. @@ -979,6 +989,10 @@ public long getTabletMaxCompactionScore() { return tabletMaxCompactionScore; } + public boolean supportsVariantFlexiblePartialUpdate() { + return supportsVariantFlexiblePartialUpdate; + } + private long getDiskNumByStorageMedium(TStorageMedium storageMedium) { return disksRef.values().stream().filter(v -> v.getStorageMedium() == storageMedium).count(); } @@ -1138,4 +1152,3 @@ public static Backend fromThrift(TBackend backend) { } } - diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java index 41fad9961a4e94..0ce0375c2e1666 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/BackendHbResponse.java @@ -51,6 +51,8 @@ public class BackendHbResponse extends HeartbeatResponse implements Writable { private boolean isShutDown = false; // The physical memory available for use by BE. private long beMemory = 0; + @SerializedName(value = "supportsVariantFlexiblePartialUpdate") + private boolean supportsVariantFlexiblePartialUpdate = false; public BackendHbResponse() { super(HeartbeatResponse.Type.BACKEND); @@ -59,25 +61,21 @@ public BackendHbResponse() { public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, boolean isShutDown, int arrowFlightSqlPort) { - super(HeartbeatResponse.Type.BACKEND); - this.beId = beId; - this.status = HbStatus.OK; - this.bePort = bePort; - this.httpPort = httpPort; - this.brpcPort = brpcPort; - this.hbTime = hbTime; - this.beStartTime = beStartTime; - this.version = version; - this.nodeRole = nodeRole; - this.fragmentNum = fragmentNum; - this.lastFragmentUpdateTime = lastFragmentUpdateTime; - this.isShutDown = isShutDown; - this.arrowFlightSqlPort = arrowFlightSqlPort; + this(beId, bePort, httpPort, brpcPort, hbTime, beStartTime, version, nodeRole, fragmentNum, + lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, 0, false); } public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, boolean isShutDown, int arrowFlightSqlPort, long beMemory) { + this(beId, bePort, httpPort, brpcPort, hbTime, beStartTime, version, nodeRole, fragmentNum, + lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory, false); + } + + public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long hbTime, long beStartTime, + String version, String nodeRole, long fragmentNum, long lastFragmentUpdateTime, + boolean isShutDown, int arrowFlightSqlPort, long beMemory, + boolean supportsVariantFlexiblePartialUpdate) { super(HeartbeatResponse.Type.BACKEND); this.beId = beId; this.status = HbStatus.OK; @@ -93,6 +91,7 @@ public BackendHbResponse(long beId, int bePort, int httpPort, int brpcPort, long this.isShutDown = isShutDown; this.arrowFlightSqlPort = arrowFlightSqlPort; this.beMemory = beMemory; + this.supportsVariantFlexiblePartialUpdate = supportsVariantFlexiblePartialUpdate; } public BackendHbResponse(long beId, String host, long lastHbTime, String errMsg) { @@ -152,6 +151,10 @@ public long getBeMemory() { return beMemory; } + public boolean supportsVariantFlexiblePartialUpdate() { + return supportsVariantFlexiblePartialUpdate; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -162,6 +165,7 @@ public String toString() { sb.append(", httpPort: ").append(httpPort); sb.append(", brpcPort: ").append(brpcPort); sb.append(", arrowFlightSqlPort: ").append(arrowFlightSqlPort); + sb.append(", supportsVariantFlexiblePartialUpdate: ").append(supportsVariantFlexiblePartialUpdate); return sb.toString(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java index 4500ce6da0aefd..ce984225790e8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/HeartbeatMgr.java @@ -321,6 +321,7 @@ private HeartbeatResponse pingOnce() { backendInfo.setBrpcPort(4); backendInfo.setArrowFlightSqlPort(8); backendInfo.setVersion("test-1234"); + backendInfo.setSupportsVariantFlexiblePartialUpdate(true); result = new THeartbeatResult(); result.setStatus(new TStatus(TStatusCode.OK)); result.setBackendInfo(backendInfo); @@ -365,9 +366,13 @@ private HeartbeatResponse pingOnce() { isShutDown = tBackendInfo.isIsShutdown(); } long beMemory = tBackendInfo.isSetBeMem() ? tBackendInfo.getBeMem() : 0; + boolean supportsVariantFlexiblePartialUpdate = + tBackendInfo.isSetSupportsVariantFlexiblePartialUpdate() + && tBackendInfo.isSupportsVariantFlexiblePartialUpdate(); return new BackendHbResponse(backendId, bePort, httpPort, brpcPort, System.currentTimeMillis(), beStartTime, version, nodeRole, - fragmentNum, lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory); + fragmentNum, lastFragmentUpdateTime, isShutDown, arrowFlightSqlPort, beMemory, + supportsVariantFlexiblePartialUpdate); } else { return new BackendHbResponse(backendId, backend.getHost(), backend.getLastUpdateMs(), result.getStatus().getErrorMsgs().isEmpty() diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java index 0cefab3d91d6b8..e825237e3cba21 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/OlapTableTest.java @@ -17,19 +17,25 @@ package org.apache.doris.catalog; +import org.apache.doris.alter.SchemaChangeHandler; import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.catalog.info.IndexType; import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.cloud.rpc.VersionHelper; +import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.UserException; import org.apache.doris.common.io.FastByteArrayOutputStream; import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.UnitTestUtil; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; +import org.apache.doris.system.Backend; +import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TStorageType; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.junit.Assert; @@ -40,8 +46,11 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; +import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; @@ -146,6 +155,170 @@ public void testBuildVariantEnableFlattenNestedWithLegacyPropertyKey() throws IO tableProperty.getProperties().containsKey(PropertyAnalyzer.LEGACY_PROPERTIES_VARIANT_ENABLE_FLATTEN_NESTED)); } + @Test + public void testValidateVariantColumnsForFlexiblePartialUpdate() throws UserException { + Column normalVariant = new Column("v", Type.VARIANT); + OlapTable.validateVariantColumnsForFlexiblePartialUpdate(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariant)); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariant)); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + table.validateForFlexiblePartialUpdate(); + + VariantType docModeVariant = new VariantType(new ArrayList<>(), 0, false, 10000, 0, + true, 0L, 64, false); + Column docModeColumn = new Column("doc_v", docModeVariant); + UserException exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + Lists.newArrayList(docModeColumn))); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support doc mode in this version")); + + exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateVariantColumnsForFlexiblePartialUpdate( + Lists.newArrayList(normalVariant), true)); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + exception = Assert.assertThrows(UserException.class, table::validateForFlexiblePartialUpdate); + Assert.assertTrue(exception.getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(true); + exception = Assert.assertThrows(UserException.class, table::validateForFlexiblePartialUpdate); + Assert.assertTrue(exception.getMessage().contains("cluster keys")); + } + + @Test + public void testValidateVariantFlexiblePartialUpdateRejectsUnsupportedBackendCapability() + throws UserException { + Backend supportedBackend = Mockito.mock(Backend.class); + Mockito.when(supportedBackend.isAlive()).thenReturn(true); + Mockito.when(supportedBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(true); + OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate(Lists.newArrayList(supportedBackend)); + + Backend oldBackend = Mockito.mock(Backend.class); + Mockito.when(oldBackend.isAlive()).thenReturn(true); + Mockito.when(oldBackend.getId()).thenReturn(2L); + Mockito.when(oldBackend.getHost()).thenReturn("127.0.0.2"); + Mockito.when(oldBackend.getVersion()).thenReturn("old-version"); + Mockito.when(oldBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(false); + + UserException exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate( + Lists.newArrayList(supportedBackend, oldBackend))); + Assert.assertTrue(exception.getMessage().contains("variant patch skip-bitmap marker support")); + Assert.assertTrue(exception.getMessage().contains("old-version")); + + Mockito.when(oldBackend.isAlive()).thenReturn(false); + exception = Assert.assertThrows(UserException.class, + () -> OlapTable.validateBackendsSupportVariantFlexiblePartialUpdate( + Lists.newArrayList(supportedBackend, oldBackend))); + Assert.assertTrue(exception.getMessage().contains("not alive")); + } + + @Test + public void testValidateVariantFlexiblePartialUpdateUsesCurrentClusterBackends() + throws UserException, AnalysisException { + Backend supportedBackend = Mockito.mock(Backend.class); + Mockito.when(supportedBackend.isAlive()).thenReturn(true); + Mockito.when(supportedBackend.supportsVariantFlexiblePartialUpdate()).thenReturn(true); + + SystemInfoService systemInfoService = Mockito.mock(SystemInfoService.class); + Mockito.when(systemInfoService.getBackendsByCurrentCluster()) + .thenReturn(ImmutableMap.of(1L, supportedBackend)); + Mockito.when(systemInfoService.getAllBackendsByAllCluster()) + .thenThrow(new AnalysisException("unexpected all-backend validation")); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("v", Type.VARIANT))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentSystemInfo).thenReturn(systemInfoService); + table.validateForFlexiblePartialUpdate(); + } + } + + @Test + public void testSchemaChangeHandlerValidatesVariantColumnsForFlexiblePartialUpdate() throws Throwable { + Column normalVariant = new Column("v", Type.VARIANT); + Column skipBitmap = new Column(Column.SKIP_BITMAP_COL, Type.BITMAP); + skipBitmap.setIsVisible(false); + LinkedList baseSchema = Lists.newLinkedList( + Lists.newArrayList(new Column("k", PrimitiveType.INT), normalVariant, skipBitmap)); + Map> indexSchemaMap = Maps.newHashMap(); + indexSchemaMap.put(1L, baseSchema); + + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getBaseIndexId()).thenReturn(1L); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + Method method = SchemaChangeHandler.class.getDeclaredMethod( + "validateVariantColumnsForFlexiblePartialUpdate", OlapTable.class, Map.class); + method.setAccessible(true); + try { + method.invoke(new SchemaChangeHandler(), table, indexSchemaMap); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + InvocationTargetException exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, indexSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + try { + method.invoke(new SchemaChangeHandler(), table, indexSchemaMap); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + + Mockito.when(table.variantEnableFlattenNested()).thenReturn(true); + exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, indexSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested")); + + VariantType docModeVariant = new VariantType(new ArrayList<>(), 0, false, 10000, 0, + true, 0L, 64, false); + Map> docModeSchemaMap = Maps.newHashMap(); + docModeSchemaMap.put(1L, Lists.newLinkedList(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("doc_v", docModeVariant), skipBitmap))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + exception = Assert.assertThrows(InvocationTargetException.class, + () -> method.invoke(new SchemaChangeHandler(), table, docModeSchemaMap)); + Assert.assertTrue(exception.getCause().getMessage().contains( + "VARIANT flexible partial update does not support doc mode in this version")); + } + @Test public void testGetPartitionRowCount() { OlapTable olapTable = new OlapTable(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java index aa1dc052605f2b..0d7742e7f4dfdf 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/KinesisRoutineLoadJobTest.java @@ -17,20 +17,40 @@ package org.apache.doris.load.routineload; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.Separator; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.Config; +import org.apache.doris.common.UserException; import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.InternalCatalog; +import org.apache.doris.datasource.property.fileformat.FileFormatProperties; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kinesis.KinesisConfiguration; import org.apache.doris.load.routineload.kinesis.KinesisDataSourceProperties; import org.apache.doris.load.routineload.kinesis.KinesisProgress; import org.apache.doris.load.routineload.kinesis.KinesisRoutineLoadJob; import org.apache.doris.load.routineload.kinesis.KinesisTaskInfo; +import org.apache.doris.nereids.trees.plans.commands.AlterRoutineLoadCommand; +import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import org.apache.doris.nereids.trees.plans.commands.info.LabelNameInfo; +import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; +import org.apache.doris.persist.EditLog; +import org.apache.doris.thrift.TUniqueKeyUpdateMode; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.gson.Gson; import org.junit.Assert; import org.junit.Test; +import org.mockito.MockedStatic; +import org.mockito.Mockito; import java.util.HashMap; import java.util.HashSet; @@ -229,6 +249,111 @@ public void testModifyPropertiesShouldReplaceCustomShardsWhenExplicitShardsProvi Assert.assertEquals("202", progress.getSequenceNumberByShard("shard-2")); } + @Test + public void testModifyPropertiesShouldApplyAndPersistRoutineLoadDesc() throws Exception { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + Deencapsulation.setField(routineLoadJob, "state", RoutineLoadJob.JobState.PAUSED); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + AlterRoutineLoadCommand command = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(command, "routineLoadDesc", routineLoadDesc); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + + routineLoadJob.modifyProperties(command); + } + + Assert.assertEquals(1, routineLoadJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", routineLoadJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", routineLoadJob.getSequenceCol()); + Mockito.verify(editLog).logAlterRoutineLoadJob(Mockito.argThat(log -> + log.getRoutineLoadDesc() != null + && "|".equals(log.getRoutineLoadDesc().getColumnSeparator().getSeparator()) + && "seq".equals(log.getRoutineLoadDesc().getSequenceColName()))); + } + + @Test + public void testModifyPropertiesShouldValidateFlexibleAlterAgainstRoutineLoadDesc() throws Exception { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + Deencapsulation.setField(routineLoadJob, "dbId", 1L); + Deencapsulation.setField(routineLoadJob, "tableId", 2L); + Deencapsulation.setField(routineLoadJob, "isMultiTable", false); + Deencapsulation.setField(routineLoadJob, "state", RoutineLoadJob.JobState.PAUSED); + Deencapsulation.setField(routineLoadJob, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(routineLoadJob, "jobProperties", currentJobProperties); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + AlterRoutineLoadCommand command = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(command, "routineLoadDesc", routineLoadDesc); + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + Deencapsulation.setField(command, "analyzedJobProperties", flexibleProperties); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + UserException exception = Assert.assertThrows(UserException.class, + () -> routineLoadJob.modifyProperties(command)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + Mockito.verify(editLog, Mockito.never()).logAlterRoutineLoadJob(Mockito.any()); + } + + @Test + public void testReplayModifyPropertiesShouldRestoreRoutineLoadDesc() { + KinesisRoutineLoadJob routineLoadJob = + new KinesisRoutineLoadJob(1L, "kinesis_routine_load_job", 1L, + 1L, "ap-southeast-1", "stream-1", UserIdentity.ADMIN); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog( + 1L, Maps.newHashMap(), null, routineLoadDesc); + routineLoadJob.replayModifyProperties(log); + + Assert.assertEquals(1, routineLoadJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", routineLoadJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", routineLoadJob.getSequenceCol()); + } + @Test public void testShardRefreshShouldMoveRetiredParentToClosedUntilConsumed() throws Exception { KinesisRoutineLoadJob routineLoadJob = diff --git a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java index 8876c6a8aea9bc..fdf36700b5da13 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/load/routineload/RoutineLoadJobTest.java @@ -17,21 +17,43 @@ package org.apache.doris.load.routineload; +import org.apache.doris.analysis.BinaryPredicate; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.IntLiteral; +import org.apache.doris.analysis.Separator; +import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.info.PartitionNamesInfo; import org.apache.doris.common.InternalErrorCode; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.jmockit.Deencapsulation; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.datasource.kafka.KafkaUtil; +import org.apache.doris.datasource.property.fileformat.FileFormatProperties; +import org.apache.doris.datasource.property.fileformat.JsonFileFormatProperties; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kafka.KafkaProgress; import org.apache.doris.load.routineload.kafka.KafkaRoutineLoadJob; import org.apache.doris.load.routineload.kafka.KafkaTaskInfo; +import org.apache.doris.nereids.trees.plans.commands.AlterRoutineLoadCommand; import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import org.apache.doris.nereids.trees.plans.commands.info.LabelNameInfo; +import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; import org.apache.doris.persist.EditLog; +import org.apache.doris.persist.gson.GsonUtils; +import org.apache.doris.qe.OriginStatement; +import org.apache.doris.task.LoadTaskInfo.ImportColumnDescs; import org.apache.doris.thrift.TKafkaRLTaskProgress; import org.apache.doris.thrift.TUniqueKeyUpdateMode; import org.apache.doris.transaction.GlobalTransactionMgrIface; @@ -42,6 +64,7 @@ import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.gson.JsonObject; import org.apache.kafka.common.PartitionInfo; import org.junit.Assert; import org.junit.Test; @@ -51,6 +74,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Optional; public class RoutineLoadJobTest { @Test @@ -459,4 +483,509 @@ public void testUniqueKeyUpdateModeTakesPrecedenceOverPartialColumns() throws Ex Assert.assertFalse(isPartialUpdate); } + @Test + public void testValidateFlexiblePartialUpdateForAlterUsesAlteredProperties() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + Map modeAndJsonPathsProperties = Maps.newHashMap(); + modeAndJsonPathsProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + modeAndJsonPathsProperties.put(JsonFileFormatProperties.PROP_JSON_PATHS, "[\"$.id\"]"); + UserException exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(modeAndJsonPathsProperties, null)); + Assert.assertTrue(exception.getMessage().contains("jsonpaths")); + + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS); + + Map fuzzyParseProperties = Maps.newHashMap(); + fuzzyParseProperties.put(JsonFileFormatProperties.PROP_FUZZY_PARSE, "true"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(fuzzyParseProperties, null)); + Assert.assertTrue(exception.getMessage().contains("fuzzy_parse")); + + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), routineLoadDesc)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.APPEND); + + Deencapsulation.setField(job, "mergeTypeSpecified", true); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + Deencapsulation.setField(job, "mergeTypeSpecified", false); + + RoutineLoadDesc explicitAppendDesc = new RoutineLoadDesc(null, null, null, null, null, null, null, + LoadTask.MergeType.APPEND, true, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), explicitAppendDesc)); + Assert.assertTrue(exception.getMessage().contains("merge_type")); + + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "id"), new IntLiteral(1))); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("where")); + Deencapsulation.setField(job, "whereExpr", null); + + RoutineLoadDesc whereDesc = new RoutineLoadDesc(null, null, null, null, + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "id"), new IntLiteral(1)), + null, null, LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), whereDesc)); + Assert.assertTrue(exception.getMessage().contains("where")); + + RoutineLoadDesc deleteDesc = new RoutineLoadDesc(null, null, null, null, null, null, + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "is_delete"), + new IntLiteral(1)), + LoadTask.MergeType.APPEND, null); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), deleteDesc)); + Assert.assertTrue(exception.getMessage().contains("delete")); + + Deencapsulation.setField(job, "sequenceCol", "seq"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), null)); + Assert.assertTrue(exception.getMessage().contains("function_column.sequence_col")); + Deencapsulation.setField(job, "sequenceCol", null); + + RoutineLoadDesc sequenceDesc = new RoutineLoadDesc(null, null, null, null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(Maps.newHashMap(), sequenceDesc)); + Assert.assertTrue(exception.getMessage().contains("function_column.sequence_col")); + } + } + + @Test + public void testRoutineLoadDescIsInstalledBeforeFlexibleAlterValidation() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "state", RoutineLoadJob.JobState.PAUSED); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + RoutineLoadDesc columnsDesc = new RoutineLoadDesc(null, null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, null); + AlterRoutineLoadCommand columnsCommand = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(columnsCommand, "routineLoadDesc", columnsDesc); + + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + AlterRoutineLoadCommand flexibleCommand = new AlterRoutineLoadCommand( + new LabelNameInfo("db", "job"), Maps.newHashMap(), Maps.newHashMap()); + Deencapsulation.setField(flexibleCommand, "analyzedJobProperties", flexibleProperties); + + Env env = Mockito.mock(Env.class); + EditLog editLog = Mockito.mock(EditLog.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getEditLog()).thenReturn(editLog); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + job.modifyProperties(columnsCommand); + Assert.assertEquals(1, job.getColumnExprDescs().descs.size()); + + UserException exception = Assert.assertThrows( + UserException.class, () -> job.modifyProperties(flexibleCommand)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + } + + @Test + public void testReplayModifyPropertiesRestoresRoutineLoadDescForFlexibleValidation() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "uniqueKeyUpdateMode", TUniqueKeyUpdateMode.UPSERT); + + Map currentJobProperties = Maps.newHashMap(); + currentJobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + Deencapsulation.setField(job, "jobProperties", currentJobProperties); + + RoutineLoadDesc columnsDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, "seq"); + Map replayProperties = Maps.newHashMap(); + replayProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog( + 1L, replayProperties, null, columnsDesc); + job.replayModifyProperties(log); + Assert.assertEquals(1, job.getColumnExprDescs().descs.size()); + Assert.assertEquals("|", job.getColumnSeparator().getSeparator()); + Assert.assertEquals("seq", job.getSequenceCol()); + Assert.assertEquals(TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, job.getUniqueKeyUpdateMode()); + Assert.assertEquals("UPDATE_FLEXIBLE_COLUMNS", + log.getJobProperties().get(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE)); + + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(catalog.getDbNullable(1L)).thenReturn(db); + Mockito.when(db.getTableNullable(2L)).thenReturn(table); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate(); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList(new Column("k", PrimitiveType.INT))); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + Map flexibleProperties = Maps.newHashMap(); + flexibleProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + UserException exception = Assert.assertThrows(UserException.class, + () -> job.validateFlexiblePartialUpdateForAlter(flexibleProperties, null)); + Assert.assertTrue(exception.getMessage().contains("COLUMNS specification")); + } + } + + @Test + public void testColumnDescsSerializedInRoutineLoadJobSnapshot() { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("id", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "origStmt", new OriginStatement("INVALID", 0)); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + JsonObject jsonObject = GsonUtils.GSON.fromJson(json, JsonObject.class); + + Assert.assertTrue(jsonObject.has("columnDescs")); + Assert.assertEquals("id", + jsonObject.getAsJsonObject("columnDescs").getAsJsonArray("des").get(0).getAsJsonObject().get("cn") + .getAsString()); + + RoutineLoadJob legacyKeyJob = GsonUtils.GSON.fromJson( + json.replace("\"columnDescs\"", "\"cd\""), RoutineLoadJob.class); + Assert.assertEquals("id", legacyKeyJob.getColumnExprDescs().descs.get(0).getColumnName()); + } + + @Test + public void testColumnDescsSnapshotOverridesOrigStmtAfterRead() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("score", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "columnSeparator", new Separator("|", "|")); + Deencapsulation.setField(job, "lineDelimiter", new Separator("\n", "\\n")); + Deencapsulation.setField(job, "partitionNamesInfo", + new PartitionNamesInfo(false, Lists.newArrayList("p2"))); + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "score"), new IntLiteral(10))); + Deencapsulation.setField(job, "deleteCondition", + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "deleted"), new IntLiteral(1))); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + Deencapsulation.setField(job, "mergeTypeSpecified", true); + Deencapsulation.setField(job, "sequenceCol", "seq2"); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + Table table = Mockito.mock(Table.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(json, RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(1, restoredJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("score", restoredJob.getColumnExprDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", restoredJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("\n", restoredJob.getLineDelimiter().getSeparator()); + Assert.assertEquals(Lists.newArrayList("p2"), + restoredJob.getPartitionNamesInfo().getPartitionNames()); + Assert.assertNotNull(restoredJob.getWhereExpr()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertTrue(Deencapsulation.getField(restoredJob, "mergeTypeSpecified")); + Assert.assertEquals("seq2", restoredJob.getSequenceCol()); + } + } + + @Test + public void testLegacyRoutineLoadDescSnapshotOverridesOrigStmtAfterRead() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "WITH MERGE " + + "DELETE ON stale_deleted = 1 " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + ImportColumnDescs columnDescs = new ImportColumnDescs(); + columnDescs.descs.add(new ImportColumnDesc("score", null)); + Deencapsulation.setField(job, "columnDescs", columnDescs); + Deencapsulation.setField(job, "columnSeparator", new Separator("|", "|")); + Deencapsulation.setField(job, "lineDelimiter", new Separator("\n", "\\n")); + Deencapsulation.setField(job, "partitionNamesInfo", + new PartitionNamesInfo(false, Lists.newArrayList("p2"))); + Deencapsulation.setField(job, "precedingFilter", + new BinaryPredicate(BinaryPredicate.Operator.GE, new SlotRef(null, "id"), new IntLiteral(1))); + Deencapsulation.setField(job, "whereExpr", + new BinaryPredicate(BinaryPredicate.Operator.GT, new SlotRef(null, "score"), new IntLiteral(10))); + Deencapsulation.setField(job, "deleteCondition", + new BinaryPredicate(BinaryPredicate.Operator.EQ, new SlotRef(null, "deleted"), new IntLiteral(1))); + Deencapsulation.setField(job, "mergeType", LoadTask.MergeType.MERGE); + Deencapsulation.setField(job, "sequenceCol", "seq2"); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + JsonObject legacyImage = GsonUtils.GSON.fromJson( + GsonUtils.GSON.toJson(job, RoutineLoadJob.class), JsonObject.class); + legacyImage.add("cd", legacyImage.remove("columnDescs")); + legacyImage.add("partitionNamesInfo", legacyImage.remove("pni")); + legacyImage.add("precedingFilter", legacyImage.remove("pf")); + legacyImage.add("whereExpr", legacyImage.remove("filter")); + legacyImage.add("deleteCondition", legacyImage.remove("dc")); + legacyImage.add("sequenceCol", legacyImage.remove("scn")); + legacyImage.remove("cs"); + legacyImage.remove("ocs"); + legacyImage.remove("ld"); + legacyImage.remove("old"); + legacyImage.remove("mt"); + JsonObject legacyColumnSeparator = new JsonObject(); + legacyColumnSeparator.addProperty("separator", "|"); + legacyColumnSeparator.addProperty("oriSeparator", "|"); + legacyImage.add("columnSeparator", legacyColumnSeparator); + JsonObject legacyLineDelimiter = new JsonObject(); + legacyLineDelimiter.addProperty("separator", "\n"); + legacyLineDelimiter.addProperty("oriSeparator", "\\n"); + legacyImage.add("lineDelimiter", legacyLineDelimiter); + + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(legacyImage, RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(1, restoredJob.getColumnExprDescs().descs.size()); + Assert.assertEquals("score", restoredJob.getColumnExprDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", restoredJob.getColumnSeparator().getSeparator()); + Assert.assertEquals("\n", restoredJob.getLineDelimiter().getSeparator()); + Assert.assertEquals(Lists.newArrayList("p2"), + restoredJob.getPartitionNamesInfo().getPartitionNames()); + Assert.assertNotNull(restoredJob.getPrecedingFilter()); + Assert.assertNotNull(restoredJob.getWhereExpr()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertEquals("seq2", restoredJob.getSequenceCol()); + } + } + + @Test + public void testLegacyRoutineLoadImageKeepsMergeTypeFromOrigStmt() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "WITH MERGE " + + "DELETE ON is_delete = 1 " + + "PROPERTIES (\"format\" = \"json\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + String json = GsonUtils.GSON.toJson(job, RoutineLoadJob.class); + JsonObject legacyImage = GsonUtils.GSON.fromJson(json, JsonObject.class); + legacyImage.remove("mt"); + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson(legacyImage, RoutineLoadJob.class); + + Assert.assertEquals(LoadTask.MergeType.MERGE, restoredJob.getMergeType()); + Assert.assertNotNull(restoredJob.getDeleteCondition()); + } + } + + @Test + public void testFlexibleRoutineLoadImageRestoreSkipsBackendCapabilityCheck() throws Exception { + KafkaRoutineLoadJob job = new KafkaRoutineLoadJob(); + Deencapsulation.setField(job, "dbId", 1L); + Deencapsulation.setField(job, "tableId", 2L); + Deencapsulation.setField(job, "isMultiTable", false); + Deencapsulation.setField(job, "origStmt", new OriginStatement( + "CREATE ROUTINE LOAD job ON tbl " + + "PROPERTIES (\"format\" = \"json\", " + + "\"unique_key_update_mode\" = \"UPDATE_FLEXIBLE_COLUMNS\") " + + "FROM KAFKA (\"kafka_broker_list\" = \"127.0.0.1:9092\", " + + "\"kafka_topic\" = \"topic\")", + 0)); + Map jobProperties = Maps.newHashMap(); + jobProperties.put(FileFormatProperties.PROP_FORMAT, "json"); + jobProperties.put(CreateRoutineLoadInfo.UNIQUE_KEY_UPDATE_MODE, "UPDATE_FLEXIBLE_COLUMNS"); + Deencapsulation.setField(job, "jobProperties", jobProperties); + + Env env = Mockito.mock(Env.class); + CatalogMgr catalogMgr = Mockito.mock(CatalogMgr.class); + InternalCatalog catalog = Mockito.mock(InternalCatalog.class); + Database db = Mockito.mock(Database.class); + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(env.getCatalogMgr()).thenReturn(catalogMgr); + Mockito.when(catalogMgr.getCatalog(Mockito.anyString())).thenReturn(catalog); + Mockito.when(env.getInternalCatalog()).thenReturn(catalog); + Mockito.when(catalog.getDb("db")).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDb(1L)).thenReturn(Optional.of(db)); + Mockito.when(catalog.getDbOrAnalysisException("db")).thenReturn(db); + Mockito.when(db.getName()).thenReturn("db"); + Mockito.when(db.getId()).thenReturn(1L); + Mockito.when(db.getTableOrAnalysisException("tbl")).thenReturn(table); + Mockito.when(db.getTable(2L)).thenReturn(Optional.of(table)); + Mockito.when(table.getName()).thenReturn("tbl"); + Mockito.when(table.getType()).thenReturn(Table.TableType.OLAP); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.hasDeleteSign()).thenReturn(true); + Mockito.doCallRealMethod().when(table).validateForFlexiblePartialUpdate(Mockito.anyBoolean()); + Mockito.doCallRealMethod().when(table).validateVariantColumnsForFlexiblePartialUpdate( + Mockito.anyBoolean()); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(true); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getBaseSchema()).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), new Column("v", Type.VARIANT))); + Mockito.when(table.variantEnableFlattenNested()).thenReturn(false); + + try (MockedStatic envStatic = Mockito.mockStatic(Env.class)) { + envStatic.when(Env::getCurrentEnv).thenReturn(env); + envStatic.when(Env::getCurrentInternalCatalog).thenReturn(catalog); + + RoutineLoadJob restoredJob = GsonUtils.GSON.fromJson( + GsonUtils.GSON.toJson(job, RoutineLoadJob.class), RoutineLoadJob.class); + + Assert.assertNotEquals(RoutineLoadJob.JobState.CANCELLED, restoredJob.getState()); + Assert.assertEquals(TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, + restoredJob.getUniqueKeyUpdateMode()); + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java new file mode 100644 index 00000000000000..f4cf3abf40194b --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/load/NereidsStreamLoadPlannerTest.java @@ -0,0 +1,160 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.load; + +import org.apache.doris.common.UserException; +import org.apache.doris.load.loadv2.LoadTask; +import org.apache.doris.nereids.analyzer.UnboundSlot; +import org.apache.doris.thrift.TFileCompressType; +import org.apache.doris.thrift.TFileFormatType; +import org.apache.doris.thrift.TFileType; +import org.apache.doris.thrift.TMergeType; +import org.apache.doris.thrift.TPartialUpdateNewRowPolicy; +import org.apache.doris.thrift.TStreamLoadPutRequest; +import org.apache.doris.thrift.TUniqueId; +import org.apache.doris.thrift.TUniqueKeyUpdateMode; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; +import org.mockito.Mockito; + +import java.util.Collections; + +public class NereidsStreamLoadPlannerTest { + @Test + public void testValidateFlexiblePartialUpdateStreamLoadOptions() throws Exception { + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(newFlexibleTaskInfo()); + + NereidsLoadTaskInfo csvTaskInfo = newFlexibleTaskInfo(); + Mockito.when(csvTaskInfo.getFormatType()).thenReturn(TFileFormatType.FORMAT_CSV_PLAIN); + assertRejected(csvTaskInfo, "flexible partial update only support json format"); + + NereidsLoadTaskInfo fuzzyParseTaskInfo = newFlexibleTaskInfo(); + Mockito.when(fuzzyParseTaskInfo.isFuzzyParse()).thenReturn(true); + assertRejected(fuzzyParseTaskInfo, "Don't support flexible partial update when 'fuzzy_parse' is enabled"); + + NereidsLoadTaskInfo columnsTaskInfo = newFlexibleTaskInfo(); + NereidsLoadTaskInfo.NereidsImportColumnDescs columnDescs = + new NereidsLoadTaskInfo.NereidsImportColumnDescs(); + columnDescs.descs.add(new NereidsImportColumnDesc("k")); + Mockito.when(columnsTaskInfo.getColumnExprDescs()).thenReturn(columnDescs); + assertRejected(columnsTaskInfo, "Don't support flexible partial update when 'columns' is specified"); + + NereidsLoadTaskInfo jsonPathsTaskInfo = newFlexibleTaskInfo(); + Mockito.when(jsonPathsTaskInfo.getJsonPaths()).thenReturn("[\"$.k\",\"$.v\"]"); + assertRejected(jsonPathsTaskInfo, "Don't support flexible partial update when 'jsonpaths' is specified"); + + NereidsLoadTaskInfo hiddenColumnsTaskInfo = newFlexibleTaskInfo(); + Mockito.when(hiddenColumnsTaskInfo.getHiddenColumns()) + .thenReturn(Collections.singletonList("__DORIS_DELETE_SIGN__")); + assertRejected(hiddenColumnsTaskInfo, + "Don't support flexible partial update when 'hidden_columns' is specified"); + + NereidsLoadTaskInfo sequenceColTaskInfo = newFlexibleTaskInfo(); + Mockito.when(sequenceColTaskInfo.hasSequenceCol()).thenReturn(true); + assertRejected(sequenceColTaskInfo, + "Don't support flexible partial update when 'function_column.sequence_col' is specified"); + + NereidsLoadTaskInfo mergeTaskInfo = newFlexibleTaskInfo(); + Mockito.when(mergeTaskInfo.getMergeType()).thenReturn(LoadTask.MergeType.MERGE); + assertRejected(mergeTaskInfo, "Don't support flexible partial update when 'merge_type' is specified"); + + NereidsLoadTaskInfo explicitAppendTaskInfo = newFlexibleTaskInfo(); + Mockito.when(explicitAppendTaskInfo.isMergeTypeSpecified()).thenReturn(true); + assertRejected(explicitAppendTaskInfo, + "Don't support flexible partial update when 'merge_type' is specified"); + + NereidsLoadTaskInfo whereTaskInfo = newFlexibleTaskInfo(); + Mockito.when(whereTaskInfo.getWhereExpr()).thenReturn(new UnboundSlot("v")); + assertRejected(whereTaskInfo, "Don't support flexible partial update when 'where' is specified"); + + NereidsLoadTaskInfo deleteTaskInfo = newFlexibleTaskInfo(); + Mockito.when(deleteTaskInfo.getDeleteCondition()).thenReturn(new UnboundSlot("v")); + assertRejected(deleteTaskInfo, "Don't support flexible partial update when 'delete' is specified"); + } + + @Test + public void testValidateFlexiblePartialUpdateRoutineLoadOptions() throws Exception { + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate( + newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, null, null)); + + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.MERGE, null, null), + "Don't support flexible partial update when 'merge_type' is specified"); + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, new UnboundSlot("v"), null), + "Don't support flexible partial update when 'where' is specified"); + assertRejected(newFlexibleRoutineTaskInfo(LoadTask.MergeType.APPEND, null, "seq"), + "Don't support flexible partial update when 'function_column.sequence_col' is specified"); + } + + @Test + public void testValidateFlexiblePartialUpdateStreamLoadDefaultMergeType() throws Exception { + TStreamLoadPutRequest request = newFlexibleStreamLoadRequest(); + request.setMergeType(TMergeType.APPEND); + NereidsStreamLoadTask defaultAppendTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(request); + Assertions.assertFalse(defaultAppendTask.isMergeTypeSpecified()); + NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(defaultAppendTask); + + request.setMergeTypeSpecified(true); + NereidsStreamLoadTask explicitAppendTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(request); + Assertions.assertTrue(explicitAppendTask.isMergeTypeSpecified()); + assertRejected(explicitAppendTask, + "Don't support flexible partial update when 'merge_type' is specified"); + + TStreamLoadPutRequest deleteRequest = newFlexibleStreamLoadRequest(); + deleteRequest.setMergeType(TMergeType.APPEND); + deleteRequest.setDeleteCondition("v = 1"); + NereidsStreamLoadTask deleteTask = NereidsStreamLoadTask.fromTStreamLoadPutRequest(deleteRequest); + Assertions.assertFalse(deleteTask.isMergeTypeSpecified()); + assertRejected(deleteTask, "Don't support flexible partial update when 'delete' is specified"); + } + + private NereidsLoadTaskInfo newFlexibleTaskInfo() { + NereidsLoadTaskInfo taskInfo = Mockito.mock(NereidsLoadTaskInfo.class); + Mockito.when(taskInfo.getFormatType()).thenReturn(TFileFormatType.FORMAT_JSON); + Mockito.when(taskInfo.getColumnExprDescs()).thenReturn(new NereidsLoadTaskInfo.NereidsImportColumnDescs()); + Mockito.when(taskInfo.getJsonPaths()).thenReturn(""); + Mockito.when(taskInfo.getMergeType()).thenReturn(LoadTask.MergeType.APPEND); + return taskInfo; + } + + private TStreamLoadPutRequest newFlexibleStreamLoadRequest() { + TStreamLoadPutRequest request = new TStreamLoadPutRequest(); + request.setLoadId(new TUniqueId(1, 2)); + request.setTxnId(1); + request.setFileType(TFileType.FILE_STREAM); + request.setFormatType(TFileFormatType.FORMAT_JSON); + request.setCompressType(TFileCompressType.UNKNOWN); + request.setReadJsonByLine(true); + return request; + } + + private NereidsRoutineLoadTaskInfo newFlexibleRoutineTaskInfo( + LoadTask.MergeType mergeType, UnboundSlot whereExpr, String sequenceCol) { + return new NereidsRoutineLoadTaskInfo( + 1024L, Collections.singletonMap("format", "json"), 10L, null, mergeType, null, sequenceCol, 1.0, + new NereidsLoadTaskInfo.NereidsImportColumnDescs(), null, whereExpr, null, null, + (byte) 0, (byte) 0, 1, false, TUniqueKeyUpdateMode.UPDATE_FLEXIBLE_COLUMNS, + TPartialUpdateNewRowPolicy.APPEND, false); + } + + private void assertRejected(NereidsLoadTaskInfo taskInfo, String expectedMessage) { + UserException exception = Assertions.assertThrows(UserException.class, + () -> NereidsStreamLoadPlanner.validateLoadTaskForFlexiblePartialUpdate(taskInfo)); + Assertions.assertTrue(exception.getMessage().contains(expectedMessage), exception.getMessage()); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java index 3530f3536197f4..ba176ab4a0acab 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/CreateTableCommandTest.java @@ -1114,6 +1114,42 @@ public void testVariantFieldPatternDictCompressionValidation() { Assertions.assertDoesNotThrow(() -> createTable(validSql)); } + @Test + public void testCreateTableAllowsUnsupportedFlexibleVariantModes() { + String docModeSql = "create table test.tbl_flexible_variant_doc\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true');"; + Assertions.assertDoesNotThrow(() -> createTable(docModeSql)); + + String flattenNestedSql = "create table test.tbl_flexible_variant_flatten\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true'," + + "'deprecated_variant_enable_flatten_nested' = 'true');"; + connectContext.getSessionVariable().setEnableVariantFlattenNested(true); + try { + Assertions.assertDoesNotThrow(() -> createTable(flattenNestedSql)); + } finally { + connectContext.getSessionVariable().setEnableVariantFlattenNested(false); + } + + String validSql = "create table test.tbl_flexible_variant_normal\n" + + "(k1 int, v variant null)\n" + + "unique key(k1)\n" + + "distributed by hash(k1) buckets 1\n" + + "properties('replication_num' = '1'," + + "'enable_unique_key_merge_on_write' = 'true'," + + "'enable_unique_key_skip_bitmap_column' = 'true');"; + Assertions.assertDoesNotThrow(() -> createTable(validSql)); + } + @Test public void testMTMVRejectVarbinary() throws Exception { String mv = "CREATE MATERIALIZED VIEW mv_vb\n" diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java index db64fe354d5cc3..2dd7768fdc761d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/plans/commands/AlterTableCommandTest.java @@ -17,22 +17,78 @@ package org.apache.doris.nereids.trees.plans.commands; +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.KeysType; +import org.apache.doris.catalog.MaterializedIndex; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.Type; +import org.apache.doris.catalog.info.ColumnPosition; import org.apache.doris.catalog.info.TableNameInfo; +import org.apache.doris.common.UserException; +import org.apache.doris.nereids.trees.plans.commands.info.AddColumnOp; +import org.apache.doris.nereids.trees.plans.commands.info.AddColumnsOp; import org.apache.doris.nereids.trees.plans.commands.info.AddPartitionFieldOp; import org.apache.doris.nereids.trees.plans.commands.info.AlterTableOp; +import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition; import org.apache.doris.nereids.trees.plans.commands.info.DropPartitionFieldOp; import org.apache.doris.nereids.trees.plans.commands.info.EnableFeatureOp; +import org.apache.doris.nereids.trees.plans.commands.info.ModifyColumnOp; import org.apache.doris.nereids.trees.plans.commands.info.ReplacePartitionFieldOp; +import org.apache.doris.nereids.types.IntegerType; +import com.google.common.collect.Lists; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import org.mockito.Mockito; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class AlterTableCommandTest { + private static Column normalVariantColumn(String name) { + return new Column(name, Type.VARIANT); + } + + private static Column docModeVariantColumn(String name) { + org.apache.doris.catalog.VariantType docModeVariant = new org.apache.doris.catalog.VariantType( + new ArrayList<>(), 0, false, 10000, 0, true, 0L, 64, false); + return new Column(name, docModeVariant); + } + + private static ColumnDefinition intColumnDefinition(String name) { + return new ColumnDefinition(name, IntegerType.INSTANCE, false); + } + + private static void invokeRewriteForOlapTable(AlterTableCommand command, OlapTable table) throws Exception { + invokePrivate(command, "rewriteAlterOpForOlapTable", + new Class[] {org.apache.doris.qe.ConnectContext.class, OlapTable.class}, + new Object[] {null, table}); + } + + private static void invokeValidateVariantAlter(AlterTableCommand command, OlapTable table) throws Exception { + invokePrivate(command, "validateAlterVariantColumnsForFlexiblePartialUpdate", + new Class[] {OlapTable.class}, new Object[] {table}); + } + + private static void invokePrivate(Object target, String methodName, Class[] parameterTypes, Object[] args) + throws Exception { + Method method = target.getClass().getDeclaredMethod(methodName, parameterTypes); + method.setAccessible(true); + try { + method.invoke(target, args); + } catch (InvocationTargetException e) { + if (e.getCause() instanceof Exception) { + throw (Exception) e.getCause(); + } + throw e; + } + } + @Test void testEnableFeatureOp() { List ops = new ArrayList<>(); @@ -56,6 +112,113 @@ void testEnableFeatureOp() { alterTableCommand.toSql()); } + @Test + void testRewriteEnableFlexiblePartialUpdateOnVariantTable() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(true); + Mockito.when(table.getVisibleIndex()).thenReturn(Lists.newArrayList( + new MaterializedIndex(1L, MaterializedIndex.IndexState.NORMAL))); + Mockito.when(table.getBaseSchema(true)).thenReturn(Lists.newArrayList( + new Column("k", PrimitiveType.INT), normalVariantColumn("v"))); + + List ops = new ArrayList<>(); + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + ops.add(enableFlexibleUpdate); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, ops); + + invokeRewriteForOlapTable(alterTableCommand, table); + + Assertions.assertEquals(1, alterTableCommand.getOps().size()); + Assertions.assertTrue(alterTableCommand.getOps().get(0) instanceof AddColumnOp); + AddColumnOp addColumnOp = (AddColumnOp) alterTableCommand.getOps().get(0); + Assertions.assertTrue(addColumnOp.getColumn().isSkipBitmapColumn()); + Assertions.assertEquals(Column.SKIP_BITMAP_COL, addColumnOp.getColumn().getName()); + Assertions.assertEquals("AFTER `v`", addColumnOp.getColPos().toSql()); + } + + @Test + void testEnableFlexiblePartialUpdateRequiresLightSchemaChange() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(false); + Mockito.when(table.hasSkipBitmapColumn()).thenReturn(false); + Mockito.when(table.getEnableLightSchemaChange()).thenReturn(false); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, + Lists.newArrayList(enableFlexibleUpdate)); + + UserException exception = Assertions.assertThrows(UserException.class, + () -> invokeRewriteForOlapTable(alterTableCommand, table)); + Assertions.assertTrue(exception.getMessage().contains("light_schema_change")); + } + + @Test + void testEnableFlexiblePartialUpdateRejectsClusterKeyTable() throws Exception { + OlapTable table = Mockito.mock(OlapTable.class); + Mockito.when(table.getKeysType()).thenReturn(KeysType.UNIQUE_KEYS); + Mockito.when(table.getEnableUniqueKeyMergeOnWrite()).thenReturn(true); + Mockito.when(table.isUniqKeyMergeOnWriteWithClusterKeys()).thenReturn(true); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand alterTableCommand = new AlterTableCommand(null, + Lists.newArrayList(enableFlexibleUpdate)); + + UserException exception = Assertions.assertThrows(UserException.class, + () -> invokeRewriteForOlapTable(alterTableCommand, table)); + Assertions.assertTrue(exception.getMessage().contains("cluster keys")); + } + + @Test + void testValidateVariantAlterOnlyWhenEnablingFlexiblePartialUpdate() throws Exception { + OlapTable tableWithoutFlexibleUpdate = Mockito.mock(OlapTable.class); + Mockito.when(tableWithoutFlexibleUpdate.hasSkipBitmapColumn()).thenReturn(false); + OlapTable tableWithFlexibleUpdate = Mockito.mock(OlapTable.class); + Mockito.when(tableWithFlexibleUpdate.hasSkipBitmapColumn()).thenReturn(true); + + AddColumnOp addNormalVariant = new AddColumnOp(intColumnDefinition("normal_v"), null, null, null); + addNormalVariant.setColumn(normalVariantColumn("normal_v")); + AlterTableCommand normalVariantCommand = new AlterTableCommand(null, Lists.newArrayList(addNormalVariant)); + Assertions.assertDoesNotThrow(() -> invokeValidateVariantAlter(normalVariantCommand, tableWithFlexibleUpdate)); + + AddColumnOp addDocModeVariant = new AddColumnOp(intColumnDefinition("doc_v"), null, null, null); + addDocModeVariant.setColumn(docModeVariantColumn("doc_v")); + AlterTableCommand addDocOnlyCommand = new AlterTableCommand(null, Lists.newArrayList(addDocModeVariant)); + Assertions.assertDoesNotThrow(() -> invokeValidateVariantAlter(addDocOnlyCommand, tableWithoutFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addDocOnlyCommand, tableWithFlexibleUpdate)); + + EnableFeatureOp enableFlexibleUpdate = new EnableFeatureOp("UPDATE_FLEXIBLE_COLUMNS"); + enableFlexibleUpdate.validate(null); + AlterTableCommand addDocWithEnableCommand = new AlterTableCommand(null, + Lists.newArrayList(addDocModeVariant, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addDocWithEnableCommand, tableWithoutFlexibleUpdate)); + + AddColumnsOp addColumnsOp = new AddColumnsOp(null, null, Lists.newArrayList( + new Column("plain", PrimitiveType.INT), docModeVariantColumn("doc_v2"))); + AlterTableCommand addColumnsCommand = new AlterTableCommand(null, Lists.newArrayList( + addColumnsOp, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(addColumnsCommand, tableWithoutFlexibleUpdate)); + + ModifyColumnOp modifyColumnOp = new ModifyColumnOp(intColumnDefinition("doc_v3"), + new ColumnPosition("plain"), null, null); + modifyColumnOp.setColumn(docModeVariantColumn("doc_v3")); + AlterTableCommand modifyCommand = new AlterTableCommand(null, Lists.newArrayList( + modifyColumnOp, enableFlexibleUpdate)); + Assertions.assertThrows(UserException.class, + () -> invokeValidateVariantAlter(modifyCommand, tableWithoutFlexibleUpdate)); + } + @Test void testAddPartitionFieldOp() { List ops = new ArrayList<>(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java b/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java index 8a1550d48f5d13..5cfb8047b99199 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/persist/AlterRoutineLoadOperationLogTest.java @@ -17,12 +17,17 @@ package org.apache.doris.persist; +import org.apache.doris.analysis.ImportColumnDesc; +import org.apache.doris.analysis.Separator; import org.apache.doris.common.UserException; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.load.RoutineLoadDesc; +import org.apache.doris.load.loadv2.LoadTask; import org.apache.doris.load.routineload.kafka.KafkaConfiguration; import org.apache.doris.load.routineload.kafka.KafkaDataSourceProperties; import org.apache.doris.nereids.trees.plans.commands.info.CreateRoutineLoadInfo; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.junit.Assert; import org.junit.Test; @@ -60,8 +65,12 @@ public void testSerializeAlterRoutineLoadOperationLog() throws IOException, User routineLoadDataSourceProperties.setTimezone(TimeUtils.DEFAULT_TIME_ZONE); routineLoadDataSourceProperties.analyze(); + RoutineLoadDesc routineLoadDesc = new RoutineLoadDesc(new Separator("|", "|"), null, + Lists.newArrayList(new ImportColumnDesc("id", null)), null, null, null, null, + LoadTask.MergeType.APPEND, true, "seq"); AlterRoutineLoadJobOperationLog log = new AlterRoutineLoadJobOperationLog(jobId, - jobProperties, routineLoadDataSourceProperties); + jobProperties, routineLoadDataSourceProperties, routineLoadDesc); + jobProperties.put(CreateRoutineLoadInfo.DESIRED_CONCURRENT_NUMBER_PROPERTY, "7"); log.write(out); out.flush(); out.close(); @@ -72,7 +81,13 @@ public void testSerializeAlterRoutineLoadOperationLog() throws IOException, User AlterRoutineLoadJobOperationLog log2 = AlterRoutineLoadJobOperationLog.read(in); Assert.assertEquals(1, log2.getJobProperties().size()); Assert.assertEquals("5", log2.getJobProperties().get(CreateRoutineLoadInfo.DESIRED_CONCURRENT_NUMBER_PROPERTY)); - KafkaDataSourceProperties kafkaDataSourceProperties = (KafkaDataSourceProperties) log2.getDataSourceProperties(); + KafkaDataSourceProperties kafkaDataSourceProperties = + (KafkaDataSourceProperties) log2.getDataSourceProperties(); + Assert.assertEquals(1, log2.getColumnDescs().descs.size()); + Assert.assertEquals("id", log2.getColumnDescs().descs.get(0).getColumnName()); + Assert.assertEquals("|", log2.getRoutineLoadDesc().getColumnSeparator().getSeparator()); + Assert.assertTrue(log2.getRoutineLoadDesc().isMergeTypeSpecified()); + Assert.assertEquals("seq", log2.getRoutineLoadDesc().getSequenceColName()); Assert.assertEquals(null, kafkaDataSourceProperties.getBrokerList()); Assert.assertEquals(null, kafkaDataSourceProperties.getTopic()); Assert.assertEquals(1, kafkaDataSourceProperties.getCustomKafkaProperties().size()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java index 033568017d90f3..afef92822922eb 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/system/SystemInfoServiceTest.java @@ -101,7 +101,7 @@ public void testBackendHbResponseSerialization() throws IOException { System.out.println(Env.getCurrentEnvJournalVersion()); BackendHbResponse writeResponse = new BackendHbResponse(1L, 1234, 1234, 1234, 1234, 1234, "test", - Tag.VALUE_COMPUTATION, 10, 100, false, 1234); + Tag.VALUE_COMPUTATION, 10, 100, false, 1234, 2048, true); // Write objects to file File file1 = new File("./BackendHbResponseSerialization"); @@ -122,6 +122,7 @@ public void testBackendHbResponseSerialization() throws IOException { // Before meta version 121, nodeRole will not be read, so readResponse is not equal to writeResponse Assert.assertTrue(readResponse.toString().equals(writeResponse.toString())); Assert.assertTrue(Tag.VALUE_COMPUTATION.equals(readResponse.getNodeRole())); + Assert.assertTrue(readResponse.supportsVariantFlexiblePartialUpdate()); } catch (IOException e) { e.printStackTrace(); Assert.fail(); @@ -132,6 +133,32 @@ public void testBackendHbResponseSerialization() throws IOException { } } + @Test + public void testBackendVariantFlexiblePartialUpdateCapabilityFromHeartbeat() { + long oldToleranceCount = Config.max_backend_heartbeat_failure_tolerance_count; + Config.max_backend_heartbeat_failure_tolerance_count = 2; + try { + Backend backend = new Backend(1L, "127.0.0.1", 9050); + BackendHbResponse okResponse = new BackendHbResponse(1L, 9060, 8040, 8060, + 1234, 5678, "test", Tag.VALUE_MIX, 10, 100, false, 8070, 2048, true); + backend.handleHbResponse(okResponse, false); + Assert.assertTrue(backend.isAlive()); + Assert.assertTrue(backend.supportsVariantFlexiblePartialUpdate()); + + BackendHbResponse badResponse = new BackendHbResponse(1L, "127.0.0.1", + okResponse.getHbTime(), "bad heartbeat"); + backend.handleHbResponse(badResponse, false); + Assert.assertTrue(backend.isAlive()); + Assert.assertTrue(backend.supportsVariantFlexiblePartialUpdate()); + + backend.handleHbResponse(badResponse, false); + Assert.assertFalse(backend.isAlive()); + Assert.assertFalse(backend.supportsVariantFlexiblePartialUpdate()); + } finally { + Config.max_backend_heartbeat_failure_tolerance_count = oldToleranceCount; + } + } + @Test public void testSelectBackendIdsByPolicy() throws Exception { Config.disable_backend_black_list = true; diff --git a/gensrc/proto/olap_file.proto b/gensrc/proto/olap_file.proto index 593a89d0aa992e..0e4fcf7cacc085 100644 --- a/gensrc/proto/olap_file.proto +++ b/gensrc/proto/olap_file.proto @@ -791,6 +791,7 @@ message PartialUpdateInfoPB { optional int32 nano_seconds = 13 [default = 0]; optional UniqueKeyUpdateModePB partial_update_mode = 14 [default = UPSERT]; optional PartialUpdateNewRowPolicyPB partial_update_new_key_policy = 15 [default = APPEND]; + optional int32 sequence_map_col_uid = 16 [default = -1]; } message FileEncryptionInfoPB { diff --git a/gensrc/thrift/FrontendService.thrift b/gensrc/thrift/FrontendService.thrift index 07eb9333637721..300630ab220e16 100644 --- a/gensrc/thrift/FrontendService.thrift +++ b/gensrc/thrift/FrontendService.thrift @@ -587,6 +587,7 @@ struct TStreamLoadPutRequest { 58: optional Descriptors.TPartialUpdateNewRowPolicy partial_update_new_key_policy 59: optional bool empty_field_as_null 60: optional TCertBasedAuth cert_based_auth + 61: optional bool merge_type_specified // For cloud 1000: optional string cloud_cluster diff --git a/gensrc/thrift/HeartbeatService.thrift b/gensrc/thrift/HeartbeatService.thrift index ecf9727cf3e0db..ef22209d2bb47e 100644 --- a/gensrc/thrift/HeartbeatService.thrift +++ b/gensrc/thrift/HeartbeatService.thrift @@ -62,6 +62,7 @@ struct TBackendInfo { 8: optional bool is_shutdown 9: optional Types.TPort arrow_flight_sql_port 10: optional i64 be_mem // The physical memory available for use by BE. + 11: optional bool supports_variant_flexible_partial_update // For cloud 1000: optional i64 fragment_executing_count 1001: optional i64 fragment_last_active_time diff --git a/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json b/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json new file mode 100644 index 00000000000000..f79265847946e5 --- /dev/null +++ b/regression-test/data/unique_with_mow_p0/flexible/variant_patch_merge.json @@ -0,0 +1,3 @@ +{"k":1,"v":{"a":10}} +{"k":1,"v":{"b":20}} +{"k":2,"c":22,"v":{"nested":{"y":2}}} diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy index 510eeb0ecab73e..74561130b966d0 100644 --- a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update.groovy @@ -22,9 +22,29 @@ import org.apache.kafka.clients.producer.ProducerRecord suite("test_routine_load_flexible_partial_update", "nonConcurrent") { if (RoutineLoadTestUtils.isKafkaTestEnabled(context)) { + sql "set default_variant_enable_doc_mode = false" + def runSql = { String q -> sql q } def kafka_broker = RoutineLoadTestUtils.getKafkaBroker(context) def producer = RoutineLoadTestUtils.createKafkaProducer(kafka_broker) + def waitForLoadedRows = { String jobName, long expectedLoadedRows, int maxWait = 120 -> + def waited = 0 + while (waited < maxWait) { + def res = sql "show routine load for ${jobName}" + def state = res[0][8].toString() + def statJson = new groovy.json.JsonSlurper().parseText(res[0][14].toString()) + long loadedRows = statJson.loadedRows as long + logger.info("waitForLoadedRows: state=${state}, loadedRows=${loadedRows}, expected>=${expectedLoadedRows}") + if (state == "RUNNING" && loadedRows >= expectedLoadedRows) { + break + } + if (waited >= maxWait - 1) { + assertTrue("Timeout waiting for loadedRows >= ${expectedLoadedRows}, got ${loadedRows}", false) + } + sleep(1000) + waited++ + } + } // Test 1: Basic flexible partial update def kafkaJsonTopic1 = "test_routine_load_flexible_partial_update_basic" @@ -365,7 +385,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { exception "Flexible partial update does not support COLUMNS specification" } - // Test 7: Success case - WHERE clause works with flexible partial update + // Test 7: Error case - WHERE clause not supported with flexible partial update def kafkaJsonTopic7 = "test_routine_load_flexible_partial_update_where" def tableName7 = "test_routine_load_flex_update_where" def job7 = "test_flex_partial_update_job_where" @@ -389,18 +409,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - // insert initial data - sql """ - INSERT INTO ${tableName7} VALUES - (1, 'alice', 100, 20), - (2, 'bob', 90, 21), - (3, 'charlie', 80, 22) - """ - - qt_select_initial7 "SELECT id, name, score, age FROM ${tableName7} ORDER BY id" - - try { - // create routine load with WHERE clause and flexible partial update + test { sql """ CREATE ROUTINE LOAD ${job7} ON ${tableName7} WHERE id > 1 @@ -417,37 +426,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { "property.kafka_default_offsets" = "OFFSET_BEGINNING" ); """ - - // send JSON data - WHERE clause filters id > 1, so id=1 row should NOT be processed - def data7 = [ - '{"id": 1, "score": 999}', - '{"id": 2, "score": 95}', - '{"id": 3, "name": "chuck"}', - '{"id": 4, "name": "diana", "score": 70}' - ] - - data7.each { line -> - logger.info("Sending to Kafka: ${line}") - def record = new ProducerRecord<>(kafkaJsonTopic7, null, line) - producer.send(record).get() - } - producer.flush() - - // With skip_delete_bitmap=true and WHERE id > 1: - // - id=1: 1 version (not updated, filtered by WHERE) - // - id=2: 2 versions (original + partial update) - // - id=3: 2 versions (original + partial update) - // - id=4: 1 version (new row) - // Total: 6 rows, so expectedMinRows = 5 (waits for count > 5) - RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job7, tableName7, 5) - - // verify: id=1 should NOT be updated (filtered by WHERE), id=2,3,4 should be updated - qt_select_after_flex_where "SELECT id, name, score, age FROM ${tableName7} ORDER BY id" - } catch (Exception e) { - logger.error("Error during test: " + e.getMessage()) - throw e - } finally { - sql "STOP ROUTINE LOAD FOR ${job7}" + exception "where" } // Test 8: Error case - table without skip_bitmap column @@ -491,7 +470,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { exception "Flexible partial update can only support table with skip bitmap hidden column" } - // Test 9: Error case - table with variant column + // Test 9: table with variant column def kafkaJsonTopic9 = "test_routine_load_flexible_partial_update_variant" def tableName9 = "test_routine_load_flex_update_variant" def job9 = "test_flex_partial_update_job_variant" @@ -513,7 +492,8 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - test { + sql """ INSERT INTO ${tableName9} VALUES (1, 'base', '{"a": 1, "b": 1}') """ + try { sql """ CREATE ROUTINE LOAD ${job9} ON ${tableName9} PROPERTIES @@ -529,7 +509,214 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { "property.kafka_default_offsets" = "OFFSET_BEGINNING" ); """ - exception "Flexible partial update can only support table without variant columns" + + def data9 = [ + '{"id": 1, "data": {"a": 10}}', + '{"id": 1, "data": {"c": 3}}', + '{"id": 2, "name": "new", "data": {"x": 5}}' + ] + data9.each { line -> + logger.info("Sending to Kafka: ${line}") + def record = new ProducerRecord<>(kafkaJsonTopic9, null, line) + producer.send(record).get() + } + producer.flush() + + RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job9, tableName9, 1) + def variantRows = sql """ + SELECT id, name, cast(data['a'] as int), cast(data['b'] as int), + cast(data['c'] as int), cast(data['x'] as int) + FROM ${tableName9} ORDER BY id + """ + assertEquals("[[1, base, 10, 1, 3, null], [2, new, null, null, null, 5]]", + variantRows.toString()) + } finally { + sql "STOP ROUTINE LOAD FOR ${job9}" + } + + def tableName9Doc = "test_routine_load_flex_update_variant_doc" + def job9Doc = "test_flex_partial_update_job_variant_doc" + + sql """ DROP TABLE IF EXISTS ${tableName9Doc} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Doc} ( + `id` int NOT NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Doc} ON ${tableName9Doc} + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9}_doc", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "VARIANT flexible partial update does not support doc mode" + } + + def kafkaJsonTopic9Seq = "test_routine_load_flexible_partial_update_variant_seq" + def tableName9Seq = "test_routine_load_flex_update_variant_seq" + def job9Seq = "test_flex_partial_update_job_variant_seq" + + sql """ DROP TABLE IF EXISTS ${tableName9Seq} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Seq} ( + `id` int NOT NULL, + `seq` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq" + ); + """ + sql """ INSERT INTO ${tableName9Seq} VALUES (1, 10, '{"a": 1, "b": 1}') """ + try { + sql """ + CREATE ROUTINE LOAD ${job9Seq} ON ${tableName9Seq} + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Seq}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + + def data9Seq = [ + '{"id": 1, "seq": 20, "data": {"a": 2}}', + '{"id": 1, "seq": 15, "data": {"b": 3}}', + '{"id": 1, "seq": 20, "data": {"b": 4}}' + ] + data9Seq.each { line -> + logger.info("Sending to Kafka: ${line}") + def record = new ProducerRecord<>(kafkaJsonTopic9Seq, null, line) + producer.send(record).get() + } + producer.flush() + + waitForLoadedRows(job9Seq, 3) + def seqVariantRows = sql """ + SELECT id, seq, __DORIS_SEQUENCE_COL__, cast(data['a'] as int), cast(data['b'] as int) + FROM ${tableName9Seq} ORDER BY id + """ + assertEquals("[[1, 20, 20, 2, 4]]", seqVariantRows.toString()) + } finally { + sql "STOP ROUTINE LOAD FOR ${job9Seq}" + } + + def kafkaJsonTopic9Order = "test_routine_load_flexible_partial_update_variant_order" + def tableName9Order = "test_routine_load_flex_update_variant_order" + def job9Order = "test_flex_partial_update_job_variant_order" + + sql """ DROP TABLE IF EXISTS ${tableName9Order} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Order} ( + `id` int NOT NULL, + `seq` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Order} ON ${tableName9Order} + ORDER BY seq + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Order}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "function_column.sequence_col" + } + + def kafkaJsonTopic9Merge = "test_routine_load_flexible_partial_update_variant_merge" + def tableName9Merge = "test_routine_load_flex_update_variant_merge" + def job9Merge = "test_flex_partial_update_job_variant_merge" + + sql """ DROP TABLE IF EXISTS ${tableName9Merge} force;""" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName9Merge} ( + `id` int NOT NULL, + `name` varchar(65533) NULL, + `is_delete` int NULL, + `data` variant NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + test { + sql """ + CREATE ROUTINE LOAD ${job9Merge} ON ${tableName9Merge} + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "max_batch_interval" = "10", + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "${kafka_broker}", + "kafka_topic" = "${kafkaJsonTopic9Merge}", + "property.kafka_default_offsets" = "OFFSET_BEGINNING" + ); + """ + exception "merge_type" } // Test 10: Error case - invalid unique_key_update_mode value @@ -702,6 +889,18 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { // pause the job before altering sql "PAUSE ROUTINE LOAD FOR ${job12}" + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS", + "jsonpaths" = '[\"\$.id\"]' + ); + """ + exception "Flexible partial update does not support jsonpaths" + } + // alter to UPDATE_FLEXIBLE_COLUMNS mode sql """ ALTER ROUTINE LOAD FOR ${job12} @@ -717,6 +916,36 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { logger.info("Altered routine load job properties: ${jobProperties}") assertTrue(jobProperties.contains("UPDATE_FLEXIBLE_COLUMNS")) + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "jsonpaths" = '[\"\$.id\"]' + ); + """ + exception "Flexible partial update does not support jsonpaths" + } + + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + PROPERTIES + ( + "fuzzy_parse" = "true" + ); + """ + exception "Flexible partial update does not support fuzzy_parse" + } + + test { + sql """ + ALTER ROUTINE LOAD FOR ${job12} + COLUMNS(id, score); + """ + exception "Flexible partial update does not support COLUMNS specification" + } + // resume the job sql "RESUME ROUTINE LOAD FOR ${job12}" @@ -1050,7 +1279,7 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { sql "STOP ROUTINE LOAD FOR ${job17}" } - // Test 18: ALTER to flex mode succeeds with WHERE clause + // Test 18: ALTER to flex mode fails with WHERE clause def kafkaJsonTopic18 = "test_routine_load_alter_flex_where" def tableName18 = "test_routine_load_alter_flex_where" def job18 = "test_alter_flex_where_job" @@ -1074,15 +1303,6 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { ); """ - // insert initial data - sql """ - INSERT INTO ${tableName18} VALUES - (1, 'alice', 100, 20), - (2, 'bob', 90, 21) - """ - - qt_select_initial18 "SELECT id, name, score, age FROM ${tableName18} ORDER BY id" - try { // create routine load with WHERE clause (UPSERT mode) sql """ @@ -1103,46 +1323,16 @@ suite("test_routine_load_flexible_partial_update", "nonConcurrent") { sql "PAUSE ROUTINE LOAD FOR ${job18}" - // alter to UPDATE_FLEXIBLE_COLUMNS mode - should succeed - sql """ - ALTER ROUTINE LOAD FOR ${job18} - PROPERTIES - ( - "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" - ); - """ - - // verify the property was changed - def res = sql "SHOW ROUTINE LOAD FOR ${job18}" - def jobProperties = res[0][11].toString() - logger.info("Altered routine load job properties: ${jobProperties}") - assertTrue(jobProperties.contains("UPDATE_FLEXIBLE_COLUMNS")) - - sql "RESUME ROUTINE LOAD FOR ${job18}" - - // send JSON data - WHERE clause filters id > 1 - def data18 = [ - '{"id": 1, "score": 999}', - '{"id": 2, "score": 95}', - '{"id": 3, "name": "charlie", "score": 80}' - ] - - data18.each { line -> - logger.info("Sending to Kafka: ${line}") - def record = new ProducerRecord<>(kafkaJsonTopic18, null, line) - producer.send(record).get() + test { + sql """ + ALTER ROUTINE LOAD FOR ${job18} + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "where" } - producer.flush() - - // With skip_delete_bitmap=true and WHERE id > 1: - // - id=1: 1 version (not updated, filtered by WHERE) - // - id=2: 2 versions (original + partial update) - // - id=3: 1 version (new row) - // Total: 4 rows, so expectedMinRows = 3 (waits for count > 3) - RoutineLoadTestUtils.waitForTaskFinishMoW(runSql, job18, tableName18, 3) - - // verify: id=1 should NOT be updated (filtered by WHERE), id=2,3 should be updated - qt_select_after_alter_flex_where "SELECT id, name, score, age FROM ${tableName18} ORDER BY id" } catch (Exception e) { logger.error("Error during test: " + e.getMessage()) throw e diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy new file mode 100644 index 00000000000000..1c0877808dbf79 --- /dev/null +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_flexible_partial_update_validate.groovy @@ -0,0 +1,301 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_routine_load_flexible_partial_update_validate", "nonConcurrent") { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_where_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_merge_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_append_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_order_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_table_seq_job; """ + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_where force; """ + sql """ + CREATE TABLE test_rl_flex_validate_where ( + `id` int NOT NULL, + `name` varchar(32) NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_where_job ON test_rl_flex_validate_where + WHERE id > 1 + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_where" + ); + """ + exception "where" + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_merge force; """ + sql """ + CREATE TABLE test_rl_flex_validate_merge ( + `id` int NOT NULL, + `name` varchar(32) NULL, + `is_delete` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_merge_job ON test_rl_flex_validate_merge + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_merge" + ); + """ + exception "merge_type" + } + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_append_job ON test_rl_flex_validate_merge + WITH APPEND + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_append" + ); + """ + exception "merge_type" + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_order force; """ + sql """ + CREATE TABLE test_rl_flex_validate_order ( + `id` int NOT NULL, + `seq` int NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true" + ); + """ + + test { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_order_job ON test_rl_flex_validate_order + ORDER BY seq + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_order" + ); + """ + exception "function_column.sequence_col" + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_where_job ON test_rl_flex_validate_where + WHERE id > 1 + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_where" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "where" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_where_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_merge_job ON test_rl_flex_validate_merge + WITH MERGE + DELETE ON is_delete = 1 + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_merge" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "merge_type" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_merge_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_delete_job ON test_rl_flex_validate_merge + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_delete" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job + DELETE ON is_delete = 1; + """ + exception "delete" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_delete_job; """ + } + + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_alter_order_job ON test_rl_flex_validate_order + ORDER BY seq + PROPERTIES + ( + "format" = "json" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_alter_order" + ); + """ + sql """ PAUSE ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + test { + sql """ + ALTER ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job + PROPERTIES + ( + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ); + """ + exception "function_column.sequence_col" + } + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_alter_order_job; """ + } + + sql """ DROP TABLE IF EXISTS test_rl_flex_validate_table_seq force; """ + sql """ + CREATE TABLE test_rl_flex_validate_table_seq ( + `id` int NOT NULL, + `seq` int NULL, + `score` int NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq" + ); + """ + try { + sql """ + CREATE ROUTINE LOAD test_rl_flex_validate_table_seq_job ON test_rl_flex_validate_table_seq + PROPERTIES + ( + "format" = "json", + "unique_key_update_mode" = "UPDATE_FLEXIBLE_COLUMNS" + ) + FROM KAFKA + ( + "kafka_broker_list" = "127.0.0.1:9092", + "kafka_topic" = "test_rl_flex_validate_table_seq" + ); + """ + } finally { + try_sql """ STOP ROUTINE LOAD FOR test_rl_flex_validate_table_seq_job; """ + } +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy b/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy new file mode 100644 index 00000000000000..de8605abf71fe8 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/flexible/publish/test_flexible_partial_update_variant_publish_conflict.groovy @@ -0,0 +1,303 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert + +suite("test_flexible_partial_update_variant_publish_conflict") { + if (isCloudMode()) { + logger.info("skip test_flexible_partial_update_variant_publish_conflict in cloud mode") + return + } + + sql "set default_variant_enable_doc_mode = false" + + def dbName = context.config.getDbNameByFile(context.file) + + def do_streamload_2pc_commit = { tableName, txnId -> + def command = "curl -X PUT --location-trusted -u ${context.config.feHttpUser}:${context.config.feHttpPassword}" + + " -H txn_id:${txnId}" + + " -H txn_operation:commit" + + " http://${context.config.feHttpAddress}/api/${dbName}/${tableName}/_stream_load_2pc" + log.info("http_stream execute 2pc: ${command}") + + def process = command.execute() + def code = process.waitFor() + def out = process.text + def json2pc = parseJson(out) + log.info("http_stream 2pc result: ${out}".toString()) + assertEquals(0, code) + assertEquals("success", json2pc.status.toLowerCase()) + } + + def wait_for_publish = { txnId, waitSecond -> + String st = "PREPARE" + while (!st.equalsIgnoreCase("VISIBLE") && !st.equalsIgnoreCase("ABORTED") && waitSecond > 0) { + Thread.sleep(1000) + waitSecond -= 1 + def result = sql_return_maparray "show transaction from ${dbName} where id = ${txnId}" + assertNotNull(result) + st = result[0].TransactionStatus + } + log.info("Stream load with txn ${txnId} is ${st}") + assertEquals("VISIBLE", st) + } + + def prepare_streamload = { tableName, loadBody -> + def txnId = null + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'two_phase_commit', 'true' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 40000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + + def json = parseJson(result) + txnId = json.TxnId + assertEquals("success", json.Status.toLowerCase()) + } + } + return txnId + } + + def streamload = { tableName, loadBody -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + } + } + + for (def use_row_store : [false, true]) { + def tableName = "t_fpu_var_pub_conf_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${tableName} FORCE """ + sql """ + CREATE TABLE ${tableName} ( + `k` int NOT NULL, + `c` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${tableName} VALUES (1, 1, '{"a":1,"b":1}') """ + + def txnA = prepare_streamload(tableName, """{"k":1,"v":{"a":2}}\n""") + def txnB = prepare_streamload(tableName, """{"k":1,"c":9,"v":{"b":3}}\n""") + do_streamload_2pc_commit(tableName, txnB) + wait_for_publish(txnB, 60) + do_streamload_2pc_commit(tableName, txnA) + wait_for_publish(txnA, 60) + + def disjointPathRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 2, 3, 9]]", disjointPathRows.toString()) + + def txnC = prepare_streamload(tableName, """{"k":1,"v":{"a":4}}\n""") + def txnD = prepare_streamload(tableName, """{"k":1,"v":{"a":5}}\n""") + do_streamload_2pc_commit(tableName, txnD) + wait_for_publish(txnD, 60) + do_streamload_2pc_commit(tableName, txnC) + wait_for_publish(txnC, 60) + + def samePathRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 4, 3, 9]]", samePathRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (3, 3, '{"nested":{"x":1},"b":1}') """ + def txnEmptyObject = prepare_streamload(tableName, """{"k":3,"v":{"nested":{}}}\n""") + def txnOtherPath = prepare_streamload(tableName, """{"k":3,"v":{"b":3}}\n""") + do_streamload_2pc_commit(tableName, txnOtherPath) + wait_for_publish(txnOtherPath, 60) + do_streamload_2pc_commit(tableName, txnEmptyObject) + wait_for_publish(txnEmptyObject, 60) + + def emptyObjectRows = sql """ + SELECT k, v['nested']['x'] IS NULL, cast(v['b'] as int), c + FROM ${tableName} WHERE k = 3 ORDER BY k + """ + assertEquals("[[3, true, 3, 3]]", emptyObjectRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (4, 4, '{"a":{"c":0},"x":1}') """ + def txnParentThenChild = prepare_streamload(tableName, + """{"k":4,"v":{"a":{}}} +{"k":4,"v":{"a":{"b":1}}} +""") + def txnConcurrentChild = prepare_streamload(tableName, """{"k":4,"v":{"a":{"c":9}}}\n""") + do_streamload_2pc_commit(tableName, txnConcurrentChild) + wait_for_publish(txnConcurrentChild, 60) + do_streamload_2pc_commit(tableName, txnParentThenChild) + wait_for_publish(txnParentThenChild, 60) + + def parentThenChildRows = sql """ + SELECT k, cast(v['a']['b'] as int), v['a']['c'] IS NULL, cast(v['x'] as int) + FROM ${tableName} WHERE k = 4 ORDER BY k + """ + assertEquals("[[4, 1, true, 1]]", parentThenChildRows.toString()) + + def multiTableName = "t_fpu_var_pub_multi_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${multiTableName} FORCE """ + sql """ + CREATE TABLE ${multiTableName} ( + `k` int NOT NULL, + `v1` variant NULL, + `v2` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${multiTableName} VALUES (1, '{"a":1,"b":1}', '{"a":1,"b":1}') """ + def txnMultiA = prepare_streamload(multiTableName, + """{"k":1,"v1":{"a":2},"v2":{"b":8}}\n""") + def txnMultiB = prepare_streamload(multiTableName, """{"k":1,"v1":{"b":9}}\n""") + do_streamload_2pc_commit(multiTableName, txnMultiB) + wait_for_publish(txnMultiB, 60) + do_streamload_2pc_commit(multiTableName, txnMultiA) + wait_for_publish(txnMultiA, 60) + + def multiVariantRows = sql """ + SELECT k, cast(v1['a'] as int), cast(v1['b'] as int), + cast(v2['a'] as int), cast(v2['b'] as int) + FROM ${multiTableName} ORDER BY k + """ + assertEquals("[[1, 2, 9, 1, 8]]", multiVariantRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (2, 1, '{"a":1,"b":1}') """ + def txnPatchAfterDelete = prepare_streamload(tableName, """{"k":2,"v":{"a":2}}\n""") + streamload(tableName, """{"k":2,"__DORIS_DELETE_SIGN__":1}\n""") + do_streamload_2pc_commit(tableName, txnPatchAfterDelete) + wait_for_publish(txnPatchAfterDelete, 60) + + def patchAfterDeleteRows = sql """ + SELECT k, cast(v['a'] as int), v['b'] IS NULL, c + FROM ${tableName} WHERE k = 2 ORDER BY k + """ + assertEquals("[[2, 2, true, null]]", patchAfterDeleteRows.toString()) + + def sparseTableName = "t_fpu_var_pub_sparse_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${sparseTableName} FORCE """ + sql """ + CREATE TABLE ${sparseTableName} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${sparseTableName} VALUES (1, '{"a":1,"b":1,"c":1}') """ + def txnSparseA = prepare_streamload(sparseTableName, """{"k":1,"v":{"b":2}}\n""") + def txnSparseB = prepare_streamload(sparseTableName, """{"k":1,"v":{"c":3}}\n""") + do_streamload_2pc_commit(sparseTableName, txnSparseB) + wait_for_publish(txnSparseB, 60) + do_streamload_2pc_commit(sparseTableName, txnSparseA) + wait_for_publish(txnSparseA, 60) + + def sparseRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int) + FROM ${sparseTableName} ORDER BY k + """ + assertEquals("[[1, 1, 2, 3]]", sparseRows.toString()) + } + + for (def use_row_store : [false, true]) { + def seqTableName = "t_fpu_var_pub_seq_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${seqTableName} FORCE """ + sql """ + CREATE TABLE ${seqTableName} ( + `k` int NOT NULL, + `seq` int NULL, + `v` variant NULL, + `c` int NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq", + "store_row_column" = "${use_row_store}"); + """ + + sql """ INSERT INTO ${seqTableName} VALUES (1, 10, '{"a":1,"b":1}', 1) """ + + def txnLowSeq = prepare_streamload(seqTableName, """{"k":1,"seq":5,"v":{"a":2}}\n""") + def txnHighSeq = prepare_streamload(seqTableName, """{"k":1,"seq":20,"v":{"b":3}}\n""") + do_streamload_2pc_commit(seqTableName, txnHighSeq) + wait_for_publish(txnHighSeq, 60) + do_streamload_2pc_commit(seqTableName, txnLowSeq) + wait_for_publish(txnLowSeq, 60) + + def lowSeqDiscardRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 20, 1, 3, 1]]", lowSeqDiscardRows.toString()) + + def txnMissingSeq = prepare_streamload(seqTableName, """{"k":1,"v":{"a":4}}\n""") + do_streamload_2pc_commit(seqTableName, txnMissingSeq) + wait_for_publish(txnMissingSeq, 60) + + def missingSeqRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 20, 4, 3, 1]]", missingSeqRows.toString()) + + def txnHigherSeq = prepare_streamload(seqTableName, """{"k":1,"seq":30,"v":{"a":5}}\n""") + do_streamload_2pc_commit(seqTableName, txnHigherSeq) + wait_for_publish(txnHigherSeq, 60) + + def highSeqRows = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int), c + FROM ${seqTableName} ORDER BY k + """ + assertEquals("[[1, 30, 5, 3, 1]]", highSeqRows.toString()) + } +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy index 4e26a6a7de45ad..df7f2fd2d9671d 100644 --- a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_property.groovy @@ -16,6 +16,7 @@ // under the License. suite('test_flexible_partial_update_property') { + sql "set default_variant_enable_doc_mode = false" def tableName = "test_flexible_partial_update_property" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -37,11 +38,85 @@ suite('test_flexible_partial_update_property') { def show_res = sql "show create table ${tableName}" assertTrue(show_res.toString().contains('"enable_unique_key_skip_bitmap_column" = "true"')) + def doSchemaChange = { cmd -> + sql cmd + waitForSchemaChangeDone { + sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1""" + time 2000 + } + } + + def expect_flexible_streamload_fail = { targetTable, loadBody, expectedMessage -> + streamLoad { + table "${targetTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + assertTrue(exception.getMessage().contains(expectedMessage)) + return + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains(expectedMessage)) + } + } + } + test { sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS"; """ - exception "table ${tableName} has enabled update flexible columns feature already." + exception "table ${tableName} has enabled update flexible columns feature already." + } + doSchemaChange """alter table ${tableName} add column v_normal variant NULL;""" + test { + sql """alter table ${tableName} add column v_doc variant NULL;""" + exception "VARIANT flexible partial update does not support doc mode" + } + test { + sql """alter table ${tableName} add column (v_doc_multi variant NULL);""" + exception "VARIANT flexible partial update does not support doc mode" } + tableName = "test_flexible_partial_update_property_doc_create" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "false"); """ + expect_flexible_streamload_fail(tableName, """{"k":1,"v":{"a":1}}\n""", + "VARIANT flexible partial update does not support doc mode") + + tableName = "test_flexible_partial_update_property_flatten_create" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql "set enable_variant_flatten_nested = true" + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "deprecated_variant_enable_flatten_nested" = "true", + "store_row_column" = "false"); """ + sql "set enable_variant_flatten_nested = false" + expect_flexible_streamload_fail(tableName, """{"k":1,"v":{"a":1}}\n""", + "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested") + // the default value "enable_unique_key_skip_bitmap_column" is "false" tableName = "test_flexible_partial_update_property2" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -65,19 +140,101 @@ suite('test_flexible_partial_update_property') { sql """insert into ${tableName} select number, number, number, number, number, number from numbers("number" = "6"); """ order_qt_sql "select k,v1,v2,v3,v4,v5 from ${tableName};" - def doSchemaChange = { cmd -> - sql cmd - waitForSchemaChangeDone { - sql """SHOW ALTER TABLE COLUMN WHERE IndexName='${tableName}' ORDER BY createtime DESC LIMIT 1""" - time 2000 + test { + sql """alter table ${tableName} set ("enable_unique_key_skip_bitmap_column"="true");""" + exception "You can not modify property 'enable_unique_key_skip_bitmap_column'." + } + + if (!isCloudMode()) { + tableName = "test_flexible_partial_update_property_lsc_false" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` BIGINT NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "false", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "light_schema_change" } } + tableName = "test_flexible_partial_update_property_cluster_key" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` BIGINT NOT NULL + ) UNIQUE KEY(`k`) ORDER BY(`v`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ test { - sql """alter table ${tableName} set ("enable_unique_key_skip_bitmap_column"="true");""" - exception "You can not modify property 'enable_unique_key_skip_bitmap_column'." + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "cluster keys" + } + + tableName = "test_flexible_partial_update_property_doc" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support doc mode" + } + + sql "set enable_variant_flatten_nested = true" + tableName = "test_flexible_partial_update_property_flatten" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "deprecated_variant_enable_flatten_nested" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support deprecated_variant_enable_flatten_nested" + } + sql "set enable_variant_flatten_nested = false" + + tableName = "test_flexible_partial_update_property_doc_add_enable" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ CREATE TABLE ${tableName} ( + `k` int(11) NULL + ) UNIQUE KEY(`k`) DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "store_row_column" = "false"); """ + test { + sql """alter table ${tableName} + add column v variant NULL, + enable feature "UPDATE_FLEXIBLE_COLUMNS";""" + exception "VARIANT flexible partial update does not support doc mode" } + tableName = "test_flexible_partial_update_property2" + doSchemaChange """alter table ${tableName} enable feature "UPDATE_FLEXIBLE_COLUMNS";""" show_res = sql "show create table ${tableName}" assertTrue(show_res.toString().contains('"enable_unique_key_skip_bitmap_column" = "true"')) @@ -95,4 +252,4 @@ suite('test_flexible_partial_update_property') { order_qt_sql "select k,v1,v2,v3,v4,v5,BITMAP_TO_STRING(__DORIS_SKIP_BITMAP_COL__) from ${tableName};" -} \ No newline at end of file +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy index d82ae7e8336701..9e650bc0d3c618 100644 --- a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_restricts.groovy @@ -16,6 +16,7 @@ // under the License. suite('test_flexible_partial_update_restricts') { + sql "set default_variant_enable_doc_mode = false" def tableName = "test_flexible_partial_update_restricts" sql """ DROP TABLE IF EXISTS ${tableName} """ @@ -178,6 +179,24 @@ suite('test_flexible_partial_update_restricts') { } } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + set 'delete', 'v2 > 5' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'delete' is specified")); + } + } + if (!isCloudMode()) { // in cloud mode, all tables has light schema change on tableName = "test_flexible_partial_update_restricts2" @@ -264,13 +283,32 @@ suite('test_flexible_partial_update_restricts') { PROPERTIES( "replication_num" = "1", "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", "enable_unique_key_skip_bitmap_column" = "true", "store_row_column" = "false"); """ - + + sql """insert into ${tableName} select number, number, number, number, number, number, null from numbers("number" = "6"); """ streamLoad { table "${tableName}" set 'format', 'json' set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'fuzzy_parse', 'true' set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' file "test1.json" time 20000 @@ -280,7 +318,41 @@ suite('test_flexible_partial_update_restricts') { } def json = parseJson(result) assertEquals("fail", json.Status.toLowerCase()) - assertTrue(json.Message.contains("Flexible partial update can only support table without variant columns.")); + assertTrue(json.Message.contains("Don't support flexible partial update when 'fuzzy_parse' is enabled")); + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'columns', 'k,v1,v3,v5' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'columns' is specified")); + } + } + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'jsonpaths', '["$.k","$.v1","$.v3"]' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "test1.json" + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains("Don't support flexible partial update when 'jsonpaths' is specified")); } } @@ -404,4 +476,4 @@ suite('test_flexible_partial_update_restricts') { "store_row_column" = "false"); """ exception "Disable to create table column with name start with __DORIS_: __DORIS_SKIP_BITMAP_COL__" } -} \ No newline at end of file +} diff --git a/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy new file mode 100644 index 00000000000000..46a3a364e7aa06 --- /dev/null +++ b/regression-test/suites/unique_with_mow_p0/flexible/test_flexible_partial_update_variant.groovy @@ -0,0 +1,335 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_flexible_partial_update_variant") { + sql "set default_variant_enable_doc_mode = false" + + def docModeUnsupportedMsg = "VARIANT flexible partial update does not support doc mode" + def nullPatchUnsupportedMsg = "VARIANT flexible partial update does not support JSON null patch values" + def oldValueUnsupportedMsg = "VARIANT flexible partial update only supports patching JSON object old values" + def expect_streamload_fail = { tableName, loadBody, expectedMessage -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + assertTrue(exception.getMessage().contains(expectedMessage)) + return + } + def json = parseJson(result) + assertEquals("fail", json.Status.toLowerCase()) + assertTrue(json.Message.contains(expectedMessage)) + } + } + } + def expect_streamload_filtered = { tableName, loadBody, expectedTotalRows, expectedLoadedRows, + expectedFilteredRows -> + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'max_filter_ratio', '1' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(loadBody.getBytes("UTF-8")) + time 20000 + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(expectedTotalRows, json.NumberTotalRows) + assertEquals(expectedLoadedRows, json.NumberLoadedRows) + assertEquals(expectedFilteredRows, json.NumberFilteredRows) + } + } + } + + for (def use_row_store : [false, true]) { + def tableName = "test_flexible_partial_update_variant_${use_row_store}" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE ${tableName} ( + `k` int NOT NULL, + `c` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "store_row_column" = "${use_row_store}"); + """ + + sql """ + INSERT INTO ${tableName} VALUES + (1, 1, '{"a": 1, "c": 3, "nested": {"x": 1}}'), + (2, 2, '{"nested": {"x": 1}, "keep": 9}') + """ + + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "variant_patch_merge.json" + time 20000 + } + + def merged = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int), + cast(v['nested']['x'] as int), cast(v['nested']['y'] as int), c + FROM ${tableName} ORDER BY k + """ + assertEquals("[[1, 10, 20, 3, 1, null, 1], [2, null, null, null, 1, 2, 22]]", + merged.toString()) + + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":4,"v":{"new_path":4}} +""".getBytes("UTF-8")) + time 20000 + } + + def newKeyRows = sql """ + SELECT k, cast(v['new_path'] as int), c + FROM ${tableName} WHERE k = 4 ORDER BY k + """ + assertEquals("[[4, 4, null]]", newKeyRows.toString()) + + expect_streamload_filtered(tableName, """{"k":1,"v":[1,2,3]} +{"k":1,"v":"{\\"b\\":2}"} +{"k":1,"v":null} +{"k":1,"v":{"b":30}} +""", 4, 1, 3) + expect_streamload_fail(tableName, """{"k":1,"v":{"a":null}}\n""", nullPatchUnsupportedMsg) + + def afterRejectedRootValues = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int), + cast(v['nested']['x'] as int) + FROM ${tableName} WHERE k = 1 ORDER BY k + """ + assertEquals("[[1, 10, 30, 3, 1]]", afterRejectedRootValues.toString()) + + sql """ INSERT INTO ${tableName} VALUES (3, 3, '{"nested": {"x": 1}, "b": 1}') """ + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":3,"v":{"nested":{}}} +{"k":3,"v":{"b":2}} +""".getBytes("UTF-8")) + time 20000 + } + + def emptyObjectRows = sql """ + SELECT k, v['nested']['x'] IS NULL, cast(v['b'] as int), c + FROM ${tableName} WHERE k = 3 ORDER BY k + """ + assertEquals("[[3, true, 2, 3]]", emptyObjectRows.toString()) + + sql """ INSERT INTO ${tableName} VALUES (5, 5, '{"a": 1, "b": 1, "keep": 1}') """ + streamLoad { + table "${tableName}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream("""{"k":5,"v":{"a":2}} +{"k":5,"__DORIS_DELETE_SIGN__":1} +{"k":5,"v":{"b":3}} +""".getBytes("UTF-8")) + time 20000 + } + + def patchAfterDeleteRows = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['keep'] as int), c + FROM ${tableName} WHERE k = 5 ORDER BY k + """ + assertEquals("[[5, null, 3, null, null]]", patchAfterDeleteRows.toString()) + } + + def oldRootTable = "test_flexible_partial_update_variant_old_root" + sql """ DROP TABLE IF EXISTS ${oldRootTable} """ + sql """ + CREATE TABLE ${oldRootTable} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + streamLoad { + table "${oldRootTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + inputStream new ByteArrayInputStream("""{"k":1,"v":[1,2]} +{"k":2,"v":"plain"} +""".getBytes("UTF-8")) + time 20000 + } + expect_streamload_fail(oldRootTable, """{"k":1,"v":{"a":1}}\n""", oldValueUnsupportedMsg) + expect_streamload_fail(oldRootTable, """{"k":2,"v":{"a":1}}\n""", oldValueUnsupportedMsg) + + def typedTable = "test_flexible_partial_update_variant_typed" + sql """ DROP TABLE IF EXISTS ${typedTable} """ + sql """ + CREATE TABLE ${typedTable} ( + `k` int NOT NULL, + `v` variant<'a' : int, 'b' : int, properties("variant_enable_typed_paths_to_sparse" = "false")> NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + sql """ INSERT INTO ${typedTable} VALUES (1, '{"a": 1, "c": 3}') """ + + streamLoad { + table "${typedTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + file "variant_patch_merge.json" + time 20000 + } + + def typedMerged = sql """ + SELECT k, cast(v['a'] as int), cast(v['b'] as int), cast(v['c'] as int) + FROM ${typedTable} WHERE k = 1 ORDER BY k + """ + assertEquals("[[1, 10, 20, 3]]", typedMerged.toString()) + + def docTable = "test_flexible_partial_update_variant_doc" + sql """ DROP TABLE IF EXISTS ${docTable} """ + sql """ + CREATE TABLE ${docTable} ( + `k` int NOT NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true"); + """ + expect_streamload_fail(docTable, """{"k":1,"v":{"a":1}}\n""", docModeUnsupportedMsg) + + def seqTable = "test_flexible_partial_update_variant_same_batch_seq" + sql """ DROP TABLE IF EXISTS ${seqTable} """ + sql """ + CREATE TABLE ${seqTable} ( + `k` int NOT NULL, + `seq` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq"); + """ + sql """ INSERT INTO ${seqTable} VALUES (1, 10, '{"a":1,"b":1}') """ + + String seqLoad = """{"k":1,"seq":20,"v":{"a":2}} +{"k":1,"seq":15,"v":{"b":3}} +{"k":1,"seq":20,"v":{"b":4}} +""" + streamLoad { + table "${seqTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(seqLoad.getBytes("UTF-8")) + time 20000 + } + + def seqMerged = sql """ + SELECT k, seq, cast(v['a'] as int), cast(v['b'] as int) + FROM ${seqTable} ORDER BY k + """ + assertEquals("[[1, 20, 2, 4]]", seqMerged.toString()) + + def seqMapTable = "test_flexible_partial_update_variant_seq_map" + sql """ DROP TABLE IF EXISTS ${seqMapTable} """ + sql """ + CREATE TABLE ${seqMapTable} ( + `k` int NOT NULL, + `seq_map` int NULL, + `v` variant NULL + ) UNIQUE KEY(`k`) + DISTRIBUTED BY HASH(`k`) BUCKETS 1 + PROPERTIES( + "replication_num" = "1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "enable_unique_key_skip_bitmap_column" = "true", + "function_column.sequence_col" = "seq_map"); + """ + sql """ + INSERT INTO ${seqMapTable} VALUES + (1, 10, '{"a":1,"b":1}'), + (2, 20, '{"a":2,"b":2}') + """ + + String seqMapLoad = """{"k":1,"v":{"a":3}} +{"k":2,"seq_map":15,"v":{"a":4}} +{"k":2,"seq_map":25,"v":{"b":5}} +""" + streamLoad { + table "${seqMapTable}" + set 'format', 'json' + set 'read_json_by_line', 'true' + set 'strict_mode', 'false' + set 'unique_key_update_mode', 'UPDATE_FLEXIBLE_COLUMNS' + inputStream new ByteArrayInputStream(seqMapLoad.getBytes("UTF-8")) + time 20000 + } + + def seqMapMerged = sql """ + SELECT k, seq_map, __DORIS_SEQUENCE_COL__, cast(v['a'] as int), cast(v['b'] as int) + FROM ${seqMapTable} ORDER BY k + """ + assertEquals("[[1, 10, 10, 3, 1], [2, 25, 25, 2, 5]]", seqMapMerged.toString()) +}