From 2fb170769c6af38fe25a6a3cfc6da410c0cbca61 Mon Sep 17 00:00:00 2001 From: csun5285 Date: Fri, 15 May 2026 14:37:14 +0800 Subject: [PATCH] [refactor](be) push CHAR padding strip down to page decoder Previously CHAR(N) was stored padded with '\0' to N on disk and unpadded at the top of every read path by Block::shrink_char_type_column_suffix_zero, plus various query-time paths re-padded predicate values to match. That meant every CHAR read paid for an extra column scan, and the padding contract leaked across many layers. This commit pushes the strip down so segments hold unpadded CHAR slices natively: - Binary*PageDecoder strnlen CHAR slices on decode (dict pool + plain pages), so column reads emit unpadded data directly. - OlapColumnDataConvertorChar no longer pads on write; segments written by the new code contain natural-length slices. - ZoneMap from_olap_string strnlens CHAR min/max on read. - Predicate creators (comparison / in-list / not-in) and delete_handler no longer pad CHAR predicate values. - segment_iterator drops _char_type_idx / _has_char_type and the three shrink_char_type_column_suffix_zero calls. - Block / ColumnArray / ColumnMap / ColumnStruct lose the now-unused shrink_padding_chars overrides; ColumnDictionary drops get_shrink_value. - RowCursor::pad_char_fields() removed. Index byte-format stability is preserved by keeping the pad inside the KeyCoder. KeyCoderTraits::encode_ascending / full_encode_ascending pad to schema_length internally (new schema_length parameter, default 0, only consulted by CHAR). Short-key index, PK index and segment min-max keys therefore remain byte-identical to old BE writes, so cross-version lookups keep working. BloomFilter requires a format flag: old segments hashed the zero-padded CHAR, new segments hash the unpadded value, so the reader honours BloomFilterIndexPB.unpadded_char_filter and only probes when the predicate hashing matches the segment hashing. Old segments fall back to skipping BF pruning for CHAR -- safe (no false negatives), just slower. Tests updated: zone_map_index_test CharColumnPadding now expects unpadded min/max; key_coder_test passes schema_length=0; segment_writer_full_encode_keys_test passes per-column key_index_sizes; char_type_padding_test rewritten around the new contract. Co-Authored-By: Claude Opus 4.7 (1M context) --- be/src/core/block/block.cpp | 9 --- be/src/core/block/block.h | 3 - be/src/core/column/column_array.cpp | 4 -- be/src/core/column/column_array.h | 2 - be/src/core/column/column_dictionary.h | 27 +------ be/src/core/column/column_map.cpp | 5 -- be/src/core/column/column_map.h | 1 - be/src/core/column/column_struct.cpp | 6 -- be/src/core/column/column_struct.h | 2 - be/src/core/column/predicate_column.h | 14 ++-- .../data_type_string_serde.cpp | 16 +---- be/src/exec/rowid_fetcher.cpp | 46 ------------ be/src/service/point_query_executor.cpp | 5 -- be/src/storage/delete/delete_handler.cpp | 20 +----- be/src/storage/field.h | 4 +- .../storage/index/bloom_filter/bloom_filter.h | 10 +++ .../bloom_filter_index_reader.cpp | 1 + .../bloom_filter_index_writer.cpp | 6 ++ .../storage/index/indexed_column_reader.cpp | 4 +- .../storage/index/indexed_column_writer.cpp | 2 +- be/src/storage/index/ordinal_page_index.cpp | 4 +- .../storage/iterator/olap_data_convertor.cpp | 10 +-- be/src/storage/iterator/olap_data_convertor.h | 31 -------- be/src/storage/key_coder.h | 70 +++++++++++-------- .../storage/predicate/comparison_predicate.h | 8 ++- be/src/storage/predicate/in_list_predicate.h | 24 +++---- .../storage/predicate/like_column_predicate.h | 2 +- .../predicate_creator_comparison.cpp | 18 +---- .../predicate_creator_in_list_in.cpp | 35 ++++------ .../predicate_creator_in_list_not_in.cpp | 33 ++++----- be/src/storage/row_cursor.cpp | 36 ++-------- be/src/storage/row_cursor.h | 5 -- be/src/storage/segment/binary_plain_page.h | 13 ++++ be/src/storage/segment/binary_prefix_page.cpp | 4 ++ be/src/storage/segment/binary_prefix_page.h | 3 +- be/src/storage/segment/column_reader.cpp | 32 +++++---- be/src/storage/segment/options.h | 5 ++ be/src/storage/segment/segment_iterator.cpp | 48 ++----------- be/src/storage/segment/segment_iterator.h | 8 +-- be/src/storage/segment/segment_writer.cpp | 10 ++- be/src/storage/segment/segment_writer.h | 6 ++ .../segment/vertical_segment_writer.cpp | 10 ++- .../storage/segment/vertical_segment_writer.h | 4 ++ be/test/core/block/block_test.cpp | 2 - be/test/storage/key_coder_test.cpp | 2 +- .../segment_writer_full_encode_keys_test.cpp | 9 ++- .../storage/segment/zone_map_index_test.cpp | 47 ++++++------- be/test/util/char_type_padding_test.cpp | 53 ++++++-------- gensrc/proto/segment_v2.proto | 6 ++ 49 files changed, 265 insertions(+), 460 deletions(-) diff --git a/be/src/core/block/block.cpp b/be/src/core/block/block.cpp index 2bb156325443e3..4d0b67b93052e9 100644 --- a/be/src/core/block/block.cpp +++ b/be/src/core/block/block.cpp @@ -1081,15 +1081,6 @@ std::unique_ptr Block::create_same_struct_block(size_t size, bool is_rese return temp_block; } -void Block::shrink_char_type_column_suffix_zero(const std::vector& char_type_idx) { - for (auto idx : char_type_idx) { - if (idx < data.size()) { - auto& col_and_name = this->get_by_position(idx); - col_and_name.column->assume_mutable()->shrink_padding_chars(); - } - } -} - size_t MutableBlock::allocated_bytes() const { size_t res = 0; for (const auto& col : _columns) { diff --git a/be/src/core/block/block.h b/be/src/core/block/block.h index 62186b36cced7e..c7dced80c60224 100644 --- a/be/src/core/block/block.h +++ b/be/src/core/block/block.h @@ -346,9 +346,6 @@ class Block { return res; } - // for String type or Array type - void shrink_char_type_column_suffix_zero(const std::vector& char_type_idx); - void clear_column_mem_not_keep(const std::vector& column_keep_flags, bool need_keep_first); diff --git a/be/src/core/column/column_array.cpp b/be/src/core/column/column_array.cpp index 6de4d96cc326f7..35e56b2d900302 100644 --- a/be/src/core/column/column_array.cpp +++ b/be/src/core/column/column_array.cpp @@ -98,10 +98,6 @@ ColumnArray::ColumnArray(MutableColumnPtr&& nested_column) : data(std::move(nest offsets = ColumnOffsets::create(); } -void ColumnArray::shrink_padding_chars() { - data->shrink_padding_chars(); -} - std::string ColumnArray::get_name() const { return "Array(" + get_data().get_name() + ")"; } diff --git a/be/src/core/column/column_array.h b/be/src/core/column/column_array.h index c11547bdbf5e2d..5c5518d42fa1e4 100644 --- a/be/src/core/column/column_array.h +++ b/be/src/core/column/column_array.h @@ -117,8 +117,6 @@ class ColumnArray final : public COWHelper { offsets->sanity_check(); } - void shrink_padding_chars() override; - /** On the index i there is an offset to the beginning of the i + 1 -th element. */ using ColumnOffsets = ColumnOffset64; diff --git a/be/src/core/column/column_dictionary.h b/be/src/core/column/column_dictionary.h index a1835b817a7233..7ac780e2bde4d8 100644 --- a/be/src/core/column/column_dictionary.h +++ b/be/src/core/column/column_dictionary.h @@ -231,7 +231,7 @@ class ColumnDictI32 final : public COWHelper { _dict.initialize_hash_values_for_runtime_filter(); } - uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx], _type); } + uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } template void find_codes(const HybridSetType* values, std::vector& selected) const { @@ -278,14 +278,6 @@ class ColumnDictI32 final : public COWHelper { inline const StringRef& get_value(value_type code) const { return _dict.get_value(code); } - inline StringRef get_shrink_value(value_type code) const { - StringRef result = _dict.get_value(code); - if (_type == FieldType::OLAP_FIELD_TYPE_CHAR) { - result.size = strnlen(result.data, result.size); - } - return result; - } - size_t dict_size() const { return _dict.size(); } std::string dict_debug_string() const { return _dict.debug_string(); } @@ -326,26 +318,13 @@ class ColumnDictI32 final : public COWHelper { } } - inline uint32_t get_hash_value(Int32 code, FieldType type) const { + inline uint32_t get_hash_value(Int32 code) const { if (_compute_hash_value_flags[code]) { return _hash_values[code]; } else { auto& sv = (*_dict_data)[code]; - // The char data is stored in the disk with the schema length, - // and zeros are filled if the length is insufficient - - // When reading data, use shrink_char_type_column_suffix_zero(_char_type_idx) - // Remove the suffix 0 - // When writing data, use the CharField::consume function to fill in the trailing 0. - - // For dictionary data of char type, sv.size is the schema length, - // so use strnlen to remove the 0 at the end to get the actual length. - size_t len = sv.size; - if (type == FieldType::OLAP_FIELD_TYPE_CHAR) { - len = strnlen(sv.data, sv.size); - } uint32_t hash_val = - crc32c::Extend(0, (const uint8_t*)sv.data, static_cast(len)); + crc32c::Extend(0, (const uint8_t*)sv.data, static_cast(sv.size)); _hash_values[code] = hash_val; _compute_hash_value_flags[code] = 1; return _hash_values[code]; diff --git a/be/src/core/column/column_map.cpp b/be/src/core/column/column_map.cpp index 48db377d888b75..bf7406c87af05e 100644 --- a/be/src/core/column/column_map.cpp +++ b/be/src/core/column/column_map.cpp @@ -643,11 +643,6 @@ Status ColumnMap::deduplicate_keys(bool recursive) { return Status::OK(); } -void ColumnMap::shrink_padding_chars() { - keys_column->shrink_padding_chars(); - values_column->shrink_padding_chars(); -} - void ColumnMap::reserve(size_t n) { get_offsets().reserve(n); keys_column->reserve(n); diff --git a/be/src/core/column/column_map.h b/be/src/core/column/column_map.h index 12f8fe4f8184ab..b1fb4bb31764a0 100644 --- a/be/src/core/column/column_map.h +++ b/be/src/core/column/column_map.h @@ -115,7 +115,6 @@ class ColumnMap final : public COWHelper { const char* deserialize_and_insert_from_arena(const char* pos) override; void update_hash_with_value(size_t n, SipHash& hash) const override; - void shrink_padding_chars() override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; size_t filter(const Filter& filter) override; MutableColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/core/column/column_struct.cpp b/be/src/core/column/column_struct.cpp index 5cf8f390634627..57463f6c435717 100644 --- a/be/src/core/column/column_struct.cpp +++ b/be/src/core/column/column_struct.cpp @@ -338,12 +338,6 @@ MutableColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) co return ColumnStruct::create(new_columns); } -void ColumnStruct::shrink_padding_chars() { - for (auto& column : columns) { - column->shrink_padding_chars(); - } -} - void ColumnStruct::reserve(size_t n) { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) { diff --git a/be/src/core/column/column_struct.h b/be/src/core/column/column_struct.h index ca77868d056125..b602376f561590 100644 --- a/be/src/core/column/column_struct.h +++ b/be/src/core/column/column_struct.h @@ -150,8 +150,6 @@ class ColumnStruct final : public COWHelper { int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override; - void shrink_padding_chars() override; - void reserve(size_t n) override; void resize(size_t n) override; size_t byte_size() const override; diff --git a/be/src/core/column/predicate_column.h b/be/src/core/column/predicate_column.h index d98743500dbb1e..964cb548dc5d5a 100644 --- a/be/src/core/column/predicate_column.h +++ b/be/src/core/column/predicate_column.h @@ -104,11 +104,7 @@ class PredicateColumnType final : public COWHelper) { - auto res = reinterpret_cast(data[n]); - if constexpr (Type == TYPE_CHAR) { - res.size = strnlen(res.data, res.size); - } - return res; + return reinterpret_cast(data[n]); } else { throw doris::Exception( ErrorCode::INTERNAL_ERROR, @@ -116,6 +112,14 @@ class PredicateColumnType final : public COWHelper::from_string(StringRef& str, IColumn& // Deserializes a STRING/VARCHAR/CHAR value from its OLAP string representation // (e.g. from ZoneMap protobuf). This is the inverse of to_olap_string(). -// -// For CHAR type: if the string is shorter than the declared column length (_len), -// pads with '\0' bytes to reach _len. This preserves CHAR's fixed-length semantics. -// For STRING/VARCHAR: stores the string as-is. -// -// Examples: -// CHAR(10), str="hello" => field = "hello\0\0\0\0\0" (10 bytes) -// VARCHAR, str="hello" => field = "hello" (5 bytes) template Status DataTypeStringSerDeBase::from_olap_string(const std::string& str, Field& field, const FormatOptions& options) const { - if (cast_set(str.size()) < _len) { - DCHECK_EQ(_type, TYPE_CHAR); - std::string tmp(_len, '\0'); - memcpy(tmp.data(), str.data(), str.size()); - field = Field::create_field(std::move(tmp)); + if (_type == TYPE_CHAR) { + size_t real_len = strnlen(str.data(), str.size()); + field = Field::create_field(std::string(str.data(), real_len)); } else { field = Field::create_field(str); } diff --git a/be/src/exec/rowid_fetcher.cpp b/be/src/exec/rowid_fetcher.cpp index f97bce17a8c6a4..cc1ecf139dca0b 100644 --- a/be/src/exec/rowid_fetcher.cpp +++ b/be/src/exec/rowid_fetcher.cpp @@ -206,30 +206,6 @@ Status RowIDFetcher::_merge_rpc_results(const PMultiGetRequest& request, return Status::OK(); } -bool _has_char_type(const DataTypePtr& type) { - switch (type->get_primitive_type()) { - case TYPE_CHAR: { - return true; - } - case TYPE_ARRAY: { - const auto* arr_type = assert_cast(remove_nullable(type).get()); - return _has_char_type(arr_type->get_nested_type()); - } - case TYPE_MAP: { - const auto* map_type = assert_cast(remove_nullable(type).get()); - return _has_char_type(map_type->get_key_type()) || - _has_char_type(map_type->get_value_type()); - } - case TYPE_STRUCT: { - const auto* struct_type = assert_cast(remove_nullable(type).get()); - return std::any_of(struct_type->get_elements().begin(), struct_type->get_elements().end(), - [&](const DataTypePtr& dt) -> bool { return _has_char_type(dt); }); - } - default: - return false; - } -} - Status RowIDFetcher::fetch(const ColumnPtr& column_row_ids, Block* res_block) { CHECK(!_stubs.empty()); PMultiGetRequest mget_req = _init_fetch_request( @@ -279,16 +255,6 @@ Status RowIDFetcher::fetch(const ColumnPtr& column_row_ids, Block* res_block) { } // Check row consistency RETURN_IF_CATCH_EXCEPTION(res_block->check_number_of_rows()); - // shrink for char type - std::vector char_type_idx; - for (size_t i = 0; i < _fetch_option.desc->slots().size(); i++) { - const auto& column_desc = _fetch_option.desc->slots()[i]; - const auto type = column_desc->type(); - if (_has_char_type(type)) { - char_type_idx.push_back(i); - } - } - res_block->shrink_char_type_column_suffix_zero(char_type_idx); VLOG_DEBUG << "dump block:" << res_block->dump_data(0, 10); return Status::OK(); } @@ -561,15 +527,6 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequestV2& request, for (const auto& pslot : request_block_desc.slots()) { slots.push_back(SlotDescriptor(pslot)); } - // prepare block char vector shrink for char type - std::vector char_type_idx; - for (int j = 0; j < slots.size(); ++j) { - auto slot = slots[j]; - if (_has_char_type(slot.type())) { - char_type_idx.push_back(j); - } - } - try { if (first_file_mapping->type == FileMappingType::INTERNAL) { RETURN_IF_ERROR(read_batch_doris_format_row( @@ -587,9 +544,6 @@ Status RowIdStorageReader::read_by_rowids(const PMultiGetRequestV2& request, return Status::Error(e.code(), "Row id fetch failed because {}", e.what()); } - - // after read the block, shrink char type block - result_blocks[i].shrink_char_type_column_suffix_zero(char_type_idx); } [[maybe_unused]] size_t compressed_size = 0; diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 441284a251b3c8..10ffe67a753640 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -567,11 +567,6 @@ Status PointQueryExecutor::_lookup_row_data() { RETURN_IF_ERROR(segment->seek_and_read_by_rowid(*_tablet->tablet_schema(), slot, row_id, column, storage_read_options, iter)); - if (_tablet->tablet_schema() - ->column_by_uid(slot->col_unique_id()) - .has_char_type()) { - column->shrink_padding_chars(); - } } } } diff --git a/be/src/storage/delete/delete_handler.cpp b/be/src/storage/delete/delete_handler.cpp index 8aab3c422966fd..1781c7734de7d0 100644 --- a/be/src/storage/delete/delete_handler.cpp +++ b/be/src/storage/delete/delete_handler.cpp @@ -112,9 +112,6 @@ Status convert(const DataTypePtr& data_type, const std::list& str, // Parses a single condition value string into a Field and creates a comparison predicate. // Uses serde->from_fe_string to do the parsing, which handles all type-specific // conversions (including decimal scale, etc.). -// For CHAR type, the value is padded with '\0' to the declared column length, consistent -// with the IN list path in convert() above. -// For VARCHAR/STRING, the Field is created directly from the raw string. Status parse_to_predicate(const uint32_t index, const std::string col_name, const DataTypePtr& type, DeleteHandler::ConditionParseResult& res, Arena& arena, std::shared_ptr& predicate) { @@ -128,22 +125,7 @@ Status parse_to_predicate(const uint32_t index, const std::string col_name, cons } Field v; - if (type->get_primitive_type() == TYPE_CHAR) { - // CHAR type: create Field and pad with '\0' to the declared column length, - // consistent with IN list path (convert() above) and create_comparison_predicate. - const auto& str = res.value_str.front(); - auto char_len = cast_set( - assert_cast(remove_nullable(type).get())->len()); - auto target = std::max(char_len, str.size()); - if (target > str.size()) { - std::string padded(target, '\0'); - memcpy(padded.data(), str.data(), str.size()); - v = Field::create_field(std::move(padded)); - } else { - v = Field::create_field(str); - } - } else if (is_string_type(type->get_primitive_type())) { - // VARCHAR/STRING: create Field directly from the raw string, no padding needed. + if (is_string_type(type->get_primitive_type())) { v = Field::create_field(res.value_str.front()); } else { auto serde = type->get_serde(); diff --git a/be/src/storage/field.h b/be/src/storage/field.h index 3fa84c36f7b42d..7c4c3ff1fdeee8 100644 --- a/be/src/storage/field.h +++ b/be/src/storage/field.h @@ -80,8 +80,8 @@ class StorageField { } // encode the provided `value` into `buf`. - void full_encode_ascending(const void* value, std::string* buf) const { - _key_coder->full_encode_ascending(value, buf); + void full_encode_ascending(const void* value, std::string* buf, size_t char_len = 0) const { + _key_coder->full_encode_ascending(value, buf, char_len); } const KeyCoder* key_coder() const { return _key_coder; } diff --git a/be/src/storage/index/bloom_filter/bloom_filter.h b/be/src/storage/index/bloom_filter/bloom_filter.h index ff08561644e66f..f1e8376ce09830 100644 --- a/be/src/storage/index/bloom_filter/bloom_filter.h +++ b/be/src/storage/index/bloom_filter/bloom_filter.h @@ -203,6 +203,13 @@ class BloomFilter { virtual bool has_null() const { return *_has_null; } + // Set by the BF index reader to mirror BloomFilterIndexPB.unpadded_char_filter. + // Predicate evaluation uses this to decide whether a CHAR BF can be safely + // probed with an unpadded predicate value (true) or must be skipped to avoid + // false negatives against an old padded-hash BF (false / unset). + void set_unpadded_char_filter(bool v) { _unpadded_char_filter = v; } + bool unpadded_char_filter() const { return _unpadded_char_filter; } + virtual void add_hash(uint64_t hash) = 0; virtual bool test_hash(uint64_t hash) const = 0; @@ -237,6 +244,9 @@ class BloomFilter { bool* _has_null = nullptr; // is this bf used for write bool _is_write = false; + // mirrors BloomFilterIndexPB.unpadded_char_filter; only meaningful for CHAR + // columns and only set by the BF index reader after deserialization. + bool _unpadded_char_filter = false; std::function _hash_func; }; diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_reader.cpp b/be/src/storage/index/bloom_filter/bloom_filter_index_reader.cpp index b8c1c9b37440ef..ec71044b4e98ed 100644 --- a/be/src/storage/index/bloom_filter/bloom_filter_index_reader.cpp +++ b/be/src/storage/index/bloom_filter/bloom_filter_index_reader.cpp @@ -80,6 +80,7 @@ Status BloomFilterIndexIterator::read_bloom_filter(rowid_t ordinal, BloomFilter::create(_reader->_bloom_filter_index_meta->algorithm(), bf, value.size)); RETURN_IF_ERROR((*bf)->init(value.data, value.size, _reader->_bloom_filter_index_meta->hash_strategy())); + (*bf)->set_unpadded_char_filter(_reader->_bloom_filter_index_meta->unpadded_char_filter()); return Status::OK(); } diff --git a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp index 785e889a831eee..b51360a0f64a32 100644 --- a/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp +++ b/be/src/storage/index/bloom_filter/bloom_filter_index_writer.cpp @@ -126,6 +126,12 @@ class BloomFilterIndexWriterImpl : public BloomFilterIndexWriter { BloomFilterIndexPB* meta = index_meta->mutable_bloom_filter_index(); meta->set_hash_strategy(_bf_options.strategy); meta->set_algorithm(BLOCK_BLOOM_FILTER); + if constexpr (field_type == FieldType::OLAP_FIELD_TYPE_CHAR) { + // Mark this BF was built from unpadded CHAR bytes so the reader can + // safely probe with the (also unpadded) predicate value. Old segments + // lack this flag and will be skipped at probe time. + meta->set_unpadded_char_filter(true); + } // write bloom filters IndexedColumnWriterOptions options; diff --git a/be/src/storage/index/indexed_column_reader.cpp b/be/src/storage/index/indexed_column_reader.cpp index b8fe9a57541a2e..17e42c00bef212 100644 --- a/be/src/storage/index/indexed_column_reader.cpp +++ b/be/src/storage/index/indexed_column_reader.cpp @@ -194,8 +194,8 @@ Status IndexedColumnIterator::seek_to_ordinal(ordinal_t idx) { // need to read the data page containing row at idx if (_reader->_has_index_page) { std::string key; - KeyCoderTraits::full_encode_ascending(&idx, - &key); + KeyCoderTraits::full_encode_ascending( + &idx, &key, /*char_len=*/0); RETURN_IF_ERROR(_ordinal_iter.seek_at_or_before(key)); RETURN_IF_ERROR(_read_data_page(_ordinal_iter.current_page_pointer())); _current_iter = &_ordinal_iter; diff --git a/be/src/storage/index/indexed_column_writer.cpp b/be/src/storage/index/indexed_column_writer.cpp index 87a055792959cc..3902ae7e19f190 100644 --- a/be/src/storage/index/indexed_column_writer.cpp +++ b/be/src/storage/index/indexed_column_writer.cpp @@ -137,7 +137,7 @@ Status IndexedColumnWriter::_finish_current_data_page(size_t& num_val) { if (_options.write_ordinal_index) { std::string key; KeyCoderTraits::full_encode_ascending( - &first_ordinal, &key); + &first_ordinal, &key, /*char_len=*/0); _ordinal_index_builder->add(key, _last_data_page); } diff --git a/be/src/storage/index/ordinal_page_index.cpp b/be/src/storage/index/ordinal_page_index.cpp index 054f30f67f2445..855dbcd7da2b89 100644 --- a/be/src/storage/index/ordinal_page_index.cpp +++ b/be/src/storage/index/ordinal_page_index.cpp @@ -39,8 +39,8 @@ namespace segment_v2 { void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data_pp) { std::string key; - KeyCoderTraits::full_encode_ascending(&ordinal, - &key); + KeyCoderTraits::full_encode_ascending( + &ordinal, &key, /*char_len=*/0); _page_builder->add(key, data_pp); _last_pp = data_pp; } diff --git a/be/src/storage/iterator/olap_data_convertor.cpp b/be/src/storage/iterator/olap_data_convertor.cpp index 3f56e91e6f3bee..4bd7dc78f41357 100644 --- a/be/src/storage/iterator/olap_data_convertor.cpp +++ b/be/src/storage/iterator/olap_data_convertor.cpp @@ -546,17 +546,11 @@ Status OlapBlockDataConvertor::OlapColumnDataConvertorChar::convert_to_olap() { column_string = assert_cast(_typed_column.column.get()); } - // If column_string is not padded to full, we should do padding here. - if (should_padding(column_string, _length)) { - _column = clone_and_padding(column_string, _length); - column_string = assert_cast(_column.get()); - } - for (size_t i = 0; i < _num_rows; i++) { if (!_nullmap || !_nullmap[i + _row_pos]) { _slice[i] = column_string->get_data_at(i + _row_pos).to_slice(); - DCHECK(_slice[i].size == _length) - << "char type data length not equal to schema, schema=" << _length + DCHECK(_slice[i].size <= _length) + << "char type data length exceeds declared schema length, schema=" << _length << ", real=" << _slice[i].size; } } diff --git a/be/src/storage/iterator/olap_data_convertor.h b/be/src/storage/iterator/olap_data_convertor.h index 409f73618e5820..c9c7a0911a690a 100644 --- a/be/src/storage/iterator/olap_data_convertor.h +++ b/be/src/storage/iterator/olap_data_convertor.h @@ -174,39 +174,8 @@ class OlapBlockDataConvertor { Status convert_to_olap() override; private: - static bool should_padding(const ColumnString* column, size_t padding_length) { - // Check sum of data length, including terminating zero. - return column->size() * padding_length != column->chars.size(); - } - - static ColumnPtr clone_and_padding(const ColumnString* input, size_t padding_length) { - auto column = ColumnString::create(); - auto padded_column = assert_cast(column->assume_mutable().get()); - - column->offsets.resize(input->size()); - column->chars.resize(input->size() * padding_length); - memset(padded_column->chars.data(), 0, input->size() * padding_length); - - for (size_t i = 0; i < input->size(); i++) { - column->offsets[i] = cast_set((i + 1) * padding_length); - - auto str = input->get_data_at(i); - - DCHECK(str.size <= padding_length) - << "char type data length over limit, padding_length=" << padding_length - << ", real=" << str.size; - - if (str.size) { - memcpy(padded_column->chars.data() + i * padding_length, str.data, str.size); - } - } - - return column; - } - size_t _length; PaddedPODArray _slice; - ColumnPtr _column = nullptr; }; class OlapColumnDataConvertorVarChar : public OlapColumnDataConvertorBase { diff --git a/be/src/storage/key_coder.h b/be/src/storage/key_coder.h index 0c4bcf08d171e5..4aa2b2bfb86982 100644 --- a/be/src/storage/key_coder.h +++ b/be/src/storage/key_coder.h @@ -41,7 +41,11 @@ namespace doris { -using FullEncodeAscendingFunc = void (*)(const void* value, std::string* buf); +// `char_len` is only used by CHAR — the declared CHAR(N) column length. +// CHAR full-encoding zero-pads the slice up to char_len so the encoded +// bytes are the fixed-width canonical form used by PK and segment min/max +// indexes. All other types ignore this argument. +using FullEncodeAscendingFunc = void (*)(const void* value, std::string* buf, size_t char_len); using EncodeAscendingFunc = void (*)(const void* value, size_t index_size, std::string* buf); using DecodeAscendingFunc = Status (*)(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr); @@ -55,8 +59,8 @@ class KeyCoder { KeyCoder(TraitsType traits); // encode the provided `value` into `buf`. - void full_encode_ascending(const void* value, std::string* buf) const { - _full_encode_ascending(value, buf); + void full_encode_ascending(const void* value, std::string* buf, size_t char_len = 0) const { + _full_encode_ascending(value, buf, char_len); } // similar to `full_encode_ascending`, but only encode part (the first `index_size` bytes) of the value. @@ -90,7 +94,7 @@ class KeyCoderTraits::CppType; using UnsignedCppType = typename CppTypeTraits::UnsignedCppType; - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // swap MSB to encode integer @@ -105,7 +109,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -145,7 +149,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -170,7 +174,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -179,7 +183,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -205,7 +209,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -214,7 +218,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -240,7 +244,7 @@ class KeyCoderTraits { typename CppTypeTraits::UnsignedCppType; public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { UnsignedCppType unsigned_val; memcpy(&unsigned_val, value, sizeof(unsigned_val)); // make it bigendian @@ -249,7 +253,7 @@ class KeyCoderTraits { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -270,17 +274,17 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { decimal12_t decimal_val; memcpy(&decimal_val, value, sizeof(decimal12_t)); KeyCoderTraits::full_encode_ascending( - &decimal_val.integer, buf); + &decimal_val.integer, buf, /*char_len=*/0); KeyCoderTraits::full_encode_ascending(&decimal_val.fraction, - buf); + buf, /*char_len=*/0); } // namespace doris static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -297,18 +301,26 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { - auto slice = reinterpret_cast(value); - buf->append(slice->get_data(), slice->get_size()); + // Pad to char_len so the encoded bytes are the canonical fixed-width + // form used by short-key / PK / segment min-max indexes. + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t char_len) { + const Slice* slice = reinterpret_cast(value); + size_t copy_size = std::min(slice->size, char_len); + buf->append(slice->data, copy_size); + if (copy_size < char_len) { + buf->append(char_len - copy_size, '\0'); + } } NO_SANITIZE_UNDEFINED static void encode_ascending(const void* value, size_t index_size, std::string* buf) { const Slice* slice = (const Slice*)value; - CHECK(index_size <= slice->size) - << "index size is larger than char size, index=" << index_size - << ", char=" << slice->size; - buf->append(slice->data, index_size); + size_t copy_size = std::min(index_size, slice->size); + buf->append(slice->data, copy_size); + if (copy_size < index_size) { + buf->append(index_size - copy_size, '\0'); + } } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { @@ -319,7 +331,8 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t /*char_len*/) { auto slice = reinterpret_cast(value); buf->append(slice->get_data(), slice->get_size()); } @@ -339,7 +352,8 @@ class KeyCoderTraits { template <> class KeyCoderTraits { public: - NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf) { + NO_SANITIZE_UNDEFINED static void full_encode_ascending(const void* value, std::string* buf, + size_t /*char_len*/) { auto slice = reinterpret_cast(value); buf->append(slice->get_data(), slice->get_size()); } @@ -371,7 +385,7 @@ class KeyCoderTraitsForFloat { } // -infinity < -100.0 < -1.0 < -0.0 < 0.0 < 1.0 < 100.0 < infinity < NaN - static void full_encode_ascending(const void* value, std::string* buf) { + static void full_encode_ascending(const void* value, std::string* buf, size_t /*char_len*/) { CppType val; std::memcpy(&val, value, sizeof(CppType)); UnsignedCppType sortable_val = encode_float(val); @@ -383,7 +397,7 @@ class KeyCoderTraitsForFloat { } static void encode_ascending(const void* value, size_t index_size, std::string* buf) { - full_encode_ascending(value, buf); + full_encode_ascending(value, buf, /*char_len=*/0); } static Status decode_ascending(Slice* encoded_key, size_t index_size, uint8_t* cell_ptr) { diff --git a/be/src/storage/predicate/comparison_predicate.h b/be/src/storage/predicate/comparison_predicate.h index e1ebae39f8f9d4..587a50e8914011 100644 --- a/be/src/storage/predicate/comparison_predicate.h +++ b/be/src/storage/predicate/comparison_predicate.h @@ -280,7 +280,13 @@ class ComparisonPredicateBase final : public ColumnPredicate { if (bf->is_ngram_bf()) { return true; } - if constexpr (is_string_type(Type)) { + if constexpr (Type == TYPE_CHAR) { + // Old CHAR BFs hashed padded bytes; skip probing them. + if (!bf->unpadded_char_filter()) { + return true; + } + return bf->test_bytes(_value.data(), _value.size()); + } else if constexpr (is_string_type(Type)) { return bf->test_bytes(_value.data(), _value.size()); } else { // DecimalV2 using decimal12_t in bloom filter, should convert value to decimal12_t diff --git a/be/src/storage/predicate/in_list_predicate.h b/be/src/storage/predicate/in_list_predicate.h index 6c92290e5be009..a4be6ede6566e3 100644 --- a/be/src/storage/predicate/in_list_predicate.h +++ b/be/src/storage/predicate/in_list_predicate.h @@ -68,18 +68,16 @@ class InListPredicateBase final : public ColumnPredicate { ENABLE_FACTORY_CREATOR(InListPredicateBase); using T = typename PrimitiveTypeTraits::CppType; InListPredicateBase(uint32_t column_id, std::string col_name, - const std::shared_ptr& hybrid_set, bool is_opposite, - size_t char_length = 0) + const std::shared_ptr& hybrid_set, bool is_opposite) : ColumnPredicate(column_id, col_name, Type, is_opposite), _min_value(type_limit::max()), _max_value(type_limit::min()) { CHECK(hybrid_set != nullptr); // String types need a copy because: - // 1. The caller's set is StringSet>, but here we want - // StringSet> for small-set optimization — different - // C++ types, cannot share the pointer. - // 2. CHAR type additionally needs padding to char_length. + // The caller's set is StringSet>, but here we want + // StringSet> for small-set optimization — different + // C++ types, cannot share the pointer. // // Date/DECIMALV2 types do NOT need a copy: their ElementType (CppType) is identical // between the caller's HybridSet and InListPredicateBase's, and InListPredicateBase @@ -93,13 +91,7 @@ class InListPredicateBase final : public ColumnPredicate { HybridSetBase::IteratorBase* iter = hybrid_set->begin(); while (iter->has_next()) { const auto* value = (const StringRef*)(iter->get_value()); - if constexpr (Type == TYPE_CHAR) { - std::string padded(std::max(char_length, value->size), '\0'); - memcpy(padded.data(), value->data, value->size); - _values->insert((void*)padded.data(), padded.length()); - } else { - _values->insert((void*)value->data, value->size); - } + _values->insert((void*)value->data, value->size); iter->next(); } } else { @@ -350,6 +342,12 @@ class InListPredicateBase final : public ColumnPredicate { if (bf->is_ngram_bf()) { return true; } + if constexpr (Type == TYPE_CHAR) { + // Old CHAR BFs hashed padded bytes; skip probing them. + if (!bf->unpadded_char_filter()) { + return true; + } + } HybridSetBase::IteratorBase* iter = _values->begin(); while (iter->has_next()) { if constexpr (is_string_type(Type)) { diff --git a/be/src/storage/predicate/like_column_predicate.h b/be/src/storage/predicate/like_column_predicate.h index 24f129b0fb7dc6..152518450517b9 100644 --- a/be/src/storage/predicate/like_column_predicate.h +++ b/be/src/storage/predicate/like_column_predicate.h @@ -155,7 +155,7 @@ class LikeColumnPredicate final : public ColumnPredicate { std::vector tmp_res(column.dict_size(), false); for (int i = 0; i < column.dict_size(); i++) { - StringRef cell_value = column.get_shrink_value(i); + StringRef cell_value = column.get_value(i); unsigned char flag = 0; THROW_IF_ERROR((_state->scalar_function)( &_like_state, StringRef(cell_value.data, cell_value.size), pattern, &flag)); diff --git a/be/src/storage/predicate/predicate_creator_comparison.cpp b/be/src/storage/predicate/predicate_creator_comparison.cpp index bfec1262cfc1a5..b10a175b016592 100644 --- a/be/src/storage/predicate/predicate_creator_comparison.cpp +++ b/be/src/storage/predicate/predicate_creator_comparison.cpp @@ -77,21 +77,9 @@ std::shared_ptr create_comparison_predicate(const uint32_t cid, opposite); } case TYPE_CHAR: { - auto target = std::max(cast_set(assert_cast( - remove_nullable(data_type).get()) - ->len()), - value.template get().size()); - if (target > value.template get().size()) { - std::string tmp(target, '\0'); - memcpy(tmp.data(), value.template get().data(), - value.template get().size()); - return ComparisonPredicateBase::create_shared( - cid, col_name, Field::create_field(std::move(tmp)), opposite); - } else { - return ComparisonPredicateBase::create_shared( - cid, col_name, Field::create_field(value.template get()), - opposite); - } + return ComparisonPredicateBase::create_shared( + cid, col_name, Field::create_field(value.template get()), + opposite); } case TYPE_VARCHAR: case TYPE_STRING: { diff --git a/be/src/storage/predicate/predicate_creator_in_list_in.cpp b/be/src/storage/predicate/predicate_creator_in_list_in.cpp index e720da632a8ad4..f1ad365b4f523e 100644 --- a/be/src/storage/predicate/predicate_creator_in_list_in.cpp +++ b/be/src/storage/predicate/predicate_creator_in_list_in.cpp @@ -26,42 +26,34 @@ namespace doris { template static std::shared_ptr create_in_list_predicate_impl( const uint32_t cid, const std::string col_name, const std::shared_ptr& set, - bool is_opposite, size_t char_length = 0) { + bool is_opposite) { // Only string types construct their own HybridSetType in the constructor (to convert // from DynamicContainer to FixedContainer), so N dispatch is only needed // for them. All other types directly share the caller's hybrid_set. if constexpr (!is_string_type(TYPE)) { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } else { auto set_size = set->size(); if (set_size == 1) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 2) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 3) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 4) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 5) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 6) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 7) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } } } @@ -120,9 +112,8 @@ std::shared_ptr create_in_list_predicate( - cid, col_name, set, is_opposite, - assert_cast(remove_nullable(data_type).get())->len()); + return create_in_list_predicate_impl(cid, col_name, set, + is_opposite); } case TYPE_VARCHAR: { return create_in_list_predicate_impl( diff --git a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp index 63e8eb37b186a3..e4cb4731a57095 100644 --- a/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp +++ b/be/src/storage/predicate/predicate_creator_in_list_not_in.cpp @@ -26,42 +26,34 @@ namespace doris { template static std::shared_ptr create_in_list_predicate_impl( const uint32_t cid, const std::string col_name, const std::shared_ptr& set, - bool is_opposite, size_t char_length = 0) { + bool is_opposite) { // Only string types construct their own HybridSetType in the constructor (to convert // from DynamicContainer to FixedContainer), so N dispatch is only needed // for them. All other types directly share the caller's hybrid_set. if constexpr (!is_string_type(TYPE)) { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } else { auto set_size = set->size(); if (set_size == 1) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 2) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 3) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 4) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 5) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 6) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == 7) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else if (set_size == FIXED_CONTAINER_MAX_SIZE) { - return InListPredicateBase::create_shared(cid, col_name, set, is_opposite, - char_length); + return InListPredicateBase::create_shared(cid, col_name, set, is_opposite); } else { return InListPredicateBase::create_shared( - cid, col_name, set, is_opposite, char_length); + cid, col_name, set, is_opposite); } } } @@ -121,8 +113,7 @@ std::shared_ptr create_in_list_predicate( - cid, col_name, set, is_opposite, - assert_cast(remove_nullable(data_type).get())->len()); + cid, col_name, set, is_opposite); } case TYPE_VARCHAR: { return create_in_list_predicate_impl( diff --git a/be/src/storage/row_cursor.cpp b/be/src/storage/row_cursor.cpp index ef649a6a092979..900b7d8f728065 100644 --- a/be/src/storage/row_cursor.cpp +++ b/be/src/storage/row_cursor.cpp @@ -123,17 +123,6 @@ RowCursor RowCursor::clone() const { return result; } -void RowCursor::pad_char_fields() { - for (size_t i = 0; i < _fields.size(); ++i) { - const StorageField* col = _schema->column(cast_set(i)); - if (col->type() == FieldType::OLAP_FIELD_TYPE_CHAR && !_fields[i].is_null()) { - String padded = _fields[i].get(); - padded.resize(col->length(), '\0'); - _fields[i] = Field::create_field(std::move(padded)); - } - } -} - std::string RowCursor::to_string() const { std::string result; for (size_t i = 0; i < _fields.size(); ++i) { @@ -156,29 +145,12 @@ void RowCursor::_encode_field(const StorageField* storage_field, const Field& f, FieldType ft = storage_field->type(); if (field_is_slice_type(ft)) { - // String types: CHAR, VARCHAR, STRING — all stored as String in Field. const String& str = f.get(); - - if (ft == FieldType::OLAP_FIELD_TYPE_CHAR) { - // CHAR type: must pad with \0 to the declared column length - size_t col_len = storage_field->length(); - String padded(col_len, '\0'); - memcpy(padded.data(), str.data(), std::min(str.size(), col_len)); - - Slice slice(padded.data(), col_len); - if (full_encode) { - storage_field->full_encode_ascending(&slice, buf); - } else { - storage_field->encode_ascending(&slice, buf); - } + Slice slice(str.data(), str.size()); + if (full_encode) { + storage_field->full_encode_ascending(&slice, buf, storage_field->length()); } else { - // VARCHAR / STRING: use actual length - Slice slice(str.data(), str.size()); - if (full_encode) { - storage_field->full_encode_ascending(&slice, buf); - } else { - storage_field->encode_ascending(&slice, buf); - } + storage_field->encode_ascending(&slice, buf); } return; } diff --git a/be/src/storage/row_cursor.h b/be/src/storage/row_cursor.h index 58a07d9ce8c874..30d045e3761552 100644 --- a/be/src/storage/row_cursor.h +++ b/be/src/storage/row_cursor.h @@ -71,11 +71,6 @@ class RowCursor { // Returns a deep copy of this RowCursor with the same schema and field values. RowCursor clone() const; - // Pad all CHAR-type fields in-place to their declared column length using '\0'. - // RowCursor holds CHAR values in compute format (unpadded). Call this before - // comparing against storage-format data (e.g. _seek_block) where CHAR is padded. - void pad_char_fields(); - // Output row cursor content in string format std::string to_string() const; diff --git a/be/src/storage/segment/binary_plain_page.h b/be/src/storage/segment/binary_plain_page.h index e41d3515d21b7e..f62103142e404b 100644 --- a/be/src/storage/segment/binary_plain_page.h +++ b/be/src/storage/segment/binary_plain_page.h @@ -237,6 +237,10 @@ class BinaryPlainPageDecoder : public PageDecoder { } dst->insert_many_continuous_binary_data(_data.data, _offsets.data(), max_fetch); + if (_options.strip_padding) { + dst->shrink_padding_chars(); + } + *n = max_fetch; return Status::OK(); } @@ -289,6 +293,9 @@ class BinaryPlainPageDecoder : public PageDecoder { if (LIKELY(read_count > 0)) { dst->insert_many_strings(_binary_data.data(), read_count); + if (_options.strip_padding) { + dst->shrink_padding_chars(); + } } *n = read_count; @@ -332,6 +339,12 @@ class BinaryPlainPageDecoder : public PageDecoder { dict_word_info[_num_elems - 1].size = (data_begin + _offsets_pos) - (char*)dict_word_info[_num_elems - 1].data; + + if (_options.strip_padding) { + for (uint32_t i = 0; i < _num_elems; ++i) { + dict_word_info[i].size = strnlen(dict_word_info[i].data, dict_word_info[i].size); + } + } return Status::OK(); } diff --git a/be/src/storage/segment/binary_prefix_page.cpp b/be/src/storage/segment/binary_prefix_page.cpp index ed11c818f4e6da..92aed1ae675837 100644 --- a/be/src/storage/segment/binary_prefix_page.cpp +++ b/be/src/storage/segment/binary_prefix_page.cpp @@ -235,6 +235,10 @@ Status BinaryPrefixPageDecoder::next_batch(size_t* n, MutableColumnPtr& dst) { } } + if (_options.strip_padding) { + dst->shrink_padding_chars(); + } + *n = max_fetch; return Status::OK(); } diff --git a/be/src/storage/segment/binary_prefix_page.h b/be/src/storage/segment/binary_prefix_page.h index ce2b363e934635..e45991efd8affd 100644 --- a/be/src/storage/segment/binary_prefix_page.h +++ b/be/src/storage/segment/binary_prefix_page.h @@ -95,7 +95,7 @@ class BinaryPrefixPageBuilder : public PageBuilderHelperget_meta_type() == FieldType::OLAP_FIELD_TYPE_CHAR) { return Status::InternalError( - "OFFSET_ONLY access path is not supported on CHAR column '{}': CHAR is stored " - "padded so the per-row length information available without reading the chars " - "buffer is always the padded length, not the logical length. The FE planner " - "must not emit an OFFSET access path for CHAR columns.", + "OFFSET_ONLY access path is not supported on CHAR column '{}': legacy CHAR " + "segments stored padded bytes, and the per-row length information available " + "without reading the chars buffer is the padded length, not the logical " + "length. The FE planner must not emit an OFFSET access path for CHAR columns.", _column_name); } if (read_offset_only()) { @@ -2393,6 +2393,7 @@ Status FileColumnIterator::_read_data_page(const OrdinalPageIndexIterator& iter) _opts.type = DATA_PAGE; PageDecoderOptions decoder_opts; decoder_opts.only_read_offsets = _opts.only_read_offsets; + decoder_opts.strip_padding = (_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_CHAR); RETURN_IF_ERROR( _reader->read_page(_opts, iter.page(), &handle, &page_body, &footer, _compress_codec)); // parse data page @@ -2439,7 +2440,10 @@ Status FileColumnIterator::_read_dict_data() { RETURN_IF_ERROR(EncodingInfo::get(FieldType::OLAP_FIELD_TYPE_VARCHAR, dict_footer.dict_page_footer().encoding(), {}, &encoding_info)); - RETURN_IF_ERROR(encoding_info->create_page_decoder(dict_data, {}, _dict_decoder)); + PageDecoderOptions dict_decoder_opts; + dict_decoder_opts.strip_padding = (_reader->get_meta_type() == FieldType::OLAP_FIELD_TYPE_CHAR); + RETURN_IF_ERROR( + encoding_info->create_page_decoder(dict_data, dict_decoder_opts, _dict_decoder)); RETURN_IF_ERROR(_dict_decoder->init()); _dict_word_info.reset(new StringRef[_dict_decoder->count()]); diff --git a/be/src/storage/segment/options.h b/be/src/storage/segment/options.h index 4ff94c0a8e3585..a1695d89655d8f 100644 --- a/be/src/storage/segment/options.h +++ b/be/src/storage/segment/options.h @@ -21,6 +21,8 @@ #include +#include "storage/olap_common.h" + namespace doris { namespace segment_v2 { @@ -50,6 +52,9 @@ struct PageBuilderOptions { struct PageDecoderOptions { bool need_check_bitmap = true; bool only_read_offsets = false; + // Whether to strip CHAR zero-padding inline so callers receive + // unpadded strings without a separate shrink pass. + bool strip_padding = false; }; } // namespace segment_v2 diff --git a/be/src/storage/segment/segment_iterator.cpp b/be/src/storage/segment/segment_iterator.cpp index f7bd652ccf0745..40343085d9c673 100644 --- a/be/src/storage/segment/segment_iterator.cpp +++ b/be/src/storage/segment/segment_iterator.cpp @@ -538,7 +538,7 @@ Status SegmentIterator::_lazy_init(Block* block) { } _current_return_columns.resize(_schema->columns().size()); - _vec_init_char_column_id(block); + _vec_init_char_column_id(); for (size_t i = 0; i < _schema->column_ids().size(); i++) { ColumnId cid = _schema->column_ids()[i]; const auto* column_desc = _schema->column(cid); @@ -1684,13 +1684,6 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool const auto& key_col_ids = key.schema()->column_ids(); - // Clone the key once and pad CHAR fields to storage format before the binary search. - // _seek_block holds storage-format data where CHAR is zero-padded to column length, - // while RowCursor holds CHAR in compute format (unpadded). Padding once here avoids - // repeated allocation inside the comparison loop. - RowCursor padded_key = key.clone(); - padded_key.pad_char_fields(); - ssize_t start_block_id = 0; auto start_iter = sk_index_decoder->lower_bound(index_key); if (start_iter.valid()) { @@ -1718,7 +1711,7 @@ Status SegmentIterator::_lookup_ordinal_from_sk_index(const RowCursor& key, bool while (start < end) { rowid_t mid = (start + end) / 2; RETURN_IF_ERROR(_seek_and_peek(mid)); - int cmp = _compare_short_key_with_seek_block(padded_key, key_col_ids); + int cmp = _compare_short_key_with_seek_block(key, key_col_ids); if (cmp > 0) { start = mid + 1; } else if (cmp == 0) { @@ -2074,29 +2067,8 @@ bool SegmentIterator::_can_evaluated_by_vectorized(std::shared_ptrcolumns().size(), false); @@ -2104,14 +2076,6 @@ void SegmentIterator::_vec_init_char_column_id(Block* block) { auto cid = _schema->column_id(i); const StorageField* column_desc = _schema->column(cid); - // The additional deleted filter condition will be in the materialized column at the end of the block. - // After _output_column_by_sel_idx, it will be erased, so we do not need to shrink it. - if (i < block->columns()) { - if (_has_char_type(*column_desc)) { - _char_type_idx.emplace_back(i); - } - } - if (column_desc->type() == FieldType::OLAP_FIELD_TYPE_CHAR) { _is_char_type[cid] = true; } @@ -2871,8 +2835,6 @@ Status SegmentIterator::_next_batch_internal(Block* block) { _output_index_result_column(vir_ctxs, sel_rowid_idx, _selected_size, block); } RETURN_IF_ERROR(_materialization_of_virtual_column(block)); - // shrink char_type suffix zero data - block->shrink_char_type_column_suffix_zero(_char_type_idx); if (_opts.read_limit > 0) { _rows_returned += block->rows(); } @@ -2982,7 +2944,6 @@ Status SegmentIterator::_process_common_expr(uint16_t* sel_rowid_idx, uint16_t& common_ctxs.push_back(ctx.get()); } _output_index_result_column(common_ctxs, _sel_rowid_idx.data(), _selected_size, block); - block->shrink_char_type_column_suffix_zero(_char_type_idx); RETURN_IF_ERROR(_execute_common_expr(_sel_rowid_idx.data(), _selected_size, block)); if (need_mock_col) { @@ -3383,7 +3344,6 @@ Status SegmentIterator::_materialization_of_virtual_column(Block* block) { idx_in_block, block->columns(), _vir_cid_to_idx_in_block.size(), column_expr->root()->debug_string()); } - block->shrink_char_type_column_suffix_zero(_char_type_idx); if (check_and_get_column( block->get_by_position(idx_in_block).column.get())) { VLOG_DEBUG << fmt::format("Virtual column is doing materialization, cid {}, col idx {}", diff --git a/be/src/storage/segment/segment_iterator.h b/be/src/storage/segment/segment_iterator.h index 4fff2de331844f..b0f297629d7157 100644 --- a/be/src/storage/segment/segment_iterator.h +++ b/be/src/storage/segment/segment_iterator.h @@ -205,11 +205,7 @@ class SegmentIterator : public RowwiseIterator { bool _is_literal_node(const TExprNodeType::type& node_type); Status _vec_init_lazy_materialization(); - // TODO: Fix Me - // CHAR type in storage layer padding the 0 in length. But query engine need ignore the padding 0. - // so segment iterator need to shrink char column before output it. only use in vec query engine. - void _vec_init_char_column_id(Block* block); - bool _has_char_type(const StorageField& column_desc); + void _vec_init_char_column_id(); uint32_t segment_id() const { return _segment->id(); } uint32_t num_rows() const { return _segment->num_rows(); } @@ -433,8 +429,6 @@ class SegmentIterator : public RowwiseIterator { io::FileReaderSPtr _file_reader; - // char_type or array type columns cid - std::vector _char_type_idx; std::vector _is_char_type; // used for compaction, record selectd rowids of current batch diff --git a/be/src/storage/segment/segment_writer.cpp b/be/src/storage/segment/segment_writer.cpp index b0e929d8ef8463..edc3699d0bbaf1 100644 --- a/be/src/storage/segment/segment_writer.cpp +++ b/be/src/storage/segment/segment_writer.cpp @@ -127,6 +127,7 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); // primary keys _primary_key_coders.swap(_key_coders); + _primary_key_index_size = _key_index_size; // cluster keys _key_coders.clear(); _key_index_size.clear(); @@ -829,13 +830,15 @@ std::string SegmentWriter::_full_encode_keys( assert(_key_index_size.size() == _num_sort_key_columns); assert(key_columns.size() == _num_sort_key_columns && _key_coders.size() == _num_sort_key_columns); - return _full_encode_keys(_key_coders, key_columns, pos, null_first); + return _full_encode_keys(_key_coders, _key_index_size, key_columns, pos, null_first); } std::string SegmentWriter::_full_encode_keys( const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos, bool null_first) { assert(key_columns.size() == key_coders.size()); + assert(key_columns.size() == key_index_sizes.size()); std::string encoded_keys; size_t cid = 0; @@ -852,7 +855,7 @@ std::string SegmentWriter::_full_encode_keys( } encoded_keys.push_back(KEY_NORMAL_MARKER); DCHECK(key_coders[cid] != nullptr); - key_coders[cid]->full_encode_ascending(field, &encoded_keys); + key_coders[cid]->full_encode_ascending(field, &encoded_keys, key_index_sizes[cid]); ++cid; } return encoded_keys; @@ -1213,7 +1216,8 @@ Status SegmentWriter::_generate_primary_key_index( } else { // mow table with cluster key // generate primary keys in memory for (uint32_t pos = 0; pos < num_rows; pos++) { - std::string key = _full_encode_keys(primary_key_coders, primary_key_columns, pos); + std::string key = _full_encode_keys(primary_key_coders, _primary_key_index_size, + primary_key_columns, pos); _maybe_invalid_row_cache(key); if (_tablet_schema->has_sequence_col()) { _encode_seq_column(seq_column, pos, &key); diff --git a/be/src/storage/segment/segment_writer.h b/be/src/storage/segment/segment_writer.h index 37b4e996448d76..59b7eafa3b471b 100644 --- a/be/src/storage/segment/segment_writer.h +++ b/be/src/storage/segment/segment_writer.h @@ -172,6 +172,7 @@ class SegmentWriter { size_t pos, bool null_first = true); static std::string _full_encode_keys(const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos, bool null_first = true); @@ -223,7 +224,12 @@ class SegmentWriter { std::vector _primary_key_coders; const KeyCoder* _seq_coder = nullptr; const KeyCoder* _rowid_coder = nullptr; + // Per-column index length, parallel to _key_coders. Forwarded as + // char_len to KeyCoder::full_encode_ascending. std::vector _key_index_size; + // Mirror of _key_index_size captured before the coder swap in mow-with- + // cluster-key tables, used by PK encoding. + std::vector _primary_key_index_size; size_t _short_key_row_pos = 0; std::vector _column_ids; diff --git a/be/src/storage/segment/vertical_segment_writer.cpp b/be/src/storage/segment/vertical_segment_writer.cpp index 78031d1f7d4429..ea3e27aaf26c2d 100644 --- a/be/src/storage/segment/vertical_segment_writer.cpp +++ b/be/src/storage/segment/vertical_segment_writer.cpp @@ -133,6 +133,7 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 _rowid_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_UNSIGNED_INT); // primary keys _primary_key_coders.swap(_key_coders); + _primary_key_index_size = _key_index_size; // cluster keys _key_coders.clear(); _key_index_size.clear(); @@ -1127,7 +1128,8 @@ Status VerticalSegmentWriter::_generate_primary_key_index( // 1. generate primary keys in memory std::vector primary_keys; for (uint32_t pos = 0; pos < num_rows; pos++) { - std::string key = _full_encode_keys(primary_key_coders, primary_key_columns, pos); + std::string key = _full_encode_keys(primary_key_coders, _primary_key_index_size, + primary_key_columns, pos); _maybe_invalid_row_cache(key); if (_tablet_schema->has_sequence_col()) { _encode_seq_column(seq_column, pos, &key); @@ -1187,13 +1189,15 @@ std::string VerticalSegmentWriter::_full_encode_keys( } assert(key_columns.size() == _num_sort_key_columns && _key_coders.size() == _num_sort_key_columns); - return _full_encode_keys(_key_coders, key_columns, pos); + return _full_encode_keys(_key_coders, _key_index_size, key_columns, pos); } std::string VerticalSegmentWriter::_full_encode_keys( const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos) { assert(key_columns.size() == key_coders.size()); + assert(key_columns.size() == key_index_sizes.size()); std::string encoded_keys; size_t cid = 0; @@ -1206,7 +1210,7 @@ std::string VerticalSegmentWriter::_full_encode_keys( } encoded_keys.push_back(KEY_NORMAL_MARKER); DCHECK(key_coders[cid] != nullptr); - key_coders[cid]->full_encode_ascending(field, &encoded_keys); + key_coders[cid]->full_encode_ascending(field, &encoded_keys, key_index_sizes[cid]); ++cid; } return encoded_keys; diff --git a/be/src/storage/segment/vertical_segment_writer.h b/be/src/storage/segment/vertical_segment_writer.h index 5c0ec0930e522d..2e4d1d3cde6e4c 100644 --- a/be/src/storage/segment/vertical_segment_writer.h +++ b/be/src/storage/segment/vertical_segment_writer.h @@ -148,6 +148,7 @@ class VerticalSegmentWriter { std::string _full_encode_keys(const std::vector& key_columns, size_t pos); std::string _full_encode_keys(const std::vector& key_coders, + const std::vector& key_index_sizes, const std::vector& key_columns, size_t pos); // used for unique-key with merge on write @@ -231,6 +232,9 @@ class VerticalSegmentWriter { const KeyCoder* _seq_coder = nullptr; const KeyCoder* _rowid_coder = nullptr; std::vector _key_index_size; + // Mirror of _key_index_size captured before the coder swap in mow-with- + // cluster-key tables, used by PK encoding. + std::vector _primary_key_index_size; size_t _short_key_row_pos = 0; // _num_rows_written means row count already written in this current column group diff --git a/be/test/core/block/block_test.cpp b/be/test/core/block/block_test.cpp index 1bb930bb15a6de..40a6d2ee99ebfa 100644 --- a/be/test/core/block/block_test.cpp +++ b/be/test/core/block/block_test.cpp @@ -1241,8 +1241,6 @@ TEST(BlockTest, others) { auto block = ColumnHelper::create_block({1, 2, 3}); block.insert(ColumnHelper::create_column_with_name({"abc", "efg", "hij"})); - block.shrink_char_type_column_suffix_zero({1, 2}); - SipHash hash; block.update_hash(hash); diff --git a/be/test/storage/key_coder_test.cpp b/be/test/storage/key_coder_test.cpp index 5362645078677c..6eb0dabe7df816 100644 --- a/be/test/storage/key_coder_test.cpp +++ b/be/test/storage/key_coder_test.cpp @@ -123,7 +123,7 @@ typename CppTypeTraits::CppType decode_float(const std::string& enco template std::string encode_float(typename CppTypeTraits::CppType value) { std::string buf; - KeyCoderTraits::full_encode_ascending(&value, &buf); + KeyCoderTraits::full_encode_ascending(&value, &buf, /*char_len=*/0); return buf; } diff --git a/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp b/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp index c2748a43fb9eac..8848bed4e44012 100644 --- a/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp +++ b/be/test/storage/segment/segment_writer_full_encode_keys_test.cpp @@ -87,9 +87,14 @@ TEST(SegmentWriterFullEncodeKeysTest, TestSegmentWriterKeyEncoding) { auto int_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_INT); auto str_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_VARCHAR); std::vector key_coders = {int_coder, str_coder, str_coder}; + // Per-column index sizes. Only CHAR's KeyCoder consults this; the columns + // here are INT and VARCHAR, which ignore it — zeros are fine. + std::vector key_index_sizes = {0, 0, 0}; //////////////////////////////////////////////////////////////////////////// - std::string encoded0 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 0); - std::string encoded1 = SegmentWriter::_full_encode_keys(key_coders, key_columns, 1); + std::string encoded0 = + SegmentWriter::_full_encode_keys(key_coders, key_index_sizes, key_columns, 0); + std::string encoded1 = + SegmentWriter::_full_encode_keys(key_coders, key_index_sizes, key_columns, 1); //////////////////////////////////////////////////////////////////////////// std::cout << StringView(encoded0).dump_hex() << std::endl; // X'02850505050261026262' std::cout << StringView(encoded1).dump_hex() << std::endl; // X'0285050505026101026363' diff --git a/be/test/storage/segment/zone_map_index_test.cpp b/be/test/storage/segment/zone_map_index_test.cpp index 2a8c90594670b3..39bcb27844a8f1 100644 --- a/be/test/storage/segment/zone_map_index_test.cpp +++ b/be/test/storage/segment/zone_map_index_test.cpp @@ -250,16 +250,15 @@ class ColumnZoneMapTest : public testing::Test { DataTypeFactory::instance().create_data_type(TYPE_CHAR, true, 0, 0, length); auto tab_col = create_char_key(0, true, length); auto field = std::unique_ptr(StorageFieldFactory::create(*tab_col)); - std::string s_less_than_schema_length1(length - 1, 'a'); - std::string s_less_than_schema_length1_expect(length, 'a'); - s_less_than_schema_length1_expect[length - 1] = '\0'; - std::string s_less_than_schema_length2(length - 2, 'b'); - std::string s_less_than_schema_length2_expect(length, 'b'); - s_less_than_schema_length2_expect[length - 1] = '\0'; - s_less_than_schema_length2_expect[length - 2] = '\0'; + // CHAR ZoneMap min/max are now stored and surfaced unpadded — writers + // see real character lengths from OlapColumnDataConvertorChar, and + // from_olap_string strnlens any trailing '\0' from legacy padded + // segments so the Field always reflects the real character length. + std::string s_less_than_char_len1(length - 1, 'a'); + std::string s_less_than_char_len2(length - 2, 'b'); std::unique_ptr writer; ASSERT_TRUE(ZoneMapIndexWriter::create(data_type, field.get(), writer).ok()); - Slice slices[] = {Slice(s_less_than_schema_length1), Slice(s_less_than_schema_length2)}; + Slice slices[] = {Slice(s_less_than_char_len1), Slice(s_less_than_char_len2)}; writer->add_values(&slices, 2); if (pass_all) { writer->invalid_page_zone_map(); @@ -275,13 +274,13 @@ class ColumnZoneMapTest : public testing::Test { ASSERT_TRUE(file_writer->close().ok()); const auto& seg_zm = index_meta.zone_map_index().segment_zone_map(); - // Min/Max should be truncated to MAX_ZONE_MAP_INDEX_SIZE and last byte of Max is incremented - EXPECT_EQ(seg_zm.min().size(), s_less_than_schema_length1.size()); - EXPECT_EQ(seg_zm.min(), s_less_than_schema_length1); - EXPECT_EQ(seg_zm.max().size(), s_less_than_schema_length2.size()); - EXPECT_EQ(seg_zm.max(), s_less_than_schema_length2); + // On-disk min/max preserve the raw (unpadded) bytes. + EXPECT_EQ(seg_zm.min().size(), s_less_than_char_len1.size()); + EXPECT_EQ(seg_zm.min(), s_less_than_char_len1); + EXPECT_EQ(seg_zm.max().size(), s_less_than_char_len2.size()); + EXPECT_EQ(seg_zm.max(), s_less_than_char_len2); - // Verify ZoneMap::from_proto can correctly parse the truncated zone map + // Verify ZoneMap::from_proto materializes the unpadded Field. ZoneMap seg_zone_map; ASSERT_TRUE(ZoneMap::from_proto(seg_zm, data_type, seg_zone_map).ok()); EXPECT_EQ(seg_zone_map.has_null, false); @@ -290,10 +289,10 @@ class ColumnZoneMapTest : public testing::Test { EXPECT_EQ(seg_zone_map.has_positive_inf, false); EXPECT_EQ(seg_zone_map.has_negative_inf, false); EXPECT_EQ(seg_zone_map.has_nan, false); - EXPECT_EQ(seg_zone_map.min_value.get().size(), length); - EXPECT_EQ(seg_zone_map.min_value.get(), s_less_than_schema_length1_expect); - EXPECT_EQ(seg_zone_map.max_value.get().size(), length); - EXPECT_EQ(seg_zone_map.max_value.get(), s_less_than_schema_length2_expect); + EXPECT_EQ(seg_zone_map.min_value.get().size(), s_less_than_char_len1.size()); + EXPECT_EQ(seg_zone_map.min_value.get(), s_less_than_char_len1); + EXPECT_EQ(seg_zone_map.max_value.get().size(), s_less_than_char_len2.size()); + EXPECT_EQ(seg_zone_map.max_value.get(), s_less_than_char_len2); io::FileReaderSPtr file_reader; EXPECT_TRUE(_fs->open_file(file_path, &file_reader).ok()); @@ -316,12 +315,12 @@ class ColumnZoneMapTest : public testing::Test { EXPECT_EQ(page_zone_map.has_negative_inf, false); EXPECT_EQ(page_zone_map.has_nan, false); if (!pass_all) { - EXPECT_EQ(page_zone_map.min_value.get().size(), length); - EXPECT_EQ(page_zone_map.min_value.get(), - s_less_than_schema_length1_expect); - EXPECT_EQ(page_zone_map.max_value.get().size(), length); - EXPECT_EQ(page_zone_map.max_value.get(), - s_less_than_schema_length2_expect); + EXPECT_EQ(page_zone_map.min_value.get().size(), + s_less_than_char_len1.size()); + EXPECT_EQ(page_zone_map.min_value.get(), s_less_than_char_len1); + EXPECT_EQ(page_zone_map.max_value.get().size(), + s_less_than_char_len2.size()); + EXPECT_EQ(page_zone_map.max_value.get(), s_less_than_char_len2); } } } diff --git a/be/test/util/char_type_padding_test.cpp b/be/test/util/char_type_padding_test.cpp index 869bd35ad7d003..c379e0db214bea 100644 --- a/be/test/util/char_type_padding_test.cpp +++ b/be/test/util/char_type_padding_test.cpp @@ -21,54 +21,45 @@ #include +#include "core/block/column_with_type_and_name.h" #include "core/column/column.h" #include "core/column/column_string.h" +#include "core/data_type/data_type_string.h" #include "core/string_ref.h" #include "gtest/gtest_pred_impl.h" #include "storage/iterator/olap_data_convertor.h" +#include "util/slice.h" namespace doris { -using ConvertorChar = OlapBlockDataConvertor::OlapColumnDataConvertorChar; +// OlapColumnDataConvertorChar no longer pads CHAR to the declared schema +// length — downstream consumers (page writers, ZoneMap, BF) see slices with +// their real character length, and KeyCoder pads to fixed width on its own +// when encoding short-key / PK entries. These tests verify the convertor +// passes through varying-length CHAR rows without injecting trailing '\0'. -TEST(CharTypePaddingTest, CharTypePaddingFullTest) { +TEST(CharTypePaddingTest, ConvertorSurfacesUnpaddedSlices) { auto input = ColumnString::create(); - - std::string str = "Allemande"; - size_t rows = 10; - - for (size_t i = 0; i < rows; i++) { - input->insert_data(str.data(), str.length()); - } - EXPECT_FALSE(ConvertorChar::should_padding(input.get(), str.length())); - - input->insert_data(str.data(), str.length() - 1); - EXPECT_TRUE(ConvertorChar::should_padding(input.get(), str.length())); -} - -TEST(CharTypePaddingTest, CharTypePaddingDataTest) { - auto input = ColumnString::create(); - std::string str = "Allemande"; + // Insert rows with progressively shorter CHAR values (within CHAR(N=str.length())) size_t rows = str.length(); - for (int i = 0; i < rows; i++) { + for (size_t i = 0; i < rows; i++) { input->insert_data(str.data(), str.length() - i); } - auto output = ConvertorChar::clone_and_padding(input.get(), str.length()); + OlapBlockDataConvertor::OlapColumnDataConvertorChar convertor(str.length()); + auto data_type = std::make_shared(); + ColumnWithTypeAndName ctn(input->get_ptr(), data_type, "c"); + convertor.set_source_column(ctn, 0, rows); + ASSERT_TRUE(convertor.convert_to_olap().ok()); - for (int i = 0; i < rows; i++) { - auto cell = output->get_data_at(i).to_string(); - EXPECT_EQ(cell.length(), str.length()); - - auto str_real = std::string(cell.data(), str.length() - i); - auto str_expect = str.substr(0, str.length() - i); - EXPECT_EQ(str_real, str_expect); - - for (int j = str.length() - i; j < str.length(); j++) { - EXPECT_EQ(cell[j], 0); - } + const auto* slices = static_cast(convertor.get_data()); + for (size_t i = 0; i < rows; i++) { + // Each slice carries the row's real character length, not the padded + // schema length. + EXPECT_EQ(slices[i].size, str.length() - i); + EXPECT_EQ(std::string(slices[i].data, slices[i].size), str.substr(0, str.length() - i)); } } diff --git a/gensrc/proto/segment_v2.proto b/gensrc/proto/segment_v2.proto index 3c8e646acd9b2f..394bc45812fdcb 100644 --- a/gensrc/proto/segment_v2.proto +++ b/gensrc/proto/segment_v2.proto @@ -388,4 +388,10 @@ message BloomFilterIndexPB { optional BloomFilterAlgorithmPB algorithm = 2; // required: meta for bloom filters optional IndexedColumnMetaPB bloom_filter = 3; + // When true, the bloom filter for a CHAR column was built from unpadded + // (real-character-length) bytes — strnlen applied at write time. Allows + // CHAR predicates (now unpadded after read-side strip) to probe the BF. + // Absent / false on old segments where CHAR bytes were hashed padded; + // readers must skip BF probe for CHAR in that case to avoid false negatives. + optional bool unpadded_char_filter = 4 [default = false]; }