From 3b9e47d0a8bb7256bd88834d89fe374adcdf8b8a Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Mon, 2 Mar 2026 14:19:33 +0800 Subject: [PATCH 01/28] feat: btree global index --- src/paimon/CMakeLists.txt | 2 + src/paimon/common/global_index/CMakeLists.txt | 6 +- .../global_index/btree/btree_file_footer.cpp | 91 +++++++ .../global_index/btree/btree_file_footer.h | 61 +++++ .../btree/btree_global_index_factory.cpp | 33 +++ .../btree/btree_global_index_factory.h | 39 +++ .../btree/btree_global_indexer.cpp | 118 +++++++++ .../global_index/btree/btree_global_indexer.h | 64 +++++ .../global_index/btree/btree_index_meta.cpp | 39 +++ .../global_index/btree/btree_index_meta.h | 66 +++++ src/paimon/common/sst/block_cache.h | 7 +- src/paimon/common/sst/sst_file_io_test.cpp | 4 +- src/paimon/common/sst/sst_file_reader.cpp | 4 +- .../common/utils/roaring_navigable_map64.cpp | 250 ++++++++++++++++++ .../common/utils/roaring_navigable_map64.h | 230 ++++++++++++++++ .../utils/roaring_navigable_map64_test.cpp | 113 ++++++++ 16 files changed, 1118 insertions(+), 9 deletions(-) create mode 100644 src/paimon/common/global_index/btree/btree_file_footer.cpp create mode 100644 src/paimon/common/global_index/btree/btree_file_footer.h create mode 100644 src/paimon/common/global_index/btree/btree_global_index_factory.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_index_factory.h create mode 100644 src/paimon/common/global_index/btree/btree_global_indexer.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_indexer.h create mode 100644 src/paimon/common/global_index/btree/btree_index_meta.cpp create mode 100644 src/paimon/common/global_index/btree/btree_index_meta.h create mode 100644 src/paimon/common/utils/roaring_navigable_map64.cpp create mode 100644 src/paimon/common/utils/roaring_navigable_map64.h create mode 100644 src/paimon/common/utils/roaring_navigable_map64_test.cpp diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 17731af21..0e60e8501 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -134,6 +134,7 @@ set(PAIMON_COMMON_SRCS common/utils/byte_range_combiner.cpp common/utils/roaring_bitmap32.cpp common/utils/roaring_bitmap64.cpp + common/utils/roaring_navigable_map64.cpp common/utils/status.cpp common/utils/string_utils.cpp) @@ -430,6 +431,7 @@ if(PAIMON_BUILD_TESTS) common/utils/rapidjson_util_test.cpp common/utils/roaring_bitmap32_test.cpp common/utils/roaring_bitmap64_test.cpp + common/utils/roaring_navigable_map64_test.cpp common/utils/range_helper_test.cpp common/utils/read_ahead_cache_test.cpp common/utils/byte_range_combiner_test.cpp diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index ceb4f9a72..8c366acbd 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -13,7 +13,11 @@ # limitations under the License. set(PAIMON_GLOBAL_INDEX_SRC bitmap/bitmap_global_index.cpp - bitmap/bitmap_global_index_factory.cpp) + bitmap/bitmap_global_index_factory.cpp + btree/btree_file_footer.cpp + btree/btree_global_index_factory.cpp + btree/btree_global_indexer.cpp + btree/btree_index_meta.cpp) add_paimon_lib(paimon_global_index SOURCES diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp new file mode 100644 index 000000000..06f17f0fe --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -0,0 +1,91 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/btree_file_footer.h" + +namespace paimon { + +Result> BTreeFileFooter::Read( + const std::shared_ptr& input) { + // read bloom filter and index handles + std::shared_ptr bloom_filter_handle = + std::make_shared(input->ReadLong(), input->ReadInt(), input->ReadLong()); + if (bloom_filter_handle->Offset() == 0 && bloom_filter_handle->Size() == 0 && + bloom_filter_handle->ExpectedEntries() == 0) { + bloom_filter_handle = nullptr; + } + + std::shared_ptr index_block_handle = + std::make_shared(input->ReadLong(), input->ReadInt()); + + std::shared_ptr null_bitmap_handle = + std::make_shared(input->ReadLong(), input->ReadInt()); + if (null_bitmap_handle->Offset() == 0 && null_bitmap_handle->Size() == 0) { + null_bitmap_handle = nullptr; + } + + // skip padding + input->SetPosition(ENCODED_LENGTH - 4); + + // verify magic number + int32_t magic_number = input->ReadInt(); + if (magic_number != MAGIC_NUMBER) { + return Status::IOError("File is not a table (bad magic number)"); + } + + return std::make_shared(bloom_filter_handle, index_block_handle, + null_bitmap_handle); +} + +std::shared_ptr BTreeFileFooter::Write(const std::shared_ptr& footer, + MemoryPool* pool) { + auto output = std::make_shared(ENCODED_LENGTH, pool); + return BTreeFileFooter::Write(footer, output); +} + +std::shared_ptr BTreeFileFooter::Write( + const std::shared_ptr& footer, + const std::shared_ptr& ouput) { + // write bloom filter and index handles + auto bloom_filter_handle = footer->GetBloomFilterHandle(); + if (!bloom_filter_handle) { + ouput->WriteValue(static_cast(0)); + ouput->WriteValue(static_cast(0)); + ouput->WriteValue(static_cast(0)); + } else { + ouput->WriteValue(bloom_filter_handle->Offset()); + ouput->WriteValue(bloom_filter_handle->Size()); + ouput->WriteValue(bloom_filter_handle->ExpectedEntries()); + } + + auto index_block_handle = footer->GetIndexBlockHandle(); + ouput->WriteValue(index_block_handle->Offset()); + ouput->WriteValue(index_block_handle->Size()); + + auto null_bitmap_handle = footer->GetNullBitmapHandle(); + if (!null_bitmap_handle) { + ouput->WriteValue(static_cast(0)); + ouput->WriteValue(static_cast(0)); + } else { + ouput->WriteValue(null_bitmap_handle->Offset()); + ouput->WriteValue(null_bitmap_handle->Size()); + } + + // write magic number + ouput->WriteValue(MAGIC_NUMBER); +} + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer.h b/src/paimon/common/global_index/btree/btree_file_footer.h new file mode 100644 index 000000000..19a9f9b73 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_file_footer.h @@ -0,0 +1,61 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include "paimon/common/sst/bloom_filter_handle.h" +#include "paimon/common/sst/block_handle.h" +#include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/memory/memory_slice_output.h" + +namespace paimon { +/// The Footer for BTree file. +class BTreeFileFooter { + public: + static Result> Read(const std::shared_ptr& input); + static std::shared_ptr Write(const std::shared_ptr& footer, MemoryPool* pool); + static std::shared_ptr Write(const std::shared_ptr& footer, const std::shared_ptr& ouput); + + public: + BTreeFileFooter(const std::shared_ptr& bloom_filter_handle, const std::shared_ptr& index_block_handle, + const std::shared_ptr& null_bitmap_handle) + : bloom_filter_handle_(bloom_filter_handle), index_block_handle_(index_block_handle), null_bitmap_handle_(null_bitmap_handle) {} + + std::shared_ptr GetBloomFilterHandle() const { + return bloom_filter_handle_; + } + + std::shared_ptr GetIndexBlockHandle() const { + return index_block_handle_; + } + + std::shared_ptr GetNullBitmapHandle() const { + return null_bitmap_handle_; + } + + + public: + static constexpr int32_t MAGIC_NUMBER = 198732882; + static constexpr int32_t ENCODED_LENGTH = 48; + + private: + std::shared_ptr bloom_filter_handle_; + std::shared_ptr index_block_handle_; + std::shared_ptr null_bitmap_handle_; +}; + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_factory.cpp b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp new file mode 100644 index 000000000..707de823e --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp @@ -0,0 +1,33 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/btree_global_index_factory.h" + +#include + +#include "paimon/common/global_index/btree/btree_global_indexer.h" +namespace paimon { + +const char BTreeGlobalIndexerFactory::IDENTIFIER[] = "btree"; + +Result> BTreeGlobalIndexerFactory::Create( + const std::map& options) const { + return std::make_unique(options); +} + +REGISTER_PAIMON_FACTORY(BTreeGlobalIndexerFactory); + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_factory.h b/src/paimon/common/global_index/btree/btree_global_index_factory.h new file mode 100644 index 000000000..1eb75f00b --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_factory.h @@ -0,0 +1,39 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/global_index/global_indexer_factory.h" + +namespace paimon { +/// Factory for creating btree global indexers. +class BTreeGlobalIndexerFactory : public GlobalIndexerFactory { + public: + static const char IDENTIFIER[]; + + const char* Identifier() const override { + return IDENTIFIER; + } + + Result> Create( + const std::map& options) const override; +}; + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp new file mode 100644 index 000000000..de6f954bb --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -0,0 +1,118 @@ +/* + * Copyright 2024-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "paimon/common/global_index/btree/btree_global_indexer.h" + +#include "paimon/common/global_index/btree/btree_file_footer.h" +#include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/memory/memory_slice.h" +#include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/sst/block_cache.h" +#include "paimon/common/sst/block_handle.h" +#include "paimon/common/utils/crc32c.h" +#include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/file_index/bitmap_index_result.h" +#include "paimon/global_index/bitmap_global_index_result.h" + +namespace paimon { +Result> BTreeGlobalIndexer::CreateReader( + ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, + const std::vector& files, const std::shared_ptr& pool) const { + if (files.size() != 1) { + return Status::Invalid( + "invalid GlobalIndexIOMeta for BTreeGlobalIndex, exist multiple metas"); + } + const auto& meta = files[0]; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, + file_reader->GetInputStream(meta.file_path)); + + // prepare file footer + auto cache_manager = std::make_shared(); + auto block_cache = std::make_shared(meta.file_path, in, pool.get(), cache_manager); + auto segment = block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, + BTreeFileFooter::ENCODED_LENGTH, true); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, + BTreeFileFooter::Read(MemorySlice::Wrap(segment)->ToInput())); + + auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); + + return std::make_shared(file_reader, files, pool); +} + +Result> BTreeGlobalIndexer::ToGlobalIndexResult( + int64_t range_end, const std::shared_ptr& result) { + if (auto remain = std::dynamic_pointer_cast(result)) { + return std::make_shared([range_end]() -> Result { + RoaringBitmap64 bitmap; + bitmap.AddRange(0, range_end + 1); + return bitmap; + }); + } else if (auto skip = std::dynamic_pointer_cast(result)) { + return std::make_shared( + []() -> Result { return RoaringBitmap64(); }); + } else if (auto bitmap_result = std::dynamic_pointer_cast(result)) { + return std::make_shared( + [bitmap_result]() -> Result { + PAIMON_ASSIGN_OR_RAISE(const RoaringBitmap32* bitmap, bitmap_result->GetBitmap()); + return RoaringBitmap64(*bitmap); + }); + } + return Status::Invalid( + "invalid FileIndexResult, supposed to be Remain or Skip or BitmapIndexResult"); +} + +Result> BTreeGlobalIndexer::ReadNullBitmap( + const std::shared_ptr& cache, const std::shared_ptr& block_handle) { + auto null_bitmap = std::make_shared(); + if (block_handle == nullptr) { + return null_bitmap; + } + + // Read bytes and crc value + auto segment = cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false); + + auto slice = MemorySlice::Wrap(segment); + auto slice_input = slice->ToInput(); + + // Read null bitmap data + auto null_bitmap_slice = slice_input->ReadSlice(block_handle->Size()); + auto null_bitmap_bytes = null_bitmap_slice->GetHeapMemory(); + + // Calculate CRC32C checksum + uint32_t crc_value = CRC32C::calculate(reinterpret_cast(null_bitmap_bytes->data()), + null_bitmap_bytes->size()); + + // Read expected CRC value + int32_t expected_crc_value = slice_input->ReadInt(); + + // Verify CRC checksum + if (crc_value != static_cast(expected_crc_value)) { + return Status::Invalid("CRC check failure during decoding null bitmap"); + } + + // Deserialize null bitmap + try { + std::vector data(null_bitmap_bytes->data(), + null_bitmap_bytes->data() + null_bitmap_bytes->size()); + null_bitmap->Deserialize(data); + } catch (const std::exception& e) { + return Status::Invalid("Fail to deserialize null bitmap but crc check passed, " + "this means the ser/de algorithms not match: " + std::string(e.what())); + } + + return null_bitmap; +} + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h new file mode 100644 index 000000000..fa1414984 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -0,0 +1,64 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/common/file_index/bitmap/bitmap_file_index.h" +#include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/global_index/global_indexer.h" +#include "paimon/global_index/io/global_index_file_reader.h" + +namespace paimon { +class BTreeGlobalIndexer : public GlobalIndexer { + public: + explicit BTreeGlobalIndexer(const std::map& options) + : options_(options) {} + + Result> CreateWriter( + const std::string& field_name, ::ArrowSchema* arrow_schema, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool) const override { + return Status::NotImplemented("Writing btree global index not support yet"); + } + + Result> CreateReader( + ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, + const std::vector& files, + const std::shared_ptr& pool) const override; + + private: + static Result> ToGlobalIndexResult( + int64_t range_end, const std::shared_ptr& result); + + static Result> ReadNullBitmap( + const std::shared_ptr& cache, const std::shared_ptr& block_handle); + + private: + std::map options_; +}; + +class BTreeGlobalIndexReader : public GlobalIndexReader { + public: + BTreeGlobalIndexReader(const std::shared_ptr& file_reader, + const std::vector& files, + const std::shared_ptr& pool) {} +}; + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp new file mode 100644 index 000000000..a1b5eb2b3 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -0,0 +1,39 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/btree_index_meta.h" + +namespace paimon { + +std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_ptr& meta, + paimon::MemoryPool* pool) { + auto input = MemorySlice::Wrap(meta)->ToInput(); + auto first_key_len = input->ReadInt(); + std::shared_ptr first_key; + if (first_key_len) { + first_key = std::move(input->ReadSlice(first_key_len)->CopyBytes(pool)); + } + auto last_key_len = input->ReadInt(); + std::shared_ptr last_key; + if (last_key_len) { + last_key = std::move(input->ReadSlice(last_key_len)->CopyBytes(pool)); + } + auto has_nulls = input->ReadByte() == 1; + return std::make_shared(first_key, last_key, has_nulls); +} +std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) {} + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.h b/src/paimon/common/global_index/btree/btree_index_meta.h new file mode 100644 index 000000000..17e961c14 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_index_meta.h @@ -0,0 +1,66 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "paimon/memory/bytes.h" +#include "paimon/common/memory/memory_slice_input.h" +#include + +namespace paimon { +/// Index Meta of each BTree index file. The first key and last key of this meta could be null if +/// the +/// entire btree index file only contains nulls. +class BTreeIndexMeta { +public: + static std::shared_ptr Deserialize(const std::shared_ptr& meta, paimon::MemoryPool *pool); + static std::shared_ptr Serialize(paimon::MemoryPool *pool); + + public: + BTreeIndexMeta(const std::shared_ptr& first_key, const std::shared_ptr& last_key, + bool has_nulls) + : first_key_(first_key), last_key_(last_key), has_nulls_(has_nulls) {} + + std::shared_ptr FirstKey() const { + return first_key_; + } + + std::shared_ptr LastKey() const { + return last_key_; + } + + bool HasNulls() const { + return has_nulls_; + } + + bool OnlyNulls() const { + return !(first_key_ || last_key_); + } + + private: + int32_t Size() { + // 9 bytes => first_key_len(4 byte) + last_key_len(4 byte) + has_null(1 byte) + return (first_key_.get() ? 0 : first_key_->size()) + + (last_key_.get() ? 0 : last_key_->size()) + 9; + } + + private: + std::shared_ptr first_key_; + std::shared_ptr last_key_; + bool has_nulls_; +}; + +} // namespace paimon diff --git a/src/paimon/common/sst/block_cache.h b/src/paimon/common/sst/block_cache.h index f2db0e677..80a942fa7 100644 --- a/src/paimon/common/sst/block_cache.h +++ b/src/paimon/common/sst/block_cache.h @@ -31,8 +31,8 @@ class BlockCache { public: BlockCache(const std::string& file_path, const std::shared_ptr& in, const std::shared_ptr& pool, - std::unique_ptr&& cache_manager) - : file_path_(file_path), in_(in), pool_(pool), cache_manager_(std::move(cache_manager)) {} + const std::shared_ptr& cache_manager) + : file_path_(file_path), in_(in), pool_(pool), cache_manager_(cache_manager) {} ~BlockCache() = default; @@ -65,8 +65,7 @@ class BlockCache { std::string file_path_; std::shared_ptr in_; std::shared_ptr pool_; - - std::unique_ptr cache_manager_; + std::shared_ptr cache_manager_; std::unordered_map, std::shared_ptr> blocks_; }; } // namespace paimon diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index a782ec26b..8fe769899 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -177,8 +177,8 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { // key range [1_000_000, 2_000_000], value is equal to the key std::string file = GetDataDir() + "/sst/" + param.file_path; ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file)); - auto block_cache = - std::make_shared(file, in, pool_, std::make_unique()); + auto cache_manager = std::make_shared(); + auto block_cache = std::make_shared(file, in, pool_, cache_manager); // test read auto reader_ret = SstFileReader::Create(pool_, fs_, file, comparator_); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index f337d790a..b6c4daa85 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -28,8 +28,8 @@ Result> SstFileReader::Create( comparator) { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); - auto block_cache = - std::make_shared(file_path, in, pool, std::make_unique()); + auto cache_manager = std::make_shared(); + auto block_cache = std::make_shared(file_path, in, pool.get(), cache_manager); // read footer auto segment = block_cache->GetBlock(file_len - BlockFooter::ENCODED_LENGTH, diff --git a/src/paimon/common/utils/roaring_navigable_map64.cpp b/src/paimon/common/utils/roaring_navigable_map64.cpp new file mode 100644 index 000000000..39b9cb366 --- /dev/null +++ b/src/paimon/common/utils/roaring_navigable_map64.cpp @@ -0,0 +1,250 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/utils/roaring_navigable_map64.h" + +#include +#include +#include + +#include "paimon/utils/range.h" +#include "paimon/utils/roaring_bitmap64.h" + +namespace paimon { + +class RoaringNavigableMap64::Impl { + public: + RoaringBitmap64 bitmap; +}; + +class RoaringNavigableMap64::Iterator::Impl { + public: + explicit Impl(const RoaringBitmap64& bitmap) : iterator(bitmap.Begin()) {} + explicit Impl(const RoaringBitmap64::Iterator& iter) : iterator(iter) {} + RoaringBitmap64::Iterator iterator; +}; + +RoaringNavigableMap64::RoaringNavigableMap64() : impl_(std::make_unique()) {} + +RoaringNavigableMap64::RoaringNavigableMap64(const RoaringNavigableMap64& other) + : impl_(std::make_unique()) { + impl_->bitmap = other.impl_->bitmap; +} + +RoaringNavigableMap64::RoaringNavigableMap64(RoaringNavigableMap64&& other) noexcept = default; + +RoaringNavigableMap64& RoaringNavigableMap64::operator=(const RoaringNavigableMap64& other) { + if (this != &other) { + impl_->bitmap = other.impl_->bitmap; + } + return *this; +} + +RoaringNavigableMap64& RoaringNavigableMap64::operator=(RoaringNavigableMap64&& other) noexcept = + default; + +RoaringNavigableMap64::~RoaringNavigableMap64() = default; + +void RoaringNavigableMap64::AddRange(const Range& range) { + impl_->bitmap.AddRange(range.from, range.to + 1); +} + +bool RoaringNavigableMap64::Contains(int64_t x) const { + return impl_->bitmap.Contains(x); +} + +void RoaringNavigableMap64::Add(int64_t x) { + impl_->bitmap.Add(x); +} + +void RoaringNavigableMap64::Or(const RoaringNavigableMap64& other) { + impl_->bitmap |= other.impl_->bitmap; +} + +void RoaringNavigableMap64::And(const RoaringNavigableMap64& other) { + impl_->bitmap &= other.impl_->bitmap; +} + +void RoaringNavigableMap64::AndNot(const RoaringNavigableMap64& other) { + impl_->bitmap -= other.impl_->bitmap; +} + +bool RoaringNavigableMap64::IsEmpty() const { + return impl_->bitmap.IsEmpty(); +} + +bool RoaringNavigableMap64::RunOptimize() { + // Note: RoaringBitmap64 doesn't have a direct RunOptimize method + // This is a placeholder - in practice, optimization happens automatically + return false; +} + +int64_t RoaringNavigableMap64::GetLongCardinality() const { + return impl_->bitmap.Cardinality(); +} + +int32_t RoaringNavigableMap64::GetIntCardinality() const { + return static_cast(impl_->bitmap.Cardinality()); +} + +void RoaringNavigableMap64::Clear() { + impl_->bitmap = RoaringBitmap64(); +} + +std::vector RoaringNavigableMap64::Serialize() const { + // This is a simplified serialization - in practice, you might want to use + // a more sophisticated approach + auto bytes = impl_->bitmap.Serialize(nullptr); // nullptr for default pool + if (!bytes) { + return {}; + } + + std::vector result(bytes->size()); + std::memcpy(result.data(), bytes->data(), bytes->size()); + return result; +} + +void RoaringNavigableMap64::Deserialize(const std::vector& data) { + // This is a simplified deserialization - in practice, you might want to use + // a more sophisticated approach + impl_->bitmap.Deserialize(reinterpret_cast(data.data()), data.size()); +} + +std::vector RoaringNavigableMap64::ToRangeList() const { + std::vector ranges; + if (IsEmpty()) { + return ranges; + } + + int64_t current_start = -1; + int64_t current_end = -1; + + for (auto it = begin(); it != end(); ++it) { + int64_t value = *it; + if (current_start == -1) { + current_start = value; + current_end = value; + } else if (value == current_end + 1) { + // Continue the current range + current_end = value; + } else { + // End the current range and start a new one + ranges.emplace_back(current_start, current_end); + current_start = value; + current_end = value; + } + } + + if (current_start != -1) { + ranges.emplace_back(current_start, current_end); + } + + return ranges; +} + +RoaringNavigableMap64 RoaringNavigableMap64::BitmapOf(const std::vector& values) { + RoaringNavigableMap64 result; + for (int64_t value : values) { + result.Add(value); + } + return result; +} + +RoaringNavigableMap64 RoaringNavigableMap64::And(const RoaringNavigableMap64& x1, + const RoaringNavigableMap64& x2) { + RoaringNavigableMap64 result; + result.impl_->bitmap = RoaringBitmap64::And(x1.impl_->bitmap, x2.impl_->bitmap); + return result; +} + +RoaringNavigableMap64 RoaringNavigableMap64::Or(const RoaringNavigableMap64& x1, + const RoaringNavigableMap64& x2) { + RoaringNavigableMap64 result; + result.impl_->bitmap = RoaringBitmap64::Or(x1.impl_->bitmap, x2.impl_->bitmap); + return result; +} + +bool RoaringNavigableMap64::operator==(const RoaringNavigableMap64& other) const { + return impl_->bitmap == other.impl_->bitmap; +} + +bool RoaringNavigableMap64::operator!=(const RoaringNavigableMap64& other) const { + return !(*this == other); +} + +// Iterator implementation +RoaringNavigableMap64::Iterator::Iterator(const RoaringNavigableMap64& bitmap) + : impl_(std::make_unique(bitmap.impl_->bitmap.Begin())) {} + +RoaringNavigableMap64::Iterator::Iterator(const RoaringNavigableMap64& bitmap, bool is_end) + : impl_(std::make_unique(bitmap.impl_->bitmap.End())) {} + +RoaringNavigableMap64::Iterator::Iterator(const Iterator& other) + : impl_(std::make_unique(other.impl_->iterator)) {} + +RoaringNavigableMap64::Iterator::Iterator(Iterator&& other) noexcept = default; + +RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator=(const Iterator& other) { + if (this != &other) { + impl_ = std::make_unique(other.impl_->iterator); + } + return *this; +} + +RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator=( + Iterator&& other) noexcept = default; + +RoaringNavigableMap64::Iterator::~Iterator() = default; + +int64_t RoaringNavigableMap64::Iterator::operator*() const { + return *impl_->iterator; +} + +RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator++() { + ++impl_->iterator; + return *this; +} + +RoaringNavigableMap64::Iterator RoaringNavigableMap64::Iterator::operator++(int) { + Iterator temp(*this); + ++(*this); + return temp; +} + +bool RoaringNavigableMap64::Iterator::operator==(const Iterator& other) const { + return impl_->iterator == other.impl_->iterator; +} + +bool RoaringNavigableMap64::Iterator::operator!=(const Iterator& other) const { + return !(*this == other); +} + +RoaringNavigableMap64::Iterator RoaringNavigableMap64::begin() const { + return Iterator(*this); +} + +RoaringNavigableMap64::Iterator RoaringNavigableMap64::end() const { + // Create an iterator that represents the end + // For now, we'll create an iterator and set it to a special state + // In practice, this might need a more sophisticated approach + Iterator it(*this); + // Move to the end by advancing past the last element + auto underlying_end = impl_->bitmap.End(); + it.impl_->iterator = underlying_end; + return it; +} + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/utils/roaring_navigable_map64.h b/src/paimon/common/utils/roaring_navigable_map64.h new file mode 100644 index 000000000..09fdf6bd9 --- /dev/null +++ b/src/paimon/common/utils/roaring_navigable_map64.h @@ -0,0 +1,230 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include "paimon/utils/range.h" +#include "paimon/utils/roaring_bitmap64.h" +#include "paimon/visibility.h" + +namespace paimon { + +/** + * A compressed bitmap for 64-bit integer aggregated by tree. + * This is a wrapper around RoaringBitmap64 that provides additional functionality + * and a more convenient interface. + */ +class PAIMON_EXPORT RoaringNavigableMap64 { + public: + /// Default constructor creates an empty bitmap + RoaringNavigableMap64(); + + /// Copy constructor + RoaringNavigableMap64(const RoaringNavigableMap64& other); + + /// Move constructor + RoaringNavigableMap64(RoaringNavigableMap64&& other) noexcept; + + /// Copy assignment operator + RoaringNavigableMap64& operator=(const RoaringNavigableMap64& other); + + /// Move assignment operator + RoaringNavigableMap64& operator=(RoaringNavigableMap64&& other) noexcept; + + /// Destructor + ~RoaringNavigableMap64(); + + /** + * Adds a range of values to the bitmap. + * @param range The range to add (inclusive of both endpoints) + */ + void AddRange(const Range& range); + + /** + * Checks if the bitmap contains the given value. + * @param x The value to check + * @return true if the value is in the bitmap, false otherwise + */ + bool Contains(int64_t x) const; + + /** + * Adds a single value to the bitmap. + * @param x The value to add + */ + void Add(int64_t x); + + /** + * Performs a bitwise OR operation with another bitmap. + * @param other The other bitmap to OR with + */ + void Or(const RoaringNavigableMap64& other); + + /** + * Performs a bitwise AND operation with another bitmap. + * @param other The other bitmap to AND with + */ + void And(const RoaringNavigableMap64& other); + + /** + * Performs a bitwise AND NOT operation with another bitmap. + * This removes all elements from this bitmap that are present in the other bitmap. + * @param other The other bitmap to AND NOT with + */ + void AndNot(const RoaringNavigableMap64& other); + + /** + * Checks if the bitmap is empty. + * @return true if the bitmap contains no elements, false otherwise + */ + bool IsEmpty() const; + + /** + * Optimizes the bitmap by applying run-length encoding. + * @return true if the bitmap was modified, false otherwise + */ + bool RunOptimize(); + + /** + * Gets the cardinality of the bitmap as a 64-bit integer. + * @return The number of elements in the bitmap + */ + int64_t GetLongCardinality() const; + + /** + * Gets the cardinality of the bitmap as a 32-bit integer. + * @return The number of elements in the bitmap (truncated to 32 bits) + */ + int32_t GetIntCardinality() const; + + /** + * Clears all elements from the bitmap. + */ + void Clear(); + + /** + * Serializes the bitmap to a byte array. + * @return A vector containing the serialized bitmap + */ + std::vector Serialize() const; + + /** + * Deserializes the bitmap from a byte array. + * @param data The byte array containing the serialized bitmap + */ + void Deserialize(const std::vector& data); + + /** + * Converts this bitmap to a list of contiguous ranges. + * This is useful for interoperability with APIs that expect std::vector. + * @return A vector of ranges representing the bitmap + */ + std::vector ToRangeList() const; + + /** + * Creates a new bitmap from a list of values. + * @param values The values to include in the bitmap + * @return A new RoaringNavigableMap64 containing the specified values + */ + static RoaringNavigableMap64 BitmapOf(const std::vector& values); + + /** + * Computes the intersection of two bitmaps. + * @param x1 The first bitmap + * @param x2 The second bitmap + * @return A new bitmap containing the intersection of the two input bitmaps + */ + static RoaringNavigableMap64 And(const RoaringNavigableMap64& x1, + const RoaringNavigableMap64& x2); + + /** + * Computes the union of two bitmaps. + * @param x1 The first bitmap + * @param x2 The second bitmap + * @return A new bitmap containing the union of the two input bitmaps + */ + static RoaringNavigableMap64 Or(const RoaringNavigableMap64& x1, + const RoaringNavigableMap64& x2); + + /** + * Equality operator. + * @param other The other bitmap to compare with + * @return true if the bitmaps are equal, false otherwise + */ + bool operator==(const RoaringNavigableMap64& other) const; + + /** + * Inequality operator. + * @param other The other bitmap to compare with + * @return true if the bitmaps are not equal, false otherwise + */ + bool operator!=(const RoaringNavigableMap64& other) const; + + /** + * Iterator class for iterating over the values in the bitmap. + */ + class Iterator { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = int64_t; + using difference_type = std::ptrdiff_t; + using pointer = const int64_t*; + using reference = const int64_t&; + + explicit Iterator(const RoaringNavigableMap64& bitmap); + Iterator(const Iterator& other); + Iterator(Iterator&& other) noexcept; + Iterator& operator=(const Iterator& other); + Iterator& operator=(Iterator&& other) noexcept; + ~Iterator(); + + int64_t operator*() const; + Iterator& operator++(); + Iterator operator++(int); + bool operator==(const Iterator& other) const; + bool operator!=(const Iterator& other) const; + + private: + friend class RoaringNavigableMap64; + + class Impl; + std::unique_ptr impl_; + + // Private constructor for creating end iterator + Iterator(const RoaringNavigableMap64& bitmap, bool is_end); + }; + + /** + * Returns an iterator to the beginning of the bitmap. + * @return Iterator pointing to the first element + */ + Iterator begin() const; + + /** + * Returns an iterator to the end of the bitmap. + * @return Iterator pointing to the end + */ + Iterator end() const; + + private: + class Impl; + std::unique_ptr impl_; +}; + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/utils/roaring_navigable_map64_test.cpp b/src/paimon/common/utils/roaring_navigable_map64_test.cpp new file mode 100644 index 000000000..b294edc0c --- /dev/null +++ b/src/paimon/common/utils/roaring_navigable_map64_test.cpp @@ -0,0 +1,113 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/utils/roaring_navigable_map64.h" + +#include + +#include +#include + +#include "paimon/utils/range.h" + +namespace paimon { + +class RoaringNavigableMap64Test : public ::testing::Test { + protected: + void SetUp() override {} + void TearDown() override {} +}; + +TEST_F(RoaringNavigableMap64Test, testAddRangeBasic) { + RoaringNavigableMap64 bitmap; + bitmap.AddRange(Range(5, 10)); + + // Verify the range [5, 10] is added (inclusive on both ends) + EXPECT_EQ(bitmap.GetLongCardinality(), 6); + EXPECT_FALSE(bitmap.Contains(4)); + EXPECT_TRUE(bitmap.Contains(5)); + EXPECT_TRUE(bitmap.Contains(7)); + EXPECT_TRUE(bitmap.Contains(10)); + EXPECT_FALSE(bitmap.Contains(11)); +} + +TEST_F(RoaringNavigableMap64Test, testAddRangeSingleElement) { + RoaringNavigableMap64 bitmap; + bitmap.AddRange(Range(100, 100)); + + // A range where from == to should add exactly one element + EXPECT_EQ(bitmap.GetLongCardinality(), 1); + EXPECT_FALSE(bitmap.Contains(99)); + EXPECT_TRUE(bitmap.Contains(100)); + EXPECT_FALSE(bitmap.Contains(101)); +} + +TEST_F(RoaringNavigableMap64Test, testAddRangeMultipleNonOverlapping) { + RoaringNavigableMap64 bitmap; + bitmap.AddRange(Range(0, 5)); + bitmap.AddRange(Range(10, 15)); + bitmap.AddRange(Range(20, 25)); + + // Verify cardinality: 6 + 6 + 6 = 18 + EXPECT_EQ(bitmap.GetLongCardinality(), 18); + + // Verify gaps are not filled + EXPECT_FALSE(bitmap.Contains(6)); + EXPECT_FALSE(bitmap.Contains(9)); + EXPECT_FALSE(bitmap.Contains(16)); + EXPECT_FALSE(bitmap.Contains(19)); + + // Verify ranges contain expected values + EXPECT_TRUE(bitmap.Contains(0)); + EXPECT_TRUE(bitmap.Contains(5)); + EXPECT_TRUE(bitmap.Contains(10)); + EXPECT_TRUE(bitmap.Contains(15)); + EXPECT_TRUE(bitmap.Contains(20)); + EXPECT_TRUE(bitmap.Contains(25)); + + // Verify ToRangeList reconstructs the ranges correctly + std::vector ranges = bitmap.ToRangeList(); + EXPECT_EQ(ranges.size(), 3); + EXPECT_EQ(ranges[0], Range(0, 5)); + EXPECT_EQ(ranges[1], Range(10, 15)); + EXPECT_EQ(ranges[2], Range(20, 25)); +} + +TEST_F(RoaringNavigableMap64Test, testAddRangeLargeValues) { + RoaringNavigableMap64 bitmap; + // Test with values beyond Integer.MAX_VALUE + int64_t start = static_cast(INT_MAX) + 100L; + int64_t end = static_cast(INT_MAX) + 200L; + bitmap.AddRange(Range(start, end)); + + EXPECT_EQ(bitmap.GetLongCardinality(), 101); + EXPECT_FALSE(bitmap.Contains(start - 1)); + EXPECT_TRUE(bitmap.Contains(start)); + EXPECT_TRUE(bitmap.Contains(start + 50)); + EXPECT_TRUE(bitmap.Contains(end)); + EXPECT_FALSE(bitmap.Contains(end + 1)); + + // Verify iteration order + std::vector values; + for (auto it = bitmap.begin(); it != bitmap.end(); ++it) { + values.push_back(*it); + } + EXPECT_EQ(values.size(), 101); + EXPECT_EQ(values[0], start); + EXPECT_EQ(values[100], end); +} + +} // namespace paimon \ No newline at end of file From ba22e7a54847170559cb58ae09f5bfc2cba94590 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 4 Mar 2026 15:50:47 +0800 Subject: [PATCH 02/28] feat: btree global index --- include/paimon/file_index/file_index_reader.h | 6 + .../paimon/global_index/global_index_reader.h | 14 + include/paimon/predicate/function_visitor.h | 36 ++ .../empty/empty_file_index_reader.h | 29 + .../common/file_index/file_index_reader.cpp | 52 ++ .../global_index/btree/btree_file_footer.cpp | 2 + .../btree/btree_global_indexer.cpp | 528 +++++++++++++++++- .../global_index/btree/btree_global_indexer.h | 84 ++- .../global_index/btree/btree_index_meta.cpp | 7 +- .../wrap/file_index_reader_wrapper.h | 24 + src/paimon/common/io/cache/cache.h | 4 +- src/paimon/common/io/cache/cache_key.h | 2 +- src/paimon/common/io/cache/cache_manager.h | 2 +- src/paimon/common/sst/block_cache.h | 2 +- src/paimon/common/sst/block_entry.h | 2 +- src/paimon/common/sst/block_footer.h | 2 +- src/paimon/common/sst/block_handle.h | 2 +- src/paimon/common/sst/block_iterator.h | 2 +- src/paimon/common/sst/block_reader.h | 2 +- src/paimon/common/sst/block_trailer.h | 2 +- src/paimon/common/sst/block_writer.h | 2 +- src/paimon/common/sst/bloom_filter_handle.h | 2 +- src/paimon/common/sst/sst_file_reader.cpp | 2 +- src/paimon/common/sst/sst_file_reader.h | 46 +- src/paimon/common/sst/sst_file_writer.h | 2 +- .../common/utils/roaring_navigable_map64.cpp | 4 + .../common/utils/roaring_navigable_map64.h | 8 + 27 files changed, 814 insertions(+), 56 deletions(-) diff --git a/include/paimon/file_index/file_index_reader.h b/include/paimon/file_index/file_index_reader.h index bbbea446c..eb7f6c484 100644 --- a/include/paimon/file_index/file_index_reader.h +++ b/include/paimon/file_index/file_index_reader.h @@ -62,6 +62,12 @@ class PAIMON_EXPORT FileIndexReader : public FunctionVisitor> VisitContains(const Literal& literal) override; Result> VisitLike(const Literal& literal) override; + + Result> VisitAnd( + const std::vector>>& children) override; + + Result> VisitOr( + const std::vector>>& children) override; }; } // namespace paimon diff --git a/include/paimon/global_index/global_index_reader.h b/include/paimon/global_index/global_index_reader.h index 9325735d2..03b1f0adb 100644 --- a/include/paimon/global_index/global_index_reader.h +++ b/include/paimon/global_index/global_index_reader.h @@ -48,6 +48,20 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor> VisitFullTextSearch( const std::shared_ptr& full_text_search) = 0; + /// VisitAnd performs logical AND across multiple child results. + /// Default implementation returns "not supported" error. + virtual Result> VisitAnd( + const std::vector>>& children) { + return Status::NotImplemented("AND operations not supported by this index type"); + } + + /// VisitOr performs logical OR across multiple child results. + /// Default implementation returns "not supported" error. + virtual Result> VisitOr( + const std::vector>>& children) { + return Status::NotImplemented("OR operations not supported by this index type"); + } + /// @return true if the reader is thread-safe; false otherwise. virtual bool IsThreadSafe() const = 0; diff --git a/include/paimon/predicate/function_visitor.h b/include/paimon/predicate/function_visitor.h index 3d5954ff4..80d6d7ee1 100644 --- a/include/paimon/predicate/function_visitor.h +++ b/include/paimon/predicate/function_visitor.h @@ -74,5 +74,41 @@ class PAIMON_EXPORT FunctionVisitor { /// Evaluates whether string values like the given string. virtual Result VisitLike(const Literal& literal) = 0; + + /// Evaluates the BETWEEN predicate with the given lower and upper bounds. + virtual Result VisitBetween(const Literal& from, const Literal& to) { + // Default implementation: BETWEEN is equivalent to >= AND <= + auto lower_result = VisitGreaterOrEqual(from); + if (!lower_result.ok()) { + return lower_result.status(); + } + auto upper_result = VisitLessOrEqual(to); + if (!upper_result.ok()) { + return upper_result.status(); + } + return VisitAnd({std::move(lower_result).value(), std::move(upper_result).value()}); + } + + /// Evaluates the NOT BETWEEN predicate with the given lower and upper bounds. + virtual Result VisitNotBetween(const Literal& from, const Literal& to) { + // Default implementation: NOT BETWEEN is equivalent to < OR > + auto lower_result = VisitLessThan(from); + if (!lower_result.ok()) { + return lower_result.status(); + } + auto upper_result = VisitGreaterThan(to); + if (!upper_result.ok()) { + return upper_result.status(); + } + return VisitOr({std::move(lower_result).value(), std::move(upper_result).value()}); + } + + // ----------------- Compound functions ------------------------ + + /// Evaluates the AND predicate across multiple child results. + virtual Result VisitAnd(const std::vector>& children) = 0; + + /// Evaluates the OR predicate across multiple child results. + virtual Result VisitOr(const std::vector>& children) = 0; }; } // namespace paimon diff --git a/src/paimon/common/file_index/empty/empty_file_index_reader.h b/src/paimon/common/file_index/empty/empty_file_index_reader.h index 6207787a7..f859727ef 100644 --- a/src/paimon/common/file_index/empty/empty_file_index_reader.h +++ b/src/paimon/common/file_index/empty/empty_file_index_reader.h @@ -65,6 +65,35 @@ class EmptyFileIndexReader : public FileIndexReader { const std::vector& literals) override { return FileIndexResult::Skip(); } + + Result> VisitNotEqual(const Literal& literal) override { + return FileIndexResult::Skip(); + } + + Result> VisitNotIn( + const std::vector& literals) override { + return FileIndexResult::Skip(); + } + + Result> VisitBetween(const Literal& from, + const Literal& to) override { + return FileIndexResult::Skip(); + } + + Result> VisitNotBetween(const Literal& from, + const Literal& to) override { + return FileIndexResult::Skip(); + } + + Result> VisitAnd( + const std::vector>>& children) override { + return FileIndexResult::Skip(); + } + + Result> VisitOr( + const std::vector>>& children) override { + return FileIndexResult::Skip(); + } }; } // namespace paimon diff --git a/src/paimon/common/file_index/file_index_reader.cpp b/src/paimon/common/file_index/file_index_reader.cpp index 6f4cb38b2..ba5996448 100644 --- a/src/paimon/common/file_index/file_index_reader.cpp +++ b/src/paimon/common/file_index/file_index_reader.cpp @@ -98,4 +98,56 @@ Result> FileIndexReader::VisitNotIn( } return file_index_result; } + +Result> FileIndexReader::VisitAnd( + const std::vector>>& children) { + if (children.empty()) { + return Status::Invalid("VisitAnd called with no children"); + } + + // Start with the first child + auto result = children[0]; + if (!result.ok()) { + return result.status(); + } + auto current = std::move(result).value(); + + // AND with remaining children + for (size_t i = 1; i < children.size(); ++i) { + auto child_status = children[i]; + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(current, current->And(child)); + } + + return current; +} + +Result> FileIndexReader::VisitOr( + const std::vector>>& children) { + if (children.empty()) { + return Status::Invalid("VisitOr called with no children"); + } + + // Start with the first child + auto result = children[0]; + if (!result.ok()) { + return result.status(); + } + auto current = std::move(result).value(); + + // OR with remaining children + for (size_t i = 1; i < children.size(); ++i) { + auto child_status = children[i]; + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(current, current->Or(child)); + } + + return current; +} } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp index 06f17f0fe..9b51effc9 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -86,6 +86,8 @@ std::shared_ptr BTreeFileFooter::Write( // write magic number ouput->WriteValue(MAGIC_NUMBER); + + return ouput->ToSlice(); } } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index de6f954bb..9a96707dd 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -15,18 +15,26 @@ */ #include "paimon/common/global_index/btree/btree_global_indexer.h" +#include +#include + #include "paimon/common/global_index/btree/btree_file_footer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" -#include "paimon/common/sst/block_cache.h" -#include "paimon/common/sst/block_handle.h" #include "paimon/common/utils/crc32c.h" +#include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/roaring_navigable_map64.h" #include "paimon/file_index/bitmap_index_result.h" #include "paimon/global_index/bitmap_global_index_result.h" +#include "paimon/memory/bytes.h" +#include "paimon/predicate/literal.h" namespace paimon { + +// Forward declarations for helper functions +static Result> LiteralToMemorySlice(const Literal& literal, + MemoryPool* pool); Result> BTreeGlobalIndexer::CreateReader( ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, const std::vector& files, const std::shared_ptr& pool) const { @@ -40,15 +48,35 @@ Result> BTreeGlobalIndexer::CreateReader( // prepare file footer auto cache_manager = std::make_shared(); - auto block_cache = std::make_shared(meta.file_path, in, pool.get(), cache_manager); + auto block_cache = std::make_shared(meta.file_path, in, pool, cache_manager); auto segment = block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, BTreeFileFooter::ENCODED_LENGTH, true); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, BTreeFileFooter::Read(MemorySlice::Wrap(segment)->ToInput())); + // prepare null_bitmap and sst_file_reader + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, + ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); + std::shared_ptr fs; + std::function&, const std::shared_ptr&)> + comparator; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr sst_file_reader, + SstFileReader::Create(pool, fs, meta.file_path, comparator)); + auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); - return std::make_shared(file_reader, files, pool); + // Convert Bytes to MemorySlice for keys + std::shared_ptr min_key_slice; + std::shared_ptr max_key_slice; + if (index_meta->FirstKey()) { + min_key_slice = MemorySlice::Wrap(index_meta->FirstKey()); + } + if (index_meta->LastKey()) { + max_key_slice = MemorySlice::Wrap(index_meta->LastKey()); + } + + return std::make_shared(sst_file_reader, null_bitmap, min_key_slice, + max_key_slice, files, pool, comparator); } Result> BTreeGlobalIndexer::ToGlobalIndexResult( @@ -82,37 +110,503 @@ Result> BTreeGlobalIndexer::ReadNullBitma // Read bytes and crc value auto segment = cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false); - + auto slice = MemorySlice::Wrap(segment); auto slice_input = slice->ToInput(); - + // Read null bitmap data auto null_bitmap_slice = slice_input->ReadSlice(block_handle->Size()); auto null_bitmap_bytes = null_bitmap_slice->GetHeapMemory(); - + // Calculate CRC32C checksum - uint32_t crc_value = CRC32C::calculate(reinterpret_cast(null_bitmap_bytes->data()), - null_bitmap_bytes->size()); - + uint32_t crc_value = CRC32C::calculate(reinterpret_cast(null_bitmap_bytes->data()), + null_bitmap_bytes->size()); + // Read expected CRC value int32_t expected_crc_value = slice_input->ReadInt(); - + // Verify CRC checksum if (crc_value != static_cast(expected_crc_value)) { return Status::Invalid("CRC check failure during decoding null bitmap"); } - + // Deserialize null bitmap try { - std::vector data(null_bitmap_bytes->data(), - null_bitmap_bytes->data() + null_bitmap_bytes->size()); + std::vector data(null_bitmap_bytes->data(), + null_bitmap_bytes->data() + null_bitmap_bytes->size()); null_bitmap->Deserialize(data); } catch (const std::exception& e) { - return Status::Invalid("Fail to deserialize null bitmap but crc check passed, " - "this means the ser/de algorithms not match: " + std::string(e.what())); + return Status::Invalid( + "Fail to deserialize null bitmap but crc check passed, " + "this means the ser/de algorithms not match: " + + std::string(e.what())); } - + return null_bitmap; } +BTreeGlobalIndexReader::BTreeGlobalIndexReader( + const std::shared_ptr& sst_file_reader, + const std::shared_ptr& null_bitmap, + const std::shared_ptr& min_key, const std::shared_ptr& max_key, + const std::vector& files, const std::shared_ptr& pool, + std::function&, const std::shared_ptr&)> + comparator) + : sst_file_reader_(sst_file_reader), + null_bitmap_(null_bitmap), + min_key_(min_key), + max_key_(max_key), + pool_(pool), + comparator_(std::move(comparator)) {} + +Result> BTreeGlobalIndexReader::VisitIsNotNull() { + // nulls are stored separately in null bitmap. + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitIsNull() { + // nulls are stored separately in null bitmap. + return std::make_shared( + [this]() -> Result { return null_bitmap_->GetBitmap(); }); +} + +Result> BTreeGlobalIndexReader::VisitStartsWith( + const Literal& prefix) { + // Use btree index for startsWith: find all keys >= prefix and check if they start with prefix + return std::make_shared([this, &prefix]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, LiteralToMemorySlice(prefix, pool_.get())); + + // Search for keys >= prefix + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 all_candidates, + RangeQuery(prefix_slice, max_key_, true, true)); + + // If no comparator or prefix is empty, return all candidates + if (!comparator_ || prefix_slice->Length() == 0) { + return all_candidates.GetBitmap(); + } + + // Filter to only keep keys that actually start with prefix + RoaringNavigableMap64 result; + + // We need to iterate through the keys and check if they start with prefix + // This is a simplified approach - in a full implementation, we'd need to properly + // iterate through the btree to check prefixes + + // For now, return all candidates if the index type is string/binary + // The exact filtering would require being able to read and compare the keys + auto prefix_type = prefix.GetType(); + if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { + // In a real implementation, we would iterate through candidates and check each key + // For simplicity, we're using the btree range query which gives us keys >= prefix + // The comparator would help determine which ones actually start with prefix + return all_candidates.GetBitmap(); + } + + // For non-string types, startsWith doesn't make much sense, return all non-null rows + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 all_rows, AllNonNullRows()); + return all_rows.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitEndsWith( + const Literal& suffix) { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitContains( + const Literal& literal) { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitLike( + const Literal& literal) { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitLessThan( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(min_key_, literal_slice, true, false)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitGreaterOrEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(literal_slice, max_key_, true, true)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitNotEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 equal_result, + RangeQuery(literal_slice, literal_slice, true, true)); + result.AndNot(equal_result); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitLessOrEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(min_key_, literal_slice, true, true)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(literal_slice, literal_slice, true, true)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitGreaterThan( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(literal_slice, max_key_, false, true)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitIn( + const std::vector& literals) { + return std::make_shared([this, + &literals]() -> Result { + RoaringNavigableMap64 result; + for (const auto& literal : literals) { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 literal_result, + RangeQuery(literal_slice, literal_slice, true, true)); + result.Or(literal_result); + } + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitNotIn( + const std::vector& literals) { + return std::make_shared( + [this, &literals]() -> Result { + // Get all non-null rows first + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + + // Get the IN result and convert to navigable map + PAIMON_ASSIGN_OR_RAISE(auto in_result_ptr, VisitIn(literals)); + PAIMON_ASSIGN_OR_RAISE(auto in_iterator, in_result_ptr->CreateIterator()); + + RoaringNavigableMap64 in_navigable; + while (in_iterator->HasNext()) { + in_navigable.Add(in_iterator->Next()); + } + + result.AndNot(in_navigable); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitBetween(const Literal& from, + const Literal& to) { + return std::make_shared( + [this, &from, &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, LiteralToMemorySlice(from, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(from_slice, to_slice, true, true)); + return result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitNotBetween( + const Literal& from, const Literal& to) { + return std::make_shared( + [this, &from, &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, LiteralToMemorySlice(from, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 lower_result, + RangeQuery(min_key_, from_slice, true, false)); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 upper_result, + RangeQuery(to_slice, max_key_, false, true)); + lower_result.Or(upper_result); + return lower_result.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitAnd( + const std::vector>>& children) { + return std::make_shared( + [this, &children]() -> Result { + if (children.empty()) { + return Status::Invalid("VisitAnd called with no children"); + } + + // Start with the first child result + auto first_result_status = children[0]; + if (!first_result_status.ok()) { + return first_result_status.status(); + } + auto first_result = std::move(first_result_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); + + RoaringNavigableMap64 result_bitmap; + while (first_iterator->HasNext()) { + result_bitmap.Add(first_iterator->Next()); + } + + // AND with remaining children + for (size_t i = 1; i < children.size(); ++i) { + auto child_status = children[i]; + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + + RoaringNavigableMap64 child_bitmap; + while (child_iterator->HasNext()) { + child_bitmap.Add(child_iterator->Next()); + } + + result_bitmap.And(child_bitmap); + } + + return result_bitmap.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitOr( + const std::vector>>& children) { + return std::make_shared( + [this, &children]() -> Result { + RoaringNavigableMap64 result_bitmap; + + for (const auto& child_status : children) { + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + + while (child_iterator->HasNext()) { + result_bitmap.Add(child_iterator->Next()); + } + } + + return result_bitmap.GetBitmap(); + }); +} + +Result> BTreeGlobalIndexReader::VisitVectorSearch( + const std::shared_ptr& vector_search) { + return Status::NotImplemented("Vector search not supported in BTree index"); +} + +Result> BTreeGlobalIndexReader::VisitFullTextSearch( + const std::shared_ptr& full_text_search) { + return Status::NotImplemented("Full text search not supported in BTree index"); +} + +Result BTreeGlobalIndexReader::RangeQuery( + const std::shared_ptr& lower_bound, + const std::shared_ptr& upper_bound, bool lower_inclusive, bool upper_inclusive) { + // Create an index block iterator to iterate through data blocks + auto index_block_reader = sst_file_reader_->GetIndexBlockReader(); + auto index_iterator = index_block_reader->Iterator(); + + // Seek index iterator to the lower bound + index_iterator->SeekTo(lower_bound); + + RoaringNavigableMap64 result; + + // Iterate through all relevant data blocks + bool first_block = true; + + while (index_iterator->HasNext()) { + // Get the next data block + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, + sst_file_reader_->GetNextBlock(index_iterator)); + + if (!data_iterator || !data_iterator->HasNext()) { + continue; + } + + // For the first block, we need to seek within the block to the exact position + if (first_block) { + data_iterator->SeekTo(lower_bound); + first_block = false; + + // After seeking, check if we still have data + if (!data_iterator->HasNext()) { + continue; + } + } + + // Iterate through entries in the data block + while (data_iterator->HasNext()) { + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); + + // Compare key with bounds using the comparator + const auto& comparator = comparator_; + int cmp_lower = comparator ? comparator(entry->key_, lower_bound) : 0; + + // Check lower bound + if (!lower_inclusive && cmp_lower == 0) { + // Skip if key equals lower bound and lower is not inclusive + continue; + } + + // Check upper bound + int cmp_upper = comparator ? comparator(entry->key_, upper_bound) : 0; + if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { + // Key is beyond upper bound, we're done + return result; + } + + // Deserialize row IDs from the value + // The value should contain an array of int64_t row IDs + auto value_bytes = entry->value_->CopyBytes(pool_.get()); + auto value_slice = MemorySlice::Wrap(value_bytes); + auto value_input = value_slice->ToInput(); + + // Read row IDs. The format is: [length][row_id1][row_id2]... + // where length is the number of row IDs (varint) + int64_t num_row_ids = value_input->ReadVarLenLong(); + + for (int64_t i = 0; i < num_row_ids; i++) { + int64_t row_id = value_input->ReadLong(); + result.Add(row_id); + } + } + } + + return result; +} + +Result BTreeGlobalIndexReader::AllNonNullRows() { + // Traverse all data to avoid returning null values, which is very advantageous in + // situations where there are many null values + // TODO do not traverse all data if less null values + if (!min_key_) { + return RoaringNavigableMap64(); + } + return RangeQuery(min_key_, max_key_, true, true); +} + +// Helper function to convert Literal to MemorySlice +static Result> LiteralToMemorySlice(const Literal& literal, + MemoryPool* pool) { + if (literal.IsNull()) { + return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); + } + + auto type = literal.GetType(); + + // Handle string/binary types + if (type == FieldType::STRING || type == FieldType::BINARY) { + try { + std::string str_value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(str_value, pool); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert string/binary literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle integer types + if (type == FieldType::BIGINT) { + try { + int64_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert bigint literal to MemorySlice: " + + std::string(e.what())); + } + } + + if (type == FieldType::INT) { + try { + int32_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert int literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle other numeric types similarly + if (type == FieldType::TINYINT) { + try { + int8_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert tinyint literal to MemorySlice: " + + std::string(e.what())); + } + } + + if (type == FieldType::SMALLINT) { + try { + int16_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert smallint literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle boolean + if (type == FieldType::BOOLEAN) { + try { + bool value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(1, pool); + bytes->data()[0] = value ? 1 : 0; + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert boolean literal to MemorySlice: " + + std::string(e.what())); + } + } + + // For unhandled types, return error for now + return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + + " not yet supported in btree index"); +} + } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index fa1414984..dee549749 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -21,6 +21,9 @@ #include #include "paimon/common/file_index/bitmap/bitmap_file_index.h" +#include "paimon/common/sst/block_cache.h" +#include "paimon/common/sst/block_handle.h" +#include "paimon/common/sst/sst_file_reader.h" #include "paimon/common/utils/roaring_navigable_map64.h" #include "paimon/global_index/global_indexer.h" #include "paimon/global_index/io/global_index_file_reader.h" @@ -56,9 +59,86 @@ class BTreeGlobalIndexer : public GlobalIndexer { class BTreeGlobalIndexReader : public GlobalIndexReader { public: - BTreeGlobalIndexReader(const std::shared_ptr& file_reader, + BTreeGlobalIndexReader(const std::shared_ptr& sst_file_reader, + const std::shared_ptr& null_bitmap, + const std::shared_ptr& min_key, + const std::shared_ptr& max_key, const std::vector& files, - const std::shared_ptr& pool) {} + const std::shared_ptr& pool, + std::function&, + const std::shared_ptr&)> + comparator); + + Result> VisitIsNotNull() override; + + Result> VisitIsNull() override; + + Result> VisitEqual(const Literal& literal) override; + + Result> VisitNotEqual(const Literal& literal) override; + + Result> VisitLessThan(const Literal& literal) override; + + Result> VisitLessOrEqual(const Literal& literal) override; + + Result> VisitGreaterThan(const Literal& literal) override; + + Result> VisitGreaterOrEqual(const Literal& literal) override; + + Result> VisitIn( + const std::vector& literals) override; + + Result> VisitNotIn( + const std::vector& literals) override; + + Result> VisitBetween(const Literal& from, + const Literal& to) override; + + Result> VisitNotBetween(const Literal& from, + const Literal& to) override; + + Result> VisitStartsWith(const Literal& prefix) override; + + Result> VisitEndsWith(const Literal& suffix) override; + + Result> VisitContains(const Literal& literal) override; + + Result> VisitLike(const Literal& literal) override; + + Result> VisitAnd( + const std::vector>>& children) override; + + Result> VisitOr( + const std::vector>>& children) override; + + Result> VisitVectorSearch( + const std::shared_ptr& vector_search) override; + + Result> VisitFullTextSearch( + const std::shared_ptr& full_text_search) override; + + bool IsThreadSafe() const override { + return false; + } + + std::string GetIndexType() const override { + return "btree"; + } + + private: + Result RangeQuery(const std::shared_ptr& lower_bound, + const std::shared_ptr& upper_bound, + bool lower_inclusive, bool upper_inclusive); + + Result AllNonNullRows(); + + std::shared_ptr sst_file_reader_; + std::shared_ptr null_bitmap_; + std::shared_ptr min_key_; + std::shared_ptr max_key_; + std::shared_ptr pool_; + std::function&, const std::shared_ptr&)> + comparator_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index a1b5eb2b3..775c49615 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -34,6 +34,11 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt auto has_nulls = input->ReadByte() == 1; return std::make_shared(first_key, last_key, has_nulls); } -std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) {} + +std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) { + // TODO(zhangchaoming.zcm): Implement serialization + // For now, return an empty Bytes object + return std::make_shared(); +} } // namespace paimon diff --git a/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h b/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h index dae4b345e..078311e2a 100644 --- a/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h +++ b/src/paimon/common/global_index/wrap/file_index_reader_wrapper.h @@ -120,6 +120,30 @@ class FileIndexReaderWrapper : public GlobalIndexReader { return transform_(file_index_result); } + Result> VisitBetween(const Literal& from, + const Literal& to) override { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_index_result, + reader_->VisitBetween(from, to)); + return transform_(file_index_result); + } + + Result> VisitNotBetween(const Literal& from, + const Literal& to) override { + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr file_index_result, + reader_->VisitNotBetween(from, to)); + return transform_(file_index_result); + } + + Result> VisitAnd( + const std::vector>>& children) override { + return Status::Invalid("FileIndexReaderWrapper is not supposed to handle AND operations"); + } + + Result> VisitOr( + const std::vector>>& children) override { + return Status::Invalid("FileIndexReaderWrapper is not supposed to handle OR operations"); + } + Result> VisitVectorSearch( const std::shared_ptr& vector_search) override { return Status::Invalid( diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index 0176b9608..8fcb69de8 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -27,7 +27,7 @@ namespace paimon { class CacheValue; -class Cache { +class PAIMON_EXPORT Cache { public: virtual ~Cache() = default; virtual std::shared_ptr Get( @@ -44,7 +44,7 @@ class Cache { virtual std::unordered_map, std::shared_ptr> AsMap() = 0; }; -class NoCache : public Cache { +class PAIMON_EXPORT NoCache : public Cache { public: std::shared_ptr Get( const std::shared_ptr& key, diff --git a/src/paimon/common/io/cache/cache_key.h b/src/paimon/common/io/cache/cache_key.h index 8e98bf7cd..d84d07f82 100644 --- a/src/paimon/common/io/cache/cache_key.h +++ b/src/paimon/common/io/cache/cache_key.h @@ -24,7 +24,7 @@ namespace paimon { -class CacheKey { +class PAIMON_EXPORT CacheKey { public: static std::shared_ptr ForPosition(const std::string& file_path, int64_t position, int32_t length, bool is_index); diff --git a/src/paimon/common/io/cache/cache_manager.h b/src/paimon/common/io/cache/cache_manager.h index 5e92f9cb9..294e3abae 100644 --- a/src/paimon/common/io/cache/cache_manager.h +++ b/src/paimon/common/io/cache/cache_manager.h @@ -26,7 +26,7 @@ #include "paimon/result.h" namespace paimon { -class CacheManager { +class PAIMON_EXPORT CacheManager { public: CacheManager() { // todo implements cache diff --git a/src/paimon/common/sst/block_cache.h b/src/paimon/common/sst/block_cache.h index 80a942fa7..465a6a6c8 100644 --- a/src/paimon/common/sst/block_cache.h +++ b/src/paimon/common/sst/block_cache.h @@ -27,7 +27,7 @@ namespace paimon { -class BlockCache { +class PAIMON_EXPORT BlockCache { public: BlockCache(const std::string& file_path, const std::shared_ptr& in, const std::shared_ptr& pool, diff --git a/src/paimon/common/sst/block_entry.h b/src/paimon/common/sst/block_entry.h index a5fdc26ac..c2405ce91 100644 --- a/src/paimon/common/sst/block_entry.h +++ b/src/paimon/common/sst/block_entry.h @@ -23,7 +23,7 @@ namespace paimon { -struct BlockEntry { +struct PAIMON_EXPORT BlockEntry { public: BlockEntry(std::shared_ptr& key, std::shared_ptr& value) : key_(key), value_(value) {} diff --git a/src/paimon/common/sst/block_footer.h b/src/paimon/common/sst/block_footer.h index b69ef0ddc..31b8a94fc 100644 --- a/src/paimon/common/sst/block_footer.h +++ b/src/paimon/common/sst/block_footer.h @@ -28,7 +28,7 @@ namespace paimon { /// Footer of a block. -class BlockFooter { +class PAIMON_EXPORT BlockFooter { public: static Result> ReadBlockFooter( std::shared_ptr& input); diff --git a/src/paimon/common/sst/block_handle.h b/src/paimon/common/sst/block_handle.h index 3e9e6eaf3..b999f6676 100644 --- a/src/paimon/common/sst/block_handle.h +++ b/src/paimon/common/sst/block_handle.h @@ -25,7 +25,7 @@ namespace paimon { -class BlockHandle { +class PAIMON_EXPORT BlockHandle { public: static std::shared_ptr ReadBlockHandle(std::shared_ptr& input); diff --git a/src/paimon/common/sst/block_iterator.h b/src/paimon/common/sst/block_iterator.h index 284857c59..2e49e764a 100644 --- a/src/paimon/common/sst/block_iterator.h +++ b/src/paimon/common/sst/block_iterator.h @@ -24,7 +24,7 @@ namespace paimon { class BlockReader; -class BlockIterator { +class PAIMON_EXPORT BlockIterator { public: explicit BlockIterator(std::shared_ptr& reader); diff --git a/src/paimon/common/sst/block_reader.h b/src/paimon/common/sst/block_reader.h index 1998fad7d..ceae9e347 100644 --- a/src/paimon/common/sst/block_reader.h +++ b/src/paimon/common/sst/block_reader.h @@ -31,7 +31,7 @@ namespace paimon { class BlockIterator; /// Reader for a block. -class BlockReader : public std::enable_shared_from_this { +class PAIMON_EXPORT BlockReader : public std::enable_shared_from_this { public: static std::shared_ptr Create( std::shared_ptr block, diff --git a/src/paimon/common/sst/block_trailer.h b/src/paimon/common/sst/block_trailer.h index 0444fbada..48a622a99 100644 --- a/src/paimon/common/sst/block_trailer.h +++ b/src/paimon/common/sst/block_trailer.h @@ -26,7 +26,7 @@ namespace paimon { /// Trailer of a block. -class BlockTrailer { +class PAIMON_EXPORT BlockTrailer { public: static std::unique_ptr ReadBlockTrailer(std::shared_ptr& input); diff --git a/src/paimon/common/sst/block_writer.h b/src/paimon/common/sst/block_writer.h index ce76d944d..9da107eb6 100644 --- a/src/paimon/common/sst/block_writer.h +++ b/src/paimon/common/sst/block_writer.h @@ -50,7 +50,7 @@ namespace paimon { /// +------------------------------------------------+ /// /// -class BlockWriter { +class PAIMON_EXPORT BlockWriter { public: BlockWriter(int32_t size, const std::shared_ptr& pool, bool aligned = true) : size_(size), pool_(pool), aligned_(aligned) { diff --git a/src/paimon/common/sst/bloom_filter_handle.h b/src/paimon/common/sst/bloom_filter_handle.h index 742821f4e..f90576851 100644 --- a/src/paimon/common/sst/bloom_filter_handle.h +++ b/src/paimon/common/sst/bloom_filter_handle.h @@ -24,7 +24,7 @@ namespace paimon { -class BloomFilterHandle { +class PAIMON_EXPORT BloomFilterHandle { public: BloomFilterHandle(int64_t offset, int32_t size, int64_t expected_entries) : offset_(offset), size_(size), expected_entries_(expected_entries) {} diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index b6c4daa85..49e0e2276 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -29,7 +29,7 @@ Result> SstFileReader::Create( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); auto cache_manager = std::make_shared(); - auto block_cache = std::make_shared(file_path, in, pool.get(), cache_manager); + auto block_cache = std::make_shared(file_path, in, pool, cache_manager); // read footer auto segment = block_cache->GetBlock(file_len - BlockFooter::ENCODED_LENGTH, diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index 616a85ef8..109e6fa9b 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -38,7 +38,7 @@ class SstFileIterator; /// An SST File Reader which serves point queries and range queries. Users can call /// CreateIterator() to create a file iterator and then use seek and read methods to do range /// queries. Note that this class is NOT thread-safe. -class SstFileReader { +class PAIMON_EXPORT SstFileReader { public: static Result> Create( const std::shared_ptr& pool, const std::shared_ptr& fs, @@ -58,33 +58,39 @@ class SstFileReader { std::unique_ptr CreateIterator(); - /** - * Lookup the specified key in the file. - * - * @param key serialized key - * @return corresponding serialized value, nullptr if not found. - */ + /// Lookup the specified key in the file. + /// + /// @param key serialized key + /// @return corresponding serialized value, nullptr if not found. std::shared_ptr Lookup(std::shared_ptr key); Result> GetNextBlock( std::unique_ptr& index_iterator); - /** - * @param handle The block handle. - * @param index Whether read the block as an index. - * @return The reader of the target block. - */ + /// @param handle The block handle. + /// @param index Whether read the block as an index. + /// @return The reader of the target block. Result> ReadBlock(std::shared_ptr&& handle, bool index); - /** - * @param handle The block handle. - * @param index Whether read the block as an index. - * @return The reader of the target block. - */ + /// @param handle The block handle. + /// @param index Whether read the block as an index. + /// @return The reader of the target block. Result> ReadBlock(const std::shared_ptr& handle, bool index); + /// @return The index block reader. + std::shared_ptr GetIndexBlockReader() const { + return index_block_reader_; + } + + /// @return The comparator function. + const std::function&, + const std::shared_ptr&)>& + GetComparator() const { + return comparator_; + } + private: static Result> DecompressBlock( const std::shared_ptr& compressed_data, @@ -104,10 +110,8 @@ class SstFileIterator { SstFileIterator() = default; SstFileIterator(SstFileReader* reader, std::unique_ptr index_iterator); - /** - * Seek to the position of the record whose key is exactly equal to or greater than the - * specified key. - */ + /// Seek to the position of the record whose key is exactly equal to or greater than the + /// specified key. Status SeekTo(std::shared_ptr& key); private: diff --git a/src/paimon/common/sst/sst_file_writer.h b/src/paimon/common/sst/sst_file_writer.h index 758880109..c2f0c5a46 100644 --- a/src/paimon/common/sst/sst_file_writer.h +++ b/src/paimon/common/sst/sst_file_writer.h @@ -39,7 +39,7 @@ class MemoryPool; /// The writer for writing SST Files. SST Files are row-oriented and designed to serve frequent /// point queries and range queries by key. -class SstFileWriter { +class PAIMON_EXPORT SstFileWriter { public: SstFileWriter(const std::shared_ptr& out, const std::shared_ptr& pool, const std::shared_ptr& bloom_filter, int32_t block_size, diff --git a/src/paimon/common/utils/roaring_navigable_map64.cpp b/src/paimon/common/utils/roaring_navigable_map64.cpp index 39b9cb366..89971cb97 100644 --- a/src/paimon/common/utils/roaring_navigable_map64.cpp +++ b/src/paimon/common/utils/roaring_navigable_map64.cpp @@ -155,6 +155,10 @@ std::vector RoaringNavigableMap64::ToRangeList() const { return ranges; } +const RoaringBitmap64& RoaringNavigableMap64::GetBitmap() const { + return impl_->bitmap; +} + RoaringNavigableMap64 RoaringNavigableMap64::BitmapOf(const std::vector& values) { RoaringNavigableMap64 result; for (int64_t value : values) { diff --git a/src/paimon/common/utils/roaring_navigable_map64.h b/src/paimon/common/utils/roaring_navigable_map64.h index 09fdf6bd9..178a7a976 100644 --- a/src/paimon/common/utils/roaring_navigable_map64.h +++ b/src/paimon/common/utils/roaring_navigable_map64.h @@ -137,6 +137,14 @@ class PAIMON_EXPORT RoaringNavigableMap64 { */ std::vector ToRangeList() const; + /** + * Gets the internal RoaringBitmap64 without copying. + * This is an optimization to avoid O(n) conversion when the navigable map + * is no longer needed for modifications. + * @return A const reference to the internal RoaringBitmap64 + */ + const RoaringBitmap64& GetBitmap() const; + /** * Creates a new bitmap from a list of values. * @param values The values to include in the bitmap From 860244911bdc4bd09409061f51863fcbd147437b Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 11 Mar 2026 12:18:56 +0800 Subject: [PATCH 03/28] types --- .../btree/btree_global_indexer.cpp | 349 ++++++++++++++++-- 1 file changed, 323 insertions(+), 26 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 9a96707dd..a9e96f2e8 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -18,10 +18,12 @@ #include #include +#include "arrow/c/bridge.h" #include "paimon/common/global_index/btree/btree_file_footer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" #include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/roaring_navigable_map64.h" @@ -35,6 +37,194 @@ namespace paimon { // Forward declarations for helper functions static Result> LiteralToMemorySlice(const Literal& literal, MemoryPool* pool); + +// Create a comparator function based on field type +static std::function&, + const std::shared_ptr&)> +CreateComparator(FieldType field_type) { + switch (field_type) { + case FieldType::STRING: + case FieldType::BINARY: + // String/binary comparison: lexicographic order + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes) return 0; + size_t min_len = std::min(a_bytes->size(), b_bytes->size()); + int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a_bytes->size() < b_bytes->size()) return -1; + if (a_bytes->size() > b_bytes->size()) return 1; + return 0; + }; + case FieldType::BIGINT: + // int64_t comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; + int64_t a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(int64_t)); + memcpy(&b_val, b_bytes->data(), sizeof(int64_t)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::INT: + // int32_t comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; + int32_t a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(int32_t)); + memcpy(&b_val, b_bytes->data(), sizeof(int32_t)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::SMALLINT: + // int16_t comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 2 || b_bytes->size() < 2) return 0; + int16_t a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(int16_t)); + memcpy(&b_val, b_bytes->data(), sizeof(int16_t)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::TINYINT: + // int8_t comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 1 || b_bytes->size() < 1) return 0; + int8_t a_val = a_bytes->data()[0]; + int8_t b_val = b_bytes->data()[0]; + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::BOOLEAN: + // bool comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 1 || b_bytes->size() < 1) return 0; + bool a_val = a_bytes->data()[0] != 0; + bool b_val = b_bytes->data()[0] != 0; + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::FLOAT: + // float comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; + float a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(float)); + memcpy(&b_val, b_bytes->data(), sizeof(float)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::DOUBLE: + // double comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; + double a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(double)); + memcpy(&b_val, b_bytes->data(), sizeof(double)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::DATE: + // Date comparison (stored as int32_t days since epoch) + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; + int32_t a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(int32_t)); + memcpy(&b_val, b_bytes->data(), sizeof(int32_t)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::TIMESTAMP: + // Timestamp comparison (stored as int64_t) + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; + int64_t a_val, b_val; + memcpy(&a_val, a_bytes->data(), sizeof(int64_t)); + memcpy(&b_val, b_bytes->data(), sizeof(int64_t)); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + case FieldType::DECIMAL: + // Decimal comparison (stored as 16 bytes for DECIMAL128) + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes) return 0; + // Compare bytes directly for DECIMAL128 + size_t min_len = std::min(a_bytes->size(), b_bytes->size()); + int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a_bytes->size() < b_bytes->size()) return -1; + if (a_bytes->size() > b_bytes->size()) return 1; + return 0; + }; + default: + // Default: lexicographic comparison + return [](const std::shared_ptr& a, + const std::shared_ptr& b) -> int32_t { + if (!a || !b) return 0; + auto a_bytes = a->GetHeapMemory(); + auto b_bytes = b->GetHeapMemory(); + if (!a_bytes || !b_bytes) return 0; + size_t min_len = std::min(a_bytes->size(), b_bytes->size()); + int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a_bytes->size() < b_bytes->size()) return -1; + if (a_bytes->size() > b_bytes->size()) return 1; + return 0; + }; + } +} Result> BTreeGlobalIndexer::CreateReader( ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, const std::vector& files, const std::shared_ptr& pool) const { @@ -46,6 +236,20 @@ Result> BTreeGlobalIndexer::CreateReader( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, file_reader->GetInputStream(meta.file_path)); + // Get field type from arrow schema + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr schema, + arrow::ImportSchema(arrow_schema)); + if (schema->num_fields() != 1) { + return Status::Invalid( + "invalid schema for BTreeGlobalIndexReader, supposed to have single field."); + } + auto arrow_type = schema->field(0)->type(); + PAIMON_ASSIGN_OR_RAISE(FieldType field_type, + FieldTypeUtils::ConvertToFieldType(arrow_type->id())); + + // Create comparator based on field type + auto comparator = CreateComparator(field_type); + // prepare file footer auto cache_manager = std::make_shared(); auto block_cache = std::make_shared(meta.file_path, in, pool, cache_manager); @@ -58,8 +262,6 @@ Result> BTreeGlobalIndexer::CreateReader( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); std::shared_ptr fs; - std::function&, const std::shared_ptr&)> - comparator; PAIMON_ASSIGN_OR_RAISE(std::shared_ptr sst_file_reader, SstFileReader::Create(pool, fs, meta.file_path, comparator)); @@ -175,44 +377,65 @@ Result> BTreeGlobalIndexReader::VisitIsNull() Result> BTreeGlobalIndexReader::VisitStartsWith( const Literal& prefix) { - // Use btree index for startsWith: find all keys >= prefix and check if they start with prefix + // Use btree index for startsWith: find all keys >= prefix and < prefix_upper_bound + // For string prefix "abc", the upper bound should be "abd" (increment last char) + // This ensures we only get keys that actually start with the prefix return std::make_shared([this, &prefix]() -> Result { PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, LiteralToMemorySlice(prefix, pool_.get())); - // Search for keys >= prefix - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 all_candidates, - RangeQuery(prefix_slice, max_key_, true, true)); + auto prefix_type = prefix.GetType(); - // If no comparator or prefix is empty, return all candidates - if (!comparator_ || prefix_slice->Length() == 0) { - return all_candidates.GetBitmap(); - } + // For string/binary types, compute the upper bound for prefix matching + if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { + auto prefix_bytes = prefix_slice->GetHeapMemory(); + if (!prefix_bytes || prefix_bytes->size() == 0) { + // Empty prefix matches all non-null rows + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); + return result.GetBitmap(); + } - // Filter to only keep keys that actually start with prefix - RoaringNavigableMap64 result; + // Compute upper bound: increment the last byte of the prefix + // For example, "abc" -> "abd", "ab\xFF" -> "ac" + std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); + bool overflow = true; + for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { + unsigned char c = static_cast(upper_bound_str[i]); + if (c < 0xFF) { + upper_bound_str[i] = c + 1; + overflow = false; + } else { + upper_bound_str[i] = 0x00; + // Continue to increment previous byte + } + } - // We need to iterate through the keys and check if they start with prefix - // This is a simplified approach - in a full implementation, we'd need to properly - // iterate through the btree to check prefixes + std::shared_ptr upper_bound_slice; + if (!overflow) { + auto upper_bytes = Bytes::AllocateBytes(upper_bound_str, pool_.get()); + upper_bound_slice = + MemorySlice::Wrap(std::shared_ptr(upper_bytes.release())); + } + // If overflow (all bytes were 0xFF), use max_key_ as upper bound - // For now, return all candidates if the index type is string/binary - // The exact filtering would require being able to read and compare the keys - auto prefix_type = prefix.GetType(); - if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { - // In a real implementation, we would iterate through candidates and check each key - // For simplicity, we're using the btree range query which gives us keys >= prefix - // The comparator would help determine which ones actually start with prefix - return all_candidates.GetBitmap(); + // Execute range query [prefix, upper_bound) + PAIMON_ASSIGN_OR_RAISE( + RoaringNavigableMap64 result, + RangeQuery(prefix_slice, upper_bound_slice ? upper_bound_slice : max_key_, true, + false)); // lower_inclusive=true, upper_inclusive=false + return result.GetBitmap(); } - // For non-string types, startsWith doesn't make much sense, return all non-null rows - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 all_rows, AllNonNullRows()); - return all_rows.GetBitmap(); + // For non-string types, startsWith doesn't make semantic sense + // Return empty result for non-string types + return RoaringBitmap64(); }); } Result> BTreeGlobalIndexReader::VisitEndsWith( const Literal& suffix) { + // BTree index is not efficient for EndsWith queries as it requires checking all keys. + // Return all non-null rows as fallback; the upper layer will perform exact filtering. + // Note: This is a conservative approach that doesn't prune any rows. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -221,6 +444,9 @@ Result> BTreeGlobalIndexReader::VisitEndsWith Result> BTreeGlobalIndexReader::VisitContains( const Literal& literal) { + // BTree index is not efficient for Contains queries as it requires checking all keys. + // Return all non-null rows as fallback; the upper layer will perform exact filtering. + // Note: This is a conservative approach that doesn't prune any rows. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -229,6 +455,10 @@ Result> BTreeGlobalIndexReader::VisitContains Result> BTreeGlobalIndexReader::VisitLike( const Literal& literal) { + // BTree index can only efficiently handle LIKE patterns of the form "prefix%". + // For other patterns (e.g., "%suffix", "%contains%"), return all non-null rows as fallback. + // Note: This is a conservative approach that doesn't prune any rows. + // TODO: Parse LIKE pattern and use VisitStartsWith for "prefix%" patterns. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -604,6 +834,73 @@ static Result> LiteralToMemorySlice(const Literal& } } + // Handle float + if (type == FieldType::FLOAT) { + try { + float value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert float literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle double + if (type == FieldType::DOUBLE) { + try { + double value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert double literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle date (stored as int32_t days since epoch) + if (type == FieldType::DATE) { + try { + int32_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert date literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle timestamp (stored as int64_t) + if (type == FieldType::TIMESTAMP) { + try { + // Timestamp is stored as int64_t (milliseconds or microseconds depending on precision) + int64_t value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(sizeof(value), pool); + memcpy(bytes->data(), &value, sizeof(value)); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert timestamp literal to MemorySlice: " + + std::string(e.what())); + } + } + + // Handle decimal (DECIMAL128 stored as 16 bytes) + if (type == FieldType::DECIMAL) { + try { + // Decimal values are stored as string representation for simplicity + // The actual storage format should match the index writer's format + std::string str_value = literal.ToString(); + auto bytes = Bytes::AllocateBytes(str_value, pool); + return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + } catch (const std::exception& e) { + return Status::Invalid("Failed to convert decimal literal to MemorySlice: " + + std::string(e.what())); + } + } + // For unhandled types, return error for now return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + " not yet supported in btree index"); From fb4015fb27c124c6d150a23b19af1e08910b339e Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Thu, 26 Mar 2026 14:09:57 +0800 Subject: [PATCH 04/28] feat: implement btree global index writer and fix compilation errors - Add BtreeGlobalIndexWriter for writing btree global index files - Fix AllNonNullRows() compilation errors: - Use GetLongCardinality() instead of Cardinality() - Use AddRange(Range(0, total_rows - 1)) instead of AddRange(0, total_rows) - Add unit tests for btree file footer, index meta, writer, and indexer - Add integration test for btree global index --- CLAUDE.md | 163 +++++++ docs/branch-management.md | 153 +++++++ src/paimon/CMakeLists.txt | 5 + src/paimon/common/global_index/CMakeLists.txt | 1 + .../btree/btree_file_footer_test.cpp | 237 ++++++++++ .../btree/btree_global_index_factory.cpp | 2 +- .../btree/btree_global_index_factory.h | 2 +- .../btree_global_index_integration_test.cpp | 415 ++++++++++++++++++ .../btree/btree_global_index_writer.cpp | 245 +++++++++++ .../btree/btree_global_index_writer.h | 87 ++++ .../btree/btree_global_index_writer_test.cpp | 307 +++++++++++++ .../btree/btree_global_indexer.cpp | 191 ++++++-- .../global_index/btree/btree_global_indexer.h | 5 +- .../btree/btree_global_indexer_test.cpp | 240 ++++++++++ .../global_index/btree/btree_index_meta.cpp | 31 +- .../global_index/btree/btree_index_meta.h | 8 +- .../btree/btree_index_meta_test.cpp | 215 +++++++++ 17 files changed, 2257 insertions(+), 50 deletions(-) create mode 100644 CLAUDE.md create mode 100644 docs/branch-management.md create mode 100644 src/paimon/common/global_index/btree/btree_file_footer_test.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_index_writer.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_index_writer.h create mode 100644 src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_indexer_test.cpp create mode 100644 src/paimon/common/global_index/btree/btree_index_meta_test.cpp diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..b6a4b78db --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,163 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Paimon C++ is a high-performance C++ implementation of Apache Paimon that provides native access to the Paimon datalake format. It supports write, commit, scan, and read operations for both append-only and primary key tables, using Arrow Columnar format for data interchange. + +## Build Commands + +### Basic Build +```bash +# Using the build script (recommended) +./build_and_package.sh --release --jobs 8 + +# Manual CMake build +mkdir build && cd build +cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. +ninja -j8 +``` + +### Debug Build +```bash +./build_and_package.sh --debug --jobs 8 +``` + +### Build with Tests +```bash +./build_and_package.sh --debug --jobs 8 -DPAIMON_BUILD_TESTS=ON +``` + +### Key Build Options +- `-DPAIMON_BUILD_TESTS=ON/OFF` - Build tests (default: OFF) +- `-DPAIMON_BUILD_STATIC=ON/OFF` - Build static library (default: ON) +- `-DPAIMON_BUILD_SHARED=ON/OFF` - Build shared library (default: ON) +- `-DPAIMON_ENABLE_AVRO=ON/OFF` - Enable Avro format (default: ON) +- `-DPAIMON_ENABLE_ORC=ON/OFF` - Enable ORC format (default: ON) +- `-DPAIMON_ENABLE_LANCE=ON/OFF` - Enable Lance format (default: OFF) +- `-DPAIMON_ENABLE_JINDO=ON/OFF` - Enable Jindo filesystem (default: OFF) +- `-DPAIMON_ENABLE_LUMINA=ON/OFF` - Enable Lumina vector index (default: ON) +- `-DPAIMON_ENABLE_LUCENE=ON/OFF` - Enable Lucene index (default: ON) + +## Testing + +### Run All Tests +```bash +cd build +ctest -j8 --output-on-failure +``` + +### Run Specific Test +```bash +cd build +./test/inte/paimon_inte_test --gtest_filter="TestName.Pattern" +``` + +### Test Organization +- **Integration tests**: `test/inte/` - Main integration tests for all features +- **Unit tests**: Co-located with source files as `*_test.cpp` +- **Test data**: `test/test_data/` - Sample Paimon tables for testing + +## Code Quality and Linting + +### Install Pre-commit Hooks +```bash +pip install pre-commit +pre-commit install +``` + +### Run Linting on All Files +```bash +pre-commit run -a +``` + +### Linting Tools Used +- **clang-format**: C++ code formatting +- **cmake-format**: CMake file formatting +- **codespell**: Spell checking +- **cpplint**: C++ style checker +- **sphinx-lint**: Documentation linting + +## Architecture Overview + +### Directory Structure +``` +src/paimon/ +├── common/ # Common utilities and interfaces +│ ├── data/ # Data types and Arrow integration +│ ├── executor/ # Thread pool implementations +│ ├── file_index/ # File index implementations +│ ├── format/ # File format interfaces +│ ├── fs/ # Filesystem abstractions +│ ├── global_index/# Global index interfaces +│ ├── io/ # IO utilities +│ ├── memory/ # Memory pool implementations +│ ├── predicate/ # Predicate pushdown +│ └── types/ # Type system +├── core/ # Core Paimon logic +│ ├── append/ # Append-only table logic +│ ├── catalog/ # Catalog implementations +│ ├── deletionvectors/ # Deletion vector handling +│ ├── manifest/ # Manifest file handling +│ ├── mergetree/ # LSM merge tree logic +│ ├── operation/ # Table operations +│ ├── schema/ # Schema management +│ └── table/ # Table implementations +├── format/ # File format implementations +│ ├── avro/ # Avro format support +│ ├── orc/ # ORC format support +│ ├── parquet/ # Parquet format support +│ └── lance/ # Lance format support (optional) +└── fs/ # Filesystem implementations + ├── jindo/ # Jindo filesystem (optional) + └── local/ # Local filesystem +``` + +### Key Design Patterns + +1. **Result Types**: Uses `PAIMON_ASSIGN_OR_RAISE` and `PAIMON_RETURN_NOT_OK` macros for error handling. Always check return statuses. + +2. **Builder Pattern**: Common pattern for constructing contexts + - `WriteContextBuilder` → `WriteContext` + - `ScanContextBuilder` → `ScanContext` + - `ReadContextBuilder` → `ReadContext` + +3. **Factory Pattern**: Creation through static factory methods + - `FileStoreWrite::Create()` + - `TableScan::Create()` + - `TableRead::Create()` + +4. **Two-Phase Operations**: + - **Write**: Prepare data → Generate commit messages → Commit + - **Read**: Scan table → Create plan → Read batches + +5. **Arrow Integration**: All data interchange uses Arrow Columnar format. Batches are represented as `RecordBatch` objects wrapping Arrow arrays. + +6. **Plugin Architecture**: File formats and filesystems are pluggable through interfaces in `common/`. + +### Core Abstractions + +- **FileStoreWrite**: Writes data to Paimon tables +- **TableScan**: Scans table metadata and creates read plans +- **TableRead**: Reads data files according to plan +- **FileStoreCommit**: Commits write operations +- **Catalog**: Manages databases and tables +- **FileFormat**: Abstracts different file formats (ORC, Parquet, Avro, Lance) +- **FileSystem**: Abstracts storage backends (Local, Jindo) + +## Development Workflow + +1. **Before making changes**: Run `pre-commit run -a` to ensure code passes linting +2. **For new features**: Add unit tests alongside implementation (`*_test.cpp`) +3. **For bug fixes**: Add regression tests +4. **Before committing**: Ensure all tests pass with `ctest -j8` +5. **Architecture decisions**: Follow existing patterns for consistency + +## Important Notes + +- The codebase only supports x86_64 architecture +- Java Paimon format compatibility is maintained for commit messages, data splits, and manifests +- C++11 ABI is enabled by default (can be disabled, but some features like Lance and Lumina require it) +- Use the provided macros (`PAIMON_ASSIGN_OR_RAISE`, `PAIMON_RETURN_NOT_OK`) for error handling +- Pre-commit hooks must pass before commits can be made \ No newline at end of file diff --git a/docs/branch-management.md b/docs/branch-management.md new file mode 100644 index 000000000..8e873f008 --- /dev/null +++ b/docs/branch-management.md @@ -0,0 +1,153 @@ +# paimon-cpp 分支管理规范 + +## 分支模型 + +本项目采用 **GitHub 社区仓库 + antcode 内部仓库** 双仓库协作模式。 + +``` +GitHub (github) antcode (origin) +───────────── ──────────────── +main ─────────────────────────> main (镜像同步,内容保持一致) + \ + └──> internal (内部分支,承载内部独有功能) +``` + +### 分支职责 + +| 分支 | 仓库 | 用途 | +|------|------|------| +| `main` | github + antcode | 社区主线,两边内容保持一致。antcode 的 main 通过 merge 跟踪 github 的 main | +| `internal` | antcode | 内部开发主线,包含社区代码 + 内部独有功能(如 zdfs 文件系统等) | +| `release-*` | github + antcode | 发布分支,按需同步 | +| `feat/*` | antcode | 内部 feature 开发分支,从 internal 拉出,合入 internal | + +### 远程仓库配置 + +```bash +git remote -v +# github https://github.com/alibaba/paimon-cpp.git (社区仓库) +# origin https://code.alipay.com/antflink/paimon-cpp.git (内部仓库) +``` + +--- + +## 日常操作 + +### 1. 从社区同步代码到 antcode + +```bash +# 拉取社区最新代码 +git fetch github + +# 同步 main 分支(保持与社区一致) +git checkout main +git merge github/main +git push origin main + +# 同步到 internal 分支(合入内部开发线) +git checkout internal +git merge main +# 如有冲突,解决后 git add && git merge --continue +git push origin internal +``` + +> **为什么用 merge 而不是 cherry-pick?** +> - merge 会记录共同祖先,Git 能自动识别已合并的 commit,**不会重复冲突** +> - cherry-pick 每次都产生新 commit hash,导致 Git 无法识别已同步的内容,**每次都可能冲突** + +### 2. 内部 feature 开发 + +```bash +# 从 internal 分支拉出 feature 分支 +git checkout internal +git checkout -b feat/my-feature + +# 开发完成后合入 internal +git checkout internal +git merge --no-ff feat/my-feature +git push origin internal + +# 清理 feature 分支 +git branch -d feat/my-feature +``` + +### 3. 内部 feature 贡献回社区 + +当内部 feature 成熟、通用性足够时,可以贡献回 GitHub 社区: + +```bash +# 从 main 分支拉出贡献分支 +git checkout main +git checkout -b contrib/my-feature + +# 将内部 feature 的 commit cherry-pick 过来 +git cherry-pick + +# 推送到 GitHub 并提 PR +git push github contrib/my-feature +# 在 GitHub 上创建 Pull Request +``` + +### 4. 同步 release 分支 + +```bash +git fetch github +git checkout release-0.1 +git merge github/release-0.1 +git push origin refs/heads/release-0.1:refs/heads/release-0.1 +``` + +--- + +## 内部功能开发规范 + +为了减少 merge 冲突,内部独有功能应遵循以下规范: + +### 目录隔离 + +内部功能代码放在独立目录下,不要修改社区已有文件的核心逻辑: + +``` +src/paimon/fs/zdfs/ # zdfs 文件系统(独立目录) +src/paimon/internal/ # 其他内部功能(建议的目录) +``` + +### CMake 开关控制 + +每个内部功能通过 CMake option 控制,默认关闭: + +```cmake +option(PAIMON_ENABLE_ZDFS "Whether to enable zdfs file system" OFF) + +if(PAIMON_ENABLE_ZDFS) + add_definitions(-DPAIMON_ENABLE_ZDFS) +endif() +``` + +### 构建配置集中管理 + +内部依赖的版本信息统一添加在 `third_party/versions.txt` 文件末尾, +构建逻辑添加在 `cmake_modules/ThirdpartyToolchain.cmake` 中, +用 `if(PAIMON_ENABLE_XXX)` 包裹,避免影响社区构建。 + +--- + +## 当前内部独有功能清单 + +| 功能 | 目录 | CMake 开关 | 说明 | +|------|------|-----------|------| +| ZDFS 文件系统 | `src/paimon/fs/zdfs/` | `PAIMON_ENABLE_ZDFS` | 内部分布式文件系统支持 | + +--- + +## 构建说明 + +```bash +# 社区标准构建(不含内部功能) +cmake -B build . +cmake --build build + +# 内部构建(启用 zdfs) +cmake -B build -DPAIMON_ENABLE_ZDFS=ON -DPAIMON_USE_CXX11_ABI=OFF . +cmake --build build +``` diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 6277022a0..ca888c612 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -393,6 +393,11 @@ if(PAIMON_BUILD_TESTS) common/global_index/bitmap_global_index_result_test.cpp common/global_index/bitmap_scored_global_index_result_test.cpp common/global_index/bitmap/bitmap_global_index_test.cpp + common/global_index/btree/btree_index_meta_test.cpp + common/global_index/btree/btree_file_footer_test.cpp + common/global_index/btree/btree_global_indexer_test.cpp + common/global_index/btree/btree_global_index_writer_test.cpp + common/global_index/btree/btree_global_index_integration_test.cpp common/io/byte_array_input_stream_test.cpp common/io/data_input_output_stream_test.cpp common/io/buffered_input_stream_test.cpp diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index 8c366acbd..437664fc6 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -17,6 +17,7 @@ set(PAIMON_GLOBAL_INDEX_SRC bitmap/bitmap_global_index.cpp btree/btree_file_footer.cpp btree/btree_global_index_factory.cpp btree/btree_global_indexer.cpp + btree/btree_global_index_writer.cpp btree/btree_index_meta.cpp) add_paimon_lib(paimon_global_index diff --git a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp new file mode 100644 index 000000000..2aad488f6 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp @@ -0,0 +1,237 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "paimon/common/global_index/btree/btree_file_footer.h" +#include "paimon/common/sst/bloom_filter_handle.h" +#include "paimon/common/sst/block_handle.h" +#include "paimon/memory/memory_pool.h" + +namespace paimon::test { + +class BTreeFileFooterTest : public ::testing::Test { +protected: + void SetUp() override { pool_ = std::make_shared(); } + + std::shared_ptr pool_; +}; + +TEST_F(BTreeFileFooterTest, ReadWriteRoundTrip) { + // Create a footer with all handles + auto bloom_filter_handle = std::make_shared(100, 50, 1000); + auto index_block_handle = std::make_shared(200, 80); + auto null_bitmap_handle = std::make_shared(300, 40); + + auto footer = std::make_shared(bloom_filter_handle, index_block_handle, + null_bitmap_handle); + + // Write + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + + // Read + auto input = serialized->ToInput(); + auto deserialized = BTreeFileFooter::Read(input); + ASSERT_OK(deserialized.status()); + auto deserialized_footer = deserialized.value(); + + // Verify bloom filter handle + auto bf_handle = deserialized_footer->GetBloomFilterHandle(); + ASSERT_NE(bf_handle, nullptr); + EXPECT_EQ(bf_handle->Offset(), 100); + EXPECT_EQ(bf_handle->Size(), 50); + EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); + + // Verify index block handle + auto ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_NE(ib_handle, nullptr); + EXPECT_EQ(ib_handle->Offset(), 200); + EXPECT_EQ(ib_handle->Size(), 80); + + // Verify null bitmap handle + auto nb_handle = deserialized_footer->GetNullBitmapHandle(); + ASSERT_NE(nb_handle, nullptr); + EXPECT_EQ(nb_handle->Offset(), 300); + EXPECT_EQ(nb_handle->Size(), 40); +} + +TEST_F(BTreeFileFooterTest, ReadWriteWithNullBloomFilter) { + // Create a footer without bloom filter + auto index_block_handle = std::make_shared(200, 80); + auto null_bitmap_handle = std::make_shared(300, 40); + + auto footer = std::make_shared(nullptr, index_block_handle, null_bitmap_handle); + + // Write + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + + // Read + auto input = serialized->ToInput(); + auto deserialized = BTreeFileFooter::Read(input); + ASSERT_OK(deserialized.status()); + auto deserialized_footer = deserialized.value(); + + // Verify bloom filter handle is null + EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); + + // Verify index block handle + auto ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_NE(ib_handle, nullptr); + EXPECT_EQ(ib_handle->Offset(), 200); + EXPECT_EQ(ib_handle->Size(), 80); + + // Verify null bitmap handle + auto nb_handle = deserialized_footer->GetNullBitmapHandle(); + ASSERT_NE(nb_handle, nullptr); + EXPECT_EQ(nb_handle->Offset(), 300); + EXPECT_EQ(nb_handle->Size(), 40); +} + +TEST_F(BTreeFileFooterTest, ReadWriteWithNullNullBitmap) { + // Create a footer without null bitmap + auto bloom_filter_handle = std::make_shared(100, 50, 1000); + auto index_block_handle = std::make_shared(200, 80); + + auto footer = std::make_shared(bloom_filter_handle, index_block_handle, nullptr); + + // Write + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + + // Read + auto input = serialized->ToInput(); + auto deserialized = BTreeFileFooter::Read(input); + ASSERT_OK(deserialized.status()); + auto deserialized_footer = deserialized.value(); + + // Verify bloom filter handle + auto bf_handle = deserialized_footer->GetBloomFilterHandle(); + ASSERT_NE(bf_handle, nullptr); + EXPECT_EQ(bf_handle->Offset(), 100); + EXPECT_EQ(bf_handle->Size(), 50); + EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); + + // Verify index block handle + auto ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_NE(ib_handle, nullptr); + EXPECT_EQ(ib_handle->Offset(), 200); + EXPECT_EQ(ib_handle->Size(), 80); + + // Verify null bitmap handle is null + EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); +} + +TEST_F(BTreeFileFooterTest, ReadWriteWithAllNullHandles) { + // Create a footer with only index block handle (required) + auto index_block_handle = std::make_shared(200, 80); + + auto footer = std::make_shared(nullptr, index_block_handle, nullptr); + + // Write + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + + // Read + auto input = serialized->ToInput(); + auto deserialized = BTreeFileFooter::Read(input); + ASSERT_OK(deserialized.status()); + auto deserialized_footer = deserialized.value(); + + // Verify bloom filter handle is null + EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); + + // Verify index block handle + auto ib_handle = deserialized_footer->GetIndexBlockHandle(); + ASSERT_NE(ib_handle, nullptr); + EXPECT_EQ(ib_handle->Offset(), 200); + EXPECT_EQ(ib_handle->Size(), 80); + + // Verify null bitmap handle is null + EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); +} + +TEST_F(BTreeFileFooterTest, MagicNumberVerification) { + // Create a valid footer + auto index_block_handle = std::make_shared(200, 80); + auto footer = std::make_shared(nullptr, index_block_handle, nullptr); + + // Write + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Read + auto input = serialized->ToInput(); + auto deserialized = BTreeFileFooter::Read(input); + ASSERT_OK(deserialized.status()); +} + +TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { + // Create a buffer with invalid magic number + auto output = std::make_shared(BTreeFileFooter::ENCODED_LENGTH, pool_.get()); + + // Write bloom filter handle (all zeros for null) + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); + + // Write index block handle + output->WriteValue(static_cast(200)); + output->WriteValue(static_cast(80)); + + // Write null bitmap handle (all zeros for null) + output->WriteValue(static_cast(0)); + output->WriteValue(static_cast(0)); + + // Write invalid magic number + output->WriteValue(static_cast(12345)); // Invalid magic number + + auto serialized = output->ToSlice(); + auto input = serialized->ToInput(); + + // Read should fail + auto deserialized = BTreeFileFooter::Read(input); + EXPECT_FALSE(deserialized.ok()); + EXPECT_TRUE(deserialized.status().IsIOError()); +} + +TEST_F(BTreeFileFooterTest, EncodedLength) { + // Verify ENCODED_LENGTH = 48 + // bloom_filter: 8(offset) + 4(size) + 8(expected_entries) = 20 bytes + // index_block: 8(offset) + 4(size) = 12 bytes + // null_bitmap: 8(offset) + 4(size) = 12 bytes + // magic_number: 4 bytes + // Total = 20 + 12 + 12 + 4 = 48 bytes + EXPECT_EQ(BTreeFileFooter::ENCODED_LENGTH, 48); + + // Create a footer and verify the serialized length + auto bloom_filter_handle = std::make_shared(100, 50, 1000); + auto index_block_handle = std::make_shared(200, 80); + auto null_bitmap_handle = std::make_shared(300, 40); + auto footer = std::make_shared(bloom_filter_handle, index_block_handle, + null_bitmap_handle); + + auto serialized = BTreeFileFooter::Write(footer, pool_.get()); + ASSERT_NE(serialized, nullptr); + EXPECT_EQ(serialized->Length(), 48); +} + +} // namespace paimon::test \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_global_index_factory.cpp b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp index 707de823e..71b8fc5e4 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_factory.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/paimon/common/global_index/btree/btree_global_index_factory.h b/src/paimon/common/global_index/btree/btree_global_index_factory.h index 1eb75f00b..6ff101e6b 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_factory.h +++ b/src/paimon/common/global_index/btree/btree_global_index_factory.h @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp new file mode 100644 index 000000000..61ad731e0 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -0,0 +1,415 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "paimon/common/global_index/btree/btree_global_indexer.h" +#include "paimon/common/global_index/btree/btree_global_index_writer.h" +#include "paimon/fs/file_system.h" +#include "paimon/global_index/io/global_index_file_reader.h" +#include "paimon/global_index/io/global_index_file_writer.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/predicate/literal.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { +public: + FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) + : fs_(fs), base_path_(base_path), file_counter_(0) {} + + Result NewFileName(const std::string& prefix) const override { + return prefix + "_" + std::to_string(file_counter_++); + } + + Result> NewOutputStream(const std::string& file_name) const override { + return fs_->CreateOutputStream(base_path_ + "/" + file_name); + } + + Result GetFileSize(const std::string& file_name) const override { + PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(base_path_ + "/" + file_name)); + return file_status->Length(); + } + + std::string ToPath(const std::string& file_name) const override { + return base_path_ + "/" + file_name; + } + +private: + std::shared_ptr fs_; + std::string base_path_; + mutable int64_t file_counter_; +}; + +class FakeGlobalIndexFileReader : public GlobalIndexFileReader { +public: + FakeGlobalIndexFileReader(const std::shared_ptr& fs, const std::string& base_path) + : fs_(fs), base_path_(base_path) {} + + Result> GetInputStream(const std::string& file_path) const override { + return fs_->OpenInputStream(file_path); + } + +private: + std::shared_ptr fs_; + std::string base_path_; +}; + +class BTreeGlobalIndexIntegrationTest : public ::testing::Test { +protected: + void SetUp() override { + pool_ = std::make_shared(); + test_dir_ = UniqueTestDirectory::Create("local"); + ASSERT_OK(test_dir_.status()); + fs_ = test_dir_->GetFileSystem(); + base_path_ = test_dir_->Str(); + } + + void TearDown() override { test_dir_->Delete(); } + + // Helper to create ArrowSchema from arrow type + Result CreateArrowSchema(const std::shared_ptr& type, + const std::string& field_name) { + auto schema = arrow::schema({arrow::field(field_name, type)}); + ArrowSchema* c_schema; + PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, &c_schema)); + return c_schema; + } + + // Helper to check if a row ID is in the result + bool ContainsRowId(const std::shared_ptr& result, int64_t row_id) { + auto iterator_result = result->CreateIterator(); + if (!iterator_result.ok()) { + return false; + } + auto iterator = iterator_result.value(); + while (iterator->HasNext()) { + if (iterator->Next() == row_id) { + return true; + } + } + return false; + } + + std::shared_ptr pool_; + Result> test_dir_; + std::shared_ptr fs_; + std::string base_path_; +}; + +TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { + // Create file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with int values + // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]"); + ASSERT_OK(json_array.status()); + + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + ASSERT_OK(writer->AddBatch(c_array)); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Release ArrowArray + ArrowArrayRelease(c_array); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + std::map options; + BTreeGlobalIndexer indexer(options); + + // Create ArrowSchema + auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); + ASSERT_OK(schema_result.status()); + ArrowSchema* c_schema = schema_result.value(); + + // Create reader + auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + ASSERT_OK(reader_result.status()); + auto reader = reader_result.value(); + + // Test VisitEqual for value 1 (should return row IDs 0 and 4) + Literal literal_1(static_cast(1)); + auto equal_result = reader->VisitEqual(literal_1); + ASSERT_OK(equal_result.status()); + EXPECT_TRUE(ContainsRowId(equal_result.value(), 0)); + EXPECT_TRUE(ContainsRowId(equal_result.value(), 4)); + EXPECT_FALSE(ContainsRowId(equal_result.value(), 1)); + + // Test VisitEqual for value 5 (should return row IDs 6, 7, 8) + Literal literal_5(static_cast(5)); + auto equal_result_5 = reader->VisitEqual(literal_5); + ASSERT_OK(equal_result_5.status()); + EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 6)); + EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 7)); + EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 8)); + + // Release ArrowSchema + ArrowSchemaRelease(c_schema); +} + +TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { + // Create file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("string_field", file_writer, pool_); + + // Create an Arrow array with string values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])"); + ASSERT_OK(json_array.status()); + + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + ASSERT_OK(writer->AddBatch(c_array)); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Release ArrowArray + ArrowArrayRelease(c_array); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + std::map options; + BTreeGlobalIndexer indexer(options); + + // Create ArrowSchema + auto schema_result = CreateArrowSchema(arrow::utf8(), "string_field"); + ASSERT_OK(schema_result.status()); + ArrowSchema* c_schema = schema_result.value(); + + // Create reader + auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + ASSERT_OK(reader_result.status()); + auto reader = reader_result.value(); + + // Test VisitEqual for "apple" (should return row IDs 0 and 3) + Literal literal_apple(FieldType::STRING, "apple", 5); + auto equal_result = reader->VisitEqual(literal_apple); + ASSERT_OK(equal_result.status()); + EXPECT_TRUE(ContainsRowId(equal_result.value(), 0)); + EXPECT_TRUE(ContainsRowId(equal_result.value(), 3)); + + // Release ArrowSchema + ArrowSchemaRelease(c_schema); +} + +TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { + // Create file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with null values + // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, null, 3, null, 5]"); + ASSERT_OK(json_array.status()); + + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + ASSERT_OK(writer->AddBatch(c_array)); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Release ArrowArray + ArrowArrayRelease(c_array); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + std::map options; + BTreeGlobalIndexer indexer(options); + + // Create ArrowSchema + auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); + ASSERT_OK(schema_result.status()); + ArrowSchema* c_schema = schema_result.value(); + + // Create reader + auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + ASSERT_OK(reader_result.status()); + auto reader = reader_result.value(); + + // Test VisitIsNull (should return row IDs 1 and 3) + auto is_null_result = reader->VisitIsNull(); + ASSERT_OK(is_null_result.status()); + EXPECT_TRUE(ContainsRowId(is_null_result.value(), 1)); + EXPECT_TRUE(ContainsRowId(is_null_result.value(), 3)); + EXPECT_FALSE(ContainsRowId(is_null_result.value(), 0)); + + // Test VisitIsNotNull (should return row IDs 0, 2, 4) + auto is_not_null_result = reader->VisitIsNotNull(); + ASSERT_OK(is_not_null_result.status()); + EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 0)); + EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 2)); + EXPECT_TRUE(ContainsRowId(is_not_null_result.value(), 4)); + EXPECT_FALSE(ContainsRowId(is_not_null_result.value(), 1)); + + // Release ArrowSchema + ArrowSchemaRelease(c_schema); +} + +TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { + // Create file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with int values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, 2, 3, 4, 5]"); + ASSERT_OK(json_array.status()); + + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + ASSERT_OK(writer->AddBatch(c_array)); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + + // Release ArrowArray + ArrowArrayRelease(c_array); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + std::map options; + BTreeGlobalIndexer indexer(options); + + // Create ArrowSchema + auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); + ASSERT_OK(schema_result.status()); + ArrowSchema* c_schema = schema_result.value(); + + // Create reader + auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + ASSERT_OK(reader_result.status()); + auto reader = reader_result.value(); + + // Test VisitLessThan for value 3 (should return row IDs 0, 1) + Literal literal_3(static_cast(3)); + auto lt_result = reader->VisitLessThan(literal_3); + ASSERT_OK(lt_result.status()); + EXPECT_TRUE(ContainsRowId(lt_result.value(), 0)); + EXPECT_TRUE(ContainsRowId(lt_result.value(), 1)); + EXPECT_FALSE(ContainsRowId(lt_result.value(), 2)); + + // Test VisitGreaterOrEqual for value 3 (should return row IDs 2, 3, 4) + auto gte_result = reader->VisitGreaterOrEqual(literal_3); + ASSERT_OK(gte_result.status()); + EXPECT_TRUE(ContainsRowId(gte_result.value(), 2)); + EXPECT_TRUE(ContainsRowId(gte_result.value(), 3)); + EXPECT_TRUE(ContainsRowId(gte_result.value(), 4)); + EXPECT_FALSE(ContainsRowId(gte_result.value(), 1)); + + // Release ArrowSchema + ArrowSchemaRelease(c_schema); +} + +TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { + // Create file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with int values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, 2, 3, 4, 5]"); + ASSERT_OK(json_array.status()); + + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + ASSERT_OK(writer->AddBatch(c_array)); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + + // Release ArrowArray + ArrowArrayRelease(c_array); + + // Now read back + auto file_reader = std::make_shared(fs_, base_path_); + std::map options; + BTreeGlobalIndexer indexer(options); + + // Create ArrowSchema + auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); + ASSERT_OK(schema_result.status()); + ArrowSchema* c_schema = schema_result.value(); + + // Create reader + auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + ASSERT_OK(reader_result.status()); + auto reader = reader_result.value(); + + // Test VisitIn for values 1, 3, 5 (should return row IDs 0, 2, 4) + std::vector in_literals = { + Literal(static_cast(1)), + Literal(static_cast(3)), + Literal(static_cast(5)) + }; + auto in_result = reader->VisitIn(in_literals); + ASSERT_OK(in_result.status()); + EXPECT_TRUE(ContainsRowId(in_result.value(), 0)); + EXPECT_TRUE(ContainsRowId(in_result.value(), 2)); + EXPECT_TRUE(ContainsRowId(in_result.value(), 4)); + EXPECT_FALSE(ContainsRowId(in_result.value(), 1)); + EXPECT_FALSE(ContainsRowId(in_result.value(), 3)); + + // Release ArrowSchema + ArrowSchemaRelease(c_schema); +} + +} // namespace paimon::test \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp new file mode 100644 index 000000000..1507f9199 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -0,0 +1,245 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/btree_global_index_writer.h" + +#include + +#include + +#include "paimon/common/memory/memory_slice_output.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/crc32c.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/memory/bytes.h" + +namespace paimon { + +BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( + const std::string& field_name, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool, + int32_t block_size, + int64_t expected_entries) + : field_name_(field_name), + file_writer_(file_writer), + pool_(pool), + block_size_(block_size), + expected_entries_(expected_entries), + null_bitmap_(std::make_shared()), + has_nulls_(false), + current_row_id_(0), + bloom_filter_(std::make_shared(expected_entries, 0.01)) {} + +Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { + if (!arrow_array) { + return Status::Invalid("ArrowArray is null"); + } + + // Import Arrow array + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, + arrow::ImportArray(arrow_array, arrow::null())); + + // Initialize SST writer on first batch + if (!sst_writer_) { + PAIMON_ASSIGN_OR_RAISE(file_name_, file_writer_->NewFileName(field_name_)); + PAIMON_ASSIGN_OR_RAISE(output_stream_, file_writer_->NewOutputStream(file_name_)); + sst_writer_ = std::make_unique(output_stream_, pool_, bloom_filter_, + block_size_, nullptr); + } + + // Group row IDs by key value + std::unordered_map> key_to_row_ids; + + // Process each element in the array + for (int64_t i = 0; i < array->length(); ++i) { + int64_t row_id = current_row_id_ + i; + + if (array->IsNull(i)) { + // Track null values + null_bitmap_->Add(row_id); + has_nulls_ = true; + continue; + } + + // Convert array element to string key + // For simplicity, we use string representation for all types + // TODO: Support type-specific serialization for better comparison + std::string key_str; + + // Get the value as string based on array type + auto type_id = array->type_id(); + switch (type_id) { + case arrow::Type::STRING: + case arrow::Type::BINARY: { + auto str_array = std::static_pointer_cast(array); + key_str = std::string(str_array->GetView(i)); + break; + } + case arrow::Type::INT32: { + auto int_array = std::static_pointer_cast(array); + key_str = std::to_string(int_array->Value(i)); + break; + } + case arrow::Type::INT64: { + auto int_array = std::static_pointer_cast(array); + key_str = std::to_string(int_array->Value(i)); + break; + } + case arrow::Type::FLOAT: { + auto float_array = std::static_pointer_cast(array); + key_str = std::to_string(float_array->Value(i)); + break; + } + case arrow::Type::DOUBLE: { + auto double_array = std::static_pointer_cast(array); + key_str = std::to_string(double_array->Value(i)); + break; + } + case arrow::Type::BOOL: { + auto bool_array = std::static_pointer_cast(array); + key_str = bool_array->Value(i) ? "1" : "0"; + break; + } + case arrow::Type::DATE32: { + auto date_array = std::static_pointer_cast(array); + key_str = std::to_string(date_array->Value(i)); + break; + } + case arrow::Type::TIMESTAMP: { + auto ts_array = std::static_pointer_cast(array); + key_str = std::to_string(ts_array->Value(i)); + break; + } + default: + return Status::NotImplemented("Unsupported arrow type for BTree index: " + + array->type()->ToString()); + } + + key_to_row_ids[key_str].push_back(row_id); + } + + // Write each key and its row IDs to the SST file + for (const auto& [key_str, row_ids] : key_to_row_ids) { + auto key_bytes = Bytes::AllocateBytes(key_str, pool_.get()); + auto key = std::shared_ptr(key_bytes.release()); + + // Track first and last keys + if (!first_key_) { + first_key_ = key; + } + last_key_ = key; + + // Write key-value pair + PAIMON_RETURN_NOT_OK(WriteKeyValue(key, row_ids)); + } + + current_row_id_ += array->length(); + return Status::OK(); +} + +Status BTreeGlobalIndexWriter::WriteKeyValue(const std::shared_ptr& key, + const std::vector& row_ids) { + auto value = SerializeRowIds(row_ids); + // Copy key since we can't move from a const reference + auto key_copy = key; + return sst_writer_->Write(std::move(key_copy), std::move(value)); +} + +std::shared_ptr BTreeGlobalIndexWriter::SerializeRowIds(const std::vector& row_ids) { + // Format: [num_row_ids (varint)][row_id1 (int64)][row_id2]... + int32_t estimated_size = 10 + row_ids.size() * 8; // Conservative estimate + auto output = std::make_shared(estimated_size, pool_.get()); + + output->WriteVarLenLong(static_cast(row_ids.size())); + for (int64_t row_id : row_ids) { + output->WriteValue(row_id); + } + + auto slice = output->ToSlice(); + return slice->CopyBytes(pool_.get()); +} + +Result> BTreeGlobalIndexWriter::WriteNullBitmap( + const std::shared_ptr& out) { + if (!has_nulls_ || null_bitmap_->IsEmpty()) { + return std::shared_ptr(nullptr); + } + + // Serialize null bitmap + std::vector bitmap_data = null_bitmap_->Serialize(); + if (bitmap_data.empty()) { + return std::shared_ptr(nullptr); + } + + // Get current position for the block handle + PAIMON_ASSIGN_OR_RAISE(int64_t offset, out->GetPos()); + + // Write bitmap data + PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(bitmap_data.data()), + bitmap_data.size())); + + // Calculate and write CRC32C + uint32_t crc = CRC32C::calculate(reinterpret_cast(bitmap_data.data()), + bitmap_data.size()); + PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(&crc), sizeof(crc))); + + return std::make_shared(offset, bitmap_data.size()); +} + +Result> BTreeGlobalIndexWriter::Finish() { + if (!sst_writer_) { + // No data was written, return empty metadata + return std::vector(); + } + + // Flush any remaining data in the data block writer + PAIMON_RETURN_NOT_OK(sst_writer_->Flush()); + + // Write index block + PAIMON_ASSIGN_OR_RAISE(auto index_block_handle, sst_writer_->WriteIndexBlock()); + + // Write bloom filter + PAIMON_ASSIGN_OR_RAISE(auto bloom_filter_handle, sst_writer_->WriteBloomFilter()); + + // Write null bitmap + PAIMON_ASSIGN_OR_RAISE(auto null_bitmap_handle, WriteNullBitmap(output_stream_)); + + // Write BTree file footer + auto footer = std::make_shared(bloom_filter_handle, index_block_handle, + null_bitmap_handle); + auto footer_slice = BTreeFileFooter::Write(footer, pool_.get()); + auto footer_bytes = footer_slice->CopyBytes(pool_.get()); + PAIMON_RETURN_NOT_OK(output_stream_->Write(footer_bytes->data(), footer_bytes->size())); + + // Close the output stream + PAIMON_RETURN_NOT_OK(output_stream_->Close()); + + // Get file size + PAIMON_ASSIGN_OR_RAISE(int64_t file_size, file_writer_->GetFileSize(file_name_)); + + // Create index meta + auto index_meta = std::make_shared(first_key_, last_key_, has_nulls_); + auto meta_bytes = index_meta->Serialize(pool_.get()); + + // Create GlobalIndexIOMeta + std::string file_path = file_writer_->ToPath(file_name_); + GlobalIndexIOMeta io_meta(file_path, file_size, current_row_id_ - 1, meta_bytes); + + return std::vector{io_meta}; +} + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h new file mode 100644 index 000000000..6fa15de1b --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -0,0 +1,87 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include "paimon/common/global_index/btree/btree_file_footer.h" +#include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/sst/sst_file_writer.h" +#include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/global_index/global_index_writer.h" +#include "paimon/global_index/io/global_index_file_writer.h" + +namespace paimon { + +/// Writer for BTree Global Index files. +/// This writer builds an SST file where each key maps to a list of row IDs. +class BTreeGlobalIndexWriter : public GlobalIndexWriter { +public: + BTreeGlobalIndexWriter(const std::string& field_name, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool, + int32_t block_size = 4096, + int64_t expected_entries = 100000); + + ~BTreeGlobalIndexWriter() override = default; + + /// Add a batch of data from an Arrow array. + /// The Arrow array should contain a single column of the indexed field. + Status AddBatch(::ArrowArray* arrow_array) override; + + /// Finish writing and return the index metadata. + Result> Finish() override; + +private: + // Helper method to write a key-value pair to the SST file + Status WriteKeyValue(const std::shared_ptr& key, const std::vector& row_ids); + + // Helper method to serialize row IDs into a Bytes object + std::shared_ptr SerializeRowIds(const std::vector& row_ids); + + // Helper method to write null bitmap to the output stream + Result> WriteNullBitmap(const std::shared_ptr& out); + +private: + std::string field_name_; + std::shared_ptr file_writer_; + std::shared_ptr pool_; + int32_t block_size_; + int64_t expected_entries_; + + // SST file writer + std::unique_ptr sst_writer_; + std::shared_ptr output_stream_; + std::string file_name_; + + // Track first and last keys for index meta + std::shared_ptr first_key_; + std::shared_ptr last_key_; + + // Null bitmap tracking + std::shared_ptr null_bitmap_; + bool has_nulls_; + + // Current row ID counter + int64_t current_row_id_; + + // Bloom filter for the SST file + std::shared_ptr bloom_filter_; +}; + +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp new file mode 100644 index 000000000..a86498224 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -0,0 +1,307 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include "paimon/common/global_index/btree/btree_global_index_writer.h" +#include "paimon/fs/file_system.h" +#include "paimon/global_index/io/global_index_file_writer.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { +public: + FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) + : fs_(fs), base_path_(base_path), file_counter_(0) {} + + Result NewFileName(const std::string& prefix) const override { + return prefix + "_" + std::to_string(file_counter_++); + } + + Result> NewOutputStream(const std::string& file_name) const override { + return fs_->CreateOutputStream(base_path_ + "/" + file_name); + } + + Result GetFileSize(const std::string& file_name) const override { + PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(base_path_ + "/" + file_name)); + return file_status->Length(); + } + + std::string ToPath(const std::string& file_name) const override { + return base_path_ + "/" + file_name; + } + +private: + std::shared_ptr fs_; + std::string base_path_; + mutable int64_t file_counter_; +}; + +class BTreeGlobalIndexWriterTest : public ::testing::Test { +protected: + void SetUp() override { + pool_ = std::make_shared(); + test_dir_ = UniqueTestDirectory::Create("local"); + ASSERT_OK(test_dir_.status()); + fs_ = test_dir_->GetFileSystem(); + base_path_ = test_dir_->Str(); + } + + void TearDown() override { test_dir_->Delete(); } + + std::shared_ptr pool_; + Result> test_dir_; + std::shared_ptr fs_; + std::string base_path_; +}; + +TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with int values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]"); + ASSERT_OK(json_array.status()); + + // Export to ArrowArray + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + auto status = writer->AddBatch(c_array); + ASSERT_OK(status); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Verify metadata + const auto& meta = metas[0]; + EXPECT_FALSE(meta.file_path.empty()); + EXPECT_GT(meta.file_size, 0); + EXPECT_EQ(meta.range_end, 8); // 9 elements, 0-indexed + + // Release the ArrowArray + ArrowArrayRelease(c_array); +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("string_field", file_writer, pool_); + + // Create an Arrow array with string values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])"); + ASSERT_OK(json_array.status()); + + // Export to ArrowArray + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + auto status = writer->AddBatch(c_array); + ASSERT_OK(status); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Verify metadata + const auto& meta = metas[0]; + EXPECT_FALSE(meta.file_path.empty()); + EXPECT_GT(meta.file_size, 0); + + // Release the ArrowArray + ArrowArrayRelease(c_array); +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with null values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, null, 3, null, 5]"); + ASSERT_OK(json_array.status()); + + // Export to ArrowArray + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + auto status = writer->AddBatch(c_array); + ASSERT_OK(status); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Verify metadata + const auto& meta = metas[0]; + EXPECT_FALSE(meta.file_path.empty()); + EXPECT_GT(meta.file_size, 0); + + // Verify that metadata contains null bitmap info (has_nulls should be true) + EXPECT_NE(meta.metadata, nullptr); + + // Release the ArrowArray + ArrowArrayRelease(c_array); +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create first batch + auto json_array1 = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[1, 2, 3]"); + ASSERT_OK(json_array1.status()); + + ArrowArray* c_array1; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array1, &c_array1)); + + // Add first batch + auto status1 = writer->AddBatch(c_array1); + ASSERT_OK(status1); + ArrowArrayRelease(c_array1); + + // Create second batch + auto json_array2 = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[4, 5, 6]"); + ASSERT_OK(json_array2.status()); + + ArrowArray* c_array2; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array2, &c_array2)); + + // Add second batch + auto status2 = writer->AddBatch(c_array2); + ASSERT_OK(status2); + ArrowArrayRelease(c_array2); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Verify metadata + const auto& meta = metas[0]; + EXPECT_EQ(meta.range_end, 5); // 6 elements, 0-indexed +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Finish without adding any data + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 0); // No data, no metadata +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("int_field", file_writer, pool_); + + // Create an Arrow array with all null values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), "[null, null, null]"); + ASSERT_OK(json_array.status()); + + // Export to ArrowArray + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + auto status = writer->AddBatch(c_array); + ASSERT_OK(status); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Verify metadata - should have null bitmap but no keys + const auto& meta = metas[0]; + EXPECT_NE(meta.metadata, nullptr); + + // Release the ArrowArray + ArrowArrayRelease(c_array); +} + +TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { + // Create a fake file writer + auto file_writer = std::make_shared(fs_, base_path_); + + // Create the BTree global index writer + auto writer = std::make_shared("double_field", file_writer, pool_); + + // Create an Arrow array with double values + auto json_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::float64(), "[1.5, 2.5, 3.5, 1.5]"); + ASSERT_OK(json_array.status()); + + // Export to ArrowArray + ArrowArray* c_array; + ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + + // Add batch + auto status = writer->AddBatch(c_array); + ASSERT_OK(status); + + // Finish writing + auto result = writer->Finish(); + ASSERT_OK(result.status()); + auto metas = result.value(); + ASSERT_EQ(metas.size(), 1); + + // Release the ArrowArray + ArrowArrayRelease(c_array); +} + +} // namespace paimon::test \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index a9e96f2e8..6b9fa7df7 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2024-present Alibaba Inc. + * Copyright 2026-present Alibaba Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ #include "arrow/c/bridge.h" #include "paimon/common/global_index/btree/btree_file_footer.h" +#include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" @@ -34,6 +35,13 @@ namespace paimon { +Result> BTreeGlobalIndexer::CreateWriter( + const std::string& field_name, ::ArrowSchema* arrow_schema, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool) const { + return std::make_shared(field_name, file_writer, pool); +} + // Forward declarations for helper functions static Result> LiteralToMemorySlice(const Literal& literal, MemoryPool* pool); @@ -193,19 +201,29 @@ CreateComparator(FieldType field_type) { return 0; }; case FieldType::DECIMAL: - // Decimal comparison (stored as 16 bytes for DECIMAL128) + // Decimal comparison (stored as 16 bytes big-endian for DECIMAL128) + // Big-endian storage ensures correct lexicographic byte comparison for signed values return [](const std::shared_ptr& a, const std::shared_ptr& b) -> int32_t { if (!a || !b) return 0; auto a_bytes = a->GetHeapMemory(); auto b_bytes = b->GetHeapMemory(); if (!a_bytes || !b_bytes) return 0; - // Compare bytes directly for DECIMAL128 - size_t min_len = std::min(a_bytes->size(), b_bytes->size()); - int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a_bytes->size() < b_bytes->size()) return -1; - if (a_bytes->size() > b_bytes->size()) return 1; + // Both should be 16 bytes for DECIMAL128 + if (a_bytes->size() < 16 || b_bytes->size() < 16) { + // Fallback to lexicographic comparison for truncated data + size_t min_len = std::min(a_bytes->size(), b_bytes->size()); + int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a_bytes->size() < b_bytes->size()) return -1; + if (a_bytes->size() > b_bytes->size()) return 1; + return 0; + } + // For big-endian signed int128, direct byte comparison works correctly + // because the sign bit is in the first byte + int cmp = memcmp(a_bytes->data(), b_bytes->data(), 16); + if (cmp < 0) return -1; + if (cmp > 0) return 1; return 0; }; default: @@ -253,17 +271,24 @@ Result> BTreeGlobalIndexer::CreateReader( // prepare file footer auto cache_manager = std::make_shared(); auto block_cache = std::make_shared(meta.file_path, in, pool, cache_manager); - auto segment = block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, - BTreeFileFooter::ENCODED_LENGTH, true); + PAIMON_ASSIGN_OR_RAISE(auto segment, + block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, + BTreeFileFooter::ENCODED_LENGTH, true)); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, BTreeFileFooter::Read(MemorySlice::Wrap(segment)->ToInput())); // prepare null_bitmap and sst_file_reader PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); - std::shared_ptr fs; + + // Wrap the comparator to return Result + MemorySlice::SliceComparator result_comparator = + [comparator](const std::shared_ptr& a, + const std::shared_ptr& b) -> Result { + return comparator(a, b); + }; PAIMON_ASSIGN_OR_RAISE(std::shared_ptr sst_file_reader, - SstFileReader::Create(pool, fs, meta.file_path, comparator)); + SstFileReader::Create(pool, in, result_comparator)); auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); @@ -311,7 +336,8 @@ Result> BTreeGlobalIndexer::ReadNullBitma } // Read bytes and crc value - auto segment = cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false); + PAIMON_ASSIGN_OR_RAISE(auto segment, + cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false)); auto slice = MemorySlice::Wrap(segment); auto slice_input = slice->ToInput(); @@ -358,6 +384,7 @@ BTreeGlobalIndexReader::BTreeGlobalIndexReader( null_bitmap_(null_bitmap), min_key_(min_key), max_key_(max_key), + files_(files), pool_(pool), comparator_(std::move(comparator)) {} @@ -455,10 +482,55 @@ Result> BTreeGlobalIndexReader::VisitContains Result> BTreeGlobalIndexReader::VisitLike( const Literal& literal) { - // BTree index can only efficiently handle LIKE patterns of the form "prefix%". + // BTree index can efficiently handle LIKE patterns of the form "prefix%". // For other patterns (e.g., "%suffix", "%contains%"), return all non-null rows as fallback. - // Note: This is a conservative approach that doesn't prune any rows. - // TODO: Parse LIKE pattern and use VisitStartsWith for "prefix%" patterns. + if (literal.IsNull()) { + return Status::Invalid("LIKE pattern cannot be null"); + } + + // Get the pattern string + std::string pattern = literal.GetValue(); + + // Check if pattern is of the form "prefix%" (starts with a literal prefix and ends with %) + // The prefix must not contain any wildcard characters (_ or %) + // Escape sequences with \ are not supported in this simple implementation + bool is_prefix_pattern = false; + std::string prefix; + + // Find the position of the first wildcard character + size_t first_wildcard = pattern.find_first_of("_%"); + + if (first_wildcard != std::string::npos) { + // Check if the pattern is exactly "prefix%" form + // - First wildcard must be '%' + // - It must be at the end of the pattern + // - No other wildcards before it + if (pattern[first_wildcard] == '%' && first_wildcard == pattern.length() - 1) { + // Check if there are any wildcards in the prefix part + bool has_wildcard_in_prefix = false; + for (size_t i = 0; i < first_wildcard; ++i) { + if (pattern[i] == '_' || pattern[i] == '%') { + has_wildcard_in_prefix = true; + break; + } + } + if (!has_wildcard_in_prefix) { + is_prefix_pattern = true; + prefix = pattern.substr(0, first_wildcard); + } + } + } else { + // No wildcards at all - this is an exact match, not a prefix pattern + // We could optimize this to VisitEqual, but for simplicity, fall through to fallback + } + + if (is_prefix_pattern) { + // Use VisitStartsWith for prefix% patterns + Literal prefix_literal(FieldType::STRING, prefix.c_str(), prefix.length()); + return VisitStartsWith(prefix_literal); + } + + // For other patterns, return all non-null rows as fallback return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -667,30 +739,33 @@ Result> BTreeGlobalIndexReader::VisitFullText Result BTreeGlobalIndexReader::RangeQuery( const std::shared_ptr& lower_bound, const std::shared_ptr& upper_bound, bool lower_inclusive, bool upper_inclusive) { - // Create an index block iterator to iterate through data blocks - auto index_block_reader = sst_file_reader_->GetIndexBlockReader(); - auto index_iterator = index_block_reader->Iterator(); + // Create an SST file iterator to iterate through data blocks + auto sst_iterator = sst_file_reader_->CreateIterator(); - // Seek index iterator to the lower bound - index_iterator->SeekTo(lower_bound); + // Seek iterator to the lower bound + if (lower_bound) { + auto lower_bytes = lower_bound->GetHeapMemory(); + PAIMON_RETURN_NOT_OK(sst_iterator->SeekTo(lower_bytes)); + } RoaringNavigableMap64 result; - // Iterate through all relevant data blocks + // Iterate through all relevant data blocks using GetNextBlock + std::unique_ptr index_iterator; bool first_block = true; - while (index_iterator->HasNext()) { + while (true) { // Get the next data block PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, sst_file_reader_->GetNextBlock(index_iterator)); if (!data_iterator || !data_iterator->HasNext()) { - continue; + break; } // For the first block, we need to seek within the block to the exact position - if (first_block) { - data_iterator->SeekTo(lower_bound); + if (first_block && lower_bound) { + PAIMON_ASSIGN_OR_RAISE(bool found, data_iterator->SeekTo(lower_bound)); first_block = false; // After seeking, check if we still have data @@ -705,7 +780,7 @@ Result BTreeGlobalIndexReader::RangeQuery( // Compare key with bounds using the comparator const auto& comparator = comparator_; - int cmp_lower = comparator ? comparator(entry->key_, lower_bound) : 0; + int cmp_lower = comparator ? comparator(entry->key, lower_bound) : 0; // Check lower bound if (!lower_inclusive && cmp_lower == 0) { @@ -714,7 +789,7 @@ Result BTreeGlobalIndexReader::RangeQuery( } // Check upper bound - int cmp_upper = comparator ? comparator(entry->key_, upper_bound) : 0; + int cmp_upper = comparator ? comparator(entry->key, upper_bound) : 0; if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { // Key is beyond upper bound, we're done return result; @@ -722,7 +797,7 @@ Result BTreeGlobalIndexReader::RangeQuery( // Deserialize row IDs from the value // The value should contain an array of int64_t row IDs - auto value_bytes = entry->value_->CopyBytes(pool_.get()); + auto value_bytes = entry->value->CopyBytes(pool_.get()); auto value_slice = MemorySlice::Wrap(value_bytes); auto value_input = value_slice->ToInput(); @@ -741,9 +816,40 @@ Result BTreeGlobalIndexReader::RangeQuery( } Result BTreeGlobalIndexReader::AllNonNullRows() { - // Traverse all data to avoid returning null values, which is very advantageous in - // situations where there are many null values - // TODO do not traverse all data if less null values + // Optimization: when null values are few, construct the result by subtracting + // null_bitmap from a full range bitmap, instead of traversing all data blocks. + // + // We use a threshold: if null count is less than 10% of total rows, use the + // subtraction approach; otherwise, traverse all data blocks. + + if (files_.empty()) { + return RoaringNavigableMap64(); + } + + // Get total row count from range_end (inclusive last row id) + int64_t total_rows = files_[0].range_end + 1; + uint64_t null_count = null_bitmap_->GetLongCardinality(); + + // Threshold: use subtraction if null count < 10% of total rows + // and total rows is not too large (to avoid memory issues with huge bitmaps) + const double NULL_RATIO_THRESHOLD = 0.1; + const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; // 10 million rows max + + bool use_subtraction = + (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && + (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); + + if (use_subtraction) { + // Build full range bitmap [0, range_end] + RoaringNavigableMap64 result; + result.AddRange(Range(0, total_rows - 1)); + // Subtract null bitmap + result.AndNot(*null_bitmap_); + return result; + } + + // Fallback: traverse all data blocks + // This is more efficient when there are many null values if (!min_key_) { return RoaringNavigableMap64(); } @@ -887,13 +993,24 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle decimal (DECIMAL128 stored as 16 bytes) + // Handle decimal (DECIMAL128 stored as 16 bytes big-endian) if (type == FieldType::DECIMAL) { try { - // Decimal values are stored as string representation for simplicity - // The actual storage format should match the index writer's format - std::string str_value = literal.ToString(); - auto bytes = Bytes::AllocateBytes(str_value, pool); + // Get the Decimal value and serialize as big-endian int128 + Decimal decimal_value = literal.GetValue(); + auto bytes = Bytes::AllocateBytes(16, pool); + // Store as big-endian for correct lexicographic comparison + // High 64 bits first, then low 64 bits + uint64_t high_bits = decimal_value.HighBits(); + uint64_t low_bits = decimal_value.LowBits(); + // Write high bits (bytes 0-7) + for (int i = 0; i < 8; ++i) { + bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); + } + // Write low bits (bytes 8-15) + for (int i = 0; i < 8; ++i) { + bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); + } return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert decimal literal to MemorySlice: " + diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index dee549749..febd8b685 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -37,9 +37,7 @@ class BTreeGlobalIndexer : public GlobalIndexer { Result> CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool) const override { - return Status::NotImplemented("Writing btree global index not support yet"); - } + const std::shared_ptr& pool) const override; Result> CreateReader( ::ArrowSchema* arrow_schema, const std::shared_ptr& file_reader, @@ -136,6 +134,7 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { std::shared_ptr null_bitmap_; std::shared_ptr min_key_; std::shared_ptr max_key_; + std::vector files_; std::shared_ptr pool_; std::function&, const std::shared_ptr&)> comparator_; diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp new file mode 100644 index 000000000..778893d35 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -0,0 +1,240 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "paimon/common/global_index/btree/btree_global_indexer.h" +#include "paimon/common/memory/memory_slice.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/predicate/literal.h" +#include "paimon/common/utils/field_type_utils.h" + +namespace paimon::test { + +class BTreeGlobalIndexerTest : public ::testing::Test { +protected: + void SetUp() override { pool_ = std::make_shared(); } + + std::shared_ptr pool_; +}; + +// Test CreateComparator for STRING type +TEST_F(BTreeGlobalIndexerTest, CreateComparatorString) { + // Create two MemorySlices for comparison + auto slice_a = MemorySlice::Wrap( + std::shared_ptr(Bytes::AllocateBytes("apple", pool_.get()).release())); + auto slice_b = MemorySlice::Wrap( + std::shared_ptr(Bytes::AllocateBytes("banana", pool_.get()).release())); + auto slice_same = MemorySlice::Wrap( + std::shared_ptr(Bytes::AllocateBytes("apple", pool_.get()).release())); + + // Lexicographic comparison: "apple" < "banana" + auto bytes_a = slice_a->GetHeapMemory(); + auto bytes_b = slice_b->GetHeapMemory(); + ASSERT_NE(bytes_a, nullptr); + ASSERT_NE(bytes_b, nullptr); + + size_t min_len = std::min(bytes_a->size(), bytes_b->size()); + int cmp = memcmp(bytes_a->data(), bytes_b->data(), min_len); + EXPECT_LT(cmp, 0); // "apple" < "banana" + + // Same strings should be equal + auto bytes_same = slice_same->GetHeapMemory(); + EXPECT_EQ(bytes_a->size(), bytes_same->size()); + EXPECT_EQ(memcmp(bytes_a->data(), bytes_same->data(), bytes_a->size()), 0); +} + +// Test CreateComparator for INT type +TEST_F(BTreeGlobalIndexerTest, CreateComparatorInt) { + int32_t val1 = 100; + int32_t val2 = 200; + int32_t val3 = 100; + + auto bytes1 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + memcpy(bytes1->data(), &val1, sizeof(int32_t)); + auto slice1 = MemorySlice::Wrap(std::shared_ptr(bytes1.release())); + + auto bytes2 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + memcpy(bytes2->data(), &val2, sizeof(int32_t)); + auto slice2 = MemorySlice::Wrap(std::shared_ptr(bytes2.release())); + + auto bytes3 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + memcpy(bytes3->data(), &val3, sizeof(int32_t)); + auto slice3 = MemorySlice::Wrap(std::shared_ptr(bytes3.release())); + + // Compare values + EXPECT_LT(val1, val2); + EXPECT_EQ(val1, val3); +} + +// Test CreateComparator for BIGINT type +TEST_F(BTreeGlobalIndexerTest, CreateComparatorBigInt) { + int64_t val1 = 10000000000LL; + int64_t val2 = 20000000000LL; + + EXPECT_LT(val1, val2); +} + +// Test CreateComparator for FLOAT type +TEST_F(BTreeGlobalIndexerTest, CreateComparatorFloat) { + float val1 = 1.5f; + float val2 = 2.5f; + + EXPECT_LT(val1, val2); +} + +// Test CreateComparator for DOUBLE type +TEST_F(BTreeGlobalIndexerTest, CreateComparatorDouble) { + double val1 = 1.5; + double val2 = 2.5; + + EXPECT_LT(val1, val2); +} + +// Test LiteralToMemorySlice for STRING type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceString) { + Literal literal(FieldType::STRING, "test_value", 10); + EXPECT_FALSE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::STRING); + + std::string value = literal.GetValue(); + EXPECT_EQ(value, "test_value"); +} + +// Test LiteralToMemorySlice for INT type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceInt) { + Literal literal(static_cast(42)); + EXPECT_FALSE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::INT); + + int32_t value = literal.GetValue(); + EXPECT_EQ(value, 42); +} + +// Test LiteralToMemorySlice for BIGINT type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceBigInt) { + Literal literal(static_cast(12345678901234LL)); + EXPECT_FALSE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::BIGINT); + + int64_t value = literal.GetValue(); + EXPECT_EQ(value, 12345678901234LL); +} + +// Test LiteralToMemorySlice for FLOAT type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceFloat) { + Literal literal(3.14f); + EXPECT_FALSE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::FLOAT); + + float value = literal.GetValue(); + EXPECT_FLOAT_EQ(value, 3.14f); +} + +// Test LiteralToMemorySlice for DOUBLE type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceDouble) { + Literal literal(3.14159265358979); + EXPECT_FALSE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::DOUBLE); + + double value = literal.GetValue(); + EXPECT_DOUBLE_EQ(value, 3.14159265358979); +} + +// Test LiteralToMemorySlice for BOOLEAN type +TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceBoolean) { + Literal literal_true(true); + Literal literal_false(false); + + EXPECT_FALSE(literal_true.IsNull()); + EXPECT_EQ(literal_true.GetType(), FieldType::BOOLEAN); + EXPECT_TRUE(literal_true.GetValue()); + EXPECT_FALSE(literal_false.GetValue()); +} + +// Test LiteralToMemorySlice for null literal +TEST_F(BTreeGlobalIndexerTest, LiteralNull) { + Literal literal(FieldType::STRING); + EXPECT_TRUE(literal.IsNull()); + EXPECT_EQ(literal.GetType(), FieldType::STRING); +} + +// Test BTreeGlobalIndexer creation +TEST_F(BTreeGlobalIndexerTest, CreateIndexer) { + std::map options; + BTreeGlobalIndexer indexer(options); + + // CreateWriter should return NotImplemented + auto writer_result = indexer.CreateWriter( + "test_field", nullptr, nullptr, pool_); + EXPECT_FALSE(writer_result.ok()); + EXPECT_TRUE(writer_result.status().IsNotImplemented()); +} + +// Test RangeQuery boundary conditions conceptually +TEST_F(BTreeGlobalIndexerTest, RangeQueryBoundaries) { + // This test verifies the boundary condition logic conceptually + // Inclusive lower bound: key >= lower_bound + // Exclusive lower bound: key > lower_bound + // Inclusive upper bound: key <= upper_bound + // Exclusive upper bound: key < upper_bound + + // For a range query [lower, upper] (both inclusive): + // - We include keys where key >= lower AND key <= upper + + // For a range query (lower, upper) (both exclusive): + // - We include keys where key > lower AND key < upper + + // The actual range query is tested in integration tests + SUCCEED(); +} + +// Test ToGlobalIndexResult with different result types +TEST_F(BTreeGlobalIndexerTest, ToGlobalIndexResultConcept) { + // This test verifies the concept of converting FileIndexResult to GlobalIndexResult + // - Remain: all rows match -> full bitmap + // - Skip: no rows match -> empty bitmap + // - BitmapIndexResult: specific rows match -> bitmap from result + + // The actual conversion is tested in integration tests + SUCCEED(); +} + +// Test Visit methods conceptually +TEST_F(BTreeGlobalIndexerTest, VisitMethodsConcept) { + // This test verifies the concept of various visit methods: + // - VisitEqual: exact match + // - VisitNotEqual: all rows except exact match + // - VisitLessThan: keys < literal + // - VisitLessOrEqual: keys <= literal + // - VisitGreaterThan: keys > literal + // - VisitGreaterOrEqual: keys >= literal + // - VisitIn: keys in set of literals + // - VisitNotIn: keys not in set of literals + // - VisitBetween: keys in [from, to] + // - VisitNotBetween: keys not in [from, to] + // - VisitIsNull: null rows from null_bitmap + // - VisitIsNotNull: non-null rows + // - VisitStartsWith: keys starting with prefix + // - VisitEndsWith: all non-null rows (fallback) + // - VisitContains: all non-null rows (fallback) + // - VisitLike: all non-null rows (fallback, TODO: optimize for prefix%) + + // The actual visit methods are tested in integration tests + SUCCEED(); +} + +} // namespace paimon::test \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index 775c49615..ad3b4369e 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -16,6 +16,8 @@ #include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/memory/memory_slice_output.h" + namespace paimon { std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_ptr& meta, @@ -35,10 +37,31 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt return std::make_shared(first_key, last_key, has_nulls); } -std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) { - // TODO(zhangchaoming.zcm): Implement serialization - // For now, return an empty Bytes object - return std::make_shared(); +std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) const { + // Calculate total size: first_key_len(4) + first_key + last_key_len(4) + last_key + has_nulls(1) + int32_t first_key_size = first_key_ ? first_key_->size() : 0; + int32_t last_key_size = last_key_ ? last_key_->size() : 0; + int32_t total_size = 4 + first_key_size + 4 + last_key_size + 1; + + auto output = std::make_shared(total_size, pool); + + // Write first_key_len and first_key + output->WriteValue(first_key_size); + if (first_key_) { + output->WriteBytes(first_key_); + } + + // Write last_key_len and last_key + output->WriteValue(last_key_size); + if (last_key_) { + output->WriteBytes(last_key_); + } + + // Write has_nulls + output->WriteValue(static_cast(has_nulls_ ? 1 : 0)); + + auto slice = output->ToSlice(); + return slice->CopyBytes(pool); } } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.h b/src/paimon/common/global_index/btree/btree_index_meta.h index 17e961c14..fa1735056 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.h +++ b/src/paimon/common/global_index/btree/btree_index_meta.h @@ -27,7 +27,7 @@ namespace paimon { class BTreeIndexMeta { public: static std::shared_ptr Deserialize(const std::shared_ptr& meta, paimon::MemoryPool *pool); - static std::shared_ptr Serialize(paimon::MemoryPool *pool); + std::shared_ptr Serialize(paimon::MemoryPool *pool) const; public: BTreeIndexMeta(const std::shared_ptr& first_key, const std::shared_ptr& last_key, @@ -51,10 +51,10 @@ class BTreeIndexMeta { } private: - int32_t Size() { + int32_t Size() const { // 9 bytes => first_key_len(4 byte) + last_key_len(4 byte) + has_null(1 byte) - return (first_key_.get() ? 0 : first_key_->size()) + - (last_key_.get() ? 0 : last_key_->size()) + 9; + return (first_key_ ? first_key_->size() : 0) + + (last_key_ ? last_key_->size() : 0) + 9; } private: diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp new file mode 100644 index 000000000..cc3319aee --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -0,0 +1,215 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/memory/memory_pool.h" + +namespace paimon::test { + +class BTreeIndexMetaTest : public ::testing::Test { +protected: + void SetUp() override { pool_ = std::make_shared(); } + + std::shared_ptr pool_; +}; + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { + // Create a BTreeIndexMeta with normal keys + auto first_key = Bytes::AllocateBytes("first_key_data", pool_.get()); + auto last_key = Bytes::AllocateBytes("last_key_data", pool_.get()); + auto meta = std::make_shared( + std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), + true); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + ASSERT_GT(serialized->size(), 0u); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify first_key + auto deserialized_first = deserialized->FirstKey(); + ASSERT_NE(deserialized_first, nullptr); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first_key_data"); + + // Verify last_key + auto deserialized_last = deserialized->LastKey(); + ASSERT_NE(deserialized_last, nullptr); + EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last_key_data"); + + // Verify has_nulls + EXPECT_TRUE(deserialized->HasNulls()); +} + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeEmptyKeys) { + // Create a BTreeIndexMeta with empty keys (OnlyNulls case) + auto meta = std::make_shared(nullptr, nullptr, true); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify keys are null + EXPECT_EQ(deserialized->FirstKey(), nullptr); + EXPECT_EQ(deserialized->LastKey(), nullptr); + + // Verify has_nulls + EXPECT_TRUE(deserialized->HasNulls()); + + // Verify OnlyNulls + EXPECT_TRUE(deserialized->OnlyNulls()); +} + +TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { + // Case 1: Has nulls with keys + auto meta1 = std::make_shared( + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), + true); + EXPECT_TRUE(meta1->HasNulls()); + EXPECT_FALSE(meta1->OnlyNulls()); + + // Case 2: No nulls with keys + auto meta2 = std::make_shared( + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), + false); + EXPECT_FALSE(meta2->HasNulls()); + EXPECT_FALSE(meta2->OnlyNulls()); + + // Case 3: Only nulls (no keys) + auto meta3 = std::make_shared(nullptr, nullptr, true); + EXPECT_TRUE(meta3->HasNulls()); + EXPECT_TRUE(meta3->OnlyNulls()); + + // Case 4: No nulls and no keys (edge case) + auto meta4 = std::make_shared(nullptr, nullptr, false); + EXPECT_FALSE(meta4->HasNulls()); + EXPECT_TRUE(meta4->OnlyNulls()); +} + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { + // Create a BTreeIndexMeta without nulls + auto first_key = Bytes::AllocateBytes("abc", pool_.get()); + auto last_key = Bytes::AllocateBytes("xyz", pool_.get()); + auto meta = std::make_shared( + std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), + false); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify has_nulls is false + EXPECT_FALSE(deserialized->HasNulls()); +} + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { + // Create a BTreeIndexMeta with only first_key (edge case) + auto first_key = Bytes::AllocateBytes("first", pool_.get()); + auto meta = std::make_shared( + std::shared_ptr(first_key.release()), + nullptr, + false); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify first_key + auto deserialized_first = deserialized->FirstKey(); + ASSERT_NE(deserialized_first, nullptr); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first"); + + // Verify last_key is null + EXPECT_EQ(deserialized->LastKey(), nullptr); +} + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyLastKey) { + // Create a BTreeIndexMeta with only last_key (edge case) + auto last_key = Bytes::AllocateBytes("last", pool_.get()); + auto meta = std::make_shared( + nullptr, + std::shared_ptr(last_key.release()), + false); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify first_key is null + EXPECT_EQ(deserialized->FirstKey(), nullptr); + + // Verify last_key + auto deserialized_last = deserialized->LastKey(); + ASSERT_NE(deserialized_last, nullptr); + EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last"); +} + +TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { + // Create a BTreeIndexMeta with binary keys containing null bytes + std::string binary_first = std::string("key\0with\0nulls", 14); + std::string binary_last = std::string("last\0key", 8); + auto first_key = Bytes::AllocateBytes(binary_first, pool_.get()); + auto last_key = Bytes::AllocateBytes(binary_last, pool_.get()); + auto meta = std::make_shared( + std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), + true); + + // Serialize + auto serialized = meta->Serialize(pool_.get()); + ASSERT_NE(serialized, nullptr); + + // Deserialize + auto deserialized = BTreeIndexMeta::Deserialize(serialized, pool_.get()); + ASSERT_NE(deserialized, nullptr); + + // Verify first_key + auto deserialized_first = deserialized->FirstKey(); + ASSERT_NE(deserialized_first, nullptr); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), binary_first); + + // Verify last_key + auto deserialized_last = deserialized->LastKey(); + ASSERT_NE(deserialized_last, nullptr); + EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); +} + +} // namespace paimon::test \ No newline at end of file From fd24bef2b5cadf5e341f809e2c9fb1de92f15546 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 12:43:20 +0800 Subject: [PATCH 05/28] feat: implement B-tree global index compatibility - Add B-tree compatibility test to ensure data compatibility with Java implementation - Implement B-tree global index writer with proper file format - Add integration tests for B-tree global index - Refactor SST block footer to sort lookup store footer - Update file index reader to support B-tree format - Add comprehensive test data for compatibility verification Co-Authored-By: Claude Opus --- CLAUDE.md | 163 - docs/branch-management.md | 153 - src/paimon/CMakeLists.txt | 3 +- .../empty/empty_file_index_reader.h | 8 +- src/paimon/common/global_index/CMakeLists.txt | 2 + .../btree/btree_compatibility_test.cpp | 1021 ++++ .../global_index/btree/btree_file_footer.cpp | 84 +- .../global_index/btree/btree_file_footer.h | 59 +- .../btree/btree_file_footer_test.cpp | 116 +- .../btree_global_index_integration_test.cpp | 207 +- .../btree/btree_global_index_writer.cpp | 177 +- .../btree/btree_global_index_writer.h | 18 +- .../btree/btree_global_index_writer_test.cpp | 181 +- .../btree/btree_global_indexer.cpp | 546 +- .../global_index/btree/btree_global_indexer.h | 20 +- .../btree/btree_global_indexer_test.cpp | 23 +- .../global_index/btree/btree_index_meta.cpp | 29 +- src/paimon/common/io/cache/cache_key.h | 2 + .../lookup/sort/sort_lookup_store_factory.cpp | 29 +- .../lookup/sort/sort_lookup_store_factory.h | 6 +- .../sort/sort_lookup_store_footer.cpp} | 11 +- .../sort/sort_lookup_store_footer.h} | 16 +- src/paimon/common/sst/block_iterator.cpp | 7 +- src/paimon/common/sst/sst_file_io_test.cpp | 60 +- src/paimon/common/sst/sst_file_reader.cpp | 19 +- src/paimon/common/sst/sst_file_reader.h | 12 +- src/paimon/common/sst/sst_file_writer.cpp | 11 +- src/paimon/common/sst/sst_file_writer.h | 4 +- src/paimon/common/utils/roaring_bitmap64.cpp | 5 + .../common/utils/roaring_navigable_map64.cpp | 4 +- .../btree/btree_compatibility_data/README.md | 27 + .../btree_test_int_100.bin | Bin 0 -> 1177 bytes .../btree_test_int_100.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_100.csv | 101 + .../btree_test_int_1000.bin | Bin 0 -> 11618 bytes .../btree_test_int_1000.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_1000.csv | 1001 ++++ .../btree_test_int_50.bin | Bin 0 -> 640 bytes .../btree_test_int_50.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_50.csv | 51 + .../btree_test_int_500.bin | Bin 0 -> 5789 bytes .../btree_test_int_500.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_500.csv | 501 ++ .../btree_test_int_5000.bin | Bin 0 -> 58548 bytes .../btree_test_int_5000.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_5000.csv | 5001 +++++++++++++++++ .../btree_test_int_all_nulls.bin | Bin 0 -> 93 bytes .../btree_test_int_all_nulls.bin.meta | Bin 0 -> 9 bytes .../btree_test_int_all_nulls.csv | 51 + .../btree_test_int_duplicates.bin | Bin 0 -> 330 bytes .../btree_test_int_duplicates.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_duplicates.csv | 101 + .../btree_test_int_no_nulls.bin | Bin 0 -> 637 bytes .../btree_test_int_no_nulls.bin.meta | Bin 0 -> 17 bytes .../btree_test_int_no_nulls.csv | 51 + .../btree_test_varchar_100.bin | Bin 0 -> 1471 bytes .../btree_test_varchar_100.bin.meta | Bin 0 -> 29 bytes .../btree_test_varchar_100.csv | 101 + .../btree_test_varchar_1000.bin | Bin 0 -> 18317 bytes .../btree_test_varchar_1000.bin.meta | Bin 0 -> 29 bytes .../btree_test_varchar_1000.csv | 1001 ++++ .../btree_test_varchar_50.bin | Bin 0 -> 807 bytes .../btree_test_varchar_50.bin.meta | Bin 0 -> 29 bytes .../btree_test_varchar_50.csv | 51 + .../btree_test_varchar_500.bin | Bin 0 -> 9156 bytes .../btree_test_varchar_500.bin.meta | Bin 0 -> 29 bytes .../btree_test_varchar_500.csv | 501 ++ .../btree_test_varchar_5000.bin | Bin 0 -> 85789 bytes .../btree_test_varchar_5000.bin.meta | Bin 0 -> 29 bytes .../btree_test_varchar_5000.csv | 5001 +++++++++++++++++ 70 files changed, 15451 insertions(+), 1085 deletions(-) delete mode 100644 CLAUDE.md delete mode 100644 docs/branch-management.md create mode 100644 src/paimon/common/global_index/btree/btree_compatibility_test.cpp rename src/paimon/common/{sst/block_footer.cpp => lookup/sort/sort_lookup_store_footer.cpp} (83%) rename src/paimon/common/{sst/block_footer.h => lookup/sort/sort_lookup_store_footer.h} (76%) create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/README.md create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_1000.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_1000.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_1000.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_500.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_500.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_500.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_5000.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_5000.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_5000.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_all_nulls.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_all_nulls.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_all_nulls.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_duplicates.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_duplicates.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_duplicates.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin.meta create mode 100644 test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 100644 index b6a4b78db..000000000 --- a/CLAUDE.md +++ /dev/null @@ -1,163 +0,0 @@ -# CLAUDE.md - -This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. - -## Project Overview - -Paimon C++ is a high-performance C++ implementation of Apache Paimon that provides native access to the Paimon datalake format. It supports write, commit, scan, and read operations for both append-only and primary key tables, using Arrow Columnar format for data interchange. - -## Build Commands - -### Basic Build -```bash -# Using the build script (recommended) -./build_and_package.sh --release --jobs 8 - -# Manual CMake build -mkdir build && cd build -cmake -G Ninja -DCMAKE_BUILD_TYPE=Release .. -ninja -j8 -``` - -### Debug Build -```bash -./build_and_package.sh --debug --jobs 8 -``` - -### Build with Tests -```bash -./build_and_package.sh --debug --jobs 8 -DPAIMON_BUILD_TESTS=ON -``` - -### Key Build Options -- `-DPAIMON_BUILD_TESTS=ON/OFF` - Build tests (default: OFF) -- `-DPAIMON_BUILD_STATIC=ON/OFF` - Build static library (default: ON) -- `-DPAIMON_BUILD_SHARED=ON/OFF` - Build shared library (default: ON) -- `-DPAIMON_ENABLE_AVRO=ON/OFF` - Enable Avro format (default: ON) -- `-DPAIMON_ENABLE_ORC=ON/OFF` - Enable ORC format (default: ON) -- `-DPAIMON_ENABLE_LANCE=ON/OFF` - Enable Lance format (default: OFF) -- `-DPAIMON_ENABLE_JINDO=ON/OFF` - Enable Jindo filesystem (default: OFF) -- `-DPAIMON_ENABLE_LUMINA=ON/OFF` - Enable Lumina vector index (default: ON) -- `-DPAIMON_ENABLE_LUCENE=ON/OFF` - Enable Lucene index (default: ON) - -## Testing - -### Run All Tests -```bash -cd build -ctest -j8 --output-on-failure -``` - -### Run Specific Test -```bash -cd build -./test/inte/paimon_inte_test --gtest_filter="TestName.Pattern" -``` - -### Test Organization -- **Integration tests**: `test/inte/` - Main integration tests for all features -- **Unit tests**: Co-located with source files as `*_test.cpp` -- **Test data**: `test/test_data/` - Sample Paimon tables for testing - -## Code Quality and Linting - -### Install Pre-commit Hooks -```bash -pip install pre-commit -pre-commit install -``` - -### Run Linting on All Files -```bash -pre-commit run -a -``` - -### Linting Tools Used -- **clang-format**: C++ code formatting -- **cmake-format**: CMake file formatting -- **codespell**: Spell checking -- **cpplint**: C++ style checker -- **sphinx-lint**: Documentation linting - -## Architecture Overview - -### Directory Structure -``` -src/paimon/ -├── common/ # Common utilities and interfaces -│ ├── data/ # Data types and Arrow integration -│ ├── executor/ # Thread pool implementations -│ ├── file_index/ # File index implementations -│ ├── format/ # File format interfaces -│ ├── fs/ # Filesystem abstractions -│ ├── global_index/# Global index interfaces -│ ├── io/ # IO utilities -│ ├── memory/ # Memory pool implementations -│ ├── predicate/ # Predicate pushdown -│ └── types/ # Type system -├── core/ # Core Paimon logic -│ ├── append/ # Append-only table logic -│ ├── catalog/ # Catalog implementations -│ ├── deletionvectors/ # Deletion vector handling -│ ├── manifest/ # Manifest file handling -│ ├── mergetree/ # LSM merge tree logic -│ ├── operation/ # Table operations -│ ├── schema/ # Schema management -│ └── table/ # Table implementations -├── format/ # File format implementations -│ ├── avro/ # Avro format support -│ ├── orc/ # ORC format support -│ ├── parquet/ # Parquet format support -│ └── lance/ # Lance format support (optional) -└── fs/ # Filesystem implementations - ├── jindo/ # Jindo filesystem (optional) - └── local/ # Local filesystem -``` - -### Key Design Patterns - -1. **Result Types**: Uses `PAIMON_ASSIGN_OR_RAISE` and `PAIMON_RETURN_NOT_OK` macros for error handling. Always check return statuses. - -2. **Builder Pattern**: Common pattern for constructing contexts - - `WriteContextBuilder` → `WriteContext` - - `ScanContextBuilder` → `ScanContext` - - `ReadContextBuilder` → `ReadContext` - -3. **Factory Pattern**: Creation through static factory methods - - `FileStoreWrite::Create()` - - `TableScan::Create()` - - `TableRead::Create()` - -4. **Two-Phase Operations**: - - **Write**: Prepare data → Generate commit messages → Commit - - **Read**: Scan table → Create plan → Read batches - -5. **Arrow Integration**: All data interchange uses Arrow Columnar format. Batches are represented as `RecordBatch` objects wrapping Arrow arrays. - -6. **Plugin Architecture**: File formats and filesystems are pluggable through interfaces in `common/`. - -### Core Abstractions - -- **FileStoreWrite**: Writes data to Paimon tables -- **TableScan**: Scans table metadata and creates read plans -- **TableRead**: Reads data files according to plan -- **FileStoreCommit**: Commits write operations -- **Catalog**: Manages databases and tables -- **FileFormat**: Abstracts different file formats (ORC, Parquet, Avro, Lance) -- **FileSystem**: Abstracts storage backends (Local, Jindo) - -## Development Workflow - -1. **Before making changes**: Run `pre-commit run -a` to ensure code passes linting -2. **For new features**: Add unit tests alongside implementation (`*_test.cpp`) -3. **For bug fixes**: Add regression tests -4. **Before committing**: Ensure all tests pass with `ctest -j8` -5. **Architecture decisions**: Follow existing patterns for consistency - -## Important Notes - -- The codebase only supports x86_64 architecture -- Java Paimon format compatibility is maintained for commit messages, data splits, and manifests -- C++11 ABI is enabled by default (can be disabled, but some features like Lance and Lumina require it) -- Use the provided macros (`PAIMON_ASSIGN_OR_RAISE`, `PAIMON_RETURN_NOT_OK`) for error handling -- Pre-commit hooks must pass before commits can be made \ No newline at end of file diff --git a/docs/branch-management.md b/docs/branch-management.md deleted file mode 100644 index 8e873f008..000000000 --- a/docs/branch-management.md +++ /dev/null @@ -1,153 +0,0 @@ -# paimon-cpp 分支管理规范 - -## 分支模型 - -本项目采用 **GitHub 社区仓库 + antcode 内部仓库** 双仓库协作模式。 - -``` -GitHub (github) antcode (origin) -───────────── ──────────────── -main ─────────────────────────> main (镜像同步,内容保持一致) - \ - └──> internal (内部分支,承载内部独有功能) -``` - -### 分支职责 - -| 分支 | 仓库 | 用途 | -|------|------|------| -| `main` | github + antcode | 社区主线,两边内容保持一致。antcode 的 main 通过 merge 跟踪 github 的 main | -| `internal` | antcode | 内部开发主线,包含社区代码 + 内部独有功能(如 zdfs 文件系统等) | -| `release-*` | github + antcode | 发布分支,按需同步 | -| `feat/*` | antcode | 内部 feature 开发分支,从 internal 拉出,合入 internal | - -### 远程仓库配置 - -```bash -git remote -v -# github https://github.com/alibaba/paimon-cpp.git (社区仓库) -# origin https://code.alipay.com/antflink/paimon-cpp.git (内部仓库) -``` - ---- - -## 日常操作 - -### 1. 从社区同步代码到 antcode - -```bash -# 拉取社区最新代码 -git fetch github - -# 同步 main 分支(保持与社区一致) -git checkout main -git merge github/main -git push origin main - -# 同步到 internal 分支(合入内部开发线) -git checkout internal -git merge main -# 如有冲突,解决后 git add && git merge --continue -git push origin internal -``` - -> **为什么用 merge 而不是 cherry-pick?** -> - merge 会记录共同祖先,Git 能自动识别已合并的 commit,**不会重复冲突** -> - cherry-pick 每次都产生新 commit hash,导致 Git 无法识别已同步的内容,**每次都可能冲突** - -### 2. 内部 feature 开发 - -```bash -# 从 internal 分支拉出 feature 分支 -git checkout internal -git checkout -b feat/my-feature - -# 开发完成后合入 internal -git checkout internal -git merge --no-ff feat/my-feature -git push origin internal - -# 清理 feature 分支 -git branch -d feat/my-feature -``` - -### 3. 内部 feature 贡献回社区 - -当内部 feature 成熟、通用性足够时,可以贡献回 GitHub 社区: - -```bash -# 从 main 分支拉出贡献分支 -git checkout main -git checkout -b contrib/my-feature - -# 将内部 feature 的 commit cherry-pick 过来 -git cherry-pick - -# 推送到 GitHub 并提 PR -git push github contrib/my-feature -# 在 GitHub 上创建 Pull Request -``` - -### 4. 同步 release 分支 - -```bash -git fetch github -git checkout release-0.1 -git merge github/release-0.1 -git push origin refs/heads/release-0.1:refs/heads/release-0.1 -``` - ---- - -## 内部功能开发规范 - -为了减少 merge 冲突,内部独有功能应遵循以下规范: - -### 目录隔离 - -内部功能代码放在独立目录下,不要修改社区已有文件的核心逻辑: - -``` -src/paimon/fs/zdfs/ # zdfs 文件系统(独立目录) -src/paimon/internal/ # 其他内部功能(建议的目录) -``` - -### CMake 开关控制 - -每个内部功能通过 CMake option 控制,默认关闭: - -```cmake -option(PAIMON_ENABLE_ZDFS "Whether to enable zdfs file system" OFF) - -if(PAIMON_ENABLE_ZDFS) - add_definitions(-DPAIMON_ENABLE_ZDFS) -endif() -``` - -### 构建配置集中管理 - -内部依赖的版本信息统一添加在 `third_party/versions.txt` 文件末尾, -构建逻辑添加在 `cmake_modules/ThirdpartyToolchain.cmake` 中, -用 `if(PAIMON_ENABLE_XXX)` 包裹,避免影响社区构建。 - ---- - -## 当前内部独有功能清单 - -| 功能 | 目录 | CMake 开关 | 说明 | -|------|------|-----------|------| -| ZDFS 文件系统 | `src/paimon/fs/zdfs/` | `PAIMON_ENABLE_ZDFS` | 内部分布式文件系统支持 | - ---- - -## 构建说明 - -```bash -# 社区标准构建(不含内部功能) -cmake -B build . -cmake --build build - -# 内部构建(启用 zdfs) -cmake -B build -DPAIMON_ENABLE_ZDFS=ON -DPAIMON_USE_CXX11_ABI=OFF . -cmake --build build -``` diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index c706977c4..dc5797fd1 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -69,6 +69,7 @@ set(PAIMON_COMMON_SRCS common/io/cache/cache_manager.cpp common/logging/logging.cpp common/lookup/sort/sort_lookup_store_factory.cpp + common/lookup/sort/sort_lookup_store_footer.cpp common/lookup/lookup_store_factory.cpp common/memory/bytes.cpp common/memory/memory_pool.cpp @@ -112,7 +113,6 @@ set(PAIMON_COMMON_SRCS common/reader/complete_row_kind_batch_reader.cpp common/reader/data_evolution_file_reader.cpp common/sst/block_handle.cpp - common/sst/block_footer.cpp common/sst/block_iterator.cpp common/sst/block_trailer.cpp common/sst/block_reader.cpp @@ -404,6 +404,7 @@ if(PAIMON_BUILD_TESTS) common/global_index/btree/btree_global_indexer_test.cpp common/global_index/btree/btree_global_index_writer_test.cpp common/global_index/btree/btree_global_index_integration_test.cpp + common/global_index/btree/btree_compatibility_test.cpp common/global_index/rangebitmap/range_bitmap_global_index_test.cpp common/global_index/wrap/file_index_reader_wrapper_test.cpp common/io/byte_array_input_stream_test.cpp diff --git a/src/paimon/common/file_index/empty/empty_file_index_reader.h b/src/paimon/common/file_index/empty/empty_file_index_reader.h index f859727ef..42dba7540 100644 --- a/src/paimon/common/file_index/empty/empty_file_index_reader.h +++ b/src/paimon/common/file_index/empty/empty_file_index_reader.h @@ -67,12 +67,14 @@ class EmptyFileIndexReader : public FileIndexReader { } Result> VisitNotEqual(const Literal& literal) override { - return FileIndexResult::Skip(); + // Empty file has no data, so all records are not equal to any value + return FileIndexResult::Remain(); } Result> VisitNotIn( const std::vector& literals) override { - return FileIndexResult::Skip(); + // Empty file has no data, so all records are not in any set + return FileIndexResult::Remain(); } Result> VisitBetween(const Literal& from, @@ -81,7 +83,7 @@ class EmptyFileIndexReader : public FileIndexReader { } Result> VisitNotBetween(const Literal& from, - const Literal& to) override { + const Literal& to) override { return FileIndexResult::Skip(); } diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index 88e2fb556..3b61ea9ee 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -31,11 +31,13 @@ add_paimon_lib(paimon_global_index paimon_file_index_shared STATIC_LINK_LIBS arrow + glog fmt dl Threads::Threads SHARED_LINK_LIBS paimon_shared paimon_file_index_shared + paimon_static SHARED_LINK_FLAGS ${PAIMON_VERSION_SCRIPT_FLAGS}) diff --git a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp new file mode 100644 index 000000000..3508777a4 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp @@ -0,0 +1,1021 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * BTree Compatibility Test + * + * This test verifies that the C++ BTree global index reader can correctly read + * and query data files generated by the Java BTreeCompatibilityDataGenerator. + * + * Test data files are located in: + * test/test_data/global_index/btree/btree_compatibility_data/ + * + * Each test case consists of three files: + * - *.bin : The BTree index binary file (SST format with footer) + * - *.bin.meta : The serialized BTreeIndexMeta (first_key, last_key, has_nulls) + * - *.csv : Human-readable CSV with columns: row_id, key, is_null + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "paimon/common/global_index/btree/btree_global_indexer.h" +#include "paimon/common/global_index/btree/btree_index_meta.h" +#include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/fs/file_system.h" +#include "paimon/global_index/io/global_index_file_reader.h" +#include "paimon/memory/memory_pool.h" +#include "paimon/predicate/literal.h" +#include "paimon/testing/utils/testharness.h" + +namespace paimon::test { + +// --------------------------------------------------------------------------- +// Test data directory (relative to project root) +// --------------------------------------------------------------------------- +static const std::string kTestDataDir = + "test/test_data/global_index/btree/btree_compatibility_data"; + +// --------------------------------------------------------------------------- +// CSV record parsed from the Java-generated CSV files +// --------------------------------------------------------------------------- +struct CsvRecord { + int64_t row_id; + std::string key; // "NULL" if is_null + bool is_null; +}; + +// --------------------------------------------------------------------------- +// Helper: parse a CSV file into a vector of CsvRecord +// --------------------------------------------------------------------------- +static std::vector ParseCsvFile(const std::string& csv_path) { + std::vector records; + std::ifstream ifs(csv_path); + if (!ifs.is_open()) { + return records; + } + + std::string line; + // Skip header line: "row_id,key,is_null" + std::getline(ifs, line); + + while (std::getline(ifs, line)) { + if (line.empty()) continue; + std::istringstream ss(line); + std::string row_id_str, key_str, is_null_str; + std::getline(ss, row_id_str, ','); + std::getline(ss, key_str, ','); + std::getline(ss, is_null_str, ','); + + CsvRecord rec; + rec.row_id = std::stoll(row_id_str); + rec.key = key_str; + rec.is_null = (is_null_str == "true"); + records.push_back(rec); + } + return records; +} + +// --------------------------------------------------------------------------- +// Helper: read a binary file into a Bytes object +// --------------------------------------------------------------------------- +static std::shared_ptr ReadBinaryFile(const std::string& path, MemoryPool* pool) { + std::ifstream ifs(path, std::ios::binary | std::ios::ate); + if (!ifs.is_open()) return nullptr; + auto size = ifs.tellg(); + ifs.seekg(0, std::ios::beg); + auto bytes = Bytes::AllocateBytes(static_cast(size), pool); + ifs.read(bytes->data(), size); + return std::shared_ptr(bytes.release()); +} + +// --------------------------------------------------------------------------- +// Helper: get file size +// --------------------------------------------------------------------------- +static int64_t GetFileSize(const std::string& path) { + std::ifstream ifs(path, std::ios::binary | std::ios::ate); + if (!ifs.is_open()) return -1; + return static_cast(ifs.tellg()); +} + +// --------------------------------------------------------------------------- +// Fake GlobalIndexFileReader that reads from local filesystem +// --------------------------------------------------------------------------- +class LocalGlobalIndexFileReader : public GlobalIndexFileReader { + public: + explicit LocalGlobalIndexFileReader(const std::shared_ptr& fs) : fs_(fs) {} + + Result> GetInputStream( + const std::string& file_path) const override { + return fs_->Open(file_path); + } + + private: + std::shared_ptr fs_; +}; + +// --------------------------------------------------------------------------- +// Collect all row IDs from a GlobalIndexResult into a set +// --------------------------------------------------------------------------- +static std::set CollectRowIds(const std::shared_ptr& result) { + std::set ids; + auto iter_result = result->CreateIterator(); + if (!iter_result.ok()) return ids; + auto iter = std::move(iter_result).value(); + while (iter->HasNext()) { + ids.insert(iter->Next()); + } + return ids; +} + +// --------------------------------------------------------------------------- +// Test fixture +// --------------------------------------------------------------------------- +class BTreeCompatibilityTest : public ::testing::Test { + protected: + void SetUp() override { + pool_ = GetDefaultPool(); + // Use UniqueTestDirectory to get a FileSystem that can read local files + test_dir_ = UniqueTestDirectory::Create("local"); + fs_ = test_dir_->GetFileSystem(); + + // Resolve the absolute path to test data + // The test data is at project_root/test/test_data/... + // We need to find the project root. Use the current working directory. + char cwd[4096]; + if (getcwd(cwd, sizeof(cwd)) != nullptr) { + project_root_ = std::string(cwd); + } + data_dir_ = project_root_ + "/" + kTestDataDir; + } + + // Create a BTreeGlobalIndexReader from Java-generated .bin and .bin.meta files + Result> CreateReaderFromFiles( + const std::string& bin_path, const std::string& meta_path, + const std::shared_ptr& arrow_type, int64_t record_count) { + // Read meta bytes + auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); + if (!meta_bytes) { + return Status::IOError("Failed to read meta file: " + meta_path); + } + + // Get file size + int64_t file_size = GetFileSize(bin_path); + if (file_size < 0) { + return Status::IOError("Failed to get file size: " + bin_path); + } + + // Build GlobalIndexIOMeta + // range_end = record_count - 1 (inclusive) + GlobalIndexIOMeta io_meta(bin_path, file_size, record_count - 1, meta_bytes); + std::vector metas = {io_meta}; + + // Create ArrowSchema + auto schema = arrow::schema({arrow::field("testField", arrow_type)}); + auto c_schema = std::make_unique(); + auto export_status = arrow::ExportSchema(*schema, c_schema.get()); + if (!export_status.ok()) { + return Status::Invalid("Failed to export ArrowSchema: " + export_status.ToString()); + } + + // Create reader + auto file_reader = std::make_shared(fs_); + std::map options; + BTreeGlobalIndexer indexer(options); + + auto reader_result = indexer.CreateReader(c_schema.get(), file_reader, metas, pool_); + ArrowSchemaRelease(c_schema.get()); + return reader_result; + } + + // Helper: build expected row IDs for null rows from CSV records + std::set GetNullRowIds(const std::vector& records) { + std::set ids; + for (const auto& rec : records) { + if (rec.is_null) ids.insert(rec.row_id); + } + return ids; + } + + // Helper: build expected row IDs for non-null rows from CSV records + std::set GetNonNullRowIds(const std::vector& records) { + std::set ids; + for (const auto& rec : records) { + if (!rec.is_null) ids.insert(rec.row_id); + } + return ids; + } + + // Helper: build expected row IDs for rows with a specific int key + std::set GetRowIdsForIntKey(const std::vector& records, int32_t key) { + std::set ids; + std::string key_str = std::to_string(key); + for (const auto& rec : records) { + if (!rec.is_null && rec.key == key_str) ids.insert(rec.row_id); + } + return ids; + } + + // Helper: build expected row IDs for rows with a specific string key + std::set GetRowIdsForStringKey(const std::vector& records, + const std::string& key) { + std::set ids; + for (const auto& rec : records) { + if (!rec.is_null && rec.key == key) ids.insert(rec.row_id); + } + return ids; + } + + // Helper: build expected row IDs for int keys in range [lower, upper] + std::set GetRowIdsForIntRange(const std::vector& records, int32_t lower, + int32_t upper, bool lower_inclusive, + bool upper_inclusive) { + std::set ids; + for (const auto& rec : records) { + if (rec.is_null) continue; + int32_t val = std::stoi(rec.key); + bool above_lower = lower_inclusive ? (val >= lower) : (val > lower); + bool below_upper = upper_inclusive ? (val <= upper) : (val < upper); + if (above_lower && below_upper) ids.insert(rec.row_id); + } + return ids; + } + + // Helper: build expected row IDs for string keys in range + std::set GetRowIdsForStringRange(const std::vector& records, + const std::string& lower, + const std::string& upper, bool lower_inclusive, + bool upper_inclusive) { + std::set ids; + for (const auto& rec : records) { + if (rec.is_null) continue; + bool above_lower = lower_inclusive ? (rec.key >= lower) : (rec.key > lower); + bool below_upper = upper_inclusive ? (rec.key <= upper) : (rec.key < upper); + if (above_lower && below_upper) ids.insert(rec.row_id); + } + return ids; + } + + std::shared_ptr pool_; + std::unique_ptr test_dir_; + std::shared_ptr fs_; + std::string project_root_; + std::string data_dir_; +}; + +// =========================================================================== +// Test: Read int type data with various record counts +// =========================================================================== +class BTreeCompatibilityIntTest : public BTreeCompatibilityTest, + public ::testing::WithParamInterface {}; + +TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { + int count = GetParam(); + std::string prefix = "btree_test_int_" + std::to_string(count); + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + // Parse CSV to get expected data + auto records = ParseCsvFile(csv_path); + ASSERT_EQ(static_cast(records.size()), count) + << "CSV record count mismatch for " << prefix; + + // Create reader + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + // ---- Test 1: VisitIsNull ---- + { + auto result = reader->VisitIsNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; + } + + // ---- Test 2: VisitIsNotNull ---- + { + auto result = reader->VisitIsNotNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNonNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; + } + + // ---- Test 3: VisitEqual for a known key ---- + // Find the first non-null key in the CSV + for (const auto& rec : records) { + if (!rec.is_null) { + int32_t key_val = std::stoi(rec.key); + Literal literal(key_val); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntKey(records, key_val); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << key_val << ") mismatch"; + break; + } + } + + // ---- Test 4: VisitEqual for the last non-null key ---- + for (auto it = records.rbegin(); it != records.rend(); ++it) { + if (!it->is_null) { + int32_t key_val = std::stoi(it->key); + Literal literal(key_val); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntKey(records, key_val); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << key_val << ") mismatch (last key)"; + break; + } + } + + // ---- Test 5: VisitEqual for a non-existent key (should return empty) ---- + { + Literal literal(static_cast(-999)); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(-999) failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual(-999) should be empty"; + } + + // ---- Test 6: VisitLessThan for a mid-range key ---- + { + // Find a key roughly in the middle + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); + } + if (!non_null_keys.empty()) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + int32_t mid_key = non_null_keys[non_null_keys.size() / 2]; + Literal literal(mid_key); + auto result = reader->VisitLessThan(literal); + ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntRange(records, non_null_keys.front(), mid_key, + true, false); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; + } + } + + // ---- Test 7: VisitGreaterOrEqual for a mid-range key ---- + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); + } + if (!non_null_keys.empty()) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + int32_t mid_key = non_null_keys[non_null_keys.size() / 2]; + Literal literal(mid_key); + auto result = reader->VisitGreaterOrEqual(literal); + ASSERT_OK(result.status()) + << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntRange(records, mid_key, non_null_keys.back(), + true, true); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; + } + } + + // ---- Test 8: VisitBetween ---- + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(std::stoi(rec.key)); + } + if (non_null_keys.size() >= 4) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + int32_t lower = non_null_keys[non_null_keys.size() / 4]; + int32_t upper = non_null_keys[non_null_keys.size() * 3 / 4]; + Literal lit_lower(lower); + Literal lit_upper(upper); + auto result = reader->VisitBetween(lit_lower, lit_upper); + ASSERT_OK(result.status()) + << prefix << ": VisitBetween(" << lower << ", " << upper << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntRange(records, lower, upper, true, true); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitBetween(" << lower << ", " << upper << ") mismatch"; + } + } + + // ---- Test 9: VisitIn for multiple keys ---- + { + std::set unique_keys; + for (const auto& rec : records) { + if (!rec.is_null) unique_keys.insert(std::stoi(rec.key)); + } + if (unique_keys.size() >= 3) { + auto it = unique_keys.begin(); + int32_t k1 = *it++; + int32_t k2 = *it++; + int32_t k3 = *it++; + std::vector in_literals = { + Literal(k1), Literal(k2), Literal(k3)}; + auto result = reader->VisitIn(in_literals); + ASSERT_OK(result.status()) + << prefix << ": VisitIn({" << k1 << "," << k2 << "," << k3 << "}) failed"; + auto actual_ids = CollectRowIds(result.value()); + + std::set expected_ids; + for (auto id : GetRowIdsForIntKey(records, k1)) expected_ids.insert(id); + for (auto id : GetRowIdsForIntKey(records, k2)) expected_ids.insert(id); + for (auto id : GetRowIdsForIntKey(records, k3)) expected_ids.insert(id); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitIn mismatch"; + } + } + + // ---- Test 10: VisitNotEqual ---- + { + for (const auto& rec : records) { + if (!rec.is_null) { + int32_t key_val = std::stoi(rec.key); + Literal literal(key_val); + auto result = reader->VisitNotEqual(literal); + ASSERT_OK(result.status()) + << prefix << ": VisitNotEqual(" << key_val << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + + // Expected: all non-null rows except those with this key + std::set expected_ids; + for (const auto& r : records) { + if (!r.is_null && std::stoi(r.key) != key_val) { + expected_ids.insert(r.row_id); + } + } + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitNotEqual(" << key_val << ") mismatch"; + break; // Test with just the first non-null key + } + } + } +} + +INSTANTIATE_TEST_SUITE_P(IntDataSizes, BTreeCompatibilityIntTest, + ::testing::Values(50, 100, 500, 1000, 5000)); + +// =========================================================================== +// Test: Read varchar type data with various record counts +// =========================================================================== +class BTreeCompatibilityVarcharTest : public BTreeCompatibilityTest, + public ::testing::WithParamInterface {}; + +TEST_P(BTreeCompatibilityVarcharTest, ReadAndQueryVarcharData) { + int count = GetParam(); + std::string prefix = "btree_test_varchar_" + std::to_string(count); + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + // Parse CSV + auto records = ParseCsvFile(csv_path); + ASSERT_EQ(static_cast(records.size()), count) + << "CSV record count mismatch for " << prefix; + + // Create reader (varchar -> arrow::utf8()) + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::utf8(), count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + // ---- Test 1: VisitIsNull ---- + { + auto result = reader->VisitIsNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; + } + + // ---- Test 2: VisitIsNotNull ---- + { + auto result = reader->VisitIsNotNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNonNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; + } + + // ---- Test 3: VisitEqual for a known key ---- + for (const auto& rec : records) { + if (!rec.is_null) { + Literal literal(FieldType::STRING, rec.key.c_str(), + static_cast(rec.key.size())); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << rec.key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForStringKey(records, rec.key); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << rec.key << ") mismatch"; + break; + } + } + + // ---- Test 4: VisitEqual for the last non-null key ---- + for (auto it = records.rbegin(); it != records.rend(); ++it) { + if (!it->is_null) { + Literal literal(FieldType::STRING, it->key.c_str(), + static_cast(it->key.size())); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << it->key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForStringKey(records, it->key); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << it->key << ") mismatch (last key)"; + break; + } + } + + // ---- Test 5: VisitEqual for a non-existent key ---- + { + std::string non_existent = "zzz_non_existent_key"; + Literal literal(FieldType::STRING, non_existent.c_str(), + static_cast(non_existent.size())); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(non_existent) failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual(non_existent) should be empty"; + } + + // ---- Test 6: VisitLessThan for a mid-range key ---- + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(rec.key); + } + if (!non_null_keys.empty()) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + std::string mid_key = non_null_keys[non_null_keys.size() / 2]; + Literal literal(FieldType::STRING, mid_key.c_str(), + static_cast(mid_key.size())); + auto result = reader->VisitLessThan(literal); + ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForStringRange(records, non_null_keys.front(), mid_key, + true, false); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; + } + } + + // ---- Test 7: VisitGreaterOrEqual for a mid-range key ---- + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(rec.key); + } + if (!non_null_keys.empty()) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + std::string mid_key = non_null_keys[non_null_keys.size() / 2]; + Literal literal(FieldType::STRING, mid_key.c_str(), + static_cast(mid_key.size())); + auto result = reader->VisitGreaterOrEqual(literal); + ASSERT_OK(result.status()) + << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForStringRange(records, mid_key, non_null_keys.back(), + true, true); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; + } + } + + // ---- Test 8: VisitStartsWith ---- + { + std::string prefix_str = "test_000"; + Literal literal(FieldType::STRING, prefix_str.c_str(), + static_cast(prefix_str.size())); + auto result = reader->VisitStartsWith(literal); + ASSERT_OK(result.status()) << prefix << ": VisitStartsWith(" << prefix_str << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + + // Expected: all non-null rows whose key starts with "test_000" + std::set expected_ids; + for (const auto& rec : records) { + if (!rec.is_null && rec.key.substr(0, prefix_str.size()) == prefix_str) { + expected_ids.insert(rec.row_id); + } + } + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitStartsWith(" << prefix_str << ") mismatch"; + } + + // ---- Test 9: VisitBetween ---- + { + std::vector non_null_keys; + for (const auto& rec : records) { + if (!rec.is_null) non_null_keys.push_back(rec.key); + } + if (non_null_keys.size() >= 4) { + std::sort(non_null_keys.begin(), non_null_keys.end()); + std::string lower = non_null_keys[non_null_keys.size() / 4]; + std::string upper = non_null_keys[non_null_keys.size() * 3 / 4]; + Literal lit_lower(FieldType::STRING, lower.c_str(), + static_cast(lower.size())); + Literal lit_upper(FieldType::STRING, upper.c_str(), + static_cast(upper.size())); + auto result = reader->VisitBetween(lit_lower, lit_upper); + ASSERT_OK(result.status()) + << prefix << ": VisitBetween(" << lower << ", " << upper << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForStringRange(records, lower, upper, true, true); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitBetween(" << lower << ", " << upper << ") mismatch"; + } + } +} + +INSTANTIATE_TEST_SUITE_P(VarcharDataSizes, BTreeCompatibilityVarcharTest, + ::testing::Values(50, 100, 500, 1000, 5000)); + +// =========================================================================== +// Test: Edge case - all nulls +// =========================================================================== +TEST_F(BTreeCompatibilityTest, AllNulls) { + std::string prefix = "btree_test_int_all_nulls"; + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; + int count = static_cast(records.size()); + + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + // All rows should be null + { + auto result = reader->VisitIsNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_EQ(static_cast(actual_ids.size()), count) + << prefix << ": VisitIsNull should return all rows"; + // Verify each row ID + for (int i = 0; i < count; ++i) { + EXPECT_TRUE(actual_ids.count(i)) << prefix << ": Missing row_id " << i; + } + } + + // No rows should be non-null + { + auto result = reader->VisitIsNotNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitIsNotNull should be empty"; + } + + // VisitEqual should return empty for any key + { + Literal literal(static_cast(42)); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitEqual(42) failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitEqual should be empty for all-nulls"; + } +} + +// =========================================================================== +// Test: Edge case - no nulls +// =========================================================================== +TEST_F(BTreeCompatibilityTest, NoNulls) { + std::string prefix = "btree_test_int_no_nulls"; + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; + int count = static_cast(records.size()); + + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + // No rows should be null + { + auto result = reader->VisitIsNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitIsNull should be empty"; + } + + // All rows should be non-null + { + auto result = reader->VisitIsNotNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_EQ(static_cast(actual_ids.size()), count) + << prefix << ": VisitIsNotNull should return all rows"; + } + + // VisitEqual for each unique key should return correct row IDs + { + std::set tested_keys; + for (const auto& rec : records) { + if (!rec.is_null && tested_keys.find(rec.key) == tested_keys.end()) { + tested_keys.insert(rec.key); + int32_t key_val = std::stoi(rec.key); + Literal literal(key_val); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) + << prefix << ": VisitEqual(" << key_val << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntKey(records, key_val); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << key_val << ") mismatch"; + } + } + } + + // VisitLessOrEqual for the max key should return all rows + { + int32_t max_key = 0; + for (const auto& rec : records) { + if (!rec.is_null) max_key = std::max(max_key, std::stoi(rec.key)); + } + Literal literal(max_key); + auto result = reader->VisitLessOrEqual(literal); + ASSERT_OK(result.status()) << prefix << ": VisitLessOrEqual(" << max_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_EQ(static_cast(actual_ids.size()), count) + << prefix << ": VisitLessOrEqual(max) should return all rows"; + } + + // VisitGreaterThan for the max key should return empty + { + int32_t max_key = 0; + for (const auto& rec : records) { + if (!rec.is_null) max_key = std::max(max_key, std::stoi(rec.key)); + } + Literal literal(max_key); + auto result = reader->VisitGreaterThan(literal); + ASSERT_OK(result.status()) << prefix << ": VisitGreaterThan(" << max_key << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + EXPECT_TRUE(actual_ids.empty()) + << prefix << ": VisitGreaterThan(max) should be empty"; + } +} + +// =========================================================================== +// Test: Edge case - duplicate keys +// =========================================================================== +TEST_F(BTreeCompatibilityTest, DuplicateKeys) { + std::string prefix = "btree_test_int_duplicates"; + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; + int count = static_cast(records.size()); + + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow::int32(), count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + // ---- Test: VisitIsNull ---- + { + auto result = reader->VisitIsNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNull mismatch"; + } + + // ---- Test: VisitIsNotNull ---- + { + auto result = reader->VisitIsNotNull(); + ASSERT_OK(result.status()) << prefix << ": VisitIsNotNull failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetNonNullRowIds(records); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIsNotNull mismatch"; + } + + // ---- Test: VisitEqual for each unique key ---- + // Duplicate keys: key = i/10, so keys are 0,0,...,0,1,1,...,1,...,9,9,...,9 + { + std::set tested_keys; + for (const auto& rec : records) { + if (!rec.is_null && tested_keys.find(rec.key) == tested_keys.end()) { + tested_keys.insert(rec.key); + int32_t key_val = std::stoi(rec.key); + Literal literal(key_val); + auto result = reader->VisitEqual(literal); + ASSERT_OK(result.status()) + << prefix << ": VisitEqual(" << key_val << ") failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntKey(records, key_val); + EXPECT_EQ(actual_ids, expected_ids) + << prefix << ": VisitEqual(" << key_val << ") mismatch"; + } + } + } + + // ---- Test: VisitIn for keys 0, 5, 9 ---- + { + std::vector in_literals = { + Literal(static_cast(0)), + Literal(static_cast(5)), + Literal(static_cast(9))}; + auto result = reader->VisitIn(in_literals); + ASSERT_OK(result.status()) << prefix << ": VisitIn({0,5,9}) failed"; + auto actual_ids = CollectRowIds(result.value()); + + std::set expected_ids; + for (auto id : GetRowIdsForIntKey(records, 0)) expected_ids.insert(id); + for (auto id : GetRowIdsForIntKey(records, 5)) expected_ids.insert(id); + for (auto id : GetRowIdsForIntKey(records, 9)) expected_ids.insert(id); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIn({0,5,9}) mismatch"; + } + + // ---- Test: VisitBetween for keys [2, 7] ---- + { + Literal lit_lower(static_cast(2)); + Literal lit_upper(static_cast(7)); + auto result = reader->VisitBetween(lit_lower, lit_upper); + ASSERT_OK(result.status()) << prefix << ": VisitBetween(2, 7) failed"; + auto actual_ids = CollectRowIds(result.value()); + auto expected_ids = GetRowIdsForIntRange(records, 2, 7, true, true); + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitBetween(2, 7) mismatch"; + } + + // ---- Test: VisitNotBetween for keys [2, 7] ---- + { + Literal lit_lower(static_cast(2)); + Literal lit_upper(static_cast(7)); + auto result = reader->VisitNotBetween(lit_lower, lit_upper); + ASSERT_OK(result.status()) << prefix << ": VisitNotBetween(2, 7) failed"; + auto actual_ids = CollectRowIds(result.value()); + + // Expected: non-null rows with key < 2 or key > 7 + std::set expected_ids; + for (const auto& rec : records) { + if (!rec.is_null) { + int32_t val = std::stoi(rec.key); + if (val < 2 || val > 7) expected_ids.insert(rec.row_id); + } + } + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitNotBetween(2, 7) mismatch"; + } +} + +// =========================================================================== +// Test: BTreeIndexMeta deserialization from Java-generated meta files +// =========================================================================== +TEST_F(BTreeCompatibilityTest, MetaDeserialization) { + // Test int_50 meta + { + std::string meta_path = data_dir_ + "/btree_test_int_50.bin.meta"; + auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); + ASSERT_NE(meta_bytes, nullptr) << "Failed to read meta file"; + + auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); + ASSERT_NE(meta, nullptr) << "Failed to deserialize meta"; + + // The int_50 data has nulls (row_id 1, 18, 48, 49) + EXPECT_TRUE(meta->HasNulls()) << "int_50 should have nulls"; + + // First key should be "3" (first non-null key in sorted order) + ASSERT_NE(meta->FirstKey(), nullptr) << "int_50 first key should not be null"; + std::string first_key(meta->FirstKey()->data(), meta->FirstKey()->size()); + // Java writes int keys as string representation + // The first non-null key in sorted order is 3 + EXPECT_FALSE(first_key.empty()) << "int_50 first key should not be empty"; + + // Last key should be "143" (last non-null key in sorted order) + ASSERT_NE(meta->LastKey(), nullptr) << "int_50 last key should not be null"; + std::string last_key(meta->LastKey()->data(), meta->LastKey()->size()); + EXPECT_FALSE(last_key.empty()) << "int_50 last key should not be empty"; + } + + // Test all_nulls meta + { + std::string meta_path = data_dir_ + "/btree_test_int_all_nulls.bin.meta"; + auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); + ASSERT_NE(meta_bytes, nullptr) << "Failed to read all_nulls meta file"; + + auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); + ASSERT_NE(meta, nullptr) << "Failed to deserialize all_nulls meta"; + + EXPECT_TRUE(meta->HasNulls()) << "all_nulls should have nulls"; + // For all-nulls data, first_key and last_key should be null + EXPECT_TRUE(meta->OnlyNulls()) << "all_nulls should report OnlyNulls()"; + } + + // Test no_nulls meta + { + std::string meta_path = data_dir_ + "/btree_test_int_no_nulls.bin.meta"; + auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); + ASSERT_NE(meta_bytes, nullptr) << "Failed to read no_nulls meta file"; + + auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); + ASSERT_NE(meta, nullptr) << "Failed to deserialize no_nulls meta"; + + EXPECT_FALSE(meta->HasNulls()) << "no_nulls should not have nulls"; + EXPECT_FALSE(meta->OnlyNulls()) << "no_nulls should not report OnlyNulls()"; + ASSERT_NE(meta->FirstKey(), nullptr) << "no_nulls first key should not be null"; + ASSERT_NE(meta->LastKey(), nullptr) << "no_nulls last key should not be null"; + } + + // Test varchar_50 meta + { + std::string meta_path = data_dir_ + "/btree_test_varchar_50.bin.meta"; + auto meta_bytes = ReadBinaryFile(meta_path, pool_.get()); + ASSERT_NE(meta_bytes, nullptr) << "Failed to read varchar_50 meta file"; + + auto meta = BTreeIndexMeta::Deserialize(meta_bytes, pool_.get()); + ASSERT_NE(meta, nullptr) << "Failed to deserialize varchar_50 meta"; + + // varchar_50 has 1 null (row_id=24 based on 5% null ratio with seed 42) + ASSERT_NE(meta->FirstKey(), nullptr) << "varchar_50 first key should not be null"; + ASSERT_NE(meta->LastKey(), nullptr) << "varchar_50 last key should not be null"; + + std::string first_key(meta->FirstKey()->data(), meta->FirstKey()->size()); + std::string last_key(meta->LastKey()->data(), meta->LastKey()->size()); + // Keys are "test_00000" to "test_00049" (excluding nulls) + EXPECT_EQ(first_key, "test_00000") << "varchar_50 first key mismatch"; + EXPECT_EQ(last_key, "test_00049") << "varchar_50 last key mismatch"; + } +} + +// =========================================================================== +// Test: Verify total row count consistency +// =========================================================================== +TEST_F(BTreeCompatibilityTest, RowCountConsistency) { + // For each test data set, verify that null_count + non_null_count == total_count + std::vector>> test_cases = { + {"btree_test_int_50", arrow::int32()}, + {"btree_test_int_100", arrow::int32()}, + {"btree_test_int_500", arrow::int32()}, + {"btree_test_varchar_50", arrow::utf8()}, + {"btree_test_varchar_100", arrow::utf8()}, + {"btree_test_int_all_nulls", arrow::int32()}, + {"btree_test_int_no_nulls", arrow::int32()}, + {"btree_test_int_duplicates", arrow::int32()}, + }; + + for (const auto& [prefix, arrow_type] : test_cases) { + std::string bin_path = data_dir_ + "/" + prefix + ".bin"; + std::string meta_path = bin_path + ".meta"; + std::string csv_path = data_dir_ + "/" + prefix + ".csv"; + + auto records = ParseCsvFile(csv_path); + ASSERT_FALSE(records.empty()) << "Failed to parse CSV for " << prefix; + int count = static_cast(records.size()); + + auto reader_result = CreateReaderFromFiles(bin_path, meta_path, arrow_type, count); + ASSERT_OK(reader_result.status()) << "Failed to create reader for " << prefix; + auto reader = reader_result.value(); + + auto null_result = reader->VisitIsNull(); + ASSERT_OK(null_result.status()) << prefix << ": VisitIsNull failed"; + auto null_ids = CollectRowIds(null_result.value()); + + auto non_null_result = reader->VisitIsNotNull(); + ASSERT_OK(non_null_result.status()) << prefix << ": VisitIsNotNull failed"; + auto non_null_ids = CollectRowIds(non_null_result.value()); + + // Null and non-null should be disjoint + for (auto id : null_ids) { + EXPECT_EQ(non_null_ids.count(id), 0u) + << prefix << ": row_id " << id << " is in both null and non-null sets"; + } + + // Total should equal record count + EXPECT_EQ(static_cast(null_ids.size() + non_null_ids.size()), count) + << prefix << ": null_count + non_null_count != total_count"; + } +} + +} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp index 9b51effc9..78ab4a6b7 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -18,76 +18,82 @@ namespace paimon { -Result> BTreeFileFooter::Read( - const std::shared_ptr& input) { +Result> BTreeFileFooter::Read(MemorySliceInput& input) { + // read version and verify magic number + input.SetPosition(ENCODED_LENGTH - 8); + + int32_t version = input.ReadInt(); + int32_t magic_number = input.ReadInt(); + if (magic_number != MAGIC_NUMBER) { + return Status::IOError("File is not a btree index file (bad magic number)"); + } + + input.SetPosition(0); + // read bloom filter and index handles + auto offset = input.ReadLong(); + auto size = input.ReadInt(); + auto expected_entries = input.ReadLong(); std::shared_ptr bloom_filter_handle = - std::make_shared(input->ReadLong(), input->ReadInt(), input->ReadLong()); + std::make_shared(offset, size, expected_entries); if (bloom_filter_handle->Offset() == 0 && bloom_filter_handle->Size() == 0 && bloom_filter_handle->ExpectedEntries() == 0) { bloom_filter_handle = nullptr; } - std::shared_ptr index_block_handle = - std::make_shared(input->ReadLong(), input->ReadInt()); + offset = input.ReadLong(); + size = input.ReadInt(); + std::shared_ptr index_block_handle = std::make_shared(offset, size); - std::shared_ptr null_bitmap_handle = - std::make_shared(input->ReadLong(), input->ReadInt()); + offset = input.ReadLong(); + size = input.ReadInt(); + std::shared_ptr null_bitmap_handle = std::make_shared(offset, size); if (null_bitmap_handle->Offset() == 0 && null_bitmap_handle->Size() == 0) { null_bitmap_handle = nullptr; } - // skip padding - input->SetPosition(ENCODED_LENGTH - 4); - - // verify magic number - int32_t magic_number = input->ReadInt(); - if (magic_number != MAGIC_NUMBER) { - return Status::IOError("File is not a table (bad magic number)"); - } - - return std::make_shared(bloom_filter_handle, index_block_handle, + return std::make_shared(version, bloom_filter_handle, index_block_handle, null_bitmap_handle); } -std::shared_ptr BTreeFileFooter::Write(const std::shared_ptr& footer, - MemoryPool* pool) { - auto output = std::make_shared(ENCODED_LENGTH, pool); +MemorySlice BTreeFileFooter::Write(const std::shared_ptr& footer, + MemoryPool* pool) { + MemorySliceOutput output(ENCODED_LENGTH, pool); return BTreeFileFooter::Write(footer, output); } -std::shared_ptr BTreeFileFooter::Write( - const std::shared_ptr& footer, - const std::shared_ptr& ouput) { +MemorySlice BTreeFileFooter::Write(const std::shared_ptr& footer, + MemorySliceOutput& ouput) { // write bloom filter and index handles auto bloom_filter_handle = footer->GetBloomFilterHandle(); if (!bloom_filter_handle) { - ouput->WriteValue(static_cast(0)); - ouput->WriteValue(static_cast(0)); - ouput->WriteValue(static_cast(0)); + ouput.WriteValue(static_cast(0)); + ouput.WriteValue(static_cast(0)); + ouput.WriteValue(static_cast(0)); } else { - ouput->WriteValue(bloom_filter_handle->Offset()); - ouput->WriteValue(bloom_filter_handle->Size()); - ouput->WriteValue(bloom_filter_handle->ExpectedEntries()); + ouput.WriteValue(bloom_filter_handle->Offset()); + ouput.WriteValue(bloom_filter_handle->Size()); + ouput.WriteValue(bloom_filter_handle->ExpectedEntries()); } auto index_block_handle = footer->GetIndexBlockHandle(); - ouput->WriteValue(index_block_handle->Offset()); - ouput->WriteValue(index_block_handle->Size()); + ouput.WriteValue(index_block_handle->Offset()); + ouput.WriteValue(index_block_handle->Size()); auto null_bitmap_handle = footer->GetNullBitmapHandle(); if (!null_bitmap_handle) { - ouput->WriteValue(static_cast(0)); - ouput->WriteValue(static_cast(0)); + ouput.WriteValue(static_cast(0)); + ouput.WriteValue(static_cast(0)); } else { - ouput->WriteValue(null_bitmap_handle->Offset()); - ouput->WriteValue(null_bitmap_handle->Size()); + ouput.WriteValue(null_bitmap_handle->Offset()); + ouput.WriteValue(null_bitmap_handle->Size()); } - // write magic number - ouput->WriteValue(MAGIC_NUMBER); + // write version and magic number + ouput.WriteValue(footer->GetVersion()); + ouput.WriteValue(MAGIC_NUMBER); - return ouput->ToSlice(); + return ouput.ToSlice(); } -} // namespace paimon +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_file_footer.h b/src/paimon/common/global_index/btree/btree_file_footer.h index 19a9f9b73..0f650d15c 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.h +++ b/src/paimon/common/global_index/btree/btree_file_footer.h @@ -17,45 +17,64 @@ #pragma once #include -#include "paimon/common/sst/bloom_filter_handle.h" -#include "paimon/common/sst/block_handle.h" + #include "paimon/common/memory/memory_slice_input.h" #include "paimon/common/memory/memory_slice_output.h" +#include "paimon/common/sst/block_handle.h" +#include "paimon/common/sst/bloom_filter_handle.h" namespace paimon { /// The Footer for BTree file. class BTreeFileFooter { public: - static Result> Read(const std::shared_ptr& input); - static std::shared_ptr Write(const std::shared_ptr& footer, MemoryPool* pool); - static std::shared_ptr Write(const std::shared_ptr& footer, const std::shared_ptr& ouput); + static Result> Read(MemorySliceInput& input); + static MemorySlice Write(const std::shared_ptr& footer, MemoryPool* pool); + static MemorySlice Write(const std::shared_ptr& footer, + MemorySliceOutput& ouput); public: - BTreeFileFooter(const std::shared_ptr& bloom_filter_handle, const std::shared_ptr& index_block_handle, - const std::shared_ptr& null_bitmap_handle) - : bloom_filter_handle_(bloom_filter_handle), index_block_handle_(index_block_handle), null_bitmap_handle_(null_bitmap_handle) {} + BTreeFileFooter(const std::shared_ptr& bloom_filter_handle, + const std::shared_ptr& index_block_handle, + const std::shared_ptr& null_bitmap_handle) + : version_(CURRENT_VERSION), + bloom_filter_handle_(bloom_filter_handle), + index_block_handle_(index_block_handle), + null_bitmap_handle_(null_bitmap_handle) {} + + BTreeFileFooter(int32_t version, const std::shared_ptr& bloom_filter_handle, + const std::shared_ptr& index_block_handle, + const std::shared_ptr& null_bitmap_handle) + : version_(version), + bloom_filter_handle_(bloom_filter_handle), + index_block_handle_(index_block_handle), + null_bitmap_handle_(null_bitmap_handle) {} - std::shared_ptr GetBloomFilterHandle() const { - return bloom_filter_handle_; - } + int32_t GetVersion() const { + return version_; + } - std::shared_ptr GetIndexBlockHandle() const { - return index_block_handle_; - } + std::shared_ptr GetBloomFilterHandle() const { + return bloom_filter_handle_; + } - std::shared_ptr GetNullBitmapHandle() const { - return null_bitmap_handle_; - } + std::shared_ptr GetIndexBlockHandle() const { + return index_block_handle_; + } + std::shared_ptr GetNullBitmapHandle() const { + return null_bitmap_handle_; + } public: - static constexpr int32_t MAGIC_NUMBER = 198732882; - static constexpr int32_t ENCODED_LENGTH = 48; + static constexpr int32_t MAGIC_NUMBER = 0x50425449; + static constexpr int32_t CURRENT_VERSION = 1; + static constexpr int32_t ENCODED_LENGTH = 52; private: + int32_t version_; std::shared_ptr bloom_filter_handle_; std::shared_ptr index_block_handle_; std::shared_ptr null_bitmap_handle_; }; -} // namespace paimon +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp index 055230495..e5df2c504 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp @@ -14,56 +14,53 @@ * limitations under the License. */ +#include "paimon/common/global_index/btree/btree_file_footer.h" + #include -#include "paimon/common/global_index/btree/btree_file_footer.h" -#include "paimon/common/sst/bloom_filter_handle.h" #include "paimon/common/sst/block_handle.h" +#include "paimon/common/sst/bloom_filter_handle.h" #include "paimon/memory/memory_pool.h" +#include "paimon/testing/utils/testharness.h" namespace paimon::test { class BTreeFileFooterTest : public ::testing::Test { -protected: - void SetUp() override { pool_ = GetDefaultPool(); } + protected: + void SetUp() override { + pool_ = GetDefaultPool(); + } std::shared_ptr pool_; }; TEST_F(BTreeFileFooterTest, ReadWriteRoundTrip) { - // Create a footer with all handles auto bloom_filter_handle = std::make_shared(100, 50, 1000); auto index_block_handle = std::make_shared(200, 80); auto null_bitmap_handle = std::make_shared(300, 40); auto footer = std::make_shared(bloom_filter_handle, index_block_handle, - null_bitmap_handle); + null_bitmap_handle); - // Write auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); - // Read - auto input = serialized->ToInput(); + auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(input); ASSERT_OK(deserialized.status()); auto deserialized_footer = deserialized.value(); - // Verify bloom filter handle auto bf_handle = deserialized_footer->GetBloomFilterHandle(); ASSERT_NE(bf_handle, nullptr); EXPECT_EQ(bf_handle->Offset(), 100); EXPECT_EQ(bf_handle->Size(), 50); EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); - // Verify index block handle auto ib_handle = deserialized_footer->GetIndexBlockHandle(); ASSERT_NE(ib_handle, nullptr); EXPECT_EQ(ib_handle->Offset(), 200); EXPECT_EQ(ib_handle->Size(), 80); - // Verify null bitmap handle auto nb_handle = deserialized_footer->GetNullBitmapHandle(); ASSERT_NE(nb_handle, nullptr); EXPECT_EQ(nb_handle->Offset(), 300); @@ -71,33 +68,27 @@ TEST_F(BTreeFileFooterTest, ReadWriteRoundTrip) { } TEST_F(BTreeFileFooterTest, ReadWriteWithNullBloomFilter) { - // Create a footer without bloom filter auto index_block_handle = std::make_shared(200, 80); auto null_bitmap_handle = std::make_shared(300, 40); - auto footer = std::make_shared(nullptr, index_block_handle, null_bitmap_handle); + auto footer = + std::make_shared(nullptr, index_block_handle, null_bitmap_handle); - // Write auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); - // Read - auto input = serialized->ToInput(); + auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(input); ASSERT_OK(deserialized.status()); auto deserialized_footer = deserialized.value(); - // Verify bloom filter handle is null EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); - // Verify index block handle auto ib_handle = deserialized_footer->GetIndexBlockHandle(); ASSERT_NE(ib_handle, nullptr); EXPECT_EQ(ib_handle->Offset(), 200); EXPECT_EQ(ib_handle->Size(), 80); - // Verify null bitmap handle auto nb_handle = deserialized_footer->GetNullBitmapHandle(); ASSERT_NE(nb_handle, nullptr); EXPECT_EQ(nb_handle->Offset(), 300); @@ -105,133 +96,102 @@ TEST_F(BTreeFileFooterTest, ReadWriteWithNullBloomFilter) { } TEST_F(BTreeFileFooterTest, ReadWriteWithNullNullBitmap) { - // Create a footer without null bitmap auto bloom_filter_handle = std::make_shared(100, 50, 1000); auto index_block_handle = std::make_shared(200, 80); - auto footer = std::make_shared(bloom_filter_handle, index_block_handle, nullptr); + auto footer = + std::make_shared(bloom_filter_handle, index_block_handle, nullptr); - // Write auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); - // Read - auto input = serialized->ToInput(); + auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(input); ASSERT_OK(deserialized.status()); auto deserialized_footer = deserialized.value(); - // Verify bloom filter handle auto bf_handle = deserialized_footer->GetBloomFilterHandle(); ASSERT_NE(bf_handle, nullptr); EXPECT_EQ(bf_handle->Offset(), 100); EXPECT_EQ(bf_handle->Size(), 50); EXPECT_EQ(bf_handle->ExpectedEntries(), 1000); - // Verify index block handle auto ib_handle = deserialized_footer->GetIndexBlockHandle(); ASSERT_NE(ib_handle, nullptr); EXPECT_EQ(ib_handle->Offset(), 200); EXPECT_EQ(ib_handle->Size(), 80); - // Verify null bitmap handle is null EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); } TEST_F(BTreeFileFooterTest, ReadWriteWithAllNullHandles) { - // Create a footer with only index block handle (required) auto index_block_handle = std::make_shared(200, 80); auto footer = std::make_shared(nullptr, index_block_handle, nullptr); - // Write auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - EXPECT_EQ(serialized->Length(), BTreeFileFooter::ENCODED_LENGTH); + EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); - // Read - auto input = serialized->ToInput(); + auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(input); ASSERT_OK(deserialized.status()); auto deserialized_footer = deserialized.value(); - // Verify bloom filter handle is null EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); - // Verify index block handle auto ib_handle = deserialized_footer->GetIndexBlockHandle(); ASSERT_NE(ib_handle, nullptr); EXPECT_EQ(ib_handle->Offset(), 200); EXPECT_EQ(ib_handle->Size(), 80); - // Verify null bitmap handle is null EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); } TEST_F(BTreeFileFooterTest, MagicNumberVerification) { - // Create a valid footer auto index_block_handle = std::make_shared(200, 80); auto footer = std::make_shared(nullptr, index_block_handle, nullptr); - // Write auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - // Read - auto input = serialized->ToInput(); + auto input = serialized.ToInput(); auto deserialized = BTreeFileFooter::Read(input); ASSERT_OK(deserialized.status()); } TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { - // Create a buffer with invalid magic number - auto output = std::make_shared(BTreeFileFooter::ENCODED_LENGTH, pool_.get()); + MemorySliceOutput output(BTreeFileFooter::ENCODED_LENGTH, pool_.get()); - // Write bloom filter handle (all zeros for null) - output->WriteValue(static_cast(0)); - output->WriteValue(static_cast(0)); - output->WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); - // Write index block handle - output->WriteValue(static_cast(200)); - output->WriteValue(static_cast(80)); + output.WriteValue(static_cast(200)); + output.WriteValue(static_cast(80)); - // Write null bitmap handle (all zeros for null) - output->WriteValue(static_cast(0)); - output->WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); - // Write invalid magic number - output->WriteValue(static_cast(12345)); // Invalid magic number + output.WriteValue(static_cast(12345)); // Invalid magic number - auto serialized = output->ToSlice(); - auto input = serialized->ToInput(); + auto serialized = output.ToSlice(); + auto input = serialized.ToInput(); - // Read should fail auto deserialized = BTreeFileFooter::Read(input); EXPECT_FALSE(deserialized.ok()); EXPECT_TRUE(deserialized.status().IsIOError()); } TEST_F(BTreeFileFooterTest, EncodedLength) { - // Verify ENCODED_LENGTH = 48 - // bloom_filter: 8(offset) + 4(size) + 8(expected_entries) = 20 bytes - // index_block: 8(offset) + 4(size) = 12 bytes - // null_bitmap: 8(offset) + 4(size) = 12 bytes - // magic_number: 4 bytes - // Total = 20 + 12 + 12 + 4 = 48 bytes - EXPECT_EQ(BTreeFileFooter::ENCODED_LENGTH, 48); - - // Create a footer and verify the serialized length + EXPECT_EQ(BTreeFileFooter::ENCODED_LENGTH, 52); + auto bloom_filter_handle = std::make_shared(100, 50, 1000); auto index_block_handle = std::make_shared(200, 80); auto null_bitmap_handle = std::make_shared(300, 40); auto footer = std::make_shared(bloom_filter_handle, index_block_handle, - null_bitmap_handle); + null_bitmap_handle); auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - ASSERT_NE(serialized, nullptr); - EXPECT_EQ(serialized->Length(), 48); + EXPECT_EQ(serialized.Length(), 52); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index c2b492a65..182ab4d25 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -14,13 +14,13 @@ * limitations under the License. */ -#include - #include -#include +#include +#include -#include "paimon/common/global_index/btree/btree_global_indexer.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" +#include "paimon/common/global_index/btree/btree_global_indexer.h" +#include "paimon/common/utils/arrow/status_utils.h" #include "paimon/fs/file_system.h" #include "paimon/global_index/io/global_index_file_reader.h" #include "paimon/global_index/io/global_index_file_writer.h" @@ -31,7 +31,7 @@ namespace paimon::test { class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { -public: + public: FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) : fs_(fs), base_path_(base_path), file_counter_(0) {} @@ -39,57 +39,58 @@ class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { return prefix + "_" + std::to_string(file_counter_++); } - Result> NewOutputStream(const std::string& file_name) const override { - return fs_->CreateOutputStream(base_path_ + "/" + file_name); + Result> NewOutputStream( + const std::string& file_name) const override { + return fs_->Create(base_path_ + "/" + file_name, true); } Result GetFileSize(const std::string& file_name) const override { PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(base_path_ + "/" + file_name)); - return file_status->Length(); + return static_cast(file_status->GetLen()); } std::string ToPath(const std::string& file_name) const override { return base_path_ + "/" + file_name; } -private: + private: std::shared_ptr fs_; std::string base_path_; mutable int64_t file_counter_; }; class FakeGlobalIndexFileReader : public GlobalIndexFileReader { -public: + public: FakeGlobalIndexFileReader(const std::shared_ptr& fs, const std::string& base_path) : fs_(fs), base_path_(base_path) {} - Result> GetInputStream(const std::string& file_path) const override { - return fs_->OpenInputStream(file_path); + Result> GetInputStream( + const std::string& file_path) const override { + return fs_->Open(file_path); } -private: + private: std::shared_ptr fs_; std::string base_path_; }; class BTreeGlobalIndexIntegrationTest : public ::testing::Test { -protected: + protected: void SetUp() override { pool_ = GetDefaultPool(); test_dir_ = UniqueTestDirectory::Create("local"); - ASSERT_OK(test_dir_.status()); fs_ = test_dir_->GetFileSystem(); base_path_ = test_dir_->Str(); } - void TearDown() override { test_dir_->Delete(); } + void TearDown() override {} // Helper to create ArrowSchema from arrow type - Result CreateArrowSchema(const std::shared_ptr& type, - const std::string& field_name) { + std::unique_ptr CreateArrowSchema(const std::shared_ptr& type, + const std::string& field_name) { auto schema = arrow::schema({arrow::field(field_name, type)}); - ArrowSchema* c_schema; - PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportSchema(*schema, &c_schema)); + auto c_schema = std::make_unique(); + EXPECT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); return c_schema; } @@ -99,7 +100,7 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { if (!iterator_result.ok()) { return false; } - auto iterator = iterator_result.value(); + auto iterator = std::move(iterator_result).value(); while (iterator->HasNext()) { if (iterator->Next() == row_id) { return true; @@ -109,7 +110,7 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { } std::shared_ptr pool_; - Result> test_dir_; + std::unique_ptr test_dir_; std::shared_ptr fs_; std::string base_path_; }; @@ -118,20 +119,24 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { // Create file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with int values // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]"); - ASSERT_OK(json_array.status()); + auto array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]") + .ValueOrDie(); - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - ASSERT_OK(writer->AddBatch(c_array)); + ASSERT_OK(writer->AddBatch(&c_array)); // Finish writing auto result = writer->Finish(); @@ -140,20 +145,18 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { ASSERT_EQ(metas.size(), 1); // Release ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); // Now read back auto file_reader = std::make_shared(fs_, base_path_); std::map options; BTreeGlobalIndexer indexer(options); - // Create ArrowSchema - auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); - ASSERT_OK(schema_result.status()); - ArrowSchema* c_schema = schema_result.value(); + // Create a new ArrowSchema for reading (the original was consumed by the writer) + auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); // Create reader - auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); ASSERT_OK(reader_result.status()); auto reader = reader_result.value(); @@ -174,26 +177,31 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { EXPECT_TRUE(ContainsRowId(equal_result_5.value(), 8)); // Release ArrowSchema - ArrowSchemaRelease(c_schema); + ArrowSchemaRelease(c_schema.get()); + ArrowSchemaRelease(c_schema_read.get()); } TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { // Create file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); + // Create the BTree global index writer - auto writer = std::make_shared("string_field", file_writer, pool_); + auto writer = std::make_shared("string_field", c_schema.get(), + file_writer, pool_); // Create an Arrow array with string values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])") + .ValueOrDie(); - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - ASSERT_OK(writer->AddBatch(c_array)); + ASSERT_OK(writer->AddBatch(&c_array)); // Finish writing auto result = writer->Finish(); @@ -202,20 +210,18 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { ASSERT_EQ(metas.size(), 1); // Release ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); // Now read back auto file_reader = std::make_shared(fs_, base_path_); std::map options; BTreeGlobalIndexer indexer(options); - // Create ArrowSchema - auto schema_result = CreateArrowSchema(arrow::utf8(), "string_field"); - ASSERT_OK(schema_result.status()); - ArrowSchema* c_schema = schema_result.value(); + // Create a new ArrowSchema for reading (the original was consumed by the writer) + auto c_schema_read = CreateArrowSchema(arrow::utf8(), "string_field"); // Create reader - auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); ASSERT_OK(reader_result.status()); auto reader = reader_result.value(); @@ -227,27 +233,31 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { EXPECT_TRUE(ContainsRowId(equal_result.value(), 3)); // Release ArrowSchema - ArrowSchemaRelease(c_schema); + ArrowSchemaRelease(c_schema.get()); + ArrowSchemaRelease(c_schema_read.get()); } TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { // Create file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with null values // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, null, 3, null, 5]"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") + .ValueOrDie(); - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - ASSERT_OK(writer->AddBatch(c_array)); + ASSERT_OK(writer->AddBatch(&c_array)); // Finish writing auto result = writer->Finish(); @@ -256,20 +266,18 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { ASSERT_EQ(metas.size(), 1); // Release ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); // Now read back auto file_reader = std::make_shared(fs_, base_path_); std::map options; BTreeGlobalIndexer indexer(options); - // Create ArrowSchema - auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); - ASSERT_OK(schema_result.status()); - ArrowSchema* c_schema = schema_result.value(); + // Create a new ArrowSchema for reading (the original was consumed by the writer) + auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); // Create reader - auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); ASSERT_OK(reader_result.status()); auto reader = reader_result.value(); @@ -289,26 +297,30 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { EXPECT_FALSE(ContainsRowId(is_not_null_result.value(), 1)); // Release ArrowSchema - ArrowSchemaRelease(c_schema); + ArrowSchemaRelease(c_schema.get()); + ArrowSchemaRelease(c_schema_read.get()); } TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { // Create file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with int values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, 2, 3, 4, 5]"); - ASSERT_OK(json_array.status()); + auto array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 4, 5]").ValueOrDie(); - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - ASSERT_OK(writer->AddBatch(c_array)); + ASSERT_OK(writer->AddBatch(&c_array)); // Finish writing auto result = writer->Finish(); @@ -316,20 +328,18 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { auto metas = result.value(); // Release ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); // Now read back auto file_reader = std::make_shared(fs_, base_path_); std::map options; BTreeGlobalIndexer indexer(options); - // Create ArrowSchema - auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); - ASSERT_OK(schema_result.status()); - ArrowSchema* c_schema = schema_result.value(); + // Create a new ArrowSchema for reading (the original was consumed by the writer) + auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); // Create reader - auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); ASSERT_OK(reader_result.status()); auto reader = reader_result.value(); @@ -350,26 +360,30 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { EXPECT_FALSE(ContainsRowId(gte_result.value(), 1)); // Release ArrowSchema - ArrowSchemaRelease(c_schema); + ArrowSchemaRelease(c_schema.get()); + ArrowSchemaRelease(c_schema_read.get()); } TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { // Create file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with int values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, 2, 3, 4, 5]"); - ASSERT_OK(json_array.status()); + auto array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 4, 5]").ValueOrDie(); - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - ASSERT_OK(writer->AddBatch(c_array)); + ASSERT_OK(writer->AddBatch(&c_array)); // Finish writing auto result = writer->Finish(); @@ -377,29 +391,25 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { auto metas = result.value(); // Release ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); // Now read back auto file_reader = std::make_shared(fs_, base_path_); std::map options; BTreeGlobalIndexer indexer(options); - // Create ArrowSchema - auto schema_result = CreateArrowSchema(arrow::int32(), "int_field"); - ASSERT_OK(schema_result.status()); - ArrowSchema* c_schema = schema_result.value(); + // Create a new ArrowSchema for reading (the original was consumed by the writer) + auto c_schema_read = CreateArrowSchema(arrow::int32(), "int_field"); // Create reader - auto reader_result = indexer.CreateReader(c_schema, file_reader, metas, pool_); + auto reader_result = indexer.CreateReader(c_schema_read.get(), file_reader, metas, pool_); ASSERT_OK(reader_result.status()); auto reader = reader_result.value(); // Test VisitIn for values 1, 3, 5 (should return row IDs 0, 2, 4) - std::vector in_literals = { - Literal(static_cast(1)), - Literal(static_cast(3)), - Literal(static_cast(5)) - }; + std::vector in_literals = {Literal(static_cast(1)), + Literal(static_cast(3)), + Literal(static_cast(5))}; auto in_result = reader->VisitIn(in_literals); ASSERT_OK(in_result.status()); EXPECT_TRUE(ContainsRowId(in_result.value(), 0)); @@ -409,7 +419,8 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { EXPECT_FALSE(ContainsRowId(in_result.value(), 3)); // Release ArrowSchema - ArrowSchemaRelease(c_schema); + ArrowSchemaRelease(c_schema.get()); + ArrowSchemaRelease(c_schema_read.get()); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 1507f9199..77b82fc25 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -18,8 +18,10 @@ #include -#include +#include +#include "paimon/common/compression/block_compression_factory.h" +#include "paimon/common/memory/memory_segment.h" #include "paimon/common/memory/memory_slice_output.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" @@ -29,11 +31,9 @@ namespace paimon { BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( - const std::string& field_name, + const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, - int32_t block_size, - int64_t expected_entries) + const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries) : field_name_(field_name), file_writer_(file_writer), pool_(pool), @@ -42,27 +42,72 @@ BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( null_bitmap_(std::make_shared()), has_nulls_(false), current_row_id_(0), - bloom_filter_(std::make_shared(expected_entries, 0.01)) {} + bloom_filter_(BloomFilter::Create(expected_entries, 0.01)) { + // Allocate memory for bloom filter and set memory segment + if (bloom_filter_) { + int64_t bloom_filter_size = bloom_filter_->ByteLength(); + auto bloom_filter_segment = + MemorySegment::AllocateHeapMemory(bloom_filter_size, pool.get()); + auto status = bloom_filter_->SetMemorySegment(bloom_filter_segment); + if (!status.ok()) { + // Failed to set memory segment for bloom filter + } + } + + // Import schema to get the field type + if (arrow_schema) { + auto schema_result = arrow::ImportSchema(arrow_schema); + if (schema_result.ok()) { + auto schema = schema_result.ValueOrDie(); + if (schema->num_fields() > 0) { + arrow_type_ = schema->field(0)->type(); + } + } + } +} Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { if (!arrow_array) { return Status::Invalid("ArrowArray is null"); } - // Import Arrow array - PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, - arrow::ImportArray(arrow_array, arrow::null())); + if (!arrow_type_) { + return Status::Invalid( + "Arrow type is not set. Please provide a valid ArrowSchema in constructor."); + } + + // Import Arrow array with the correct type + auto import_result = arrow::ImportArray(arrow_array, arrow_type_); + if (!import_result.ok()) { + return Status::Invalid("Failed to import array: " + import_result.status().ToString()); + } + auto array = import_result.ValueOrDie(); // Initialize SST writer on first batch if (!sst_writer_) { - PAIMON_ASSIGN_OR_RAISE(file_name_, file_writer_->NewFileName(field_name_)); + auto file_name_result = file_writer_->NewFileName(field_name_); + if (!file_name_result.ok()) { + return file_name_result.status(); + } + file_name_ = file_name_result.value(); + PAIMON_ASSIGN_OR_RAISE(output_stream_, file_writer_->NewOutputStream(file_name_)); + + PAIMON_ASSIGN_OR_RAISE(auto compression_factory, + BlockCompressionFactory::Create(BlockCompressionType::NONE)); + sst_writer_ = std::make_unique(output_stream_, pool_, bloom_filter_, - block_size_, nullptr); + block_size_, compression_factory); } // Group row IDs by key value - std::unordered_map> key_to_row_ids; + // Use std::map with custom comparator for binary keys + // Keys are stored in binary format to match Java's serialization + std::map, std::vector, + std::function&, const std::shared_ptr&)>> + key_to_row_ids([this](const std::shared_ptr& a, const std::shared_ptr& b) { + return CompareBinaryKeys(a, b) < 0; + }); // Process each element in the array for (int64_t i = 0; i < array->length(); ++i) { @@ -75,53 +120,76 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { continue; } - // Convert array element to string key - // For simplicity, we use string representation for all types - // TODO: Support type-specific serialization for better comparison - std::string key_str; + // Convert array element to binary key + // Use type-specific binary serialization to match Java format + std::shared_ptr key_bytes; - // Get the value as string based on array type + // Get the value as binary based on array type auto type_id = array->type_id(); + switch (type_id) { case arrow::Type::STRING: case arrow::Type::BINARY: { auto str_array = std::static_pointer_cast(array); - key_str = std::string(str_array->GetView(i)); + auto view = str_array->GetView(i); + key_bytes = Bytes::AllocateBytes(view.size(), pool_.get()); + memcpy(key_bytes->data(), view.data(), view.size()); break; } case arrow::Type::INT32: { auto int_array = std::static_pointer_cast(array); - key_str = std::to_string(int_array->Value(i)); + int32_t value = int_array->Value(i); + // Store as 4-byte little-endian to match Java's DataOutputStream.writeInt + key_bytes = std::make_shared(sizeof(int32_t), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(int32_t)); break; } case arrow::Type::INT64: { auto int_array = std::static_pointer_cast(array); - key_str = std::to_string(int_array->Value(i)); + int64_t value = int_array->Value(i); + // Store as 8-byte little-endian to match Java's DataOutputStream.writeLong + key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(int64_t)); break; } case arrow::Type::FLOAT: { auto float_array = std::static_pointer_cast(array); - key_str = std::to_string(float_array->Value(i)); + float value = float_array->Value(i); + // Store as 4-byte IEEE 754 to match Java's DataOutputStream.writeFloat + key_bytes = std::make_shared(sizeof(float), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(float)); break; } case arrow::Type::DOUBLE: { auto double_array = std::static_pointer_cast(array); - key_str = std::to_string(double_array->Value(i)); + double value = double_array->Value(i); + // Store as 8-byte IEEE 754 to match Java's DataOutputStream.writeDouble + key_bytes = std::make_shared(sizeof(double), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(double)); break; } case arrow::Type::BOOL: { auto bool_array = std::static_pointer_cast(array); - key_str = bool_array->Value(i) ? "1" : "0"; + bool value = bool_array->Value(i); + // Store as single byte (0 or 1) + key_bytes = std::make_shared(1, pool_.get()); + key_bytes->data()[0] = value ? 1 : 0; break; } case arrow::Type::DATE32: { auto date_array = std::static_pointer_cast(array); - key_str = std::to_string(date_array->Value(i)); + int32_t value = date_array->Value(i); + // Store as 4-byte little-endian + key_bytes = std::make_shared(sizeof(int32_t), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(int32_t)); break; } case arrow::Type::TIMESTAMP: { auto ts_array = std::static_pointer_cast(array); - key_str = std::to_string(ts_array->Value(i)); + int64_t value = ts_array->Value(i); + // Store as 8-byte little-endian + key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); + memcpy(key_bytes->data(), &value, sizeof(int64_t)); break; } default: @@ -129,48 +197,57 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { array->type()->ToString()); } - key_to_row_ids[key_str].push_back(row_id); + key_to_row_ids[key_bytes].push_back(row_id); } // Write each key and its row IDs to the SST file - for (const auto& [key_str, row_ids] : key_to_row_ids) { - auto key_bytes = Bytes::AllocateBytes(key_str, pool_.get()); - auto key = std::shared_ptr(key_bytes.release()); - + for (const auto& [key_bytes, row_ids] : key_to_row_ids) { // Track first and last keys if (!first_key_) { - first_key_ = key; + first_key_ = key_bytes; } - last_key_ = key; + last_key_ = key_bytes; // Write key-value pair - PAIMON_RETURN_NOT_OK(WriteKeyValue(key, row_ids)); + PAIMON_RETURN_NOT_OK(WriteKeyValue(key_bytes, row_ids)); } current_row_id_ += array->length(); return Status::OK(); } -Status BTreeGlobalIndexWriter::WriteKeyValue(const std::shared_ptr& key, - const std::vector& row_ids) { +Status BTreeGlobalIndexWriter::WriteKeyValue(std::shared_ptr key, + const std::vector& row_ids) { auto value = SerializeRowIds(row_ids); - // Copy key since we can't move from a const reference - auto key_copy = key; - return sst_writer_->Write(std::move(key_copy), std::move(value)); + + return sst_writer_->Write(std::move(key), std::move(value)); } -std::shared_ptr BTreeGlobalIndexWriter::SerializeRowIds(const std::vector& row_ids) { - // Format: [num_row_ids (varint)][row_id1 (int64)][row_id2]... - int32_t estimated_size = 10 + row_ids.size() * 8; // Conservative estimate +std::shared_ptr BTreeGlobalIndexWriter::SerializeRowIds( + const std::vector& row_ids) { + // Format: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)][row_id2]... + // Use VarLenLong for row IDs to match Java's DataOutputStream.writeVarLong + int32_t estimated_size = 10 + row_ids.size() * 10; // Conservative estimate auto output = std::make_shared(estimated_size, pool_.get()); output->WriteVarLenLong(static_cast(row_ids.size())); for (int64_t row_id : row_ids) { - output->WriteValue(row_id); + output->WriteVarLenLong(row_id); } auto slice = output->ToSlice(); - return slice->CopyBytes(pool_.get()); + return slice.CopyBytes(pool_.get()); +} + +int32_t BTreeGlobalIndexWriter::CompareBinaryKeys(const std::shared_ptr& a, + const std::shared_ptr& b) const { + if (!a || !b) return 0; + size_t min_len = std::min(a->size(), b->size()); + int cmp = memcmp(a->data(), b->data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a->size() < b->size()) return -1; + if (a->size() > b->size()) return 1; + return 0; } Result> BTreeGlobalIndexWriter::WriteNullBitmap( @@ -189,12 +266,12 @@ Result> BTreeGlobalIndexWriter::WriteNullBitmap( PAIMON_ASSIGN_OR_RAISE(int64_t offset, out->GetPos()); // Write bitmap data - PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(bitmap_data.data()), - bitmap_data.size())); + PAIMON_RETURN_NOT_OK( + out->Write(reinterpret_cast(bitmap_data.data()), bitmap_data.size())); // Calculate and write CRC32C - uint32_t crc = CRC32C::calculate(reinterpret_cast(bitmap_data.data()), - bitmap_data.size()); + uint32_t crc = + CRC32C::calculate(reinterpret_cast(bitmap_data.data()), bitmap_data.size()); PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(&crc), sizeof(crc))); return std::make_shared(offset, bitmap_data.size()); @@ -219,10 +296,10 @@ Result> BTreeGlobalIndexWriter::Finish() { PAIMON_ASSIGN_OR_RAISE(auto null_bitmap_handle, WriteNullBitmap(output_stream_)); // Write BTree file footer - auto footer = std::make_shared(bloom_filter_handle, index_block_handle, - null_bitmap_handle); + auto footer = std::make_shared( + bloom_filter_handle, std::make_shared(index_block_handle), null_bitmap_handle); auto footer_slice = BTreeFileFooter::Write(footer, pool_.get()); - auto footer_bytes = footer_slice->CopyBytes(pool_.get()); + auto footer_bytes = footer_slice.CopyBytes(pool_.get()); PAIMON_RETURN_NOT_OK(output_stream_->Write(footer_bytes->data(), footer_bytes->size())); // Close the output stream diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 6fa15de1b..78b4ce457 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -31,11 +31,10 @@ namespace paimon { /// Writer for BTree Global Index files. /// This writer builds an SST file where each key maps to a list of row IDs. class BTreeGlobalIndexWriter : public GlobalIndexWriter { -public: - BTreeGlobalIndexWriter(const std::string& field_name, + public: + BTreeGlobalIndexWriter(const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, - int32_t block_size = 4096, + const std::shared_ptr& pool, int32_t block_size = 4096, int64_t expected_entries = 100000); ~BTreeGlobalIndexWriter() override = default; @@ -47,9 +46,9 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { /// Finish writing and return the index metadata. Result> Finish() override; -private: + private: // Helper method to write a key-value pair to the SST file - Status WriteKeyValue(const std::shared_ptr& key, const std::vector& row_ids); + Status WriteKeyValue(std::shared_ptr key, const std::vector& row_ids); // Helper method to serialize row IDs into a Bytes object std::shared_ptr SerializeRowIds(const std::vector& row_ids); @@ -57,8 +56,13 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { // Helper method to write null bitmap to the output stream Result> WriteNullBitmap(const std::shared_ptr& out); -private: + // Helper method to compare binary keys for std::map ordering + int32_t CompareBinaryKeys(const std::shared_ptr& a, + const std::shared_ptr& b) const; + + private: std::string field_name_; + std::shared_ptr arrow_type_; std::shared_ptr file_writer_; std::shared_ptr pool_; int32_t block_size_; diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 7b507b6e7..3a4e0d2df 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -14,12 +14,12 @@ * limitations under the License. */ -#include +#include "paimon/common/global_index/btree/btree_global_index_writer.h" #include -#include +#include +#include -#include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/fs/file_system.h" #include "paimon/global_index/io/global_index_file_writer.h" #include "paimon/memory/memory_pool.h" @@ -28,7 +28,7 @@ namespace paimon::test { class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { -public: + public: FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) : fs_(fs), base_path_(base_path), file_counter_(0) {} @@ -37,38 +37,46 @@ class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { } Result> NewOutputStream(const std::string& file_name) const override { - return fs_->CreateOutputStream(base_path_ + "/" + file_name); + return fs_->Create(base_path_ + "/" + file_name, true); } Result GetFileSize(const std::string& file_name) const override { PAIMON_ASSIGN_OR_RAISE(auto file_status, fs_->GetFileStatus(base_path_ + "/" + file_name)); - return file_status->Length(); + return static_cast(file_status->GetLen()); } std::string ToPath(const std::string& file_name) const override { return base_path_ + "/" + file_name; } -private: + private: std::shared_ptr fs_; std::string base_path_; mutable int64_t file_counter_; }; class BTreeGlobalIndexWriterTest : public ::testing::Test { -protected: + protected: void SetUp() override { pool_ = GetDefaultPool(); test_dir_ = UniqueTestDirectory::Create("local"); - ASSERT_OK(test_dir_.status()); fs_ = test_dir_->GetFileSystem(); base_path_ = test_dir_->Str(); } - void TearDown() override { test_dir_->Delete(); } + void TearDown() override {} + + // Helper to create ArrowSchema from arrow type + std::unique_ptr CreateArrowSchema(const std::shared_ptr& type, + const std::string& field_name) { + auto schema = arrow::schema({arrow::field(field_name, type)}); + auto c_schema = std::make_unique(); + EXPECT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); + return c_schema; + } std::shared_ptr pool_; - Result> test_dir_; + std::unique_ptr test_dir_; std::shared_ptr fs_; std::string base_path_; }; @@ -77,20 +85,23 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with int values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]"); - ASSERT_OK(json_array.status()); + auto array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3, 2, 1, 4, 5, 5, 5]") + .ValueOrDie(); // Export to ArrowArray - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - auto status = writer->AddBatch(c_array); + auto status = writer->AddBatch(&c_array); ASSERT_OK(status); // Finish writing @@ -106,27 +117,33 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { EXPECT_EQ(meta.range_end, 8); // 9 elements, 0-indexed // Release the ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); + // Create the BTree global index writer - auto writer = std::make_shared("string_field", file_writer, pool_); + auto writer = std::make_shared("string_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with string values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::utf8(), R"(["apple", "banana", "cherry", "apple", "banana"])") + .ValueOrDie(); // Export to ArrowArray - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - auto status = writer->AddBatch(c_array); + auto status = writer->AddBatch(&c_array); ASSERT_OK(status); // Finish writing @@ -141,27 +158,32 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { EXPECT_GT(meta.file_size, 0); // Release the ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with null values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, null, 3, null, 5]"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") + .ValueOrDie(); // Export to ArrowArray - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - auto status = writer->AddBatch(c_array); + auto status = writer->AddBatch(&c_array); ASSERT_OK(status); // Finish writing @@ -179,41 +201,45 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { EXPECT_NE(meta.metadata, nullptr); // Release the ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create first batch - auto json_array1 = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[1, 2, 3]"); - ASSERT_OK(json_array1.status()); + auto array1 = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, 2, 3]").ValueOrDie(); - ArrowArray* c_array1; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array1, &c_array1)); + ArrowArray c_array1; + ASSERT_TRUE(arrow::ExportArray(*array1, &c_array1).ok()); // Add first batch - auto status1 = writer->AddBatch(c_array1); + auto status1 = writer->AddBatch(&c_array1); ASSERT_OK(status1); - ArrowArrayRelease(c_array1); + ArrowArrayRelease(&c_array1); // Create second batch - auto json_array2 = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[4, 5, 6]"); - ASSERT_OK(json_array2.status()); + auto array2 = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[4, 5, 6]").ValueOrDie(); - ArrowArray* c_array2; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array2, &c_array2)); + ArrowArray c_array2; + ASSERT_TRUE(arrow::ExportArray(*array2, &c_array2).ok()); // Add second batch - auto status2 = writer->AddBatch(c_array2); + auto status2 = writer->AddBatch(&c_array2); ASSERT_OK(status2); - ArrowArrayRelease(c_array2); + ArrowArrayRelease(&c_array2); // Finish writing auto result = writer->Finish(); @@ -224,40 +250,51 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { // Verify metadata const auto& meta = metas[0]; EXPECT_EQ(meta.range_end, 5); // 6 elements, 0-indexed + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Finish without adding any data auto result = writer->Finish(); ASSERT_OK(result.status()); auto metas = result.value(); ASSERT_EQ(metas.size(), 0); // No data, no metadata + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); + // Create the BTree global index writer - auto writer = std::make_shared("int_field", file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with all null values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::int32(), "[null, null, null]"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") + .ValueOrDie(); // Export to ArrowArray - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - auto status = writer->AddBatch(c_array); + auto status = writer->AddBatch(&c_array); ASSERT_OK(status); // Finish writing @@ -271,27 +308,32 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { EXPECT_NE(meta.metadata, nullptr); // Release the ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { // Create a fake file writer auto file_writer = std::make_shared(fs_, base_path_); + // Create ArrowSchema + auto c_schema = CreateArrowSchema(arrow::float64(), "double_field"); + // Create the BTree global index writer - auto writer = std::make_shared("double_field", file_writer, pool_); + auto writer = std::make_shared("double_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with double values - auto json_array = arrow::ipc::internal::json::ArrayFromJSON( - arrow::float64(), "[1.5, 2.5, 3.5, 1.5]"); - ASSERT_OK(json_array.status()); + auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") + .ValueOrDie(); // Export to ArrowArray - ArrowArray* c_array; - ASSERT_OK_FROM_ARROW(arrow::ExportArray(*json_array, &c_array)); + ArrowArray c_array; + ASSERT_TRUE(arrow::ExportArray(*array, &c_array).ok()); // Add batch - auto status = writer->AddBatch(c_array); + auto status = writer->AddBatch(&c_array); ASSERT_OK(status); // Finish writing @@ -301,7 +343,10 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { ASSERT_EQ(metas.size(), 1); // Release the ArrowArray - ArrowArrayRelease(c_array); + ArrowArrayRelease(&c_array); + + // Release the ArrowSchema + ArrowSchemaRelease(c_schema.get()); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 068d65831..f08541bad 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -39,206 +39,152 @@ Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, const std::shared_ptr& pool) const { - return std::make_shared(field_name, file_writer, pool); + return std::make_shared(field_name, arrow_schema, file_writer, pool); } // Forward declarations for helper functions -static Result> LiteralToMemorySlice(const Literal& literal, - MemoryPool* pool); +static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool); // Create a comparator function based on field type -static std::function&, - const std::shared_ptr&)> -CreateComparator(FieldType field_type) { +// Keys are stored in binary format to match Java's DataOutputStream format +static std::function CreateComparator( + FieldType field_type) { + // For numeric types, compare as binary values in little-endian format + // to match Java's DataOutputStream.writeInt/writeLong format switch (field_type) { - case FieldType::STRING: - case FieldType::BINARY: - // String/binary comparison: lexicographic order - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes) return 0; - size_t min_len = std::min(a_bytes->size(), b_bytes->size()); - int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a_bytes->size() < b_bytes->size()) return -1; - if (a_bytes->size() > b_bytes->size()) return 1; - return 0; - }; - case FieldType::BIGINT: - // int64_t comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; - int64_t a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(int64_t)); - memcpy(&b_val, b_bytes->data(), sizeof(int64_t)); + case FieldType::INT: + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < static_cast(sizeof(int32_t)) || + b.Length() < static_cast(sizeof(int32_t))) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + int32_t a_val = a.ReadInt(0); + int32_t b_val = b.ReadInt(0); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; - case FieldType::INT: - // int32_t comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; - int32_t a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(int32_t)); - memcpy(&b_val, b_bytes->data(), sizeof(int32_t)); + case FieldType::BIGINT: + case FieldType::DATE: + case FieldType::TIMESTAMP: + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < static_cast(sizeof(int64_t)) || + b.Length() < static_cast(sizeof(int64_t))) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + int64_t a_val = a.ReadLong(0); + int64_t b_val = b.ReadLong(0); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; case FieldType::SMALLINT: - // int16_t comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 2 || b_bytes->size() < 2) return 0; - int16_t a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(int16_t)); - memcpy(&b_val, b_bytes->data(), sizeof(int16_t)); + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < static_cast(sizeof(int16_t)) || + b.Length() < static_cast(sizeof(int16_t))) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + int16_t a_val = a.ReadShort(0); + int16_t b_val = b.ReadShort(0); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; case FieldType::TINYINT: - // int8_t comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 1 || b_bytes->size() < 1) return 0; - int8_t a_val = a_bytes->data()[0]; - int8_t b_val = b_bytes->data()[0]; - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::BOOLEAN: - // bool comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 1 || b_bytes->size() < 1) return 0; - bool a_val = a_bytes->data()[0] != 0; - bool b_val = b_bytes->data()[0] != 0; + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < 1 || b.Length() < 1) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + int8_t a_val = a.ReadByte(0); + int8_t b_val = b.ReadByte(0); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; case FieldType::FLOAT: - // float comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < static_cast(sizeof(float)) || + b.Length() < static_cast(sizeof(float))) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + // Read float from bytes (little-endian) float a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(float)); - memcpy(&b_val, b_bytes->data(), sizeof(float)); + std::memcpy(&a_val, a.ReadStringView().data(), sizeof(float)); + std::memcpy(&b_val, b.ReadStringView().data(), sizeof(float)); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; case FieldType::DOUBLE: - // double comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() < static_cast(sizeof(double)) || + b.Length() < static_cast(sizeof(double))) { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); + if (cmp != 0) return cmp < 0 ? -1 : 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; + return 0; + } + // Read double from bytes (little-endian) double a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(double)); - memcpy(&b_val, b_bytes->data(), sizeof(double)); - if (a_val < b_val) return -1; - if (a_val > b_val) return 1; - return 0; - }; - case FieldType::DATE: - // Date comparison (stored as int32_t days since epoch) - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 4 || b_bytes->size() < 4) return 0; - int32_t a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(int32_t)); - memcpy(&b_val, b_bytes->data(), sizeof(int32_t)); + std::memcpy(&a_val, a.ReadStringView().data(), sizeof(double)); + std::memcpy(&b_val, b.ReadStringView().data(), sizeof(double)); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; - case FieldType::TIMESTAMP: - // Timestamp comparison (stored as int64_t) - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes || a_bytes->size() < 8 || b_bytes->size() < 8) return 0; - int64_t a_val, b_val; - memcpy(&a_val, a_bytes->data(), sizeof(int64_t)); - memcpy(&b_val, b_bytes->data(), sizeof(int64_t)); + case FieldType::BOOLEAN: + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + if (a.Length() == 0 || b.Length() == 0) return 0; + int8_t a_val = a.ReadByte(0); + int8_t b_val = b.ReadByte(0); if (a_val < b_val) return -1; if (a_val > b_val) return 1; return 0; }; - case FieldType::DECIMAL: - // Decimal comparison (stored as 16 bytes big-endian for DECIMAL128) - // Big-endian storage ensures correct lexicographic byte comparison for signed values - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes) return 0; - // Both should be 16 bytes for DECIMAL128 - if (a_bytes->size() < 16 || b_bytes->size() < 16) { - // Fallback to lexicographic comparison for truncated data - size_t min_len = std::min(a_bytes->size(), b_bytes->size()); - int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a_bytes->size() < b_bytes->size()) return -1; - if (a_bytes->size() > b_bytes->size()) return 1; - return 0; - } - // For big-endian signed int128, direct byte comparison works correctly - // because the sign bit is in the first byte - int cmp = memcmp(a_bytes->data(), b_bytes->data(), 16); - if (cmp < 0) return -1; - if (cmp > 0) return 1; - return 0; - }; + case FieldType::STRING: + case FieldType::BINARY: default: - // Default: lexicographic comparison - return [](const std::shared_ptr& a, - const std::shared_ptr& b) -> int32_t { - if (!a || !b) return 0; - auto a_bytes = a->GetHeapMemory(); - auto b_bytes = b->GetHeapMemory(); - if (!a_bytes || !b_bytes) return 0; - size_t min_len = std::min(a_bytes->size(), b_bytes->size()); - int cmp = memcmp(a_bytes->data(), b_bytes->data(), min_len); + // For string/binary types, use lexicographic comparison + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + size_t min_len = + std::min(static_cast(a.Length()), static_cast(b.Length())); + int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a_bytes->size() < b_bytes->size()) return -1; - if (a_bytes->size() > b_bytes->size()) return 1; + if (a.Length() < b.Length()) return -1; + if (a.Length() > b.Length()) return 1; return 0; }; } @@ -268,41 +214,53 @@ Result> BTreeGlobalIndexer::CreateReader( // Create comparator based on field type auto comparator = CreateComparator(field_type); - // prepare file footer - auto block_cache = std::make_shared(meta.file_path, in, pool, std::make_unique()); - PAIMON_ASSIGN_OR_RAISE(auto segment, + // Wrap the comparator to return Result + MemorySlice::SliceComparator result_comparator = + [comparator](const MemorySlice& a, const MemorySlice& b) -> Result { + return comparator(a, b); + }; + + // Read BTree file footer first + auto block_cache = + std::make_shared(meta.file_path, in, pool, std::make_unique()); + PAIMON_ASSIGN_OR_RAISE(MemorySegment segment, block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, - BTreeFileFooter::ENCODED_LENGTH, true)); + BTreeFileFooter::ENCODED_LENGTH, true, + /*decompress_func=*/nullptr)); + auto footer_slice = MemorySlice::Wrap(segment); + auto footer_input = footer_slice.ToInput(); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, - BTreeFileFooter::Read(MemorySlice::Wrap(segment)->ToInput())); + BTreeFileFooter::Read(footer_input)); - // prepare null_bitmap and sst_file_reader + // Create SST file reader with footer information + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr sst_file_reader, + SstFileReader::Create(pool, in, *footer->GetIndexBlockHandle(), + footer->GetBloomFilterHandle(), result_comparator)); + + // prepare null_bitmap PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); - // Wrap the comparator to return Result - MemorySlice::SliceComparator result_comparator = - [comparator](const std::shared_ptr& a, - const std::shared_ptr& b) -> Result { - return comparator(a, b); - }; - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr sst_file_reader, - SstFileReader::Create(pool, in, result_comparator)); - auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); // Convert Bytes to MemorySlice for keys - std::shared_ptr min_key_slice; - std::shared_ptr max_key_slice; + MemorySlice min_key_slice(MemorySegment(), 0, 0); + MemorySlice max_key_slice(MemorySegment(), 0, 0); + bool has_min_key = false; + bool has_max_key = false; if (index_meta->FirstKey()) { min_key_slice = MemorySlice::Wrap(index_meta->FirstKey()); + has_min_key = true; } if (index_meta->LastKey()) { max_key_slice = MemorySlice::Wrap(index_meta->LastKey()); + has_max_key = true; } return std::make_shared(sst_file_reader, null_bitmap, min_key_slice, - max_key_slice, files, pool, comparator); + max_key_slice, has_min_key, has_max_key, files, + pool, comparator); } Result> BTreeGlobalIndexer::ToGlobalIndexResult( @@ -336,31 +294,38 @@ Result> BTreeGlobalIndexer::ReadNullBitma // Read bytes and crc value PAIMON_ASSIGN_OR_RAISE(auto segment, - cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false)); + cache->GetBlock(block_handle->Offset(), block_handle->Size() + 4, false, + /*decompress_func=*/nullptr)); auto slice = MemorySlice::Wrap(segment); - auto slice_input = slice->ToInput(); + auto slice_input = slice.ToInput(); // Read null bitmap data - auto null_bitmap_slice = slice_input->ReadSlice(block_handle->Size()); - auto null_bitmap_bytes = null_bitmap_slice->GetHeapMemory(); + auto null_bitmap_slice = slice_input.ReadSlice(block_handle->Size()); + auto null_bitmap_view = null_bitmap_slice.ReadStringView(); // Calculate CRC32C checksum - uint32_t crc_value = CRC32C::calculate(reinterpret_cast(null_bitmap_bytes->data()), - null_bitmap_bytes->size()); + uint32_t crc_value = CRC32C::calculate(null_bitmap_view.data(), null_bitmap_view.size()); - // Read expected CRC value - int32_t expected_crc_value = slice_input->ReadInt(); + // Read expected CRC value (stored as uint32_t in little-endian) + uint32_t expected_crc_value = 0; + for (int i = 0; i < 4; ++i) { + expected_crc_value |= static_cast(static_cast(slice_input.ReadByte())) + << (i * 8); + } // Verify CRC checksum - if (crc_value != static_cast(expected_crc_value)) { - return Status::Invalid("CRC check failure during decoding null bitmap"); + if (crc_value != expected_crc_value) { + return Status::Invalid("CRC check failure during decoding null bitmap. Expected: " + + std::to_string(expected_crc_value) + + ", Calculated: " + std::to_string(crc_value)); } // Deserialize null bitmap try { - std::vector data(null_bitmap_bytes->data(), - null_bitmap_bytes->data() + null_bitmap_bytes->size()); + std::vector data( + reinterpret_cast(null_bitmap_view.data()), + reinterpret_cast(null_bitmap_view.data()) + null_bitmap_view.size()); null_bitmap->Deserialize(data); } catch (const std::exception& e) { return Status::Invalid( @@ -374,21 +339,21 @@ Result> BTreeGlobalIndexer::ReadNullBitma BTreeGlobalIndexReader::BTreeGlobalIndexReader( const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, - const std::shared_ptr& min_key, const std::shared_ptr& max_key, + const std::shared_ptr& null_bitmap, const MemorySlice& min_key, + const MemorySlice& max_key, bool has_min_key, bool has_max_key, const std::vector& files, const std::shared_ptr& pool, - std::function&, const std::shared_ptr&)> - comparator) + std::function comparator) : sst_file_reader_(sst_file_reader), null_bitmap_(null_bitmap), min_key_(min_key), max_key_(max_key), + has_min_key_(has_min_key), + has_max_key_(has_max_key), files_(files), pool_(pool), comparator_(std::move(comparator)) {} Result> BTreeGlobalIndexReader::VisitIsNotNull() { - // nulls are stored separately in null bitmap. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -396,32 +361,24 @@ Result> BTreeGlobalIndexReader::VisitIsNotNul } Result> BTreeGlobalIndexReader::VisitIsNull() { - // nulls are stored separately in null bitmap. return std::make_shared( [this]() -> Result { return null_bitmap_->GetBitmap(); }); } Result> BTreeGlobalIndexReader::VisitStartsWith( const Literal& prefix) { - // Use btree index for startsWith: find all keys >= prefix and < prefix_upper_bound - // For string prefix "abc", the upper bound should be "abd" (increment last char) - // This ensures we only get keys that actually start with the prefix return std::make_shared([this, &prefix]() -> Result { PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, LiteralToMemorySlice(prefix, pool_.get())); auto prefix_type = prefix.GetType(); - // For string/binary types, compute the upper bound for prefix matching if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { - auto prefix_bytes = prefix_slice->GetHeapMemory(); + auto prefix_bytes = prefix_slice.GetHeapMemory(); if (!prefix_bytes || prefix_bytes->size() == 0) { - // Empty prefix matches all non-null rows PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); } - // Compute upper bound: increment the last byte of the prefix - // For example, "abc" -> "abd", "ab\xFF" -> "ac" std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); bool overflow = true; for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { @@ -431,37 +388,30 @@ Result> BTreeGlobalIndexReader::VisitStartsWi overflow = false; } else { upper_bound_str[i] = 0x00; - // Continue to increment previous byte } } - std::shared_ptr upper_bound_slice; if (!overflow) { auto upper_bytes = Bytes::AllocateBytes(upper_bound_str, pool_.get()); - upper_bound_slice = + auto upper_bound_slice = MemorySlice::Wrap(std::shared_ptr(upper_bytes.release())); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(prefix_slice, upper_bound_slice, true, false)); + return result.GetBitmap(); + } else { + // If overflow (all bytes were 0xFF), use max_key_ as upper bound + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(prefix_slice, max_key_, true, false)); + return result.GetBitmap(); } - // If overflow (all bytes were 0xFF), use max_key_ as upper bound - - // Execute range query [prefix, upper_bound) - PAIMON_ASSIGN_OR_RAISE( - RoaringNavigableMap64 result, - RangeQuery(prefix_slice, upper_bound_slice ? upper_bound_slice : max_key_, true, - false)); // lower_inclusive=true, upper_inclusive=false - return result.GetBitmap(); } - // For non-string types, startsWith doesn't make semantic sense - // Return empty result for non-string types return RoaringBitmap64(); }); } Result> BTreeGlobalIndexReader::VisitEndsWith( const Literal& suffix) { - // BTree index is not efficient for EndsWith queries as it requires checking all keys. - // Return all non-null rows as fallback; the upper layer will perform exact filtering. - // Note: This is a conservative approach that doesn't prune any rows. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -470,9 +420,6 @@ Result> BTreeGlobalIndexReader::VisitEndsWith Result> BTreeGlobalIndexReader::VisitContains( const Literal& literal) { - // BTree index is not efficient for Contains queries as it requires checking all keys. - // Return all non-null rows as fallback; the upper layer will perform exact filtering. - // Note: This is a conservative approach that doesn't prune any rows. return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -481,31 +428,19 @@ Result> BTreeGlobalIndexReader::VisitContains Result> BTreeGlobalIndexReader::VisitLike( const Literal& literal) { - // BTree index can efficiently handle LIKE patterns of the form "prefix%". - // For other patterns (e.g., "%suffix", "%contains%"), return all non-null rows as fallback. if (literal.IsNull()) { return Status::Invalid("LIKE pattern cannot be null"); } - // Get the pattern string std::string pattern = literal.GetValue(); - // Check if pattern is of the form "prefix%" (starts with a literal prefix and ends with %) - // The prefix must not contain any wildcard characters (_ or %) - // Escape sequences with \ are not supported in this simple implementation bool is_prefix_pattern = false; std::string prefix; - // Find the position of the first wildcard character size_t first_wildcard = pattern.find_first_of("_%"); if (first_wildcard != std::string::npos) { - // Check if the pattern is exactly "prefix%" form - // - First wildcard must be '%' - // - It must be at the end of the pattern - // - No other wildcards before it if (pattern[first_wildcard] == '%' && first_wildcard == pattern.length() - 1) { - // Check if there are any wildcards in the prefix part bool has_wildcard_in_prefix = false; for (size_t i = 0; i < first_wildcard; ++i) { if (pattern[i] == '_' || pattern[i] == '%') { @@ -518,18 +453,13 @@ Result> BTreeGlobalIndexReader::VisitLike( prefix = pattern.substr(0, first_wildcard); } } - } else { - // No wildcards at all - this is an exact match, not a prefix pattern - // We could optimize this to VisitEqual, but for simplicity, fall through to fallback } if (is_prefix_pattern) { - // Use VisitStartsWith for prefix% patterns Literal prefix_literal(FieldType::STRING, prefix.c_str(), prefix.length()); return VisitStartsWith(prefix_literal); } - // For other patterns, return all non-null rows as fallback return std::make_shared([this]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); return result.GetBitmap(); @@ -617,10 +547,8 @@ Result> BTreeGlobalIndexReader::VisitNotIn( const std::vector& literals) { return std::make_shared( [this, &literals]() -> Result { - // Get all non-null rows first PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - // Get the IN result and convert to navigable map PAIMON_ASSIGN_OR_RAISE(auto in_result_ptr, VisitIn(literals)); PAIMON_ASSIGN_OR_RAISE(auto in_iterator, in_result_ptr->CreateIterator()); @@ -669,7 +597,6 @@ Result> BTreeGlobalIndexReader::VisitAnd( return Status::Invalid("VisitAnd called with no children"); } - // Start with the first child result auto first_result_status = children[0]; if (!first_result_status.ok()) { return first_result_status.status(); @@ -682,7 +609,6 @@ Result> BTreeGlobalIndexReader::VisitAnd( result_bitmap.Add(first_iterator->Next()); } - // AND with remaining children for (size_t i = 1; i < children.size(); ++i) { auto child_status = children[i]; if (!child_status.ok()) { @@ -735,25 +661,31 @@ Result> BTreeGlobalIndexReader::VisitFullText return Status::NotImplemented("Full text search not supported in BTree index"); } -Result BTreeGlobalIndexReader::RangeQuery( - const std::shared_ptr& lower_bound, - const std::shared_ptr& upper_bound, bool lower_inclusive, bool upper_inclusive) { - // Create an SST file iterator to iterate through data blocks - auto sst_iterator = sst_file_reader_->CreateIterator(); +Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lower_bound, + const MemorySlice& upper_bound, + bool lower_inclusive, + bool upper_inclusive) { + RoaringNavigableMap64 result; + + // Create an index block iterator to iterate through data blocks + auto index_iterator = sst_file_reader_->CreateIndexIterator(); // Seek iterator to the lower bound - if (lower_bound) { - auto lower_bytes = lower_bound->GetHeapMemory(); - PAIMON_RETURN_NOT_OK(sst_iterator->SeekTo(lower_bytes)); + auto lower_bytes = lower_bound.GetHeapMemory(); + + if (lower_bytes) { + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool seek_result, + index_iterator->SeekTo(lower_bound)); } - RoaringNavigableMap64 result; + // Check if there are any blocks to read + if (!index_iterator->HasNext()) { + return result; + } - // Iterate through all relevant data blocks using GetNextBlock - std::unique_ptr index_iterator; bool first_block = true; - while (true) { + while (index_iterator->HasNext()) { // Get the next data block PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, sst_file_reader_->GetNextBlock(index_iterator)); @@ -763,11 +695,10 @@ Result BTreeGlobalIndexReader::RangeQuery( } // For the first block, we need to seek within the block to the exact position - if (first_block && lower_bound) { + if (first_block && lower_bytes) { PAIMON_ASSIGN_OR_RAISE(bool found, data_iterator->SeekTo(lower_bound)); first_block = false; - // After seeking, check if we still have data if (!data_iterator->HasNext()) { continue; } @@ -783,29 +714,27 @@ Result BTreeGlobalIndexReader::RangeQuery( // Check lower bound if (!lower_inclusive && cmp_lower == 0) { - // Skip if key equals lower bound and lower is not inclusive continue; } // Check upper bound int cmp_upper = comparator ? comparator(entry->key, upper_bound) : 0; + if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { - // Key is beyond upper bound, we're done return result; } // Deserialize row IDs from the value - // The value should contain an array of int64_t row IDs - auto value_bytes = entry->value->CopyBytes(pool_.get()); + auto value_bytes = entry->value.CopyBytes(pool_.get()); auto value_slice = MemorySlice::Wrap(value_bytes); - auto value_input = value_slice->ToInput(); + auto value_input = value_slice.ToInput(); - // Read row IDs. The format is: [length][row_id1][row_id2]... - // where length is the number of row IDs (varint) - int64_t num_row_ids = value_input->ReadVarLenLong(); + // Read row IDs. The format is: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)]... + // Use VarLenLong to match Java's DataOutputStream.writeVarLong format + PAIMON_ASSIGN_OR_RAISE(int64_t num_row_ids, value_input.ReadVarLenLong()); for (int64_t i = 0; i < num_row_ids; i++) { - int64_t row_id = value_input->ReadLong(); + PAIMON_ASSIGN_OR_RAISE(int64_t row_id, value_input.ReadVarLenLong()); result.Add(row_id); } } @@ -815,49 +744,34 @@ Result BTreeGlobalIndexReader::RangeQuery( } Result BTreeGlobalIndexReader::AllNonNullRows() { - // Optimization: when null values are few, construct the result by subtracting - // null_bitmap from a full range bitmap, instead of traversing all data blocks. - // - // We use a threshold: if null count is less than 10% of total rows, use the - // subtraction approach; otherwise, traverse all data blocks. - if (files_.empty()) { return RoaringNavigableMap64(); } - // Get total row count from range_end (inclusive last row id) int64_t total_rows = files_[0].range_end + 1; uint64_t null_count = null_bitmap_->GetLongCardinality(); - // Threshold: use subtraction if null count < 10% of total rows - // and total rows is not too large (to avoid memory issues with huge bitmaps) const double NULL_RATIO_THRESHOLD = 0.1; - const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; // 10 million rows max + const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; - bool use_subtraction = - (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && - (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); + bool use_subtraction = (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && + (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); if (use_subtraction) { - // Build full range bitmap [0, range_end] RoaringNavigableMap64 result; result.AddRange(Range(0, total_rows - 1)); - // Subtract null bitmap result.AndNot(*null_bitmap_); return result; } - // Fallback: traverse all data blocks - // This is more efficient when there are many null values - if (!min_key_) { + if (!has_min_key_) { return RoaringNavigableMap64(); } return RangeQuery(min_key_, max_key_, true, true); } // Helper function to convert Literal to MemorySlice -static Result> LiteralToMemorySlice(const Literal& literal, - MemoryPool* pool) { +static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool) { if (literal.IsNull()) { return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); } @@ -880,8 +794,9 @@ static Result> LiteralToMemorySlice(const Literal& if (type == FieldType::BIGINT) { try { int64_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert bigint literal to MemorySlice: " + @@ -892,8 +807,12 @@ static Result> LiteralToMemorySlice(const Literal& if (type == FieldType::INT) { try { int32_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Store as 4-byte little-endian binary to match Java format + auto bytes = Bytes::AllocateBytes(4, pool); + bytes->data()[0] = static_cast(value & 0xFF); + bytes->data()[1] = static_cast((value >> 8) & 0xFF); + bytes->data()[2] = static_cast((value >> 16) & 0xFF); + bytes->data()[3] = static_cast((value >> 24) & 0xFF); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert int literal to MemorySlice: " + @@ -901,12 +820,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle other numeric types similarly if (type == FieldType::TINYINT) { try { int8_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert tinyint literal to MemorySlice: " + @@ -917,8 +836,9 @@ static Result> LiteralToMemorySlice(const Literal& if (type == FieldType::SMALLINT) { try { int16_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert smallint literal to MemorySlice: " + @@ -926,12 +846,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle boolean if (type == FieldType::BOOLEAN) { try { bool value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(1, pool); - bytes->data()[0] = value ? 1 : 0; + // Convert to string "1" or "0" to match the format used in BTreeGlobalIndexWriter + std::string str_value = value ? "1" : "0"; + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert boolean literal to MemorySlice: " + @@ -939,12 +859,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle float if (type == FieldType::FLOAT) { try { float value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert float literal to MemorySlice: " + @@ -952,12 +872,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle double if (type == FieldType::DOUBLE) { try { double value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert double literal to MemorySlice: " + @@ -965,12 +885,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle date (stored as int32_t days since epoch) if (type == FieldType::DATE) { try { int32_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert date literal to MemorySlice: " + @@ -978,13 +898,12 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle timestamp (stored as int64_t) if (type == FieldType::TIMESTAMP) { try { - // Timestamp is stored as int64_t (milliseconds or microseconds depending on precision) int64_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(sizeof(value), pool); - memcpy(bytes->data(), &value, sizeof(value)); + // Convert to string to match the format used in BTreeGlobalIndexWriter + std::string str_value = std::to_string(value); + auto bytes = Bytes::AllocateBytes(str_value, pool); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert timestamp literal to MemorySlice: " + @@ -992,21 +911,15 @@ static Result> LiteralToMemorySlice(const Literal& } } - // Handle decimal (DECIMAL128 stored as 16 bytes big-endian) if (type == FieldType::DECIMAL) { try { - // Get the Decimal value and serialize as big-endian int128 Decimal decimal_value = literal.GetValue(); auto bytes = Bytes::AllocateBytes(16, pool); - // Store as big-endian for correct lexicographic comparison - // High 64 bits first, then low 64 bits uint64_t high_bits = decimal_value.HighBits(); uint64_t low_bits = decimal_value.LowBits(); - // Write high bits (bytes 0-7) for (int i = 0; i < 8; ++i) { bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); } - // Write low bits (bytes 8-15) for (int i = 0; i < 8; ++i) { bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); } @@ -1017,7 +930,6 @@ static Result> LiteralToMemorySlice(const Literal& } } - // For unhandled types, return error for now return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + " not yet supported in btree index"); } diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index febd8b685..80b1b4cbe 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -59,12 +59,11 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { public: BTreeGlobalIndexReader(const std::shared_ptr& sst_file_reader, const std::shared_ptr& null_bitmap, - const std::shared_ptr& min_key, - const std::shared_ptr& max_key, + const MemorySlice& min_key, const MemorySlice& max_key, + bool has_min_key, bool has_max_key, const std::vector& files, const std::shared_ptr& pool, - std::function&, - const std::shared_ptr&)> + std::function comparator); Result> VisitIsNotNull() override; @@ -124,20 +123,21 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { } private: - Result RangeQuery(const std::shared_ptr& lower_bound, - const std::shared_ptr& upper_bound, + Result RangeQuery(const MemorySlice& lower_bound, + const MemorySlice& upper_bound, bool lower_inclusive, bool upper_inclusive); Result AllNonNullRows(); std::shared_ptr sst_file_reader_; std::shared_ptr null_bitmap_; - std::shared_ptr min_key_; - std::shared_ptr max_key_; + MemorySlice min_key_; + MemorySlice max_key_; + bool has_min_key_; + bool has_max_key_; std::vector files_; std::shared_ptr pool_; - std::function&, const std::shared_ptr&)> - comparator_; + std::function comparator_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp index 79d1ef4b0..cf46d69f0 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -14,19 +14,22 @@ * limitations under the License. */ +#include "paimon/common/global_index/btree/btree_global_indexer.h" + #include -#include "paimon/common/global_index/btree/btree_global_indexer.h" #include "paimon/common/memory/memory_slice.h" +#include "paimon/common/utils/field_type_utils.h" #include "paimon/memory/memory_pool.h" #include "paimon/predicate/literal.h" -#include "paimon/common/utils/field_type_utils.h" namespace paimon::test { class BTreeGlobalIndexerTest : public ::testing::Test { -protected: - void SetUp() override { pool_ = GetDefaultPool(); } + protected: + void SetUp() override { + pool_ = GetDefaultPool(); + } std::shared_ptr pool_; }; @@ -42,8 +45,8 @@ TEST_F(BTreeGlobalIndexerTest, CreateComparatorString) { std::shared_ptr(Bytes::AllocateBytes("apple", pool_.get()).release())); // Lexicographic comparison: "apple" < "banana" - auto bytes_a = slice_a->GetHeapMemory(); - auto bytes_b = slice_b->GetHeapMemory(); + auto bytes_a = slice_a.GetHeapMemory(); + auto bytes_b = slice_b.GetHeapMemory(); ASSERT_NE(bytes_a, nullptr); ASSERT_NE(bytes_b, nullptr); @@ -52,7 +55,7 @@ TEST_F(BTreeGlobalIndexerTest, CreateComparatorString) { EXPECT_LT(cmp, 0); // "apple" < "banana" // Same strings should be equal - auto bytes_same = slice_same->GetHeapMemory(); + auto bytes_same = slice_same.GetHeapMemory(); EXPECT_EQ(bytes_a->size(), bytes_same->size()); EXPECT_EQ(memcmp(bytes_a->data(), bytes_same->data(), bytes_a->size()), 0); } @@ -178,10 +181,8 @@ TEST_F(BTreeGlobalIndexerTest, CreateIndexer) { BTreeGlobalIndexer indexer(options); // CreateWriter should return NotImplemented - auto writer_result = indexer.CreateWriter( - "test_field", nullptr, nullptr, pool_); - EXPECT_FALSE(writer_result.ok()); - EXPECT_TRUE(writer_result.status().IsNotImplemented()); + auto writer_result = indexer.CreateWriter("test_field", nullptr, nullptr, pool_); + EXPECT_TRUE(writer_result.ok()); } // Test RangeQuery boundary conditions conceptually diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index ad3b4369e..6d3e6135e 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -22,18 +22,19 @@ namespace paimon { std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_ptr& meta, paimon::MemoryPool* pool) { - auto input = MemorySlice::Wrap(meta)->ToInput(); - auto first_key_len = input->ReadInt(); + auto slice = MemorySlice::Wrap(meta); + auto input = slice.ToInput(); + auto first_key_len = input.ReadInt(); std::shared_ptr first_key; if (first_key_len) { - first_key = std::move(input->ReadSlice(first_key_len)->CopyBytes(pool)); + first_key = std::move(input.ReadSlice(first_key_len).CopyBytes(pool)); } - auto last_key_len = input->ReadInt(); + auto last_key_len = input.ReadInt(); std::shared_ptr last_key; if (last_key_len) { - last_key = std::move(input->ReadSlice(last_key_len)->CopyBytes(pool)); + last_key = std::move(input.ReadSlice(last_key_len).CopyBytes(pool)); } - auto has_nulls = input->ReadByte() == 1; + auto has_nulls = input.ReadByte() == 1; return std::make_shared(first_key, last_key, has_nulls); } @@ -43,25 +44,25 @@ std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) const int32_t last_key_size = last_key_ ? last_key_->size() : 0; int32_t total_size = 4 + first_key_size + 4 + last_key_size + 1; - auto output = std::make_shared(total_size, pool); + MemorySliceOutput output(total_size, pool); // Write first_key_len and first_key - output->WriteValue(first_key_size); + output.WriteValue(first_key_size); if (first_key_) { - output->WriteBytes(first_key_); + output.WriteBytes(first_key_); } // Write last_key_len and last_key - output->WriteValue(last_key_size); + output.WriteValue(last_key_size); if (last_key_) { - output->WriteBytes(last_key_); + output.WriteBytes(last_key_); } // Write has_nulls - output->WriteValue(static_cast(has_nulls_ ? 1 : 0)); + output.WriteValue(static_cast(has_nulls_ ? 1 : 0)); - auto slice = output->ToSlice(); - return slice->CopyBytes(pool); + auto slice = output.ToSlice(); + return slice.CopyBytes(pool); } } // namespace paimon diff --git a/src/paimon/common/io/cache/cache_key.h b/src/paimon/common/io/cache/cache_key.h index e56b64a28..7891597d2 100644 --- a/src/paimon/common/io/cache/cache_key.h +++ b/src/paimon/common/io/cache/cache_key.h @@ -20,6 +20,8 @@ #include #include +#include "paimon/visibility.h" + namespace paimon { class CacheValue; diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index b7c6b3993..e1668eef5 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -15,6 +15,10 @@ */ #include "paimon/common/lookup/sort/sort_lookup_store_factory.h" + +#include "paimon/common/lookup/sort/sort_lookup_store_footer.h" +#include "paimon/common/memory/memory_slice.h" +#include "paimon/memory/bytes.h" namespace paimon { Result> SortLookupStoreFactory::CreateWriter( const std::shared_ptr& fs, const std::string& file_path, @@ -23,16 +27,31 @@ Result> SortLookupStoreFactory::CreateWriter( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr out, fs->Create(file_path, /*overwrite=*/false)); return std::make_unique( - out, std::make_shared(out, pool, bloom_filter, block_size_, - compression_factory_)); + out, + std::make_shared(out, pool, bloom_filter, block_size_, compression_factory_), + pool); } Result> SortLookupStoreFactory::CreateReader( const std::shared_ptr& fs, const std::string& file_path, const std::shared_ptr& pool) const { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); + PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); + + // read footer + PAIMON_RETURN_NOT_OK( + in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool.get()); + PAIMON_RETURN_NOT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto slice = MemorySlice::Wrap(segment); + auto input = slice.ToInput(); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&input)); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, - SstFileReader::Create(pool, in, comparator_)); + SstFileReader::Create(pool, in, footer->GetIndexBlockHandle(), + footer->GetBloomFilterHandle(), comparator_)); return std::make_unique(in, reader); } @@ -46,7 +65,9 @@ Status SortLookupStoreWriter::Close() { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr bloom_filter_handle, writer_->WriteBloomFilter()); PAIMON_ASSIGN_OR_RAISE(BlockHandle index_block_handle, writer_->WriteIndexBlock()); - PAIMON_RETURN_NOT_OK(writer_->WriteFooter(index_block_handle, bloom_filter_handle)); + SortLookupStoreFooter footer(index_block_handle, bloom_filter_handle); + auto slice = footer.WriteSortLookupStoreFooter(pool_.get()); + PAIMON_RETURN_NOT_OK(writer_->WriteSlice(slice)); PAIMON_RETURN_NOT_OK(out_->Close()); return Status::OK(); } diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.h b/src/paimon/common/lookup/sort/sort_lookup_store_factory.h index 3917a4966..2bbdfa36c 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.h +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.h @@ -48,8 +48,9 @@ class SortLookupStoreReader : public LookupStoreReader { class SortLookupStoreWriter : public LookupStoreWriter { public: SortLookupStoreWriter(const std::shared_ptr& out, - const std::shared_ptr& writer) - : out_(out), writer_(writer) {} + const std::shared_ptr& writer, + const std::shared_ptr& pool) + : out_(out), writer_(writer), pool_(pool) {} Status Put(std::shared_ptr&& key, std::shared_ptr&& value) override { return writer_->Write(std::move(key), std::move(value)); @@ -60,6 +61,7 @@ class SortLookupStoreWriter : public LookupStoreWriter { private: std::shared_ptr out_; std::shared_ptr writer_; + std::shared_ptr pool_; }; /// A `LookupStoreFactory` which uses hash to lookup records on disk. diff --git a/src/paimon/common/sst/block_footer.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp similarity index 83% rename from src/paimon/common/sst/block_footer.cpp rename to src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp index 1f74ed5bc..c45896ae8 100644 --- a/src/paimon/common/sst/block_footer.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp @@ -14,13 +14,14 @@ * limitations under the License. */ -#include "paimon/common/sst/block_footer.h" +#include "paimon/common/lookup/sort/sort_lookup_store_footer.h" #include "paimon/common/memory/memory_slice_output.h" namespace paimon { -Result> BlockFooter::ReadBlockFooter(MemorySliceInput* input) { +Result> SortLookupStoreFooter::ReadSortLookupStoreFooter( + MemorySliceInput* input) { auto offset = input->ReadLong(); auto size = input->ReadInt(); auto expected_entries = input->ReadLong(); @@ -37,10 +38,10 @@ Result> BlockFooter::ReadBlockFooter(MemorySliceInp return Status::IOError( fmt::format("Expected magic number {}, but got {}", MAGIC_NUMBER, magic)); } - return std::make_unique(index_block_handle, bloom_filter_handle); + return std::make_unique(index_block_handle, bloom_filter_handle); } -MemorySlice BlockFooter::WriteBlockFooter(MemoryPool* pool) { +MemorySlice SortLookupStoreFooter::WriteSortLookupStoreFooter(MemoryPool* pool) { MemorySliceOutput output(ENCODED_LENGTH, pool); // 20 bytes if (!bloom_filter_handle_.get()) { @@ -59,4 +60,4 @@ MemorySlice BlockFooter::WriteBlockFooter(MemoryPool* pool) { output.WriteValue(MAGIC_NUMBER); return output.ToSlice(); } -} // namespace paimon +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/sst/block_footer.h b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h similarity index 76% rename from src/paimon/common/sst/block_footer.h rename to src/paimon/common/lookup/sort/sort_lookup_store_footer.h index 244159c41..c8ccfd1d2 100644 --- a/src/paimon/common/sst/block_footer.h +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h @@ -27,17 +27,17 @@ namespace paimon { -/// Footer of a block. -class PAIMON_EXPORT BlockFooter { +/// Footer of a sort lookup store. +class PAIMON_EXPORT SortLookupStoreFooter { public: - static Result> ReadBlockFooter(MemorySliceInput* input); + static Result> ReadSortLookupStoreFooter(MemorySliceInput* input); public: - BlockFooter(const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle) + SortLookupStoreFooter(const BlockHandle& index_block_handle, + const std::shared_ptr& bloom_filter_handle) : index_block_handle_(index_block_handle), bloom_filter_handle_(bloom_filter_handle) {} - ~BlockFooter() = default; + ~SortLookupStoreFooter() = default; const BlockHandle& GetIndexBlockHandle() const { return index_block_handle_; @@ -46,7 +46,7 @@ class PAIMON_EXPORT BlockFooter { return bloom_filter_handle_; } - MemorySlice WriteBlockFooter(MemoryPool* pool); + MemorySlice WriteSortLookupStoreFooter(MemoryPool* pool); public: // 20 bytes for bloom filter handle, 12 bytes for index block handle, 4 bytes for magic number @@ -57,4 +57,4 @@ class PAIMON_EXPORT BlockFooter { BlockHandle index_block_handle_; std::shared_ptr bloom_filter_handle_; }; -} // namespace paimon +} // namespace paimon \ No newline at end of file diff --git a/src/paimon/common/sst/block_iterator.cpp b/src/paimon/common/sst/block_iterator.cpp index 221185b02..43650c4dc 100644 --- a/src/paimon/common/sst/block_iterator.cpp +++ b/src/paimon/common/sst/block_iterator.cpp @@ -64,6 +64,7 @@ Result BlockIterator::SeekTo(const MemorySlice& target_key) { int32_t entry_position = reader_->SeekTo(mid); PAIMON_RETURN_NOT_OK(input_.SetPosition(entry_position)); + PAIMON_ASSIGN_OR_RAISE(MemorySlice mid_key, ReadKeyAndSkipValue()); PAIMON_ASSIGN_OR_RAISE(int32_t compare, reader_->Comparator()(mid_key, target_key)); @@ -71,14 +72,18 @@ Result BlockIterator::SeekTo(const MemorySlice& target_key) { polled_position_ = entry_position; return true; } else if (compare > 0) { + // mid_key > target_key, this could be the first key >= target_key polled_position_ = entry_position; right = mid - 1; } else { - polled_position_ = -1; + // mid_key < target_key, need to look at larger keys + // Don't reset polled_position_ here - keep the last position where key > target left = mid + 1; } } + // If we exit the loop without finding exact match, polled_position_ points to + // the first entry with key > target_key (if any), or -1 if all keys < target_key return false; } diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index 7bed07409..fd5d6b4ed 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -27,6 +27,7 @@ #include "arrow/ipc/json_simple.h" #include "gtest/gtest.h" #include "paimon/common/factories/io_hook.h" +#include "paimon/common/lookup/sort/sort_lookup_store_footer.h" #include "paimon/common/sst/sst_file_reader.h" #include "paimon/common/sst/sst_file_writer.h" #include "paimon/common/utils/scope_guard.h" @@ -118,7 +119,9 @@ TEST_P(SstFileIOTest, TestSimple) { ASSERT_OK_AND_ASSIGN(auto bloom_filter_handle, writer->WriteBloomFilter()); ASSERT_OK_AND_ASSIGN(auto index_block_handle, writer->WriteIndexBlock()); - ASSERT_OK(writer->WriteFooter(index_block_handle, bloom_filter_handle)); + SortLookupStoreFooter footer(index_block_handle, bloom_filter_handle); + auto slice = footer.WriteSortLookupStoreFooter(pool_.get()); + ASSERT_OK(writer->WriteSlice(slice)); ASSERT_OK(out->Flush()); ASSERT_OK(out->Close()); @@ -140,7 +143,18 @@ TEST_P(SstFileIOTest, TestSimple) { // test read ASSERT_OK_AND_ASSIGN(in, fs_->Open(index_path)); - ASSERT_OK_AND_ASSIGN(auto reader, SstFileReader::Create(pool_, in, comparator_)); + ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); + ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); + ASSERT_OK_AND_ASSIGN(auto reader, + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_)); // not exist key std::string k0 = "k0"; @@ -174,8 +188,21 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { auto block_cache = std::make_shared(file, in, pool_, std::make_unique()); + // read footer + ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); + ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); + // test read - ASSERT_OK_AND_ASSIGN(auto reader, SstFileReader::Create(pool_, in, comparator_)); + ASSERT_OK_AND_ASSIGN(auto reader, + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_)); // not exist key std::string k0 = "10000"; ASSERT_FALSE(reader->Lookup(std::make_shared(k0, pool_.get())).value()); @@ -253,9 +280,10 @@ TEST_F(SstFileIOTest, TestIOException) { CHECK_HOOK_STATUS(bloom_filter_handle_result.status(), i); auto index_block_handle_result = writer->WriteIndexBlock(); CHECK_HOOK_STATUS(index_block_handle_result.status(), i); - CHECK_HOOK_STATUS(writer->WriteFooter(index_block_handle_result.value(), - bloom_filter_handle_result.value()), - i); + SortLookupStoreFooter test_footer(index_block_handle_result.value(), + bloom_filter_handle_result.value()); + auto test_slice = test_footer.WriteSortLookupStoreFooter(pool_.get()); + CHECK_HOOK_STATUS(writer->WriteSlice(test_slice), i); CHECK_HOOK_STATUS(out->Flush(), i); CHECK_HOOK_STATUS(out->Close(), i); @@ -265,7 +293,25 @@ TEST_F(SstFileIOTest, TestIOException) { CHECK_HOOK_STATUS(in_result.status(), i); std::shared_ptr in = std::move(in_result).value(); - auto reader_result = SstFileReader::Create(pool_, in, comparator_); + auto file_len_result = in->Length(); + CHECK_HOOK_STATUS(file_len_result.status(), i); + uint64_t file_len = file_len_result.value(); + + CHECK_HOOK_STATUS( + in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET), i); + auto footer_bytes = + Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + auto read_result = in->Read(footer_bytes->data(), footer_bytes->size()); + CHECK_HOOK_STATUS(read_result.status(), i); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + auto read_footer_result = SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input); + CHECK_HOOK_STATUS(read_footer_result.status(), i); + + auto reader_result = + SstFileReader::Create(pool_, in, read_footer_result.value()->GetIndexBlockHandle(), + read_footer_result.value()->GetBloomFilterHandle(), comparator_); CHECK_HOOK_STATUS(reader_result.status(), i); std::shared_ptr reader = std::move(reader_result).value(); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index 5b5238b38..79764512e 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -23,24 +23,14 @@ namespace paimon { Result> SstFileReader::Create( const std::shared_ptr& pool, const std::shared_ptr& in, + const BlockHandle& index_block_handle, + const std::shared_ptr& bloom_filter_handle, MemorySlice::SliceComparator comparator) { - PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); PAIMON_ASSIGN_OR_RAISE(std::string file_path, in->GetUri()); auto block_cache = std::make_shared(file_path, in, pool, std::make_unique()); - // read footer - PAIMON_ASSIGN_OR_RAISE( - MemorySegment segment, - block_cache->GetBlock(file_len - BlockFooter::ENCODED_LENGTH, BlockFooter::ENCODED_LENGTH, - /*is_index=*/true, /*decompress_func=*/nullptr)); - auto slice = MemorySlice::Wrap(segment); - auto input = slice.ToInput(); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr footer, - BlockFooter::ReadBlockFooter(&input)); - // read bloom filter directly now - auto bloom_filter_handle = footer->GetBloomFilterHandle(); std::shared_ptr bloom_filter = nullptr; if (bloom_filter_handle && (bloom_filter_handle->ExpectedEntries() || bloom_filter_handle->Size() || bloom_filter_handle->Offset())) { @@ -54,7 +44,6 @@ Result> SstFileReader::Create( } // create index block reader - auto index_block_handle = footer->GetIndexBlockHandle(); PAIMON_ASSIGN_OR_RAISE( MemorySegment trailer_data, block_cache->GetBlock(index_block_handle.Offset() + index_block_handle.Size(), @@ -90,6 +79,10 @@ std::unique_ptr SstFileReader::CreateIterator() { return std::make_unique(this, index_block_reader_->Iterator()); } +std::unique_ptr SstFileReader::CreateIndexIterator() { + return index_block_reader_->Iterator(); +} + Result> SstFileReader::Lookup(const std::shared_ptr& key) { if (bloom_filter_.get() && !bloom_filter_->TestHash(MurmurHashUtils::HashBytes(key))) { return std::shared_ptr(); diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index 1c5192224..f27508a25 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -20,7 +20,6 @@ #include "paimon/common/compression/block_compression_factory.h" #include "paimon/common/sst/block_cache.h" -#include "paimon/common/sst/block_footer.h" #include "paimon/common/sst/block_handle.h" #include "paimon/common/sst/block_iterator.h" #include "paimon/common/sst/block_reader.h" @@ -40,12 +39,17 @@ class SstFileIterator; /// queries. Note that this class is NOT thread-safe. class PAIMON_EXPORT SstFileReader { public: - static Result> Create(const std::shared_ptr& pool, - const std::shared_ptr& input, - MemorySlice::SliceComparator comparator); + static Result> Create( + const std::shared_ptr& pool, const std::shared_ptr& input, + const BlockHandle& index_block_handle, + const std::shared_ptr& bloom_filter_handle, + MemorySlice::SliceComparator comparator); std::unique_ptr CreateIterator(); + /// Create an iterator for the index block. + std::unique_ptr CreateIndexIterator(); + /// Lookup the specified key in the file. /// /// @param key serialized key diff --git a/src/paimon/common/sst/sst_file_writer.cpp b/src/paimon/common/sst/sst_file_writer.cpp index 676cb866a..d2b5105d5 100644 --- a/src/paimon/common/sst/sst_file_writer.cpp +++ b/src/paimon/common/sst/sst_file_writer.cpp @@ -40,7 +40,11 @@ Status SstFileWriter::Write(std::shared_ptr&& key, std::shared_ptr if (data_block_writer_->Memory() > block_size_) { PAIMON_RETURN_NOT_OK(Flush()); } - if (bloom_filter_.get()) { + if (bloom_filter_) { + // Double-check that bloom_filter_ is valid + if (!bloom_filter_->GetBitSet()) { + return Status::Invalid("Bloom filter bit set is null"); + } PAIMON_RETURN_NOT_OK(bloom_filter_->AddHash(MurmurHashUtils::HashBytes(key))); } return Status::OK(); @@ -83,10 +87,7 @@ Result> SstFileWriter::WriteBloomFilter() { return handle; } -Status SstFileWriter::WriteFooter(const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle) { - BlockFooter footer(index_block_handle, bloom_filter_handle); - auto slice = footer.WriteBlockFooter(pool_.get()); +Status SstFileWriter::WriteSlice(const MemorySlice& slice) { auto data = slice.ReadStringView(); PAIMON_RETURN_NOT_OK(WriteBytes(data.data(), data.size())); return Status::OK(); diff --git a/src/paimon/common/sst/sst_file_writer.h b/src/paimon/common/sst/sst_file_writer.h index bc2c679fc..8d1d035b0 100644 --- a/src/paimon/common/sst/sst_file_writer.h +++ b/src/paimon/common/sst/sst_file_writer.h @@ -19,7 +19,6 @@ #include #include "paimon/common/compression/block_compression_factory.h" -#include "paimon/common/sst/block_footer.h" #include "paimon/common/sst/block_handle.h" #include "paimon/common/sst/block_trailer.h" #include "paimon/common/sst/block_writer.h" @@ -58,8 +57,7 @@ class PAIMON_EXPORT SstFileWriter { // When bloom-filter is disabled, return nullptr. Result> WriteBloomFilter(); - Status WriteFooter(const BlockHandle& index_block_handle, - const std::shared_ptr& bloom_filter_handle); + Status WriteSlice(const MemorySlice& slice); private: Result FlushBlockWriter(BlockWriter* writer); diff --git a/src/paimon/common/utils/roaring_bitmap64.cpp b/src/paimon/common/utils/roaring_bitmap64.cpp index ae078e761..e22672c75 100644 --- a/src/paimon/common/utils/roaring_bitmap64.cpp +++ b/src/paimon/common/utils/roaring_bitmap64.cpp @@ -22,6 +22,7 @@ #include "paimon/fs/file_system.h" #include "paimon/io/byte_array_input_stream.h" +#include "paimon/memory/memory_pool.h" #include "paimon/result.h" #include "roaring.hh" // NOLINT(build/include_subdir) @@ -227,6 +228,10 @@ bool RoaringBitmap64::operator==(const RoaringBitmap64& other) const noexcept { PAIMON_UNIQUE_PTR RoaringBitmap64::Serialize(MemoryPool* pool) const { GetRoaringBitmap(roaring_bitmap_).runOptimize(); auto& bitmap = GetRoaringBitmap(roaring_bitmap_); + // Use default pool if no pool is provided + if (pool == nullptr) { + pool = GetDefaultPool().get(); + } auto bytes = Bytes::AllocateBytes(bitmap.getSizeInBytes(), pool); bitmap.write(bytes->data()); return bytes; diff --git a/src/paimon/common/utils/roaring_navigable_map64.cpp b/src/paimon/common/utils/roaring_navigable_map64.cpp index 89971cb97..8aa9bf76e 100644 --- a/src/paimon/common/utils/roaring_navigable_map64.cpp +++ b/src/paimon/common/utils/roaring_navigable_map64.cpp @@ -20,6 +20,7 @@ #include #include +#include "paimon/memory/memory_pool.h" #include "paimon/utils/range.h" #include "paimon/utils/roaring_bitmap64.h" @@ -107,7 +108,8 @@ void RoaringNavigableMap64::Clear() { std::vector RoaringNavigableMap64::Serialize() const { // This is a simplified serialization - in practice, you might want to use // a more sophisticated approach - auto bytes = impl_->bitmap.Serialize(nullptr); // nullptr for default pool + // Use default pool when no pool is provided + auto bytes = impl_->bitmap.Serialize(GetDefaultPool().get()); if (!bytes) { return {}; } diff --git a/test/test_data/global_index/btree/btree_compatibility_data/README.md b/test/test_data/global_index/btree/btree_compatibility_data/README.md new file mode 100644 index 000000000..8da35acfa --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/README.md @@ -0,0 +1,27 @@ +# BTree 兼容性测试数据 + +## 文件说明 + +### 数据文件 +- `btree_test_int_.csv` - 整数类型测试数据(CSV格式) +- `btree_test_int_.bin` - 整数类型测试数据(二进制格式) +- `btree_test_varchar_.csv` - 字符串类型测试数据(CSV格式) +- `btree_test_varchar_.bin` - 字符串类型测试数据(二进制格式) + +### 数据格式 +CSV文件格式: +``` +row_id,key,is_null +0,123,false +1,NULL,true +2,456,false +``` + +### 测试场景 +1. **小规模数据**:50、100条记录 +2. **中等规模数据**:500、1000条记录 +3. **大规模数据**:5000条记录 +4. **边界条件**:空值、重复键、边界值 + +### 使用说明 +这些数据可用于验证 C++ 版本的 BTree 索引实现与 Java 版本的兼容性。 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..c525023fe1f2af74135cfe8bef5ba6eed6fcdaf1 GIT binary patch literal 1177 zcmZA1YiP}J9LMqh`Tsv=XNMWPJ7%*p+w5XvGj?{dJ9B1cS93PZvd|MUO;{~b7%W!WwZZfV0K z9fV6DLQX;qb?lJIh}T9a5~Lf6G7LWHL7JR`OgR-oTGI4u|AjLgG1nU9Nd5r$;}F3ZKZBA4L0 zEW}Oe$8A}J`?44#vILK0DIUtD7?ovsBFphqR^XYe#B*7Nm$DjfWDUk;E#Ao$crOF^ zDC_WvI`+zXe9^`#e3cFOCL8fnHsP0S2A?N>prsXVE!x(=!-ef?M}%C9Xc_DvS zgjcRZqFj$;xdCai3#oD=vSl~VXN5BJkjAkjmd+NiLRQYq^Rt?@vo6-lb}(~y_pyWQ zI5T(Y61&Q7u?OriGgExU-ZC>8GYvC<@623i7>i`)r~6nY%VR~Xk_A{33$kvumG!Yb zY=9kRC)imw#ICVBY=n)nG4`5`vk&Yun_xefV_TMoMKUk*u`HI)idhw_W6i9S^|0-1 z7wcyS*im+hooB=B2D``FczrIbruXC^PxSBb=bBw}glOt6YlfA}c`vh;bIM~w`~A8% kj&&i_t+(~veKDcMe=m*rvtj;cQvR+pxHey2+g#D`8)h$Vd;kCd literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..5d7e9d8c235ac0a7c4afeb1c076e676c177b5a19 GIT binary patch literal 17 VcmZQ!U|?VdViq9QWn^Gr1ONes06PEx literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.csv new file mode 100644 index 000000000..321d10a45 --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_100.csv @@ -0,0 +1,101 @@ +row_id,key,is_null +0,3,false +1,NULL,true +2,9,false +3,11,false +4,14,false +5,16,false +6,22,false +7,22,false +8,25,false +9,29,false +10,31,false +11,35,false +12,39,false +13,43,false +14,44,false +15,47,false +16,51,false +17,54,false +18,NULL,true +19,61,false +20,62,false +21,64,false +22,69,false +23,73,false +24,75,false +25,78,false +26,78,false +27,85,false +28,86,false +29,91,false +30,92,false +31,93,false +32,100,false +33,102,false +34,104,false +35,106,false +36,108,false +37,115,false +38,115,false +39,117,false +40,121,false +41,126,false +42,128,false +43,131,false +44,135,false +45,138,false +46,140,false +47,143,false +48,NULL,true +49,NULL,true +50,152,false +51,153,false +52,157,false +53,160,false +54,166,false +55,165,false +56,168,false +57,172,false +58,175,false +59,178,false +60,184,false +61,187,false +62,190,false +63,189,false +64,195,false +65,198,false +66,202,false +67,205,false +68,207,false +69,209,false +70,214,false +71,NULL,true +72,NULL,true +73,219,false +74,225,false +75,226,false +76,231,false +77,234,false +78,234,false +79,239,false +80,242,false +81,NULL,true +82,247,false +83,252,false +84,253,false +85,258,false +86,258,false +87,265,false +88,265,false +89,267,false +90,272,false +91,277,false +92,276,false +93,279,false +94,283,false +95,287,false +96,290,false +97,295,false +98,294,false +99,301,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_1000.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_1000.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4727238b2a153f21e6eac410b66e2623a36baca GIT binary patch literal 11618 zcmYk?2XIx@`u_2(wVt)NC{{r5B1lnDnhIDj;N>EyTm(@OK`$bxh?J{{6#?m;gklIa zbVBHaPUwVA=!8ya(gi{%2KYVuxikOyGxPEN&N@0#=jEKe_W>>@CdQf=@QUW6sH&L{ zZ)oO6HO&I3srfi+X%<9n%|fWl>Fg8#z(+bLj0T!d;#18c_>X2$G}e3yUuqUZbIsyt zp;-bgHA|wk=F@1$>FhJl;zu2n!mpaop_AtG=&V^9-89Rfmu6Y?)hvhpn&lC%`2vP% zR={x0iWsd~31c;1#$?S`FiG=OOwoJ|(={t&mgeh7)r`d=%_>OKtcvBDZ(y0`Ke0x$ z8rEt43!5~nBSZ5|Y}2fP9hx7|9ovl+BmvrzRu4vZ7 zP0bIGtN9`BXx0b!iT}Vy4Uk9iaYH=7u%;0n(fkAjG(Sat&HtdFW@8lA{0vWNHbDu^ z&rwRVDW27AhH{!;;04XEP)V~nUeWv-v6?MVRr4EE*KCQJnypYnvo+qkRxC4!RV+8C zrdVN6U9r-jrec-BzZI(uYAept zVloU`>ujsRcZzKWZ57)Mep2i(_*Jpf;5WrCgAR(_2Avgq47w{axvq-62K^NK3BT;_uSId4^|N?J(chx9Vt_?iMVv)BMZCpJih&lD z6oV`(D+XIsVR$>l{$Wu~X9*THbvD!{TGZ0nFpD~h;T9k0#SwO-#YZ|DW$}q3$)c%Z zjKvpzBE_PsVyZ zW+|3f%u%FSEL1GDSfp5Hk)~K~u}rbTV!dLO#X7}mi!F*Z7F!kR7TXkSEp{u`S?pD; zw>Y5KU~y2f(c(D6+f8<}#YvrQu{f`@47=6hvd*?yT+`WhyTjs^&URYdR_wCKRqVE~ zh&FSNg;!*9AsOE8wfig{*4cjUrs9CbV~T?ok1GywR~3gX3M-CS6j5YZlvEtGcv^9c zo2oc&QATmXqP*gy#S4l*Eh;EZS-hq=ZSlI|j76;CEEiUB&f-nQd5ao~3l{Gv{^H&; zyuD~IS-h{a%N8FivMm}Zu2}p>an<5;#WilS;<`mM#SM#Z6gMqeDsEY{R@}B|tH|LV zD{?J2VZ|7Sql&Q(rxoKIPASGaoK;M4xTKiqaD(CP zWS8uatFtK%_Y^4(&g zP(o)59G+DybSSM@(tw;&4!L)geo9&EbUNy2DAu4eqGoro(x~ZHLQ> z9Ea}V~QR?0Yy)s zu%b6mM9~K*uILLqt>_0lqv#Jjrx*Z~SHuD36!AbM#X#T{#US8S#bBTcgP-{L1E{96 z1faTNDDaLV5vZjY2Gmsy2i{YR0Nz)O1R5wt0Sy(SfyRm?;4{S-;B&=Tpt)ik@U>z* z&`L1@_?F@AL`(wO>1;Caiy|3lub2XKQltRCE2aWH6w`qIikU#1Viu67m;($`qyi%q zbAcqqJYa%iJ}^nK07zCW1g0q#1G5!NfK){qut2dCSfW@4ELAKARw`Bi>57%Wdc`VW zonkexh2iZQqyxKkwhq{%SP$%1Yyb`_HUdW#n}8FF&A=(e7T~lZ1Gu2r3S3ld11>AJ z12+{rfE$XPz#YXdASR3!bvIxYdjO}%`3yN-B{WtT+w4t~dk4D$a6W73YAOit|7X z#RYD#;xC|%;vzR#aS8Z9ahZ#&$OalIu5fV`SAixBZ?EAxH&$mifEJ3IKx@S@~tMBF!ue0tRgB3kI5){2WMksoF zj8gRRNK*9mn55|EFZLvct6l%kHW3?i|W36JS z$3{h>#|FhPkIjnV9y=8yJa#KadSog_dF)k;_BgCa@;IUx<8f3m*5ia?oW~i(c#jJV z-W~Xf9#?fX$>X|Wvd1k&vd3-36pvg*3a?i0i?(>Ghfz%P2o%#j9#G8ixL+~T<59&d zkNk?+9{Ch=_)Juh>QO{7*Q2Ono<}joe2-@p3p`3G7J8Inc)Q3i_NbtbH5c)7f^9pA|biIx2Q~bW-g0_+7Ebqo*R%qqkx&S5&dj zBTlj3W02y2#}LIqkD-b~+*HM3kCBQa9%B?)9^(~9J(3m2xT%Wc9y1gtJZ33QdZaSE z{nMZFn6IWcIfO9cU5tjyQ;|c*r&MSaX@j^ zBTI3OE2_BeaY}K+ z0D+=wfCm)a0z9PX9^es0j{x};Jp&X_^a@Z&(K|qKMc)7=6#W7`t>_=1v|>Pj@`|_s zFDT*zyrdWy;AO?20F@Pk1H>wZ1o)>SAwUhq&;Yd*i2-UWh6Q+6F+4y$#fShODMkip zpcoaPu_7rz6UCSSUns@~XsMVG;9JGS0N*pbofJ$C@T<;}19VVK3D8lI5}>nUYJe_^ zX#sjErU&S)m=U0_VrGDWidg{$DP{*4sz?nmN-;OU7zS?wg82a^>TE%PWW~Y&DT+k_ zW+)a1n59?}U>?KUv|wp~#X4ISV5wqxfR&0B0ahzk21r+|3a~-3IzWbEO@Qr+^ZjLaktPgNNu_3?_#l`?xicJBID>euCQ?Vt$X+=hWzZ6>oTvTiea80p2z)gm?JA$17 z?&xe+fLz7y0QVGo0=WC54b2RYN3l0RUd6ru4=VO^RTT%gn2Lj3OvRx9g%w8v6jfw# zF%?Gxlu{fE@Vw%9fU=4cTvf%%0Iw)caYGfSxuJ?PTujB;0M!)d0#sL=5AcrSLV!Ao zivj8?E^$K@mjisP$PUm*aV5Z~imL%WS6mP96~o&b!A6}JO?r^pG=R`GX$ zpA@+P+AHpGRTXywbW+?4&|T3vL=Q!m5PcZ@pbxu+7@)K6AqFaXgcz#m86rW^E5s;< zx4pwYA;#*gZ-{YbRmQi!7reg_ba4RKOu z<3gO%+4yimh|4;g7~+y*QiyAc$sukll0)P&yqyxJgz)@ElTZCaL|<(>Gd)Be#f%X5 zD`tjxP%$gSql(!f9%pzvCrl0Tq|WAsD5{tjqNHMeh-VcGLX=W03{hIKC`1{>;t($= zmV~INNDJ|rVrht170W_YQ7jMfFU5)wZz@)Xcw4b5L@mYY5dT)J3GuEXJ;eKpwIM!K ztPAmxVtt54iVY!}DmH~^rq~>!xnfI*uN4^~S}V4OXrtH`qOD?kh@TjIM+kR@=%}+@ zAv!B|hv=pk_k@`tdg*L$h~A2QA>tMLL&PZ#gczha$jwz8;^r!ja9tHyAto}sJsKY4 z!YYr4Cqm5B%O|jj=saDezsd1?gXr6===-bai>i`T zmMT#dszL8keQHe2>Aw_xyVHq!QXCDX=v$b{G?Nz4a#}~xcO#i}gig^#x5mwuv7)RW?9D2<^h6g@OwLTe~`0=|ci&}oXESx1ki zqleKCQXwi%W$9&lgWjfk6g|0(p3JtQAE+bsq<9)Z6KFa`4?maFdWxQ6Mvp6x(|Ni^ zcPM(O7(G3Vo);FU=s{pbszNoXE=3RLKBsT!dy1Z|MGw&i&`=sfQ)muFkFwU#7TQBc zD0;kfh5jb+7Cd?cREVOdJEf^2MGt0bQuM^-V~QS~d_&*UZ`7Rz&`=sfQ)muF4QiHCPHm|@b)){2KuHwc8$>q& zX_QVGlu6NrdUPQkU3%w`%M%lmm!ga1qEw1rr05d2Iz?Bn^(neAZBEg}=PwjpOh#9Y zLnyj3oJ2EeAw}168)+vUq>~g~VcjCbPowD4D7pqJM$uJG1&XB_6kUrnq^1;IO|+v9 z6kQ0!Q6fd3=+Q@aDn%d0(Wh?o@tR3lbcV7ihaBGv^HKpSM$c0PilrJu4M8qhoZAuG3u#ALh@aC#VFKp-NPh-lF%YAvL9z)Q&n(4~nBi8cQjZN@osW%Ox5j26O(|lS^>uEdfr{i>)dyTv|@+X*=zw<8+>`(H#o%^XE|^DoJJO zWqO0&rh3$fno%qIfjUx8il<>Tj;7LFT1x9_JME|Abe^u!9SREY=TRXlPNk_LRiTey2rwSBHHK-0XpeEFU z+ERP!M*S&)k|>#G(_&gpn<0{hpd?DB z*_1}zZw|DRNVic%@6K(SPV>QDn}LM^Esb)X&;M~O6+QYe+uD4jAW zld|XxWm68hfAHr~0V+n%Qw55p8dQfGP!noFZK*wVqyCgYNt8^pX)&#)&9s{i(K%hr5f}e zHKeB0lG;%R>OpaoNaJY+Eua;&fm-pp-NfYF`|dH`xct9gufG_bM!zcSF3!X}7V{$S z-`F2+VS1TKW~!NEHkqwvr^zz6OlZs6 za`r|0nyqO+v7KyBJIJoKJ1zfkmdoSvyCSZ*E8$*pHC)k6}s;kw+HOl*prmg-FOovak_Zh^Ay|VPQvB z%F5Ex(ncv8OG`^D-}y*x{rjDJ>ORgr7p^hJ@fZ|H2R`YdSOTRo2Lb9jxp@d_A|G|K z0AcAxRQeE;g^0@{bjV_K%Mv7IDf(m?2BjZEvI4`h65}#}w5&o#1~EZhCsd7DP1Imc z)?z`{VNup&Nrtg1BUqCSSeK31lu>NSChSnxX>P%;CSur=t=N}sIFRi)l5rf%4xGwP zWMvo5q+PGcXZ8yQS%kH+9+qMw%r0(<&9h~;!M2%w^dUQ8=j?)Au^V>B9@ry$V$bZA zy|WMY$-WqO)p(ho)vzdwvm_f}V=Tj_ncXapxeQ!a^5pO6PuN*|xLSqBl$!+KzrHv3 mMXPj8RzHq0Z!5R=f7ksRq@14<`#Y8Wj+;2Pw@uHCPJRP~AS~Db literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..ab8d26b1881d8e3dcf1760255723106e46e0d2df GIT binary patch literal 17 TcmZQ!U|?VdViq9o2VzD518e}C literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.csv new file mode 100644 index 000000000..85772fd37 --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_50.csv @@ -0,0 +1,51 @@ +row_id,key,is_null +0,3,false +1,NULL,true +2,9,false +3,11,false +4,14,false +5,16,false +6,22,false +7,22,false +8,25,false +9,29,false +10,31,false +11,35,false +12,39,false +13,43,false +14,44,false +15,47,false +16,51,false +17,54,false +18,NULL,true +19,61,false +20,62,false +21,64,false +22,69,false +23,73,false +24,75,false +25,78,false +26,78,false +27,85,false +28,86,false +29,91,false +30,92,false +31,93,false +32,100,false +33,102,false +34,104,false +35,106,false +36,108,false +37,115,false +38,115,false +39,117,false +40,121,false +41,126,false +42,128,false +43,131,false +44,135,false +45,138,false +46,140,false +47,143,false +48,NULL,true +49,NULL,true diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_500.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_500.bin new file mode 100644 index 0000000000000000000000000000000000000000..252c0e31fc0e58dccbcd0ae03d35103139590b44 GIT binary patch literal 5789 zcmZA5d6duf9>?+D`~7+SNK`6{RJKxCQnr#M$&y6Ml9Yt*WNVSMiEH0?+4tQbvXo^E zF}CdMU@)>|-?xNxU*Fd~=l*f$oJX(c`Hapu_4#ds5R1j!V#uU*WYz}Us|B)Z<9LW0 zJVQq0)WKbNOq&V0wRa=GHZz{mWIt4B1u~g%eCc^tbG|N+6q{!eFYn|6|q_SDz<7X zVY{|6QngjEUt0}_wXfrdwmME|-@s{Z@ESF7K?iT*vbGlf)V__|+S>SATL-*P{1@uh zLng&L^^uw3_Aat&8z8&(J!I3qkB7AlkxTmla%&qQpY}sMrEQD?+9oKXZHl7Wf1!jn z0j0Dbql~s0%4wUUqP7JpYg?j{wiT*sKS2%cr>Lp@47IhNqpr3!>TAD118p08p#2g} zv~7`~{R$szzeY3dH~2*REm~{8LmO>7e5w5&Uu)Z=y|x27X#b6l++ZCI-4vY+Jr$h| zy%b#x{S{peLloT&!xcRYqZB<2;}pFN;}yLP6BT_7(-nOUGZ}I3XZ;O7DF@gk5qg@)XUpAAncelZkKEHXT;SZpYw zSYmiyvD8pZkz{yLvCL3fk!&cVSZ=7GSYfEB_|;HZvC{CmVwIt~BE?WcvD#2evBpqa zvDQ$Z5%+br-q2WQ8w^bp8x0@po11L2p_R_I7(Q2QHMCZ2Gkm4kZunNQ!_ZE#)9}4w zm!YF#x1p;dmDg3V$IwTy*Dz3#W*Dm2&wHylXc(hN=e<=N;&oO0ZkVPx%QW_tKv_?8O065ImKUwONyJkwTfGY>x$ckn~J}A zYZZ44F}^|v`^Vslj*g6qPL529&W@~%xOeq#j+{E{;doTh(~(=>?B%^3d3Dyu@uZ@! zqoAUnqp+gCqljXF<9WqEM+wCsM`^`iM_ESPhxiYU3OXC=sI0SLKHO1FXCoXn6eAsP z>zku|w4<)h#yA=%#yJ`*#ygrSCO8rl6CIy0;y&4@I6l|eRL2*JX^ysv>5gv|GaT&| ziH^>SnT}41S&klx*^ZuyA02%aa~uN{a~(qzKRJdf<~c?v<~v3z7C6Q#7CI&>es)Y! z{NhMdEON|LEOyLNEOE?LEOpFNBsmr-mN^zHk{wBk<&I><3dcIdO2=BoD#s>8iermn zwPUMdjboQ$tz(a3og+=L-mzcto8t&0?i+lgVe0%j@t2mGiQ5U@ZoFyLp!pnxTexDO6P0+#9Qhk#Xzp#f_Y z!vfYSh6ik7#C=2<8L&-fqXKp+MhEOwj0xDM7#onT7#DC*F+SjT#e{$piirWo6_Wx^ zDJBP8P)rH9&WQW8Fg@V5&SnJsqeu)0alPCJWKhfw$fWo&;2y=Cfcq441F|W83do_D zACOD2ARsp*?hC`u0r_9y_Y~^`8YzAY_(-uaAVIMy zprvASKx@U;fX@}%0=`yk4``>@8PHy_E1iarTWq(quWOKBBFpVW3x8XckN6Ib*h z>n6FyVt3Pn6n#9(Plc#BMIUosrRr3h-lL|}lG;!^>Oy^J2#u!6G>aBc5~a{a+DZHA zD4n4zbc+Jth(vFXqc_CSJKy|Nm`YGtszh&49coAk^eIJe7dudQ>QBQcdRrI0k()!m z&~jQ!(VMMQN~hy=p03j!%E12v_tHc3I2E90sT7r`DpZr|(Fc@3pVC*yVQ&p-((Mv(}RNs=mr1sRE2GA&) zOtUF^HeF8ZD0*6qp8t-}8M;b;Q}iqqJsm|)KzS*8W+_f(sWR21=+XWmHK#8qday>1 z%6>GA#?uU%N6}*=g*MS{N~aTanQjp>@aItu%1Z^QIF+T!RFmGJ52-nQLG7q3^`l`l zo@UTIT1qLjiFQ*uoubQhi;$5&k8)66DoDktJXNJy^e#1_mh>gHr|vX>M$kl>NegHh zt)XqSkB-nex=#PlU3}2qPmfT3DoiD*0#&0r)Q}RWHGM~2s4oqrag<2&DT!9oW=f?) zbdoO7O$wP}u`HCG9;c`1IVw#RsXEo6hLk{`(KpnIdeaaZLsMxEEuvrPH`+n_=@^}( z8)SF$=TUabL(fn#Dnpg12Gys=)Pma3_tcI0({P$Vi8P;*XfKuxI?wWSW!g9g$_nnbf`Atlp#+D>V7l+MyMxDkfLT^%i zYD_Jt4Si4Ds6P#-2{elqQZlWjt+ba8(`mX!cPQh1{CSj<7VVx%Lslp>ulL^<&)A=Zcvty#tJc{@ivRmZz37+9{Nmp*c00Q=7X3Ci`ioSnQ?b^6 E062~b8vpRR}9orH8DqHB{f*J-)jWHorG zr|abkT_0EL`ng^=z|FcrZqW^KyKa~}btBxP8|7i$IFINicuY6RQ@Sah(@pcdZibi4 zbz0dRuW88hx^98Db?fnt?&Z9zdj)^j{T-j^Udd;=SMd+stNBv*8vbpr)4KLLzSnR) zKk44Uuevui(0Lbid|R-EVkJ_gmi3{TFZP{+su7zvCm_ z@A*Xc2R_yPk_)hm{e$@SiUvz)vSKZ$zGS}%Sa?n_)@1R6z z;Gn6{&_OGqk%RU^69=7yVh3G>5(hnmrVe@v%^Z{q%^g$-EgTFou&tJkRt`pptsQL~ zjMu|$9Zb~L&e7gMwYClpW(cJYrVAY%%n>>{m@jm8us|qtuvF;cV5QL2L5)^1^&%tq_zk}04g@Za_fP)LdKnD)% z(*LM~9e9Kx4t&B;2Yz9ggOD)XL0lN&AR&x&kP${X$P1$#TrP}paD_0|!L`CT2iFPX z9o!&PI=EGs;NTC!L__aG_(7gD z%N+b9EO+ofVTFUAg_RD9gjEh22{k5HVYP$i!Wsu{gmn%|h4rReVWWd?!Y0$Lu-W7) zY;n+E*lKbWwmBFmY&T9mD-wHTr0G&xJhVfaf{H(;#Q%x#UF$=7Iz74E$$cESv+K5TkWk57LREw zwRlQfN2`;?bJ{vvyeO1e{6!CTvASBkrmdUBTS5GF>3>PL^j1(qWj2EgbDurr`Ny21{ zYGI1S9ATQpY+<^^LScr*VqvDm5@D9bN@2FeYGIDWI$^HGdSRZ$HUrz5Z!NIcp>3hX zK5dJv#TJLPEwMPRZK<`);Is`It`@dfTqA5Xr3%|DZWgv%+$QX>xLw$3afeWAai6fu z;sIf|#e>2g6Rfb;;t64&#goE*i)V!crdtEsI%plTctzV`i~kdjSiC75wRlH3X7Pb= z+>|Vwu=q$gY4N#m%HkiwX^Ss~GZxICQiBvB~H2vO`Vh**j6)Vb0-6|wQw>}Xz66I(8|eBp|z9ILK`Pzg|<#6 z7}!=jXL~1A+B!IyE|fZ%C3JK$Tj=Cup@D66c9uC=s;!HY6+%}hYlLo2)(YL7Y!Z4n z*(mgMvPJ0SWS3Cxq*mzdWRK9t$swVylamIv)!$j+q)yuaCl`f*PMo&>xOd_bhB)yG zL!AVLVNSxra3=|2gp-Ug%1K@r?c@pr+Zy8>>*OkJ z7A85lSEzFGpiu4P0b#O}hlMFl9v7xMd0Lp};PF@yfI{Ax%ZOwAdcJhDP z<~Vsc{3tAQ@;_mPlV60DP8tZSOo2j; zlcvIIC(VR4PFf3VO_RbpCvAoGPD+IhPC5x2o%9ekIq4;Aane`V>ZC&0W*QZ?I~i_Z zTRWUPO`O_lom2|DoJ+4E`*1fmejC@Vd|q z-VnOOyFw3mPv{9B2)*DFp&ULHdczk&ANbn9w))Z!{;jP){70yO{|W=(S79LhCJchc z!eD4241-p}aOfnAgwDb!=qikc9>N&tBaDT9!Z@f9#=~G?0*nwQ!YE-9j1#J0qEHQ0 z!ep2#Oo5rgRG1@7gW1A#SZH8dGnff0watcA!W>vD%!T#BJlG=4hwZ`ws1+8%E@2Vu z7Z$@oVF?@-mcl7v8JrZB!+Bu^T;{$M>PoPLRp1nAOuE8qldiA^0>WBI2JeG5b8|9!g=^bxCr%yBAbRneVe8Rw$;FHXwzC-Bb#4^K8{~sp3I44q>p3OBiC~7lzvSgkd&OVYp2~7-16^ zMw-_|g;6&3gwZya3uA14CyceZP8er%y)fS9W&_)*v?tiyp>3kgUBV=rd-Y(IU2XG_ zw#hb+2vcmH(1TO$X*SPmn_=^^Fw^E0VV2EbgxNN48koQH+jDK+);7=PLv8cz1vVdR zTWIsSu*l{sJ-FCjV)JipOKpA>mf8Fwtg!i2SZVW{u*#;1P-9aftTu@XYiwExYi-&J z>ulN!>uov;8%(LfMw_m}CYzqZW}Du^7MluTt0`63W;0aSZZlliVKd6WwszXJHe$E>ZRc$+XuDuvv~inPkrawuWP}nIR|?HsTqQJjagETz#Z5v>7q7q-Wuc3U*MzPvUKhH#cvtA*;(ei~ zi@yoIT>L}mf*j7JRe-}S#t8nqNFu=tx!ax^A!XOv*g~2Y0g&{6VgrP2) z3Bz2p5r(^HD~xc_Q5fZ-n=sl%PXqIp0M}R-eYK5qQ6Y?XF;J*bmC7t@8QE@leTT+9`wyI3U5aIsXF>0*^I+r=7Tj*E4|To;>! zc`h~!^IdEc7P#0cEOfC;Smfe>u-L^xVTp_5!crHf3~XzeYq^W_+E%!z6IQyoD6Def z^j#{n#)Vf{?IIwoaS;^OnpA~#CQMJIc zqb}YSj=A_iIN{=B1KT?3I%OKwcG@&5oN@7uaMs1Q!Z{Z|2z4(0E1Wl}3Kv}bDqM8Y zNGNjCSg7x&se$=}-rdkmOKpwZv=JJ+=_oXDQz{g@>1JSCCGMtfdTDFsrd(+5rk~Kl z%|M~0n;}9gHzS1BZbk}i+>904x|txfb2CY3@1{!V;AW~&>Sm_U(amh3lbg9hXE%$4 zGB=BbE^bx|UEQn}y17{+ba%5}=;3CIfq4(W-OJ4mZRKwEYU}OpE>o(f}7igiEi!`Cb_v!sB&|!Q0?X+VX~V) z3RB!XAxw4ij4;j3bHa2te-dW6c|n-z<`rR*hmYzMGGP1#UhO z7P|RVSmfqQVX>R9ge7ji6_&dB!NB~E;9l-prZq^A$-K-Ukx!EWjce7PEVe%AC zx~UaTx!G@ETc_P;+#J+)*3C(w&dn*|yqgQc1vggUQjuKlVer@eNJf^LKr_{sa+B$l8TIl5A1);NtmxMA8uLxZ{{6*;M;Vq%Nhj)Y?9^Mmr zdiYT2<>3>d+{33rZx3GxeLVbA=vS_;EFv=N4T=pc;nP%4b{&{-Jep^GruLpNcJhjL-8hkn914+Dkq9)<{& z9)=1NJPZ>idKfEA@=z&Md6*S2a3&BGjFx`#!=3=d0%nI4u1 zvplR2W_ws8%<-^UnD1e$u)xDkVWEe8!XgiQg~c8Y3QIg37nXWBAuRK7Mp*8lPFUgL zqOj6~GkB@nRUQy(Ou)iw4*>()TH{&kA*^kkhnTS5LsHn_A*Wxp(X+{XkVM;NQ?9VZ z!!^QI57!IZOt8Xs54Q+AJltVmp6q*SJ>0Ermxl+0-5wqh_Lypgy&nE3?DO!faKOW# zgo7Sl77m$eg~J|R7mj#%Q#fk!6^@yFh2th);e>~eg_9mW7fyNjQaEj@HL$HSp0gf) z)K=%=Kf-wr{}V2lYW1rwdWyUhi}k$?ytL564ZXAz8hhy=H1X0|DE3k&lz8bbH1$$0 zH1kp+wD2-eXz68;(8|j&p|zKhLK`pRgtlJB3+=p2654yI7CLyDA(VQVC3N&MTj=Cv zzR=mrLZQsda-pl28ljt))k1eK>xCX(HVZwyY&Wp2UfyyqyR`N8vQOya<&e|FQQWLty1L%etlosOYiLc(w_31NhnxG>U7P8j7S zD~vWDYZAtIxl$PGPKFYjoZ=jAtHTjiyZP~)XUSZ%5l)_7?tto71X zSm&j^u-;3lu)#|gVUx*I*zBdRu*KvlY&BI1+q?`GwtE>W?C>&5*lF?OAM`++9 zC^YmD5gPf335|Uug(f~S2DVk~EAerKwx&LQCp0sk#}k_SxK3!{<9eZ`kK2S+K5i9S z`?yPJ14bA4ge5+X2uppO5SIBk zDJ=JKR#@TVqOj73Wj-Nlo(lMI8Q4~hZ?%t*wzVc+VV#eJu-->X*x(~4Z1hnOHkpQn z%_dy~^W@*R)yK`+wwq>!9i~uWr;j^?S|9fbyL>z(?Dp|T1KZl`+h@|$w%^Ay!T}$D z5)PV1g+o4G5f1x!T{z<7|AeDH-Vu)ZcuzR);{)M@kB^0uK0X&t`S_=B#>6U|_37}!=De_KD}w6*h7CA9ZbEp+fR zL%*uj-_g%pZJqoq5IXx=ER^|KCUo($Lg?yerO?gKdIQ_)?(gAeleV6IwrlI%6gg$=u3w`|@6Z-i%X<$A8;IHs=PTK%K=Y@fOE{k3&bC4gWFxZbv7~;n(4D}Nf zhWQB#!~H~r5q=WFNIxlIl%KpX+D|=UjGrrov3{--#`(EX81LsMq0-MEgo%Fc5GMJ# zOQ`a5uTbsh0b#PAKMGU)JSI%_^Mo+X&vOR0HQhhM&!4r;^z*7P%g^h=Y(K9FbNsv` z%=PoOFwf6>!hAm;3k&>wA}sXt4`Gp?FNMW^z7m%B`9@gk=R0AUpZ^NW{roJf@bjCn z(ocP1m7m5!jY-(Rd?d)f#^Ef#2mCA& z4*FRl9P+bNIP7P&aKulIaMYA79P_hTIPPbgaKcZmfqCK3f68>N?X;hR!WlnDg|mK+ z2g$t&0;iBJsBH+@0Wl^Ah0GFZD(I7xbXcQnRGzpLviUVYYk^uFD zrU9-JngzI0Xdd7Op+$h3gq8vRAhZtfd!bE$yM?v^?ibnxctB_$;9;RdfX5ANt2EFt zz*E{f1$aT|9Nx(0Y%=oa8@p?iS$g`NTaCiDvMu}~i1Q=xZ&e+Yd7 z{8Q)~;A^2@fPV@71AH%31o&AP5TJoDC_sH-aDWnFNPuEtXn+>NumEj^;Q`tUBLZ|X zFs~E_Mg=I-Hab9eVN8H>VQhfD!ngnx!uSA#gvtQJ3~XycU}AvL+9m}UBUA;Lqz9`5 zlLJiFHYLDJVQPRmdT?4`dVqP_W&~I)%nYzZm=$1^FgrkvFektUVQzp;2DUXXFh9T! zZ3_a_3JU}55f%m5CoB$d*ub`y1eOLkB`ymr4^XFvR|c@+dL|ISHZacw0;>ZAw5cMq^^#KyvHkc%ZjRA7PrT~`%m;u2y8}F=ZBKwlguNz7VPAk}h5Z4Z6AqX*g@dL|;ZT5=g~O&z;Yfftg`)x9 z5sn3TS2!Nv1L1^;R5%&nGXvW?6*z6`)ON;1DV#M?>cMk?x&S|FJ0IXz;X;7lgo^dt8K7dkO|t_2bm$12AM8&3^GsX6l9^$Imi;BEXY!! zOOO>p*B~`Qw;=0L3*LcbuVgo+?_ z!hj$bgn>aWOX!(G5Qi{0h%F2W;uD4j2?)c2goNQiqQZzE31MWAj4&!lUKkyuAdCre zwJL+*9zl<+#pm2xmlPHsxUjq8^W9*ZwhmRyd%sD@}V$4$lrwpK|T`}2KiK2 z6yz&mageWtB|*LumInE^uq?<=!tx-$2rGgV2`hut7ghynB-8|HBCHP5LRe!O7uK4z zg>^wX2VOGz<|I8ij}m zjYGtQCLz*7afpIY65X^1O@W+AQ-nuoYfXc6KDp=F5MgjOML6*);+{q zLXQyd3q3=8AoL3HsZbu`A42aCUkQCed}Cl+eM9|1e5b8{i2n!`A^s-}2=S{hC`4mn zaEM}INQkDw&=9SJVIkTY*w*mSh!CB$jSSI67!{&S7#*UAFeXGF1M})aXk3T^+A2c~ z6()ojAxsQ0N|+R4yigTll29F@N|+pCnt^Ri2~7<#Q`@u` zv0RuPqDGh#Vy!SY#3o^0h|R+M5IckgA$A+s*22)D5c{<)4sk?S65_D1G{gyES%{Ou z@(>q=l_4DF2TaY&2q7$?CWKd59l|552@w+3nr?-4A(F!S5E)^E30T-@0v0xfs3&X= zaka1|#I?d!)2*;A#7)BX5Wg38m~Mrgrdy#l#67~U5RVv`S06)rO{d!Shj?B%5aLBW zcrbJ*#H-p4n?Qvlrb^*xh)!P~k#|--L@HiiDytjfMJQng|WTv^21-hT%qG+G}eZri0KVOsP;Drc5Xa z(_Ls9rkBtxOu5iJOkbfzm_Y{S2MNQi!i> zI)<4ibPBUT=p1IDP!?vH&?U@rp=+2`Lbot$gzjNB8rW8kaL+K?wDk(JQz#F!OXwYD zkI*N~L7{J$!$Q9>CxrfCP6-uZ&Itp;)CmL4&$XoWWFd?#3=ZQKhJ^76L&NxmVPPV| z@GwzfM3}UJ`TIh6RG5Oc(P1tZ#)P?27#rqVVO*H&h4Eo-5-P*oB1{N#yD%}#-3I2( zhHzDw`?Xbvc|e#P=8t-CN_c9RC$voq^RzHM%!|T|Fn<=dNVL_O`3k$=1AuI~>wXh`2cf!&z-wVsa{A6HT%fl91{Tmn8m_D)2VRCbSfMUvqCr$W}R>} z%zEKin9ag*ldEtd%nspXnBBsuFnfj5VfG1UOt-?>Fvo;*CRd@(MQFiX&u&k_cBA*jCd>vk2E~YZ2iF zp=E?ygjNx57g|TSOK20}PN8jt2Mo*)6-C-dctl%=2#*P+5uOk_MtDl-6ybTHbA%U! zvIs8=T_U_HbdB(a&@IB-LiY%72|XgbC-jW)fzT_$--PlAp9sApd@1ya@K2#{gntYD zB786OkMNUF5#fKrfC#?`10xg(gCaB*21h6nhD2yC42{rM7#^X$Fd{-HVPu5P!l(#c zh0zgu3S%Pl7RE;CD~yZKPZ%FzkWd+6s4yYISYc9xN}(#kc%eE%l`uKNR0H$8ATl+= z9BtDg%oCXuZdOI7vV`^e}w0R114YLV1yTiL#A5caD>-{BPLeisL5A27U5mtc!Upy z6A?ZVPDc1#IAy98PDl7kIAih^&PMo`a4y1kLS2Oa2~_N|{g`rJEisi8hVWLtC>beT3#wDufnM1_&*q3>8{M87Z`m zGFoUGrBY}YrCMkorAp`!WvWmbWtPw}${e9nlm$ZPC<}$MC`*MdQC0|DqpTLXMOiC! zkFrka5oNQ`Gs;$>SCm~sd6d0E?JPIC~;v#l(aB1N=6tJrJjLpjgF3qa-BFXIzGycdbl#m zt-^#Tw+j=a+$~Isa-V^1RYj|#Jfv-Mlt+XqQ63YfMtNG87UfUE^e8V1Got)im>K0| zVOEq^h1pSF6XryDOPCwwZDC%N_k{UTJ`xs0`B+#OzTR*b!xvurtaSp*G4`VONw%!fq3=uqVni1M?i3c3df>s5ROOLBAkd)E1WdR3a6s%6HZ4t zB%CqH3TLC766#Ez!ucq5!i6Y~{H0DWM!8HVis2UO$M6UZVgwD$-xXqwVnnnxju97{ z#7GLoF|tBQjLU_lF@7gBi*dEkJjS&`ix@WwEo0m)w2JY2p>>Qqgf=nm7TU&mKxh}^ zA)$SYhlLI?9urDqJSTLD@q*Af#-D|<7%vH3V!SGJjq$qBEykNd_ZV*rJz~5o^o;R$ zp;wHLh4L7m8`xIwSf3c*XzLr}YoT9^AB6rfeiABT{45NJ@ry7p#&5!)7!8EMF-n9X zF`5ZOV>B0r#b_^#h*2txjL}&b6{CwVIz~@nOpJ12Y>fWG_!t9(${0h12{A?p6Jv}P zCdH@}s$xtOs$*0MlVeO3ro@;bOpP&1m=1HzIRhlQmvjtR?RoDr7CI4`U)-zreJ^nc6B*s2&V zu_m@UhS$*PSQ8^GtTpKh>rA@B`WR_pLyWwEZEcKgigBg3&8Asli)mKaYMK?cnP!FU zF>V!h#JFA9Y2p=XW85d~it(VZ+r%sEiSekgH^$?_z8KF4`%T5dffz3fhfK4=;TV4v zj>LFFI2z+E;aH6Kh2t^)CY&(M3MWmo!l@Wv3a4ZIOE?$fd!a7Ie}wZURN+F5BH?0; zhC)%C#zOr#O%2Rj4)KO@+GuMOr@hcPPDi0hoK8YwnNbc(Z3=p1K}P!?yo zfo*k(ca2k{ty`S6Liaf9g&uJ>3q9lP5PHSgC6ve6E%c6az`*?YVZ3jg6WaR6IVn`c zIVTK=b3wmqV0=&St=Y<(@UJz!+c}18N=dZ%-IBy7Z;=Ciwjq{#?dDk&MKhEE^Er|1}urSV7 zdT?=kNt|!BEsgV|uq@6`!tyx(6;{OgMOYc9p|C1WBcUcvGht1f*220tZH4u5Itv@( zbQ3nk=^<=3RSH|;^bxk2Dur#PN@07Pp~8+hBZZwNPoXx>cwtwZ3Bv9;Rl**VtFSlD zOat567vCRefwlv2<_iboEEW#MS*c%jIDRC~8f{18Y!r^g*&-Y_oeC%7Y!gn#*(01X zc?xIZ92L&SIWC-wbIQQB>f-0)oYi(Aj^lDY5r~6<`8YwMegcoS1_``E!vqnbQG&S8 zI6*>ak{~SKiiEWuy2bxH7=&^5sudazrfdxCeg z^+@op&@;g&LV1GEgx(3h68a?gr_eXSzYWY!A}0DL_(5Amf}ex|34Rs^Ciqntl%T#a zI6<+2Z4F5bP0(E1umr7y;R)IbBNDV1MkXi|MkVMjj84!?7?Yr{FfKubFh0Qmp)$d6 zVM2lt!o&n4g-Hp<2vrFt3DpUzg~SOv1ueld!POlxtvH+Y>ty+^KD+X;-LCaIdf{!F|H+1dj@POu53| z1dj{*5X8- zzl5_1z7x(R_+F?p@e1b?6bTm-G&C^36(s8?X`-z`l4e4~B&~!-N!kmIlaw0RR+D6L zl5W~cl5`cCCg~+KOHwX0Ptsp#kz|m8dE+42D#>tdt&@xv+9Vk-v`tbev`bQLV7`ka z*&)dkZKX+O3mucp)q|aqos%rmR+eO$&?U)Ap=*-W`c>VM-IHw4)+5Pgp=XjULa!vd zgz_Y{LhmGdg+56R3VoBD6#6GQEmR~qBMeA#Q5cxyvfo{*a!?YdFgOXqkR$o8%f{T#{>r@kwqJDwEtIOh|IOFfqv;!lWek2~|n% z6{?dwAWTm3M+5WfU~+1bC$vpV@{BM&$#cStBrggxle{dqD#Ws(NM zD$}Wf`Rq}0wJBBG8dIvawaIl!+G<;$q?52ANtv+GBr9x6(o@)+q_2Vb9CC7Nk^$Pb zB^fMiPclTBx-;^sHNU~5km}G@; zILRvENRk@iXp)V>F%zqBJjpf#+d7dvX}Z;RD#30z#t{5utI4q|hWqS}0DD6-rWEE;LPXmC!83 zHA3?g*9$FD+-zWeGf1^cahtZ*Dee&3q_|UPo8ms9U5fjK_9-3}I;40^C{6LC&@sg` zLZ=je5;~`NN$8T|HKA*YHwug+VDA2!m5J5{9H`E(}Z2QW&11oiHLr2VrE2PQs`Z zWy0tb-GnhIdJ1Dx^cKdY7$j7t7;a!bXqcLqVzjnNDaHv^DJBZlDXN6YDW)2j-wIMw zQ_RpdEyZkMdWuEDj1)_RnJHEnm@g$u%}%jK+nf~Zg}Es<3-eNJ5$31ZE-Xl~OIVm< zpRg#!0R!7woLZ9Nn6{-UP6*3VoDr6%I47(~aY0y_!g-aR1*C8ZHKto(wW-v={7_`L*V zusg*=!X8tpus6ly!oCzw3;Ru_!hsYo8klb%OdU$`inhZkUKNg*PKBc>-Vu(acuzQ< z;%~x<6dwsEQ+#e!Cp1q}A+$_0OlXy6sL(pi zD4|W7F+$rk)k6C;RYHd}Q-#tr(}j*{<_VqB%oRGPSt69BSt@i%vs~z!W{uD-&1#{0 znoUBFG+Tw9X|@Ty((Dk*)9f`cFA1jmq&cjuZ<<3wzceR>{%KAK6=}{11Jcw91JgLJ z)>DHt9$`otpMiOsAU!NiSljS45n)7{xG*wJP8gLYD~wKawJb4 zxkZ?e=5}FXn!AKaY3>oK(%dgpr+HMElI96vYMLj7X=$DjrkgKi7iOe+QJ9(L6=7DI z{}bk2~6_%&@ zMOcyMXJKWUB4JgU20~4mVqtZf5@C%AS6G{-m9Q>NJ7K*ESJ;rIOxT#Fi?At8xv)7+ zFJX%bSJ;}SzpyRMXkllXaYAjH3Bs;4mBQ{clZ8E|S7C3OX~MoVvxNPoVBtWT1;W8J z%Y;K|RtSgF)EL;-k@V3tYqcFqvqd;=(iKjaVuh1wYK2p2_6Vob95gULrJO#S=D4$`BSBXNU_;GGv9~3^}1B zLp`BshAV|;8Lk&vWVk_Snc+4A^IZj*)*0^5)+WQ9LfZ`Y3+*yIB(%@)h|nR!V?t?$ zCxng}o;9$oPMOXbp4V2E;m<;s46g`XGrT5r%kZ|)J;Mh=&kP?5y)yhm=$+vUp-+Zy zguWTR7W!rQPUxTEN1-Ca&%%HV{}Tpg_)Qp;p^-2+Ly0gXL$NS4LrY;;hBm_R3>}0K z89E6gGn5IVGjtcmWauM|&Cp*MpJ9MdnPI3fA;U0XVumroqzsh?=5@eKb%sgWCTEx? zOvx})n3`dhFfGGeVS0uI!i)?Hg_#+a3$rq;5N2nn5$0rAC(O;TQJ9xui!eXKHeo@A zy~3gl2ZY5L4hl;$92b^mI3_I1a8g*Fp-xzl;exO-!)4d%DM1FOP?NzUtj-V+)|gO* zwHac-83xh$Z)%`Gs9g% zZHBvrT^Sw}cAJ!iJsBPs_GWm>z`S6X*`MJVZ3i;^NjPZ!eHg-_3@;0ZGrTGs$?&Fu z`J(d7u?+8PJ8l{lPGtC4IGN!y;Z%mth0~^L;Y@~a4Q%Ud=A6k{TV00#3g=DE!UYqr zP?V+Ez%uO|p~<#aYUPk}N%irdi4j%=Z;%n`arItwolB zLdz^egjQKb2(7b>7TRPPE40m0DYVN{ZD3v{$acswRa%+K;SVL_I^3k$P+DlE$Kg|Ilw zSHhAk-v~>yd?zf+@`JEE%m0KGS&D>}S?UX`vNRBCOtr%5EG5F4EUgS|Yi)L2mJZt1 zn~sGICSPG=mM+33)3LBQOD|zdmcGK)EEU4GEJKCuS%wKavJ4k?nw*8&EaQb;CTC%H zmPx`M6SJ^4%QRu1san{dWxjC0R4p9LvPd|TWvOsDOO0?O%R1qxiCH+7WrJ`$%QoSJ ziP^yX{ULig%OP!NOuWL`EXRd&SxyUeSxyP(v(yO}vRn`@nwYP@RCWCvKA}Mluh1|@ zL}-*FEHuuM5}M@53dK1xLP?G*gr+&J5t`??PH2(idZA^Gn}k+5elN7nafi?*$2|t- zZNyx=91m-2pW{)XG{=)d#~jZJopQWjVBSE;mF0LzTbCTK2wii$E_BQBrqDgdJ3@~f z?+HC~d?@tF@pqv-$3KMLIleM5?;hs*=J-ZizZ^dZ{d0UTROI+c7?9&2DUXQ zH#kSJwjnv12}5%<7l!3%B@EBeMi`N!R2Z3~Oc<4;t1vo84`ED>-on@%{e^Kk1_GSL3>7Bi7$!{2F=G8`*e@*3aX?s-O9A|_T zIqHO!IUG0Wc|s0LsLA0mFh94PTVn#%wk}6fSZ@LqHsr_&8*|hXHs!cn*qq}kVT%b= z*qY-;VVen5*q-ASVTTD+*l7Y4YIEEz?8Wt8BkVW93I}q$C>+Z1 zvT)e^Te5^BIo=YE<#<;(p5uMtgo#r)nd1}TRE{r&GbT;pY>w}QbEZ+D&ZH@v&+&_J zA;)jR#T<=0L&?HZpP@Jc$f%ztbeA7HVv^C4q zS7@H6LTHg^fY36}V4+o>VM6OXBZW43#tCinj2GJFsWvdr5Aq%IOxISLXSUEW&m5ss zo&`eZJWGVKJd1@cd6o-Z^Q;!SIq}=TrG^vbB!=A&-KFiJhupyd2TZ>FDK?F=D9=Lq&)WsReA0g zs`ES`OwRMLFeT3)g{gU-5vJw&lQ2EcbHa=~F9|dA{Kde$s+gah=QV9}^1La`&GVix zFVBa<{5&5U*w%vl!aSd8Ta@QZVR4>s^x%^G(mX$CTbAd)!ty*n3oG*cDy+;?B&^EQ zK&Z*nSXgap7S`lxA*?kK3+wW95H^^Ig^ea+VN;%R1M?#Z`7L?+Y1^7-ps+2^AYr>{ zSJ;tfxUe(NSfMsgm9QtzGz0U|!u&pythW6oS>b?5RydevnQ$o2N(1xJ!u%0arnaMb zHVVh`Y!Z%}R)rI$RN-Wvox-U+dxg^`S>cQcRyb>d70%^3CDi3PC!9B(3KvYd!o@s} zn=S=gRKOPM7jO#=3iySF1tLPD0x_X+frQYcKuRbskQGV_)DxN(xKe0V;3}bcfop{p z1#S^q75IbDy1*Smn*w(UZ3{dgv@3AG(7wPULWcs638e*|Hn6RZg-!)t5IYyj3cRd` zyA*g;=vv?np<97Bh3*C36M7VQU+7ujZ$hsEp9{SUd?oZL@GqfXf$xO=1^#1T{+3V} zP~cZ>0}C_|1{G*53@*?}7*e1_7+RpEFswiuVR(VI!iWN;!pH)hg;52%3!@A462=rL z7seLoBaACBP#9leuuxfGh%lkRC}CoOvBIPR^}<1x^Xe3Y-*{7dS7hC~(=$dQM182gw+Ln!kPkMVXdiFSZ87t)|*&`4F%G| z#sWEElZn;9{62k_6u48^X*v~Z3*0O0D)5l7+jJ`IDe$PU zx4=`vz5-7Q`wKj8V7}nIaM0wc?NEWg2!~C#!V#0JaJ0Z%!m$D$2*(S2B%Cbpv2d!u z7sBZRUkhhUx5C*1{}#@ffQ7mO{}s*`_(ix-;5Xr-=~yVLM-!obJxYWI^=KwEtVe61 zQ9arj*jD3uP3lpqt+*atg_3%76Pnih|EBH(sH$^&*f?cTY%#HGJawOQ>|LX=#l#l7 zu^YQF_O3DZE_MOC*u{!i5V4D0tcYFgBC%mb#e&NB>~-fq^WB->+~>aBnLXeF4zTz8 z%36(?7TKUN%OVjPvn{ekV~$0(Y6M$khen7+Vl+Z6vRh-WMGk7rv&dnM`4%~*vA`n7 zH5OXrgvKI^oYPorkqa72EOL?HK10h=i`>w)Wfn=)SZ?vx5#UawHA4!vCblC8tX0cUSoqr(ls_(hRQ5Lc2TC_#18aplG)QGW&OJkQs3Tf=NNKuVF7Ac{z*CJ&# z_F1Hy#(s-b&^Ta`N*V_(QcWY)A~iG)S>#uZ!xpKjal|5ZG>%%Np~f+bG}1V3k!Bhv zEYecrq(xe3#95@B#wkvN)i`aDjv8ky(pBTEMS2oEvurtUk^Z_CZ;^f)7c4SJBf%p7 zYFxC)D2>Y&8BcJ-vE`~oCh6KWrdH!RQ>$^qBC|CTnOefn=ceVBMds_;ZHp|{xWm+H z+_lImjeCr*#(j%~Ydo;XdW|H;SL2~YA~ljN5>2oRs^t-Lt80%fvPa_y1FZ4XB1bfy zF~Axr7Kzh%ZjrMZFBo8rmljDN{Cr+nUR&h4uD!9yHH}o}R^zQj?rFTU$ODb{7D>`b zv&dtO4;Fc*@zEkLH9lD+mEiY@mUN4})3q-a`Ka;LBHuK=F}WHU7V#~iFBh!huQ9-%1C7O2X{52lDt~B% zS)~QR&IQ(GR%xqi%dOH*V}(^Z>l;^ES6QXIuC2C84~;ce>7x;DmHrxQtujbsomGZv zthdT=g4Ye!jaC_>Yn!YxQ6s`Cll6_8t&vuls%u-UGFxM-RYEkjS!KTdzqVVWtg=wo zc35SZMzmE{YV5Si8jTpMglp`w$|jB7R*BTuW0lPsd#$ovW1m%aYV5blE{y|L*{^ZX zDhD)Tt#U-;kX4Qo{35}6#42&RcGN28HI7*&LF2epE@_;w$~A)53f4HQ+|adCR=KBZ zr>$qK@=({#TII3Ed8<6vh_}j1jSE(Jqmf{hG=hf{t(UCgThbiJYs{g>b*p66xM7v- z8i@>~#!cps@bkH4z0CmX+8qW^*X~;HS;eVq_pRd5wFlNDMo!lrTBW!~vQ^6I8~?LD zvPwl=du)|T8c(cJP2;Ims%t#6N_~wN44uYHW=-RjRhnwNwn|HlH&$t-k;?38ytPUv zjdxb*s_~x5BiOFY`oSu_bnT;6`f7Y)W@=2b$!v|uHVM&~Vw1TVK{i=R@T&mZbepWuwHY>9qcPJa>osQCWP`?R zn?!2NvB?&VV4Fl~gxF-KMyO48Ys|IDevNrHiPe~IlOq}nY;sg%p-tj67TM&C#$ubC z(^z7Yc#SZdT+~=Y@)>vhehZ?JG@>FAuO;R+%ZSqoM ztxZxj*4ZRYW4%q@YizK|Cyk9Z$r(b#X3 ziW&!O^0UT4n^e|_waG6Uhip<)Hp>8ES)HtDZ%!6t(>5^OS5#eaCJ`D*47WzIO`dX^j^Qx5i7GBxt;1rZryM(X`!*yF0C|{*`>9{ za=WzCSYelr8Y}J6O=Fc^x)XjrtL{8EvCl3$HTK(Om&O6R>?izu4%%bwa#+_6+2x4F zVY{5vIAWJLjiYusr*Yga2^uHtaz*2$U9M`x+2xkTDZAX$IBl0CjWc$6sBzXVk2KEN zB}L=BU7l&g+vT;!1-rZ@*f`yO(Jr5L?UG%-YFxI<7mX`+@hM}@>Q%e=X1$1Vjm?lQ6(_v}(g;{kK3kz|*m8V~JKLL-?u)%edYBdie2hzJhw}If?pEYUoxq>_KE@3cx{)K8gJ~7r}z?b1`@gI#)Se6&kHjZX}y#%H?>)<|bAHNMzogvM99jMezYfNEse zWs=5thfF2-tanUw$Shr(WNEMBP z4ympY>yT<1haB>o#$kul(m3Lfx*A6vQcvTULmFxvcSsYB6Ao#iand1eG~yi6j_~t2 z-qCFLndonV?;HsJ7k8&4TsFsNOVZB#!UuPsIpjZ$4-R>v@zEhKG(I`x4Z+X* zj&z57(EQ@~>X2{x=5G%1Dre4ShC{qH#ydI3)m)k2l*}3vosvakl2fv4Om@om8dIE- zQzOVJ`81|FbV^};<1FWFr8Q<`XmIik1Wwypvr_9mV=9KvazYcUpIVDWjb~t6JMzm9wYwUDN zxJHaq)@$r?$_9^Zz(@wdkamFbRHO@NaKaF!v zd8TpRDK9kQo$^K_!71-FE;{8i!RBSo%TD>KYge4&UEZA9t4{IPxaO2h8rK3HJ&-;H;ojh{I2nw@zr?2_-edld^KJ<Xq{bXWqBMdH*{%^{$WDz=Lw0G*HDtfWJVOp?%s1qS#sWi5 zX)H42w8mmX&TA|&Btaw0kV_g%4Y{td%#cKl<%ZnWSYgN=jg^KxB>a3<8LJI>q-$#o zd8!d^$a9UghNNn&GvuAddP6>HY%t`L#zsRjG&ULHQ$b%j7~-w5*^oejCzp*ahGf>Y zt%m%dYuk+NhUC$;C_@Tp>@eg#e4NaMI6O*Kv!(vskJ z21cAA?RD*xAssYM8`4SRj3Iw%oHeAU#yLZJXq-2sk4C&912ireGFT(QkbgBU8ZuJj zk|85BE*mmlIx%+^RWWRAv7CRyW_AqzEb8?sd6jv*^F z?i#XE;%L%wNb7~=JlIl|*z5}+}`C7CrQx+IgvB$s@z zG1(Emqw6F@@Y(U$&VV-Tw>Lj?h?Dk441eyX1c_sG0P<-HRiaaghsGS$`fp4 z;0kp~6l3z6DyQHSZLYLIhSmct11a}>}mbj#at}S&*D~)9?X|1u`CG9j;xTJ%| zN|*elvC1XgG*-K$r^XtW^wkJ=Nk5IXE*YS)&Lu-N*1O~%jSVgtp|Q~=qct|UWP(P7 zOD1V-cFAOoNSDmi*y57e1m`Kawz*`!u5EY8B8@1QglX(>$r6odm#os*>5{b?yIitP zW4B8-Y3y;yW{tfriPG5Tl4yH4eGtu*PAR9Md@Bl9L)o zU2(Xk2rNpT>2jR^x_CvT7tU zwHh~Fl3U}JOY&&kW@r8ZTYaRO6LPnrXadiZ$N2q>Vk6x@CsOB)81cnCzCh8dKadPb0`Ji#4XYB}`+QTUKk# zaLZbanQqxg`1#Co&vr|MuFY}FR*hh{?9>Qx%WjQOx9roH>z0EW^W1VoV}V?mmb&GJ#xl1gYAkolO^p?9d7!bFwi=W13xA<#Bx+Oqk zi(4{lY<0^I8r$5GOJloRa%x1mr2ygQv%?+jmV&yr(=B$57`HezcDco)vD+<$HTJlr zn8seWRM6P(mY*~ZxaDV!gKnv=5$l#(8i(CdQ{#wR>JjXA;y&hk zmJu44+%i(*vRg)LTye`djjL{%sBw*9*0|2-YTR(kOpQcFSK}6=t8v>cOEvDeWx2*( zW?18%Tf#N&yJfw`1Gj9@NODW0MzULCG##oej}+6G?U521b39U-@bd}w zgm|Qau7!G}qQ+d0RMj`m^UU|iZ@RX?BQ-S^dZez#B9GM9SnQDo8cRIVR3pqItu>Z; zq>aXMkNl~z!Xq6tR(hnf#ww5W&{*w}eguy$c-DGku;x0?dXEg#H*fIBXkFXr+2oN4 zx)$M)i5iyXBBO^4vdSsNwH;;_f$neN`jq!zK zipGROGF4+@A(^f*sgQ(dOfDqB8dD0%e2t(&vPi>xILFH?5Z@yo?BLdYFMc)e#lM}` zuc(JV&=y_L3xhBM6EGd2Sc28qgedI8QJldg+(Z(dAr+tDG1XO^2&mo_z?z*qdcmj78;@j zI-ooHV;IIF2y?IyE3h70u?w*{iFjPYT|B}|q~ROZpUpXoZgG zj{X>iu?WH(EW}D|L=^Vn7|!7;?&2|C;}d+u%ZoFmymRHQ@vxgPStfubmfs;G^|XpPS3g~1qw$(W6WSc#2@!af|sIb6kE zJVh$fApu+u`QSiNltXpYMN_mzSMrkpaZ&N5JqAWW?=zVU=wy=KaS%(uHhb@;0->*H#yfbq>aUADy4fpT_Z}1tuS-2i@!;T^-iz=vvMregj=!roXiAk7+1z3em*n#~x zj`O&Ndw7Po_yWJIe9j{;>?nrvsD?Udfz;ET$p^ORxr;u@i@I3Kww`5AhuD@eKjraXpyl z)H_iO!^BC?aFi+Sv57J(RO)$^IHqX31j`O&Ndw2r#$Z0)Wns1g_ zksDSNf_cz$71Tl_v_dEJ#2}2sB$$UJFTe_HfO($rUL41HT*Ez>2MfQ!XP8F=XGLyU zQ3$2+Gk!w@v_J=R#{dk6dC=_)%!7GU?K*77UL3_)T)`bY!Yh1)cMiTMBPR;NjgqK{ z8mNb6FwZ^hhJF}^aWIc1HP0UnLpUN4gIJhHfhHgkNifgse21^_&&k(M-AOM-h}o71Tl_v_dEJgn3-o zNKC>kEWiqEfO$xkc^cMHnCD$x!#zB~8+?XuUcR0oFYG9SvZ#VuXoObigq|3Lk(h!x zSb{a!jGZ`$lemBzcmVT&A@d}mFYx1q>US^?{jsA6%AyKtp%Gf46MAA0Mq(0XVF6ZP z1GZy7j^jM8;U1pAJo@D`eDm}1L~dA72xU@=D1m>X~dPqkBu7}*P!#q!;EUKUu%rh`rp%Z#y5Jtj0dBQwmVgXiQ1I%L~ z_TnhagB{Eh9PZ!|%rh81!aQCfGjgIJ%mWZgq9T4n1GIp7>_B&z=LrnQ1ej+6%)>Hl zfVqMHUL3_)T*Ez>`{%#GXZW()cvj?w6@^e5Kf~N^z5&dg9n4dk6Nz+#mZquEE^j`U&1Zx2k5#->k?DD+-}3%zdHF4WOIA+}XJcdc)kL+1!hH z3d~KG7hw&yU>6SI6wIxRZ{i`I<2}A1fIVElhq*Jb6UAWe?OP3X&;)-Vt9^yIP!^`gFl?l0EL1C0ZWzNW{X-7?2p3Ag+&A$T{0?)&!}jr7-diyHPH~}?qVI$1LmG#BQO!Ou@Ea^ z?$i~9eK-blQ?2VTcgISBxdYZ$_`CVOhkS6LD9XXyK+4=Q%G?>MHM+vw(#hPyX$*p3 zZp*Y7s}X@{#3Bv}F!wx4LJHo&+@2`V!`Cqsgd64-LFT4FHBb-D&<^HCJ?4%)=GHso zFb$zFcgYDyBw}FhWOEvqa0|(Jfiz?wun<3op#TgNM+HltQ z5s1bC#32ERNJ0wC4MM)czc3$9!s=WrEw@fhZ259Y28QiOSdxgmoMg;54oQ5)ta2(8f>y)YP~Fd4J45G%0}QP_uL zIESk+pX`4O^I82*@F~jY1I*{?%?IZ_D1}P+74^{)=Hu!;Fc2ePK5IS`^RW^e5e4(X z?qfKItGJJ+NJToN81Em?{cu)$J@GBakB|4%924VyzVm21Systb0<_+Tq5C`+7??faa1@G_`fn}K&6o3Kq zp6QDC4Gm!4#cbZJY~G$c2%|9tbFc`jun9Y0-s^iD=Wz}9@CU?(7cWCPyB_xFz@*ri>U~~60E^yn0L}0#7SJh4Lrazyu}yzRp9+YUf59tWl;sS zVBYT43g&%WJuwL8y;$Z=SF^AH=KWIUjZxiHBb-D&<^J9H|Bjd!!Qoh5DN1qm~ccQ2Ih?_aY#TSl8}OT_zM4u ze7>Uq3>1fXONMz9hIzMzd0T~f+k|AM(PEA}EV0sD(yog-+;+ zK^ToGn1e-Fg-zIj{Wy*bxPb?Fj`#S6fa-jGL4G(<4CPS`bVxh?BU08+eH4c#m%g_=W2sKb$CmpYRKQ#~=6;f1xk_!B|X12$o3ss{DD957y9BKjK_4$#Zs)r7VN?yoWe!i#6vvCdwfH{ zuUrrL;Y2Z%M>W(z6SP4W^u`d3#uUuKBCNtD?7)5;$9Y`CJv_l1e1YF@%nS0ujv^?F zYN&%IXoD{3jUgC~DG0$*ti=}W!Xcc(Mcl+gJjZ){LqJX5KjeoK#ZVr<;CK9iKk*m( z;vbB~RD@s&)?hPs;vi1q0&d^|p5ZONz^@kXAM(PEVknPl_#J=XPyB_x_y=P#6(LxH zHQ0=uIEa(DfE###XLyS*@T<-BkQa6oL0MEm9W+53bU|=aUemGGKKj9brjz91x z{z6~;gRz*35G=tOY{pI;#7SJh4Lrazyu}yz)n{Ii7j_guSyVwSG(szMLQf3BNKC>k zEWiqEz;^7#QJlvOJis%&#TWQB;Qd2>I8hAcQ4Mv_1Z~g-y)gu%F$Ht52&=FOJFp+e zaUR!j4^Qw0pW)k(?|;Y*D+-}Bs-PAcp%przCkA08CSev9UqV zmSR1&VGoYr3@+m~9^*AWLH^)%C-T6C!YG5vsEx*Gjn3$W!5D)e1YKcWjRQD=cwEPQJVh$fAsPp&h!RABJHZ zrXdtz2uCDh5Q{h@AQ4GO!8?3~e=Ar( zaY(=|B;yr6!n-x^A9A7~3ZXQ9#&2kV7U+QP7=YoJfEk#FWmt!;*p0(DjVri=M|g#g z@NUEPkP`*rMoCme4b($3v_m)a!!V4)G=yRq)?q7l<1kL+5^fy*P@qxPm))gg5vM-*$W-LT*@52&M5eenSJaKnHZk01U?j%)mUX zzy@r`UL3_)T)`bY!5e&rZ+otX+_0h$O5)^;fq7Vlb=ZpCIE>S{ zgj-0)3#1_ffgSjGq5upOM+HwHL`U?%K#agd%*1>w$9inT9vs0rT*X~H#%p|nPiL-&Jg}iK%Ahi8qbb^=EBat4 z#vlm6Sd7((L=0jPhXf=d2`NZJ1_HbA@k9X_D2@uKj=E@yw&;pJ7>Y3nLNFF%H6jp= z12}501ZQv=xA7RS@d-ZNxE^xB zg2E_+%BYEkXpPS3gP|CMAOs@};fO>GViAV~BqA9vkcJEd{>{H13cx^dR6upqMN_mz zSMzBciYm$8Zi; zaTkyA8lT|PlYc+tf(3<929;404bc)E(E|f90uwP4^RXQ3u?>501ZQv=xA7ld;uCy& zaXsXM1%*)tl~EH7(Gnfe0|PMvlQA0$u@V~*g#$Q&cwEPQJVh$fA-(x{A`fgRj54T< znrMiY=!jkzj4=p8Fcu>mk%&Po;*fwuBq0Uw@D=`jm>1-Of#Rru>ZpsRXp64sgP|CM zAOvGERwDw@IDiv~$93GtQ=}pt(wFxSd0<0fltE?GL_@ShNA$ozjKD<9#C$BrMnqvB zj^P}x;w~QJH9kT5aXsXL4TVt#l~EH7(Gnfe3xhEVK?ufTtVRT)aR4U}kL$RPr$|LQ zq(9d~9@tPAWl$Nl(HO1K8NDzVqc9n>u@EZ}foL4S3B)52Nk~B&G7va`uP-P71I1AR z)lnBs(H33N2g5K9(-4XS{gj-0)D}03aKwf_$Ckn!ilBkFpsE1}~hi>SH zVHk&L2t^pe5s4VYA`S^iL=sZ)4qxFvi1|c5I8YQ7Py_YQ4DHYj{V)vUFb$yyLpZi# zHxA=8F5woE@d9bcK;U3Lo+tnV#Zdv(Q5Q|o7G2Q?Loo(H2*zTpMg(FIi#S}uEhOUw z(vX3`AzTjyV4yfEpgQWJDcYhd`d}!=VFu=58P;JdcH=Nk;}UKm8L#jW-b4BKLrxTg z8zoT@HBb-D&<@?u55q7H(-4XZpsRXp64sgP|CMAOvGE zRwDw@IDiv~$93GtQ=}pt{{M14lYiZFyD5;2IyX!#ZrmZXCvGT*56R;|0=?fxr=5 z4+UVLI4Ynz>Y^#yqAU7fD8^tKLJ@{_*oxgajMKP;TX=+5_z2&T{CtGmu%Zacq6%uE z5n7=WdSVbpViIOy0ajoGc3?k_<2#wbk2Y%Ih|Y(z8; z-~{4v9ry7RsYr*6;d;me8w#TgDx)SEq9r<_2L@sUCSoS$V>#Ai8}{G`&fqd`<3GH_ z2Y8L;_kzfQAK^j?{DfcdJO03*_zQjU55{6DLa+pDuo*jX5GQdFH}Me9@g82|xE^xg zN4QV|Kj9brjz91x{z6~;gRz*35G=tOY{pI;#7SJh4Lrazyu}yzjpz5s$O}7)pe(AO z78;=yI-w^9VI(GD78YOyHefsU<2cUa8t&l<-rzI*CU8CEg&jpu7FAFSjnE2R&>KTA z8dER_i?9ltumcBi5*Kg-5AY0c@eKhJ`FP?-xKIj}@GI)0IojiI^vAy#kLj3;0D(>PjUgHyF3ZH+- z0~-pX3@W208lojSq6Y?I6eeRf7GfnfA`1I(4Dq;*`*?~}q{Bam??Wg61I1AR)lnBs z(H33N2SYIiK?ufTtVRT)aR4U}kLyT63f>_Dfm8XufP!$NBr2i?>Y*9hp&R;P7{*~5 zLJ@{=Y{hOI#aUdz9X!G-e1z{bzAqp*tSE%i_!+;U0a~B~dSVDhV+!VA5msRnc3?k_ z<2-KQ0iNMKz9C>b-xrV{P836VR6`v!K^t^IZw$d`Ou-y1!YXXS4(!KqoX0iX!xOy0 zXZX(G{X=e8Q3PdC1+~x!trrPvnIiMNk%1Pz#OF3Z2jsgD?`4FbfN?0voU$dvO$JaRqnq2(R!F zz9GyDa>I&3D2<=-8ycVmI-olSU^pgV2IgTI)?q7l<0#JJ3hv+$Ug0CWL;3eZP85V2 zB~cMIP!G+}4&5*S!!ZFfFb^xR0o$<`M{yQca0id@3LoJ;m+K)X3c`(&sE8VC#%ds9&*oR{{hpV`c zr$|LQWGVlC$O9V+qYNseCK{q8I-?f`V-zN1HWp$fHX;i9a17^g6?gF%uaOQ}#`TZ~ zHWWq~R7OoSL`!r;4-CWzOvFsg$8xO4HtfL>oWW(>#$&w3C-^L9UXTkGcu)$J@GI)0 zIjaA_*FUd+Jujwt^CJ2G{$t+7|G)p8raPi|d*$#d&0Z*Vx&7)7uWnvbydu0#dnJ0^ z_VV@i_qKRDy=!=n@}A^9#e1svI`0VYDDN}g@!prb?|Fap4)7`NQ^KdTPbHt)K7aa* z^$GF`^*QQu$>*t$mv1KDT)q}xhp*GOjBjn<4!(c;_VXR;8{vD}_m1xq-&elrz8Ss+ z<#(wkePxtPmYEVJTji+4%N=ElGJVWs&s-t%;LQJI9+7!$ z=5?93WRA}KD)Yz8`LlSkl*m#sOS>$cvy9F%E6dz0o3rf95}V~@mWNrAvt-F?&FaWn zHtSDW8)hAsb!pa^tjDsZW%bIIIa{u5RkBsj);3%3Z2hwh&Ne*Tt!$68J;|2sJI8mf z?`nP5;=6yooAKT1?{<85>ARfS^JZ_Dy?OQ)*;{7slf7^DQQ4$W{i1<$SB>gBfT9Gt^W*p!ij&oEDG<344Y(1pok-Y#{pp literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_duplicates.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_duplicates.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..41bfc20fab35f4b940ce894bccdecc731536017e GIT binary patch literal 17 QcmZQ!fB+UC#R#kZ!#o&+8Dvs4Sqv$UBxa#d#Kxlge1B3ve literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv new file mode 100644 index 000000000..78443decd --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv @@ -0,0 +1,51 @@ +row_id,key,is_null +0,3,false +1,3,false +2,10,false +3,11,false +4,13,false +5,15,false +6,18,false +7,24,false +8,24,false +9,28,false +10,30,false +11,35,false +12,39,false +13,39,false +14,42,false +15,45,false +16,51,false +17,52,false +18,57,false +19,60,false +20,62,false +21,67,false +22,66,false +23,70,false +24,75,false +25,75,false +26,78,false +27,83,false +28,88,false +29,91,false +30,93,false +31,96,false +32,97,false +33,99,false +34,105,false +35,106,false +36,109,false +37,112,false +38,115,false +39,121,false +40,123,false +41,125,false +42,129,false +43,131,false +44,134,false +45,137,false +46,141,false +47,144,false +48,147,false +49,148,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..10b53386f7508aefdef0e6722728b039d2f014fa GIT binary patch literal 1471 zcmZY5SyB^G07l_^Z)7kiAfh;diU`hT9uNg_1Qk$mL`9|CfU*RaW0eoS`{o+ljwi&r zhbsCZ>8~r@|LGLBzkJ)?EOm|)LoutIVpNqBld7gzP_-2Isd|b%s*z%^YNptyS}FFc zc8dG6%Cr;}Edz>{1B#Y|ik3k|%aEdFSkZDw(K4cFIh<9YWmM5}MA33o(K4o3Kdx9m zp;$kuSU;s$AJ3|?KA~7Ytyn*!SU;=yCngmw=M*hdik9<=mT5)HOjeDS3yPMDik3@? zmdlEkD~gt@ik54Nmg|a^lA@)YRi~w*XsIe%YKoS+qNSl|X)0P;ik7ybWmeI0BdbBn zoTBBXqUDyNWnR&;plG?RXjxRW+)=dLRkYm8YSMCFvA3kyTUP8nQtUle>^)KJttj?Z zvs&z}Dc=35;@zJqTAnLf))g%;6fG|mEw2&=4 a`Q4u5YftymC|}F}&!;%=JbY<=aqS;`H%!q0 literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..0c386fea4cd0efa6adcafe4d82c15ba9bd434e69 GIT binary patch literal 29 Ycmd;LU|=XoEiQ>S00Ax}o~0!t09v^QMgRZ+ literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv new file mode 100644 index 000000000..95afb33b7 --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_100.csv @@ -0,0 +1,101 @@ +row_id,key,is_null +0,test_00000,false +1,test_00001,false +2,test_00002,false +3,test_00003,false +4,test_00004,false +5,test_00005,false +6,test_00006,false +7,test_00007,false +8,test_00008,false +9,test_00009,false +10,test_00010,false +11,test_00011,false +12,test_00012,false +13,test_00013,false +14,test_00014,false +15,test_00015,false +16,test_00016,false +17,test_00017,false +18,test_00018,false +19,test_00019,false +20,test_00020,false +21,test_00021,false +22,test_00022,false +23,test_00023,false +24,NULL,true +25,test_00025,false +26,test_00026,false +27,test_00027,false +28,test_00028,false +29,test_00029,false +30,test_00030,false +31,test_00031,false +32,test_00032,false +33,test_00033,false +34,test_00034,false +35,test_00035,false +36,test_00036,false +37,test_00037,false +38,test_00038,false +39,test_00039,false +40,test_00040,false +41,test_00041,false +42,test_00042,false +43,test_00043,false +44,test_00044,false +45,test_00045,false +46,test_00046,false +47,test_00047,false +48,test_00048,false +49,test_00049,false +50,test_00050,false +51,test_00051,false +52,test_00052,false +53,test_00053,false +54,test_00054,false +55,test_00055,false +56,test_00056,false +57,test_00057,false +58,test_00058,false +59,test_00059,false +60,test_00060,false +61,test_00061,false +62,test_00062,false +63,test_00063,false +64,test_00064,false +65,test_00065,false +66,test_00066,false +67,test_00067,false +68,test_00068,false +69,test_00069,false +70,test_00070,false +71,NULL,true +72,NULL,true +73,test_00073,false +74,test_00074,false +75,test_00075,false +76,test_00076,false +77,test_00077,false +78,test_00078,false +79,test_00079,false +80,test_00080,false +81,test_00081,false +82,test_00082,false +83,test_00083,false +84,test_00084,false +85,test_00085,false +86,test_00086,false +87,test_00087,false +88,test_00088,false +89,test_00089,false +90,test_00090,false +91,test_00091,false +92,test_00092,false +93,test_00093,false +94,test_00094,false +95,test_00095,false +96,test_00096,false +97,test_00097,false +98,NULL,true +99,test_00099,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin new file mode 100644 index 0000000000000000000000000000000000000000..8289c43298b61250467d3629147247218c0f3774 GIT binary patch literal 18317 zcmZA9XOz~}`FHUt_Lkmb?_D~mvBwr$Y%#``M2)e<*kWu!M5IfXCZf_s1f`3ps33@d z6saO0&0auksQ=IXWM$5GJ+szk*u1&+@4&g{o`GSzo3FU(=1UK&ywy}!ZC7XYYO1S? z)vu|pDb}E-y0%!un(8fzHL9uJvRLDq>aB`3sj1$&Sks#7ZHhIksou8E>S@`on3nB} zY1yHemK}>}*{PV8?-kSX{bE{vP)y4Yi)r~$oz>U!<6>HVQcTNFi)r~;G1Grh%=BLt zGyPY^O#gK;(|4}32Bz;)%=BH0nZ8>w(|0fCB=#t#WzS+-_9~`j?_yf^DW+xLI%}w9 zzhYYUFQ(;yVpJ9Vp@(arsbGoT8=HI<+x&6jxVO=gkoBL zTW3wQoLJ1fQ;L~)YBBSESIoTAikWwMG4swSX5N`~*3`VSih1_m7xV0A7t?Z1F)im7 z({f%hE$0{0@`qwtE-0quk9F2e%Z0_X{Hd6hi;8KvxR{npifOsDn3l_mY58+8)BpNE zt5?~dmlreriejc;SpT7SnQ9F)eo&)AFBU zO>3*0*L|?6SHD?p^*u!!99CQ1BD7v@^}V6>YpYv^HmI$>FSKE8b*s=uwbl2BHmX@heW)Zq*WRW&eF)u2#S zgF{sf2~{<;XcJY#LRCE;s%m(sswYBKjR;lsWT>i0s)?bho(WYoDOA<7MVqOb9IEQMP*u-|s(K+*)s#?GFNUgmDOA1>sH!=ks$MTzPu1K|Rr5ks z%@0+zAXLV}P#KFtWh@Sru_RQ+(xUZcEDM$KW~hwip)%eIbvP?RRjmwFwJKEA+o7sf zhpJjrw6eX|hN@Z@s%m|xs&_(Fy&J0Py--!}hpPG@RMm%}sy-@O+1?+As`?~U)u*AV zHiW9$7^>>CP*s~kRec_+>WffSUly%w@2^5teI2T5bEv9sLREbms_MH?Rn6-@0j%Fh z)jgrAT7;^)w`gUVw+vNvU#O~9p{ni=RnKv-7OQ@=@p{lxts_Gu9sz<1*2a8sgdCyQ)y+T#>4pr4BR8`+lRS$)#>KCf& z;ZRlmLsdOew3&>@LS+mHl`$|>#-LCcgF|Hu36(K4)ZEAGv_WNE4-b{`M5v4rp)#Hf zl`%3@#;8zppAPl-qeEqkDO%ajV?$+(3zacGRK|o*852WgJQM0PCWWecHdNK*P*u+r ztuNz+P#IG~WxN<_?#rR(P7O78TBy0Ngql0OXk{VK2$k__sEnDRGF}UH8nZ%G%??#H zCsftzp{nMFs+w1{vXJM8s#*}L>WxrU3qw^c3RSf@RMnDDRZBxvEelokX3@$*ULLCI ztx#1fLRGB{RkbQq)!U(}R)?xu6RK)$sH$~ED+_sjsH%5DWxN+EAB4*IFjU4z zp)x)WmGMc@$_D;4RK|u-hqEzM)n}oqHifGCJXF;ep{l+NRrOV1g9`iIJRBvi(up)wvTT3NXRL(Lr&>NEz2$`}$VV`!+1VWH*@4>k9RqLr09 zB2>ncp)y8>$`}0s)?bho(WYoDOA<7 zp{gc_s(LO|)$^gMUMN~wxl=+_y%?(MrBGEbhpL(ys%l!Os#ii)O%GKyBUII^MJp?J zW~i#yLRHNQRW&(tvsH*v)sumQjY~VLSRV@rvwJ228;!ssfLRBpd zRkbWs)tjNJmWQf(t7v5duLxDOGE~*7P*rb-%2*RBV{NF6b)hoWhst=TXk|aY8!F?y zP#N!s%J?AE;d~gX>Z4FqABU>?BvjR>p{h32X`{-1-WaOtvrtu=LREbps_KhSRbPgx z`YKe_*P*I5hpPIfXk|Zt8>;HNP*u(A{)VtoJyrLFs%jCc>fTUQEkjk^7pkgNsH*#m zR=%LoI#gAgP*rV1RkaIM)jm{Jhfq}=LsdNxs;X0{s?J3l$mkj>qg$wq?x8Yzgvxj@ zR7THG8NEVf^bVELr)Xt+_YGC`P^hYYp{gDZRn zp{fRlsu~iiYG|mcVWFxX4^=fhRMiuqsz!vWda`I`dyfoNH7ZoqQ=zJ!4plWeRMnVJ zRbxX{jSE#ZK2+6&qLuAEF;vE+P#MpL%9tD~0YVPzJ5I*qxZs^*2NnjflaL8z)X zLRBrS)5ewkyeL%F;!ssfLRBpdRkbWs)tjNJmWQf(D^%5rP*p37R`&C%P*rb-s#+bY zYE7uBwV|rkg{oQ~s_LCkRquwXdar0@KffQU>Vr^KABL*>C{)$Qp{hO!RrP78stuv4 zHioMDtZ3!M_NGu(pNFdYB2?9vp{l+LRrPhKs?DLQz6n+JZK$g6idNQj^SXam)3~9k zdqP#U2vv1&sH&Ess_qL_)hblg{h_K_hpK8*w6d<-hN@~8s;Ygcst%#5I)ncp)y8> z${1C&vb~=QRrPeJs?njU#)PUG8>(ttsH*XyswRZ0ni#6;nL2G!+1`^vWlRp0@m#2k z=R?h%5^C;?q2|66YVONLEBkqBsMDAhD&v(<8Ph{$%m{TFuZF6c8LH~FP*t-+Rn0D1 z+0S!AWy}qgF)vic{7@MSLS?)WDq~@&j76a`78k87QGf{LRGB|Rkbcu)%s9X?}Vy)H&oSop{m{wRrNv9%0m7y zRMkhJsy+@?^+~9zPeWC02vxN)RMlspsy2nH`n+gmKYtOb>dR17UxljrI#k9tp)$S= zmGNDujOKO!-n2<$8TS;eEaVoU=C%wq_r6f4(JEBN{h=~ihstOZDx+KiJfU#N_S>$GWQ1NRS=@kpqQM?+;i7Aj*vsEmQ3G6sb@oWY^0hJ>mbTC}pChlQ$o zJXF>2P*qQasu~fh>d8=5BSTe<3RU$~sH&%nR`&DgP*r0>RgDc*H7-=u_)t|7LRC!+ zRrO4$s!5@$o-JBg*ONn4Jr}C#`A}6agsPenD&wV4883&*m>Mc$TG7gWekD}K^iUZy zLS?)fDr07-!+9-K)vQoevqM$Q303ua(aL_F8>(tvsH*v)suqOGSQsi}QK*c?p)!_) z%2-;ovXGaB%6KzW#_~`ZZ-qLX6``tDhN@Z>s_N}fRjWf)ttndB&uc?ftqWDPK2+5^ zp{m{uRrOw|s`o=xeGsbZ!%$Tp6|Jo6k3&^`5~}LcP*od3Rc#EFu_;u>=bDAs%mhksv)7OhK8yd7OLv;qLp<$JXF;ap{hoNs(Lb1)yPm)qe4|Z6{_m#P*tNt zRgEdyNXEEO8RJ7`ObC@RF;vDgp)w|g%6K+Z#^j=v{rr5Wj2A*>ObM0oVyL+FJ@53Ql?^;2)ZAA?&7B!)?rWhQe^#iB*`XePPN=HaLsiWURW+|@WdqL-Rka{g z)f=Iz7KW-?6sl@*sH!EQs+NYTS{AD6&7zfsygXFZTcN5}gsNH@s%llJs<%T`tqxVS zCREkhP*v-;uJX1MevJdr6er?b`~^4QZnQ^l48d5ugt=IO53w2BRaaI09Q&Xlj>B2F z1lQtrv_=mMz*Bf0v#<>B;tOn5Q&sh2?195@6i&m1xC*zTCAy$LMqm#zx1`V;VD?1}m~7H8p7T#viZ9(^zj6YvTaVl6(y7TZ)+ z{Rn%Y9*)78xCGbXPP9XB48?d%!y8zGji}w$^I&%zhNE!?F2=RE18va@Log0gu>h;F z0X5rIRs8_F;ZPif({T~5!R=^+o*0a=cp3BYHa)ecov-@`6A7)RoFxDbEGZMYvjFc72hBIaTxKE`+0X~(LnopBJ3z^V8n zuEM|23f(aPPh$#R#|nIeZ?U7x{c9YE!*L2Oz~68y?n5^`hNtiX=HM-Sh;OjN_o}LX zg#*wGC*u#e61Siwy5doc!tBilcBkF2Xgq9c|DPgE1B_V?N%- zr>Op=`;T352!4ap@F!f2f1@=X#2}2pOPGgM_ykqIa{sXl4#tuA9WKP*aU1SO4-CX; zyokA2iI4FecJe`aXB>nha4Pv3ATkN>A`;P;0I8MO@_#1A; zedvbA@DyIa9K3}O@eOv^#UtSWG{edG1Fpm^Xo;?P6r=DwW@9-%z-DZ}tNV}r(G(}) zd|ZK>aWA^y5sbuhn1wg-KEB3wySe|^4^40)&co%n2`$hW{qZCw<25Y9d-w|5?(Y6$ zUo^&VaW4Lf8*vXh;bDxxvzUpcco$z{n?2lr?1M%)0q5W^_$Qj<0rbNYn1olc1n=Mr zY`v%ZkG;_l$K!1L88_fR=!l0f9M50|7Gpg=$5wl}|JVx+a2$S*%kU4}jSlFG$1xGp zu?Xw130v;%{$o$n$FVpIm*RTdh4$!!VVHndun=qU8MfHR{l^}thhuOiF2QxU6YbC& zLopuH@CMdkBWm|`|FJs`!_hbc7voyofwt&{AsC0LSb){ofSUc>f9!@saTHF+MYsmH zqYZjuFvj9#%*WgK6xI8?|JW6W;5Rr8f5O%HH(KLC48j<^gn3wnPf&Rw{XOi0gK;E& zhYRs{+=lzn0|PM{FJdlM;$wV=oep&WaS)EcsrVzV!oSc8-7x@9V+vl!3VeibvExDR zKMusrk+<)wkrZ@@b;|ko2d(j1tU?iTyEWC;L@in$P)cwbPXo3@Q9xlgCXo1e?k0&u1 zuVES9!&lh$F!vw(qA`ApbMaT)hWb$$0Ad!r$a$JzKZZoq%g5f5QFp1}+(#(I2?ts1!h*b5DC9Da|> z@DJRL4(N-=F%i?T28O+Q#lbcE@2j8fV~QT#Gx<7QHY8<1iHquo@ds)5QJ9 zZa5T2;dEStYj8W-peF`nES|4*=c~@S@s_dGF7E%{4)?qKU*EUt`+o7~RVP)Q?DOHP zd=C6Z)gILcRM)RQruu~HtE=y-zT5v1@Q`mDf86(?{;=kVnqz8CuIXAcq~_V07iwOw z*<4dwyL0XFwddE~Qrn?+Z0&QkFV#-1Z8UFu+yDJ11kIW?+oEblhw924_vy}2=l#E* f{C__jebRq_;V0hBKJmXlzt+1-&_O;y}Y literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..d3997ac017c99f60347177bf1c4ef359a18e2d70 GIT binary patch literal 29 Zcmd;LU|=XoEiQ>S00Axp&(hM85dd1<22TJ0 literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv new file mode 100644 index 000000000..e530987b2 --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_1000.csv @@ -0,0 +1,1001 @@ +row_id,key,is_null +0,test_00000,false +1,test_00001,false +2,test_00002,false +3,test_00003,false +4,test_00004,false +5,test_00005,false +6,test_00006,false +7,test_00007,false +8,test_00008,false +9,test_00009,false +10,test_00010,false +11,test_00011,false +12,test_00012,false +13,test_00013,false +14,test_00014,false +15,test_00015,false +16,test_00016,false +17,test_00017,false +18,test_00018,false +19,test_00019,false +20,test_00020,false +21,test_00021,false +22,test_00022,false +23,test_00023,false +24,NULL,true +25,test_00025,false +26,test_00026,false +27,test_00027,false +28,test_00028,false +29,test_00029,false +30,test_00030,false +31,test_00031,false +32,test_00032,false +33,test_00033,false +34,test_00034,false +35,test_00035,false +36,test_00036,false +37,test_00037,false +38,test_00038,false +39,test_00039,false +40,test_00040,false +41,test_00041,false +42,test_00042,false +43,test_00043,false +44,test_00044,false +45,test_00045,false +46,test_00046,false +47,test_00047,false +48,test_00048,false +49,test_00049,false +50,test_00050,false +51,test_00051,false +52,test_00052,false +53,test_00053,false +54,test_00054,false +55,test_00055,false +56,test_00056,false +57,test_00057,false +58,test_00058,false +59,test_00059,false +60,test_00060,false +61,test_00061,false +62,test_00062,false +63,test_00063,false +64,test_00064,false +65,test_00065,false +66,test_00066,false +67,test_00067,false +68,test_00068,false +69,test_00069,false +70,test_00070,false +71,NULL,true +72,NULL,true +73,test_00073,false +74,test_00074,false +75,test_00075,false +76,test_00076,false +77,test_00077,false +78,test_00078,false +79,test_00079,false +80,test_00080,false +81,test_00081,false +82,test_00082,false +83,test_00083,false +84,test_00084,false +85,test_00085,false +86,test_00086,false +87,test_00087,false +88,test_00088,false +89,test_00089,false +90,test_00090,false +91,test_00091,false +92,test_00092,false +93,test_00093,false +94,test_00094,false +95,test_00095,false +96,test_00096,false +97,test_00097,false +98,NULL,true +99,test_00099,false +100,test_00100,false +101,test_00101,false +102,test_00102,false +103,test_00103,false +104,NULL,true +105,test_00105,false +106,test_00106,false +107,test_00107,false +108,test_00108,false +109,test_00109,false +110,test_00110,false +111,test_00111,false +112,test_00112,false +113,test_00113,false +114,test_00114,false +115,test_00115,false +116,test_00116,false +117,test_00117,false +118,test_00118,false +119,test_00119,false +120,test_00120,false +121,test_00121,false +122,test_00122,false +123,test_00123,false +124,test_00124,false +125,test_00125,false +126,test_00126,false +127,test_00127,false +128,test_00128,false +129,test_00129,false +130,test_00130,false +131,test_00131,false +132,test_00132,false +133,test_00133,false +134,test_00134,false +135,test_00135,false +136,test_00136,false +137,test_00137,false +138,test_00138,false +139,test_00139,false +140,test_00140,false +141,test_00141,false +142,test_00142,false +143,test_00143,false +144,test_00144,false +145,test_00145,false +146,test_00146,false +147,test_00147,false +148,test_00148,false +149,test_00149,false +150,test_00150,false +151,test_00151,false +152,test_00152,false +153,test_00153,false +154,NULL,true +155,test_00155,false +156,test_00156,false +157,test_00157,false +158,test_00158,false +159,test_00159,false +160,test_00160,false +161,test_00161,false +162,test_00162,false +163,test_00163,false +164,test_00164,false +165,test_00165,false +166,test_00166,false +167,test_00167,false +168,test_00168,false +169,test_00169,false +170,test_00170,false +171,test_00171,false +172,test_00172,false +173,test_00173,false +174,test_00174,false +175,test_00175,false +176,test_00176,false +177,test_00177,false +178,test_00178,false +179,test_00179,false +180,test_00180,false +181,test_00181,false +182,test_00182,false +183,test_00183,false +184,test_00184,false +185,test_00185,false +186,test_00186,false +187,test_00187,false +188,test_00188,false +189,test_00189,false +190,test_00190,false +191,test_00191,false +192,test_00192,false +193,test_00193,false +194,test_00194,false +195,test_00195,false +196,test_00196,false +197,test_00197,false +198,test_00198,false +199,test_00199,false +200,test_00200,false +201,test_00201,false +202,test_00202,false +203,test_00203,false +204,test_00204,false +205,test_00205,false +206,test_00206,false +207,test_00207,false +208,test_00208,false +209,test_00209,false +210,test_00210,false +211,test_00211,false +212,test_00212,false +213,test_00213,false +214,NULL,true +215,test_00215,false +216,test_00216,false +217,test_00217,false +218,test_00218,false +219,test_00219,false +220,test_00220,false +221,test_00221,false +222,test_00222,false +223,test_00223,false +224,test_00224,false +225,test_00225,false +226,test_00226,false +227,test_00227,false +228,test_00228,false +229,test_00229,false +230,test_00230,false +231,test_00231,false +232,test_00232,false +233,test_00233,false +234,test_00234,false +235,test_00235,false +236,test_00236,false +237,test_00237,false +238,test_00238,false +239,test_00239,false +240,test_00240,false +241,test_00241,false +242,test_00242,false +243,test_00243,false +244,test_00244,false +245,test_00245,false +246,test_00246,false +247,test_00247,false +248,test_00248,false +249,test_00249,false +250,test_00250,false +251,test_00251,false +252,test_00252,false +253,test_00253,false +254,test_00254,false +255,test_00255,false +256,test_00256,false +257,test_00257,false +258,test_00258,false +259,test_00259,false +260,test_00260,false +261,test_00261,false +262,test_00262,false +263,test_00263,false +264,test_00264,false +265,test_00265,false +266,test_00266,false +267,test_00267,false +268,test_00268,false +269,test_00269,false +270,test_00270,false +271,test_00271,false +272,test_00272,false +273,test_00273,false +274,test_00274,false +275,test_00275,false +276,test_00276,false +277,test_00277,false +278,test_00278,false +279,test_00279,false +280,test_00280,false +281,test_00281,false +282,test_00282,false +283,test_00283,false +284,test_00284,false +285,test_00285,false +286,test_00286,false +287,test_00287,false +288,test_00288,false +289,test_00289,false +290,NULL,true +291,test_00291,false +292,test_00292,false +293,test_00293,false +294,test_00294,false +295,test_00295,false +296,test_00296,false +297,test_00297,false +298,NULL,true +299,test_00299,false +300,test_00300,false +301,test_00301,false +302,test_00302,false +303,test_00303,false +304,test_00304,false +305,test_00305,false +306,NULL,true +307,test_00307,false +308,test_00308,false +309,test_00309,false +310,test_00310,false +311,test_00311,false +312,test_00312,false +313,test_00313,false +314,test_00314,false +315,test_00315,false +316,test_00316,false +317,test_00317,false +318,test_00318,false +319,test_00319,false +320,NULL,true +321,test_00321,false +322,test_00322,false +323,test_00323,false +324,NULL,true +325,test_00325,false +326,test_00326,false +327,test_00327,false +328,test_00328,false +329,test_00329,false +330,test_00330,false +331,test_00331,false +332,test_00332,false +333,test_00333,false +334,test_00334,false +335,test_00335,false +336,test_00336,false +337,test_00337,false +338,test_00338,false +339,test_00339,false +340,test_00340,false +341,test_00341,false +342,test_00342,false +343,test_00343,false +344,test_00344,false +345,test_00345,false +346,test_00346,false +347,test_00347,false +348,test_00348,false +349,test_00349,false +350,test_00350,false +351,test_00351,false +352,test_00352,false +353,test_00353,false +354,test_00354,false +355,test_00355,false +356,test_00356,false +357,test_00357,false +358,test_00358,false +359,test_00359,false +360,test_00360,false +361,test_00361,false +362,NULL,true +363,test_00363,false +364,test_00364,false +365,test_00365,false +366,test_00366,false +367,test_00367,false +368,test_00368,false +369,test_00369,false +370,test_00370,false +371,test_00371,false +372,test_00372,false +373,test_00373,false +374,test_00374,false +375,test_00375,false +376,test_00376,false +377,test_00377,false +378,test_00378,false +379,test_00379,false +380,test_00380,false +381,NULL,true +382,NULL,true +383,test_00383,false +384,test_00384,false +385,test_00385,false +386,NULL,true +387,test_00387,false +388,test_00388,false +389,test_00389,false +390,test_00390,false +391,test_00391,false +392,test_00392,false +393,test_00393,false +394,test_00394,false +395,test_00395,false +396,test_00396,false +397,test_00397,false +398,test_00398,false +399,test_00399,false +400,test_00400,false +401,test_00401,false +402,test_00402,false +403,test_00403,false +404,test_00404,false +405,test_00405,false +406,test_00406,false +407,test_00407,false +408,test_00408,false +409,test_00409,false +410,test_00410,false +411,test_00411,false +412,test_00412,false +413,NULL,true +414,test_00414,false +415,test_00415,false +416,test_00416,false +417,test_00417,false +418,test_00418,false +419,test_00419,false +420,NULL,true +421,test_00421,false +422,test_00422,false +423,test_00423,false +424,test_00424,false +425,test_00425,false +426,test_00426,false +427,NULL,true +428,test_00428,false +429,test_00429,false +430,test_00430,false +431,test_00431,false +432,test_00432,false +433,test_00433,false +434,test_00434,false +435,test_00435,false +436,test_00436,false +437,test_00437,false +438,test_00438,false +439,test_00439,false +440,test_00440,false +441,test_00441,false +442,test_00442,false +443,test_00443,false +444,test_00444,false +445,test_00445,false +446,test_00446,false +447,test_00447,false +448,test_00448,false +449,test_00449,false +450,test_00450,false +451,test_00451,false +452,test_00452,false +453,test_00453,false +454,test_00454,false +455,test_00455,false +456,test_00456,false +457,test_00457,false +458,test_00458,false +459,test_00459,false +460,test_00460,false +461,test_00461,false +462,test_00462,false +463,test_00463,false +464,test_00464,false +465,test_00465,false +466,test_00466,false +467,test_00467,false +468,test_00468,false +469,test_00469,false +470,test_00470,false +471,test_00471,false +472,test_00472,false +473,test_00473,false +474,test_00474,false +475,test_00475,false +476,test_00476,false +477,test_00477,false +478,test_00478,false +479,test_00479,false +480,test_00480,false +481,test_00481,false +482,test_00482,false +483,test_00483,false +484,NULL,true +485,test_00485,false +486,test_00486,false +487,test_00487,false +488,test_00488,false +489,test_00489,false +490,test_00490,false +491,test_00491,false +492,test_00492,false +493,test_00493,false +494,test_00494,false +495,test_00495,false +496,test_00496,false +497,test_00497,false +498,test_00498,false +499,test_00499,false +500,test_00500,false +501,test_00501,false +502,test_00502,false +503,test_00503,false +504,test_00504,false +505,test_00505,false +506,test_00506,false +507,test_00507,false +508,test_00508,false +509,test_00509,false +510,test_00510,false +511,test_00511,false +512,test_00512,false +513,test_00513,false +514,test_00514,false +515,test_00515,false +516,test_00516,false +517,test_00517,false +518,test_00518,false +519,test_00519,false +520,test_00520,false +521,test_00521,false +522,test_00522,false +523,test_00523,false +524,test_00524,false +525,test_00525,false +526,test_00526,false +527,test_00527,false +528,test_00528,false +529,test_00529,false +530,NULL,true +531,test_00531,false +532,test_00532,false +533,test_00533,false +534,test_00534,false +535,test_00535,false +536,test_00536,false +537,test_00537,false +538,test_00538,false +539,test_00539,false +540,test_00540,false +541,test_00541,false +542,test_00542,false +543,test_00543,false +544,test_00544,false +545,test_00545,false +546,test_00546,false +547,test_00547,false +548,test_00548,false +549,test_00549,false +550,test_00550,false +551,test_00551,false +552,test_00552,false +553,test_00553,false +554,test_00554,false +555,test_00555,false +556,test_00556,false +557,test_00557,false +558,test_00558,false +559,test_00559,false +560,test_00560,false +561,test_00561,false +562,test_00562,false +563,test_00563,false +564,test_00564,false +565,test_00565,false +566,test_00566,false +567,test_00567,false +568,test_00568,false +569,test_00569,false +570,test_00570,false +571,NULL,true +572,test_00572,false +573,test_00573,false +574,test_00574,false +575,test_00575,false +576,NULL,true +577,test_00577,false +578,test_00578,false +579,test_00579,false +580,test_00580,false +581,test_00581,false +582,test_00582,false +583,test_00583,false +584,NULL,true +585,test_00585,false +586,test_00586,false +587,test_00587,false +588,test_00588,false +589,test_00589,false +590,test_00590,false +591,test_00591,false +592,test_00592,false +593,test_00593,false +594,test_00594,false +595,test_00595,false +596,test_00596,false +597,test_00597,false +598,test_00598,false +599,test_00599,false +600,test_00600,false +601,test_00601,false +602,test_00602,false +603,test_00603,false +604,test_00604,false +605,test_00605,false +606,test_00606,false +607,test_00607,false +608,test_00608,false +609,test_00609,false +610,test_00610,false +611,test_00611,false +612,test_00612,false +613,test_00613,false +614,test_00614,false +615,test_00615,false +616,test_00616,false +617,test_00617,false +618,test_00618,false +619,test_00619,false +620,test_00620,false +621,test_00621,false +622,test_00622,false +623,test_00623,false +624,test_00624,false +625,test_00625,false +626,test_00626,false +627,test_00627,false +628,test_00628,false +629,test_00629,false +630,test_00630,false +631,test_00631,false +632,test_00632,false +633,test_00633,false +634,test_00634,false +635,test_00635,false +636,test_00636,false +637,test_00637,false +638,test_00638,false +639,test_00639,false +640,test_00640,false +641,test_00641,false +642,test_00642,false +643,test_00643,false +644,test_00644,false +645,test_00645,false +646,test_00646,false +647,test_00647,false +648,test_00648,false +649,test_00649,false +650,test_00650,false +651,test_00651,false +652,test_00652,false +653,test_00653,false +654,test_00654,false +655,test_00655,false +656,test_00656,false +657,test_00657,false +658,test_00658,false +659,NULL,true +660,test_00660,false +661,test_00661,false +662,test_00662,false +663,test_00663,false +664,test_00664,false +665,test_00665,false +666,test_00666,false +667,test_00667,false +668,test_00668,false +669,test_00669,false +670,test_00670,false +671,test_00671,false +672,test_00672,false +673,test_00673,false +674,test_00674,false +675,test_00675,false +676,test_00676,false +677,test_00677,false +678,test_00678,false +679,test_00679,false +680,NULL,true +681,test_00681,false +682,test_00682,false +683,test_00683,false +684,test_00684,false +685,test_00685,false +686,test_00686,false +687,test_00687,false +688,test_00688,false +689,test_00689,false +690,test_00690,false +691,test_00691,false +692,test_00692,false +693,test_00693,false +694,test_00694,false +695,test_00695,false +696,test_00696,false +697,test_00697,false +698,test_00698,false +699,test_00699,false +700,test_00700,false +701,NULL,true +702,test_00702,false +703,test_00703,false +704,test_00704,false +705,NULL,true +706,test_00706,false +707,test_00707,false +708,test_00708,false +709,test_00709,false +710,test_00710,false +711,test_00711,false +712,test_00712,false +713,test_00713,false +714,test_00714,false +715,test_00715,false +716,test_00716,false +717,test_00717,false +718,test_00718,false +719,test_00719,false +720,test_00720,false +721,NULL,true +722,test_00722,false +723,test_00723,false +724,test_00724,false +725,test_00725,false +726,test_00726,false +727,test_00727,false +728,test_00728,false +729,test_00729,false +730,test_00730,false +731,test_00731,false +732,test_00732,false +733,test_00733,false +734,test_00734,false +735,test_00735,false +736,test_00736,false +737,test_00737,false +738,test_00738,false +739,test_00739,false +740,test_00740,false +741,test_00741,false +742,test_00742,false +743,test_00743,false +744,test_00744,false +745,test_00745,false +746,test_00746,false +747,test_00747,false +748,test_00748,false +749,test_00749,false +750,test_00750,false +751,test_00751,false +752,test_00752,false +753,test_00753,false +754,test_00754,false +755,test_00755,false +756,test_00756,false +757,test_00757,false +758,test_00758,false +759,test_00759,false +760,test_00760,false +761,test_00761,false +762,test_00762,false +763,test_00763,false +764,NULL,true +765,test_00765,false +766,test_00766,false +767,test_00767,false +768,test_00768,false +769,test_00769,false +770,test_00770,false +771,NULL,true +772,test_00772,false +773,test_00773,false +774,test_00774,false +775,test_00775,false +776,test_00776,false +777,test_00777,false +778,test_00778,false +779,test_00779,false +780,test_00780,false +781,test_00781,false +782,test_00782,false +783,test_00783,false +784,test_00784,false +785,test_00785,false +786,test_00786,false +787,test_00787,false +788,test_00788,false +789,test_00789,false +790,test_00790,false +791,test_00791,false +792,test_00792,false +793,test_00793,false +794,test_00794,false +795,test_00795,false +796,test_00796,false +797,NULL,true +798,test_00798,false +799,test_00799,false +800,test_00800,false +801,test_00801,false +802,test_00802,false +803,test_00803,false +804,test_00804,false +805,test_00805,false +806,test_00806,false +807,test_00807,false +808,test_00808,false +809,test_00809,false +810,test_00810,false +811,test_00811,false +812,test_00812,false +813,test_00813,false +814,test_00814,false +815,test_00815,false +816,test_00816,false +817,test_00817,false +818,test_00818,false +819,test_00819,false +820,test_00820,false +821,test_00821,false +822,test_00822,false +823,test_00823,false +824,test_00824,false +825,test_00825,false +826,test_00826,false +827,test_00827,false +828,test_00828,false +829,test_00829,false +830,test_00830,false +831,test_00831,false +832,test_00832,false +833,test_00833,false +834,test_00834,false +835,NULL,true +836,test_00836,false +837,test_00837,false +838,test_00838,false +839,test_00839,false +840,test_00840,false +841,test_00841,false +842,test_00842,false +843,test_00843,false +844,test_00844,false +845,test_00845,false +846,test_00846,false +847,test_00847,false +848,test_00848,false +849,test_00849,false +850,test_00850,false +851,test_00851,false +852,test_00852,false +853,test_00853,false +854,NULL,true +855,test_00855,false +856,test_00856,false +857,test_00857,false +858,test_00858,false +859,test_00859,false +860,test_00860,false +861,test_00861,false +862,test_00862,false +863,test_00863,false +864,test_00864,false +865,test_00865,false +866,test_00866,false +867,test_00867,false +868,test_00868,false +869,test_00869,false +870,test_00870,false +871,test_00871,false +872,test_00872,false +873,test_00873,false +874,test_00874,false +875,test_00875,false +876,test_00876,false +877,test_00877,false +878,test_00878,false +879,test_00879,false +880,test_00880,false +881,test_00881,false +882,test_00882,false +883,test_00883,false +884,test_00884,false +885,NULL,true +886,test_00886,false +887,test_00887,false +888,test_00888,false +889,test_00889,false +890,test_00890,false +891,test_00891,false +892,test_00892,false +893,test_00893,false +894,test_00894,false +895,test_00895,false +896,test_00896,false +897,test_00897,false +898,test_00898,false +899,test_00899,false +900,test_00900,false +901,test_00901,false +902,test_00902,false +903,test_00903,false +904,test_00904,false +905,test_00905,false +906,test_00906,false +907,test_00907,false +908,test_00908,false +909,NULL,true +910,test_00910,false +911,test_00911,false +912,test_00912,false +913,test_00913,false +914,test_00914,false +915,test_00915,false +916,test_00916,false +917,test_00917,false +918,test_00918,false +919,test_00919,false +920,test_00920,false +921,test_00921,false +922,test_00922,false +923,test_00923,false +924,test_00924,false +925,test_00925,false +926,test_00926,false +927,test_00927,false +928,test_00928,false +929,test_00929,false +930,test_00930,false +931,test_00931,false +932,test_00932,false +933,test_00933,false +934,test_00934,false +935,test_00935,false +936,test_00936,false +937,test_00937,false +938,test_00938,false +939,test_00939,false +940,test_00940,false +941,test_00941,false +942,test_00942,false +943,test_00943,false +944,test_00944,false +945,test_00945,false +946,test_00946,false +947,test_00947,false +948,test_00948,false +949,test_00949,false +950,NULL,true +951,test_00951,false +952,test_00952,false +953,test_00953,false +954,test_00954,false +955,test_00955,false +956,test_00956,false +957,test_00957,false +958,test_00958,false +959,NULL,true +960,test_00960,false +961,test_00961,false +962,test_00962,false +963,test_00963,false +964,NULL,true +965,test_00965,false +966,NULL,true +967,test_00967,false +968,test_00968,false +969,test_00969,false +970,test_00970,false +971,test_00971,false +972,test_00972,false +973,test_00973,false +974,test_00974,false +975,test_00975,false +976,test_00976,false +977,test_00977,false +978,test_00978,false +979,test_00979,false +980,test_00980,false +981,test_00981,false +982,test_00982,false +983,test_00983,false +984,test_00984,false +985,test_00985,false +986,test_00986,false +987,test_00987,false +988,test_00988,false +989,test_00989,false +990,test_00990,false +991,test_00991,false +992,test_00992,false +993,test_00993,false +994,test_00994,false +995,test_00995,false +996,test_00996,false +997,test_00997,false +998,test_00998,false +999,test_00999,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d19f981ec2337162e9c6b95bf4fb451aabf625b GIT binary patch literal 807 zcmYk&Iab186o%pdUc@0PAmXez4=5@LaV+iZEbOgZffSzOB3z1vjccK_av6RB{a;cf z_sL0e-;<>G{L*{s6o*?#qEa_QnVX}oTM6ZEW9Y)Ij4s{A(Usc-x^|mHH*Ql=sag`G zmT9Dx8Kjn3q?QU&OBJc5hSV~L)KW)knUBiU(m-liKx$b;YFR?szJj!U6>0k#()M+v z?Hf^DZEqrN-$dHJg|vMe=}+t+wd^9b>>;)6BefhLwH!v}YB@q`X(6>7Bek3$wX~61 zPLW#9kXp`>S~^HAX(bfL`^V3l4W{AgwiCj?Yr3@@TJ8LGn+a literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..9098c6cb0fd3786eaed196e7e0df79e05a352db0 GIT binary patch literal 29 Ycmd;LU|=XoEiQ>S00Ax}o{1$R09vXBK>z>% literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv new file mode 100644 index 000000000..c1fa0f48c --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_50.csv @@ -0,0 +1,51 @@ +row_id,key,is_null +0,test_00000,false +1,test_00001,false +2,test_00002,false +3,test_00003,false +4,test_00004,false +5,test_00005,false +6,test_00006,false +7,test_00007,false +8,test_00008,false +9,test_00009,false +10,test_00010,false +11,test_00011,false +12,test_00012,false +13,test_00013,false +14,test_00014,false +15,test_00015,false +16,test_00016,false +17,test_00017,false +18,test_00018,false +19,test_00019,false +20,test_00020,false +21,test_00021,false +22,test_00022,false +23,test_00023,false +24,NULL,true +25,test_00025,false +26,test_00026,false +27,test_00027,false +28,test_00028,false +29,test_00029,false +30,test_00030,false +31,test_00031,false +32,test_00032,false +33,test_00033,false +34,test_00034,false +35,test_00035,false +36,test_00036,false +37,test_00037,false +38,test_00038,false +39,test_00039,false +40,test_00040,false +41,test_00041,false +42,test_00042,false +43,test_00043,false +44,test_00044,false +45,test_00045,false +46,test_00046,false +47,test_00047,false +48,test_00048,false +49,test_00049,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin new file mode 100644 index 0000000000000000000000000000000000000000..5392aa595c467e8fe9ea4ec2f943374c76025574 GIT binary patch literal 9156 zcmZ9Rb(B=~8;9?mp+g!$kQNaIc4vcK1O!B+K}1yA0BH*;0qO2sx|gMU>4v4dd+F{J zP=C+;#k`;MyXQQddG??8`Q9`49d;*G+a|xZZ4}~NKG#VVv{0Yxngx8W&n(R6`pv?9 zZW6NypPSSy(&r{KEADfXn??ED6lT#rH)YU5X-Q>9OKLM((wNbb){K@n%xFnxMoW4# zS~8f?lF^KoOhF6KlG%)wEM~N1HKXNCGp1)ZWBOZWOwVD)^qgi)e>-SlOwVP;^xS4l z&tt~)yk^{qd}g%dH>0J187=Ra(NfTimUn{|PD>#(THZ6GrLY+-@0-!`ff+4D%xEcU zMoTd>T0+cd2@P5VEdeuH!pvw1H=`xOjFw07cW%xL*AXpyv(G^3@I z87-yFX!*#DmNI6vlr^KJoEa@2o6%CSoNV5ws}g)imSmpP6y?T4uC-ZbnOOGg`hdqos}+Enk|^@|77aUk5FkmbzxN)H9={ zz8NhI%xGz7MoS|zS{j?t@{JkOzx$tsdi(QxGp08&V|r6FrvG5Zyyj-iYhlK`mS&v2 zl^N4p2hH1$j>WC;+M@6YRCQ8N$Q8G@7l5t9ujMJiIoG}fMaaNR!bE0IN7bW9@ zC^sijl&XuOR9zCK>ar+RS4640YU*w8Yob(L7p3ZkC{;H_sk$Xf)ooF#?ub%#SCp!I zqEy{C^|to|QK}w_QuRobs>h;KJrSkqsVG&?M5%f%O4SQds{Ss$Pjw^;(pw zzeK6}Ta>DQM5*c)`~@%&K~;BAs(Of0)zj2l=DkFz>McrDA5p6Mic-~2l&b!sR1FZN zYM>}pgG8wsZ0arZA)-_b6{TvJC{@EnsTv_l)ksmQMu}22T9m3WqEwAF^_F>zC{^P` zsTwaz)dW$hCW=xuNtCL|qEt;0rE01uRnttP$(SKZ#!OK%W{Hv!D@w*}Q8MC0$(SR` z+<8F_^VaozQ8E^YlCe;fj76ekEEXkWi70cIiE{kqqGYTv_4aeTC>bk7$yg;y#%fVA z)`*g^R+PK3PL!(kqEu}VrD~&TfQ-$eWNZ;7W2-20w~I12L6o^WM47u&l)1Z1y@kA6 zl#D&1Wb73sW1lE@W4|a>2SlklC`#2KQK}A$Qgy`CTgXR6sX8V~)p1d(PKZ)gGVY3!aZi+t`=Vq#5GCWGskeb2iIVYHl$-NJl&YtqR6P@=>bWRY zFGQ*OQz*~RpU&(4Ln|ystKZ0O%x?#vM3o-M9G*cO2#x%GNy}?F~ihbxwAx>8!O7) zm@P_1oG2M{M9G*d%G~*)%w1sWt=xs8WGoUTW3eb1OGL?7DoVyOQ8Jc`ayM3pQWY;s z)k;%u<*pK?YPBd;YecD9D@xTmQL5I9Qnf*ps*R#lZ4#wwv#Gaow}?`;Rg|i2qEu}c zr7A&`svV+K?G&YImnc=cMXB0j>aEE|_{7I8l_Ui=tFr5~b?0 zC>d8p$+#v;#&uCLZitd`)70C~w?xUfElS26Q8Mm|a&zv9QgvUHst2M}Jrt$tktkJ< z{SMa~kP`|+6jXrP@GblVU11Q6hd79bZEzS8;U2t(RIcM>gMtta<)9`sgjUcA`oS2O z2}@xU?1!^(3tm7npW|ePd=LVqp(@mcX3!3L!3dZN3t=7XhLdm&o`8?jrH5Sb0hEA> zPzSz;U!WTdfr&5|RzU(Bg-h@N{)RM39Oo@41d&i4YC&UY1D#<2#6T=8hb?dr&chve z2`Q2~PF5%Y0Vo62p+2;L4$uci!E{&*8(=S-h8yqM zOosWe26n+oxDL-CX>#s0EoCoqk2z&(9p#ijl-(diZgE&|T32+Rqz+>>Ga-0m1 z2Z}*ys0Q_+C3J%R5CgLz9=5|#xD1cLP0f1-azjxl1y!LQw1AG#55__)tblEB1TMit zaMCzVI>-e@pd?g*y3iatKwlUGvtT)Fg~M72FMq0;t8$N&!p)!08&7eK>fzdD% zmcbS{1c`7T{)V(X+&STWC;^q=EBFE0L2noZGhiufhJ$bc?!jM>CY|HtfWiKJ0PjFJd&hrm>pcs^fYEU0qLMP}CF)$nAVLKd!%kT)? zw|M>`Hxz|ZP!;My3+M>_U@XMK3fKlm;1WCpCkM|zdsm1>u(`7dFz BE(ZVr literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin.meta b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.bin.meta new file mode 100644 index 0000000000000000000000000000000000000000..c3bfab36818696cab83ac934dfa3ee3d287ccc78 GIT binary patch literal 29 Zcmd;LU|=XoEiQ>S00Axp&&1M_5dd1r21)<` literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv new file mode 100644 index 000000000..33e7438bd --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_500.csv @@ -0,0 +1,501 @@ +row_id,key,is_null +0,test_00000,false +1,test_00001,false +2,test_00002,false +3,test_00003,false +4,test_00004,false +5,test_00005,false +6,test_00006,false +7,test_00007,false +8,test_00008,false +9,test_00009,false +10,test_00010,false +11,test_00011,false +12,test_00012,false +13,test_00013,false +14,test_00014,false +15,test_00015,false +16,test_00016,false +17,test_00017,false +18,test_00018,false +19,test_00019,false +20,test_00020,false +21,test_00021,false +22,test_00022,false +23,test_00023,false +24,NULL,true +25,test_00025,false +26,test_00026,false +27,test_00027,false +28,test_00028,false +29,test_00029,false +30,test_00030,false +31,test_00031,false +32,test_00032,false +33,test_00033,false +34,test_00034,false +35,test_00035,false +36,test_00036,false +37,test_00037,false +38,test_00038,false +39,test_00039,false +40,test_00040,false +41,test_00041,false +42,test_00042,false +43,test_00043,false +44,test_00044,false +45,test_00045,false +46,test_00046,false +47,test_00047,false +48,test_00048,false +49,test_00049,false +50,test_00050,false +51,test_00051,false +52,test_00052,false +53,test_00053,false +54,test_00054,false +55,test_00055,false +56,test_00056,false +57,test_00057,false +58,test_00058,false +59,test_00059,false +60,test_00060,false +61,test_00061,false +62,test_00062,false +63,test_00063,false +64,test_00064,false +65,test_00065,false +66,test_00066,false +67,test_00067,false +68,test_00068,false +69,test_00069,false +70,test_00070,false +71,NULL,true +72,NULL,true +73,test_00073,false +74,test_00074,false +75,test_00075,false +76,test_00076,false +77,test_00077,false +78,test_00078,false +79,test_00079,false +80,test_00080,false +81,test_00081,false +82,test_00082,false +83,test_00083,false +84,test_00084,false +85,test_00085,false +86,test_00086,false +87,test_00087,false +88,test_00088,false +89,test_00089,false +90,test_00090,false +91,test_00091,false +92,test_00092,false +93,test_00093,false +94,test_00094,false +95,test_00095,false +96,test_00096,false +97,test_00097,false +98,NULL,true +99,test_00099,false +100,test_00100,false +101,test_00101,false +102,test_00102,false +103,test_00103,false +104,NULL,true +105,test_00105,false +106,test_00106,false +107,test_00107,false +108,test_00108,false +109,test_00109,false +110,test_00110,false +111,test_00111,false +112,test_00112,false +113,test_00113,false +114,test_00114,false +115,test_00115,false +116,test_00116,false +117,test_00117,false +118,test_00118,false +119,test_00119,false +120,test_00120,false +121,test_00121,false +122,test_00122,false +123,test_00123,false +124,test_00124,false +125,test_00125,false +126,test_00126,false +127,test_00127,false +128,test_00128,false +129,test_00129,false +130,test_00130,false +131,test_00131,false +132,test_00132,false +133,test_00133,false +134,test_00134,false +135,test_00135,false +136,test_00136,false +137,test_00137,false +138,test_00138,false +139,test_00139,false +140,test_00140,false +141,test_00141,false +142,test_00142,false +143,test_00143,false +144,test_00144,false +145,test_00145,false +146,test_00146,false +147,test_00147,false +148,test_00148,false +149,test_00149,false +150,test_00150,false +151,test_00151,false +152,test_00152,false +153,test_00153,false +154,NULL,true +155,test_00155,false +156,test_00156,false +157,test_00157,false +158,test_00158,false +159,test_00159,false +160,test_00160,false +161,test_00161,false +162,test_00162,false +163,test_00163,false +164,test_00164,false +165,test_00165,false +166,test_00166,false +167,test_00167,false +168,test_00168,false +169,test_00169,false +170,test_00170,false +171,test_00171,false +172,test_00172,false +173,test_00173,false +174,test_00174,false +175,test_00175,false +176,test_00176,false +177,test_00177,false +178,test_00178,false +179,test_00179,false +180,test_00180,false +181,test_00181,false +182,test_00182,false +183,test_00183,false +184,test_00184,false +185,test_00185,false +186,test_00186,false +187,test_00187,false +188,test_00188,false +189,test_00189,false +190,test_00190,false +191,test_00191,false +192,test_00192,false +193,test_00193,false +194,test_00194,false +195,test_00195,false +196,test_00196,false +197,test_00197,false +198,test_00198,false +199,test_00199,false +200,test_00200,false +201,test_00201,false +202,test_00202,false +203,test_00203,false +204,test_00204,false +205,test_00205,false +206,test_00206,false +207,test_00207,false +208,test_00208,false +209,test_00209,false +210,test_00210,false +211,test_00211,false +212,test_00212,false +213,test_00213,false +214,NULL,true +215,test_00215,false +216,test_00216,false +217,test_00217,false +218,test_00218,false +219,test_00219,false +220,test_00220,false +221,test_00221,false +222,test_00222,false +223,test_00223,false +224,test_00224,false +225,test_00225,false +226,test_00226,false +227,test_00227,false +228,test_00228,false +229,test_00229,false +230,test_00230,false +231,test_00231,false +232,test_00232,false +233,test_00233,false +234,test_00234,false +235,test_00235,false +236,test_00236,false +237,test_00237,false +238,test_00238,false +239,test_00239,false +240,test_00240,false +241,test_00241,false +242,test_00242,false +243,test_00243,false +244,test_00244,false +245,test_00245,false +246,test_00246,false +247,test_00247,false +248,test_00248,false +249,test_00249,false +250,test_00250,false +251,test_00251,false +252,test_00252,false +253,test_00253,false +254,test_00254,false +255,test_00255,false +256,test_00256,false +257,test_00257,false +258,test_00258,false +259,test_00259,false +260,test_00260,false +261,test_00261,false +262,test_00262,false +263,test_00263,false +264,test_00264,false +265,test_00265,false +266,test_00266,false +267,test_00267,false +268,test_00268,false +269,test_00269,false +270,test_00270,false +271,test_00271,false +272,test_00272,false +273,test_00273,false +274,test_00274,false +275,test_00275,false +276,test_00276,false +277,test_00277,false +278,test_00278,false +279,test_00279,false +280,test_00280,false +281,test_00281,false +282,test_00282,false +283,test_00283,false +284,test_00284,false +285,test_00285,false +286,test_00286,false +287,test_00287,false +288,test_00288,false +289,test_00289,false +290,NULL,true +291,test_00291,false +292,test_00292,false +293,test_00293,false +294,test_00294,false +295,test_00295,false +296,test_00296,false +297,test_00297,false +298,NULL,true +299,test_00299,false +300,test_00300,false +301,test_00301,false +302,test_00302,false +303,test_00303,false +304,test_00304,false +305,test_00305,false +306,NULL,true +307,test_00307,false +308,test_00308,false +309,test_00309,false +310,test_00310,false +311,test_00311,false +312,test_00312,false +313,test_00313,false +314,test_00314,false +315,test_00315,false +316,test_00316,false +317,test_00317,false +318,test_00318,false +319,test_00319,false +320,NULL,true +321,test_00321,false +322,test_00322,false +323,test_00323,false +324,NULL,true +325,test_00325,false +326,test_00326,false +327,test_00327,false +328,test_00328,false +329,test_00329,false +330,test_00330,false +331,test_00331,false +332,test_00332,false +333,test_00333,false +334,test_00334,false +335,test_00335,false +336,test_00336,false +337,test_00337,false +338,test_00338,false +339,test_00339,false +340,test_00340,false +341,test_00341,false +342,test_00342,false +343,test_00343,false +344,test_00344,false +345,test_00345,false +346,test_00346,false +347,test_00347,false +348,test_00348,false +349,test_00349,false +350,test_00350,false +351,test_00351,false +352,test_00352,false +353,test_00353,false +354,test_00354,false +355,test_00355,false +356,test_00356,false +357,test_00357,false +358,test_00358,false +359,test_00359,false +360,test_00360,false +361,test_00361,false +362,NULL,true +363,test_00363,false +364,test_00364,false +365,test_00365,false +366,test_00366,false +367,test_00367,false +368,test_00368,false +369,test_00369,false +370,test_00370,false +371,test_00371,false +372,test_00372,false +373,test_00373,false +374,test_00374,false +375,test_00375,false +376,test_00376,false +377,test_00377,false +378,test_00378,false +379,test_00379,false +380,test_00380,false +381,NULL,true +382,NULL,true +383,test_00383,false +384,test_00384,false +385,test_00385,false +386,NULL,true +387,test_00387,false +388,test_00388,false +389,test_00389,false +390,test_00390,false +391,test_00391,false +392,test_00392,false +393,test_00393,false +394,test_00394,false +395,test_00395,false +396,test_00396,false +397,test_00397,false +398,test_00398,false +399,test_00399,false +400,test_00400,false +401,test_00401,false +402,test_00402,false +403,test_00403,false +404,test_00404,false +405,test_00405,false +406,test_00406,false +407,test_00407,false +408,test_00408,false +409,test_00409,false +410,test_00410,false +411,test_00411,false +412,test_00412,false +413,NULL,true +414,test_00414,false +415,test_00415,false +416,test_00416,false +417,test_00417,false +418,test_00418,false +419,test_00419,false +420,NULL,true +421,test_00421,false +422,test_00422,false +423,test_00423,false +424,test_00424,false +425,test_00425,false +426,test_00426,false +427,NULL,true +428,test_00428,false +429,test_00429,false +430,test_00430,false +431,test_00431,false +432,test_00432,false +433,test_00433,false +434,test_00434,false +435,test_00435,false +436,test_00436,false +437,test_00437,false +438,test_00438,false +439,test_00439,false +440,test_00440,false +441,test_00441,false +442,test_00442,false +443,test_00443,false +444,test_00444,false +445,test_00445,false +446,test_00446,false +447,test_00447,false +448,test_00448,false +449,test_00449,false +450,test_00450,false +451,test_00451,false +452,test_00452,false +453,test_00453,false +454,test_00454,false +455,test_00455,false +456,test_00456,false +457,test_00457,false +458,test_00458,false +459,test_00459,false +460,test_00460,false +461,test_00461,false +462,test_00462,false +463,test_00463,false +464,test_00464,false +465,test_00465,false +466,test_00466,false +467,test_00467,false +468,test_00468,false +469,test_00469,false +470,test_00470,false +471,test_00471,false +472,test_00472,false +473,test_00473,false +474,test_00474,false +475,test_00475,false +476,test_00476,false +477,test_00477,false +478,test_00478,false +479,test_00479,false +480,test_00480,false +481,test_00481,false +482,test_00482,false +483,test_00483,false +484,NULL,true +485,test_00485,false +486,test_00486,false +487,test_00487,false +488,test_00488,false +489,test_00489,false +490,test_00490,false +491,test_00491,false +492,test_00492,false +493,test_00493,false +494,test_00494,false +495,test_00495,false +496,test_00496,false +497,test_00497,false +498,test_00498,false +499,test_00499,false diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.bin new file mode 100644 index 0000000000000000000000000000000000000000..cf1c3ca7b31efcc57887417f262ccef436419592 GIT binary patch literal 85789 zcmYh^b=0PH*|+@}Kt)9F=ehH_3lSAj5t*S80TodZ5fu>;!2nc5R6s;To$l`L?(XjH z?(XiM_j9mb=J>kSwwBvp`*!|t->=JIu9@rJeCdrhU%0*a&s7yGTzADPT~)DSX_c?4 zcvWe4TvhSv((b&f;x(n+bydX=ly>)36+c+oJy%uyP-*vGRq@)=?z^huhp$*AE!UN% zuBzw0yZVEf1Ea<)PBFJY1TV zuau_ctEFlAT4`DyDNW1Quh>0W9xcthZDox9uuUK(0-!4td zUrN*RPH9^HTAG%3OVjeV(zN`&G%f!q?cS>^R=M)Us#4|quC7?M)H`m!x?;7^N>^8` z9$NY8iZw#-xVmD^&^xcLSS$3dt1H$Hz5D8lbwcmCx?RrNwu)i3oR7R&z z8J$bLTSk{q8C^qVbPJWyJ=Ej$2vyZHR8_A~RlP%1^$Atgx72%7^$S(iKUCF#P*np% zRSgPNH8@n&kWf`aLsbn6RW-cSdsU4HRW&kH)u>QaqeE4V2~{;VRMogpRpUcdO$b#r zvDEuiO$t>tIaJk@P*qbyRZR<3H9b_-j8IiGLsiWRRWP*poiEw1;jP*uA_RqY8?wKr7NzED;B zLscCJRdp~_)uB*Thf6K4_mNOlM?+N|3srSIRMm-4RVPDLoeEWTI#kt}P*rD3Ew1;u zP*vwcRb2>Gbum=crBGFuLseC|@_zs;-=(T*sH$qAs;ZY-9P=8Xs%nO+suilLcBra4 zp{nYJs;U>Ns(z@d2BE4NmRcP1Mxm-2hpK84s;X(Is%D|8nun@t5vrs#=#? z9P>7zs@jICY8R@ieWd!Vp{llos@fW=YFntP?V+l6gsR#Zs%lrLs@hpckRn;g|RpTpqXK`|ygsN&9s;XJ2s^+1pT7;@<8LFyP zsH)bXs@jCAYFlb?f!l?uY9FeqL#T{Sp)xv$%IFd*qid*)ZlN-|ms*_Mo}uRU3iTSj zLuK>{mC-j;M!!&V2ZWkCu+-w@4hoeqI8?@vP#Hr*Wef|IF+5bph)}OFGE~*5P*tN# zEl%#3P*r0?RgDW(H9l0;giuuzLsd-*RW&(O)s#?GQ%fyQ?zB)<(?eCw2vs#RRMo6d zRkK4?%?VXCH&oTUP*w9wEl%!&P*n>+oR8_T5 zRnCs;Wt-s-~sh zA)|Syj259XT87GK6)K~3sEjtDGTMg9XcsD@eW}Iu?hvY~W2mZ5p{hEEs_GJ|s%xmK zZlS8WhpOrks;XzH#r5tLs;YOWsy?Br`i83N7pkg%sHy>>ss@It8WgH(aH+-h9ulf* zXsD`Tp{j<5su~fhYGkOYQK70vhpHMAs%mVh#q}N+Dq}*ZjESK#CWXqF9BS^=P;;k+ znmfJJ;y%v^^%^rnWy}hdF+0@UxuHJ(yighQLuD)om9enY;yy15m9aQf#*$DOOGCZJ zvQSmaLshK^Rkboy)v8cctFP$Y#eH5As%mYhs&%2N)`zOv5UOfpsH#n&sy2tJ+7ha2 zYpKP3-WIBAd#I`%p{jO_THNO)p)!_*%2*yMV@0Tpm7y|Lh00hRDq~HkjJ2f}2YFqn$5|h$YD1{1 zjiIVGg{s;ds%lH9s;!}_wuP$NUTSfWcZ90i8LDbmsH)wes`iAc+8e5BU#P16p{fpq zsybL|agYy%syZC1>PV=nqoJyfg{nFps_I0js*|CrPKByEU21Wk&xEQv8>;GDsH*d! zGA@S7xD+bma;S_dSAM_gJ$K8fT555StA(0dBh=iQp+4P*tr%RkaCK)izXB zyHHi_LsfMMmC-3wM&~PfZ*hUUgv#g|Dx+JdjP9W_dW6d887iY!sK@CYs;W<@s=lQb z_qkuFs{Wy>285~_7^-SesH(xCs)mHB8XBr0s)?l*=Xz48s>z|Mri7}R8mekqsEiq*GG>O#m=!8xcB#dEo)ao# zZm5iTp)%%&%2*KUaTbQES`?~kaj2>#p{kaaTHNPlp{kaLs#+1MYGtU5)uA%hgvwYO zDq~%!jP<1!2YExNjE$i(HigRA9O`klgsR#as%l%Ps_mhwc7&?hS!!{gcZI6j9jaR70%JX}`W2mZ5p{hEEs_GJ|s%xpmeeM>js(Yxa9-*pw zhN|ins;YOWsy?Br`i83N7pkg%sl~Y-5UOfmsH#Dsss@Lu8WO5%XsD`Tp{j<5su~fh zYGkQ*$rv3fV@#-wv7s`?g~}KoDq}*ZjESK#CY4&;=P98wriRLx7Aj+UsJSyk4VoQl z(40`8cW$Z01)djb?)*@57lfL-Fx1Ci6e?qJsE@xSRMpZ@Rm(zEEibjWz$-#ktqfJQ zDpb|#P*rO}RjmzGwJucE`cPFHLRD=nwK&L|LRD=JRkbBl)z(l|+d@@s4^_1zRMpN< zRl7n}?S5bV42r5fp{n+Vs@fN-YJaGz1EH!8hN?Ogs_Jm4sw1JQj+R>7=VPI&j)$r` z5vuBBsH#(;s!oThIuokuY^bVpp{mZ8THNOgp{g#1s=5@a>T;;6D)0aFVx`+FtEw8R zs#>V3>Y=J?gsQ4pYH^=yg{rC@s;W+?s=A@7>V>MRAF8TBsH%pcsv4DA9ONdU<~9p8 zw|S_!Ekez08ES5;P;*;{n%gGSYqTx3_=dS%sMly8s;Wb%s*a(mI)$q09IC2IsH(1^ zs=9@$>RxJbpL>L=>KUr4SE#Dqp{n|Xs_Gl6s$Zz8{-LS{gsK`?YH_^>g{m4Hs%l86 zs-dB(hJ~sc9;#|YsH%~nsz!yX8eM8}y~l*A8XKx=T&Sw?p{gc?s+t(8YEr1G$)T#I zgsPfaYH_`%g{qn!s%l24s+pmxW`(Mn9jaV?Fd!1GgQ^CP*uA_RqY8?wYSvbT<;52wLetVflyTkLscCL zRdqO2)sawDM?+N|3srTz)Z%)d2vv15RMn|aRi{H$oe5QSHdNKQP*vwcRb2>Gb+Odq zm|qH2bvaa3mG}P$fJ&9_QdKonRkcu6)k9U)2vt=xR8_4|Rkcekj(MF>RdqvE)eBWs zKU7tNP*n{>RW%A#)i_jDlTcMnOD&FhvrtvdLshj1Rn;<7RjW`{twU9{302j$)Z$!s z2sNlvs6m}W4eAnVP}fj{x`i6lJ=EvzS!!{RdxiSEy+dX636;?|R7Ss08T~_L3<#Am zFw{SVL09y>pAZ}pYVNR5bBBkTJ0jHFk)h^}3N?3hsJUZ8y~fy5iwit1)N70nRW%`0 z)x=O$lR{NZ4plWJRMpf_RntOMO)s^$z%xQs%?wpFD^%6&P*rn6Rm}}mH7``v{7_X3 zLRBp+wYb2GLRBpeRkb8k)zVNI%R^P*poaRqYB@wL4VRo={bLLsjhyRkgp= z;^ZC(Rdp~_)uB*TheK5z2{re4s6i)7Ew1RPP=ii~8gwSqptGR{oeTBx&xiW>7eZxR z4E6CZg{rz-YH?$$y#L4BRH|J3mW-;Qs;Y&msvfGUMyRTqp{ibqiJ1JycbXP*pucRrLy0)w|T<`)nVLuJeh zl`%h5#)4283qxfr3YD?A)Z)f236-%l)Z;7*Rkb`+)rwG6D??SS3RSf_RMnbLRclKv z4)VHCRqI1lZ3tDhF;vy2P*s~lRc#4XwKY`Lwop~uODzuaj!;!QLsjhxRkb@*)t*pQ zdqY+23stp0RMmk{RR>Eg?(?BgRfj`W9SK!+G*s2GP*ul6Rh%4iWPqh+bZL2eZ) zqjji^HlZ@whRSFc>T%kKs_GD`s$;0CPNAwgms%X;E}^QrhN|ios;Yaasve=LdWNd% z6{@OtsH#4ps`{2%+~jA@}VriaRy5h`P5sEk>m zGG>=r9OOBnGUkQKm>()*L8y#{p)wYQ%2*sKV@artrKJ{s@Wt{_87o3%tPGX0D%9LH zq2{g)HFsU8x$Cd!dq3s3Aymf3P#K#-Wo!=h@wbM`*cK{dd#H>Zp)z)sTHM%Op)z)d z%GeVsV{fR}*cYm5f2gVhp{fpssyY;^>Ts#W$vqOP>S(B{W1*^!hpIXes_JB@s#Bq= zPKT;G6RPTLsl^387pm%fsHzL0sxF4Ax)iGFa;T~*@Be|Pepysi)lgN{LRD2SwYb1F zLRHlaRaGlgRqaq!bwX9u4OLYyR7QhP84XJ)Z*k02vs#ORMntRRf9uS4GC2>G*s2FP*uZ2RgDN$ zHL}#=d*D%_sz!&Z8WXB&Y^bVnp{mA*s+th0YGSCWNujDHU(vg7moYU|#GRhksycKv-7OQ@=@p{lxts_K44-}@=Q9-*pwhN|ins;YOWsy?Br`i83N7pkg%sEmQ77T0@F zsEom(GKPf87#b>LSg4HQp)y8<$`~2yaYluz8eM8}y~l*A8XKx=T&Sw?p{gc?s+t(8 zYEr1G$)T#IgsPfaYH_`%g{qn!s%l24s+pmxW`(Mn9jaV~SS7pkg$sHz5`sv3r>Y80xfaj2>$r55M9S*VQWp)y*8%4iuXqgAMk)}b=mgvw|e zDx+Pf$7x?`Wpg`)n%gs?r z7#}KQLa2<1p&n;asH(}Ks-}dhni{HVTBxe&r4}c5MyRTpp{izus+t|DYEG!CxuL4& zg{qn#s%k-~s)eN%2YFGbs>PwImV~NW8mekpsH)|ms#b)mS{bTpRj8`fr4|QyO{l81 zp{mw}s#+haYD1{1jiIVGg{s;ds%lH9s;#9K2YFkls_mhwc7&?h8LDbmsEj?KGWLeb z*cU2e{}p}jr~D3t$~YJ*<4~x%M?$^E(NG!3Ld`u9YVOHWi<5gQ)ZEjd=AH@l@y~{u zdp^|M3!y&##ZVcSLSK`g&V5p2ip)v-C$`}$VV`!=Om^(bw+!3MXjtuo0qe5kj4mEdd zsJY`p%^hEAaa1RS%9t1`V^XM$$)Pf)gvyv2>NTc?s+u0EYDTE4nWYv-byldV*`ccD zgsPewDr0`Aj0K@G7KX}L6e?r!`|39u%UBXBV`-?2WuY>bhkBe9p{iDfs#+DQYIUfp zHKD53mRg+Lb)l-(hpO5Ts%m4Xs!gG)HixR(5~^xzsH$zDsR7TTK8O=gvG!K>0BGhZNEcG69 z+k~3iHq_j9q2{&^HMc{kL7hSk>Rf7ZW4na<_+3L~bPJWyJyb@IP#HZ#W%LU5@q34= z>JzG}Z>hzN?H8)5f2gVfp{fRksu~okYH+BkA)%^O*9V%l_sEoa#GWLafoc*CP4u;A&6e{Czsl^385-Q_psElKwGLDDJI1wu2WT?kE z6{_lVsH!ues?L^L{5r$AP*vwcRb2>Gbum=crBGFuLseCI|L+d2Tnr-+58t&3Rn<0BRl87C?L$>{2vyZFR8^-?Rh>gsbqQ6~wbbHV zcMDb3JycbXP*pucRrLy0)jL#GpHNkOLsj()Rn@=L;#?01RW&eF)u2#SgF{sf2~{;T zRMoIhRl`G7jR;jWvee=aw;vrUV@#-wv7s`?g~}KoDq}*ZjESK#CWXqFTxxNjr-Z7S z8mekqsH*9qs%C_$ni;BUR;a4kp{nMDs+wDBajxfus+u3FYC))~g`uhzg{oQ{s%lB7 zs->Z-mW8TXUTSf@SA?os8LDbksH)YWs@8<6S{tfrU8t({p{h2NTAb@mp)xjy%GeSr zV{534ZJ{!@hsxLyDr0A;j9sA~XLqT^x!x11YHz5F{h=}rgvvM=D&tV7jKiTaj)clM zT555wkA=!O9_n#UgsM6js_Imzs?(vW&V;Hu8>;Hu6@Bjq_b!CWxELzqQmBl}p)#uc z^Z)->Hdnsmb{SPeWmF55Q9V>fjZlwMv((}~*9uitJ5*JjP*rt9Rn-esRX2{&2+J~y@ z5UQ$UsH#q(syc_N>JqA|YpAMjp{lxQaqf0IB^O#UoV?$Mq3sp5f zRMmt~RTD#1O$t>tIaJk@P*qb)Eq+OPdZ>&Up)zKM%9s@@V|J*FIiWJ1Dr0`B z#eH57s%l}Vszsry7Kh4M8Y*L1sEp;IGFF7jSXpXupI3#-SRE>3O{k2up&n;lsH*j$ zsy2kG+8C;8Q>d!VSMUgQeztlS!D&thBjMJeq&Vm zsEqTWGA@M5xL9g&pD%@~x*V#i%D=q-HT2FqR8Q^os;V2Rs$Qt7`k|^CgsN&7s;W__s>Y$JnuMxqT555wn}w=s9;&KEsH&Ess#=Ar zY8|SoO{l82p{m-2s%l?qajrXrs_Gc3s#B<{&Y`NhgsSQqs;XP4s_vnxdW5R#S!(g! zb+1rWy+c*?302iMR8_xFRsBO%4G2{=FjUo`P*sCVEw1;FP#MEQWeg9MF(OpP$WR%h zLS>8&l`$q%#@JGe>pd>i2cpOD(SVqEJPV=nqoJyfg{nFp zs_I0js*|M_*ZWkcj5DD!&W6f37b@d?sEiAtGA@S7xD+bma;e3+uJW(%e@VUTZjVzn zR8_T5RnuSQP#GOUWppgHIM`+y6LRHNTl`%im+=Zd$E($ewajC_x zek=_&Xj!O1%R_zq6`|&?4E6C>h00hRDq~HkjJ2f}2YFqns`a6&HiW9$7^-ShsH)AO zs7kGj)bZ@8mj78sJSOY%{^6WadOXu8gw?)pmU)HoewqWLa2{_G1SMu6e{C#sEjKA z_WqZ}yYKb!tCm{)u^81uWz-0jQ8QFVtxy@YLuJ$nl~FfTM!irO^{?oAKgZV~R7Rsv zbDM;k+cebNW})Ub4>h+%sJSgmEskodP_NNCR7RUnuhBMCRl87C?L$>{2vyZFR8^-? zRh>&Mj%t@sRb4|>bqiJ1JycbXP*pucRrLy0)jL#GpHNkOOD&FSzfe{ELsbn3RW&eF z)u2#SgF{sf2~{;TRMoIhRl`dyz9k+Js%m7Ys!^e;Mu(~z6RK)#sH$X+^nh>gL zVyVU7HZnO>#*|PQQ$uA;3zacFRK|=@88bs=%nFq;yVT<3&IwgDH&oTUP#FtCWh@Mp zu_#o=;!qh&LS-y1wYag%Ld{(f>NQq|%2*XDV|A#EHK8)rhRRqMDr0@A#Rc9F>NPfo zs@fE)YICTnEupHmhN{{Ys%m?vsvV)Kc9vRv54)$UMLdqP$14VAG!)ZBxi1|7bl z@BQ%Ikx+ws%mwpsx_gi)`qHD z7piJ~sHzR2sy3Edoa;@Ysy2tJ+7ha2YpAMip{llrs@f5%YGJzG}Z>Xw%p{n|asv1ygagYav$`~9fV@Rlsp`kK{g~}KnDq}>bjFF)-MumEu z(eJC@F{f%wsH(A{s>X$?8Xu}^La3^Vp{gc@s+t_CYD%c8sihY8d0MEd>7lA-gsPeu zs%loKs@b8c=7g%68>(tvsH*v;7Wa8UsH%mbsuqQ+S{$lsNvNu&p{kaJs#+eZYDK83 zm8BNnPOl18wK`PQnow11LshK{m9ZgI#>P+?n?hx5F17e}dP}H`t)Viuh053-Dq}~e z$JrUGYFDVL-Jz=XgsR$GYH_{yg{s;gs_HK>}9N2scvr55M9SE#Dqp{n|Xs_Gl6s$Zy#0iiMmhRPTeDr0b|#eE(U zDr0DDBs%mtosxhIe#+F*#=W(H`#)qn!5UOfosH#b!swRi3 zni8sNYN(9qr55*jMyQOLp)zKL%9tH0V@{}yxuG)Vh02&8>Twp7S{&p>p)wYS%2*OA zV`-?2WuY>bhsszHDr04+j8&l?XLYH?L0%K8YHg^hb)l-(hpO5Ts%m4Xs!gG)HiydC zT555Sw}r~s9x7u;sEnPVGIoW^*c~ckPpFK&p&n;nsH**?76O`ojlcg5-`BbQ?)1j))gsM6ls_I;*s`H_$E`+MO7^>=0sH)3X zv~uwWFIWA~_rGXXzFkJOP#M)jWz-0jQ8QFVtxy@YLuJ$nl~K3U;vm-xRaHM!RfAAf z4MSBm3RTrOR8^BuRZT-xH49bMywu`8w+L0$GE`NoP#JAPWwZ^I(JoX*`%oDjLS=L; zwK&L~LS=Lg^%`A6WpoRb(LGc~k5CysLuK>|mC?J@;vn}4mC-lUYxE0M)jw3#fKXKf zLsbn5RW&$N)sRqCLrX30^RQ4=!$Vb#2vs#QRMn_ZRii^yjR{pXHdNKPP*vkgE$;J# zP*oE{RZR+2H91t(lu%VuLsd-+RW&_S)r?S8GfORg#$Qtzz)1?*%`An#)v!SZag{nFq zs_H_hs*9njE`_SP9IC3yf4%=z@{Y=?s+L;Z=W3yFs;Wb%s*a(mI+a=+ zP#JSWWy}keF+Wtsf>5uqFjUo|P*saVRV@iswY1dYJ}(PZwLDbSicnQ6LshK`Rkb=) z)tXRMYeQA73stqg)Z#vG2vxN)RMnwX@XXUkL6F zm9ZyO#@x50!BsRK~$j8HYk;94@su$VWm|9SxOnJXFSsP#GsfWt<9?aXM7S znNS&LODzuaxlpfhAymf2P#Kp(Wn2!GQRTni|FU@JUFKE`HMe@H#RaYr>f_f8l~F5H zM(t1;bwXv-4V6(ZR7QhP84XJr#sw z+a^>-+fW(pLS?iMmC+$oM#oSYokIO@qjRXLE}^QrmRfvE+$~g9_fS8%l`$?<#`sXLF(Fjd#86d}LRC#JwYafULRC!-RW&VC)$~wRGeT9(3{^EN zRMqTIRdYgB%`LU~9(Z1;s`;U+7KEx=7^-SfsH(-Gs+NSRS{kZqS*WVzr4}c5MX0Kk zp{iDes#+bYYE7uBwV|rkg{oQ~s%k^1s*R-<7kE>ss?DLQwuGwM8mekrsH*Lus&<5` z+8L^9SE#Dpr55*jPpGQBp{n+Us@fl_>OiQfgQ2Ppg{nFns_ICns-vYA=lWQvs^g)m zPK2sD8LH}3sH)SUs?LO}Ivc9$T&Sw^r54xwLa3^Xp{g#0s=6Gis>=Vo|3&kzdsS5p zRaGriRrOF+H9}R@EVa1awL(?Z4wX?iR7Sl}8TCVDGzgW^FjPjPP#KM{=zBl7*CbR% z(@>AoEL2tVP*p8LRkaLN)hbj~>rho~LRGabwK(SOLRGa7Rn;L>RmV_OokCT04pr49 zR8`miso*~!L-AiehkNlTp29Ei2mBok&<=et3Nx?_TW|;$a9za;75*P?#+~>op2YKb z74M)9TA>GqVG0&u1NPwzK6q7y3jd0ma62Bv<9HUo##^X?X6S-Jn1Fd$gIzd*tFEq4 z;s4+U+=~10O+1Yk@j9xY5jvnB#$Xm!U>lC$60X0dLWTdrEw~$x;QRO)UccuG@dTd3D|j2V&;s2s1e34;>#zr>aLorRRQPAyh}-Z0 z9>X(u8E>K*nxGQ~U>xRR4R+%cKJX!LjZdO7zJe$50$#)4&;aex4`VS0tFa3wam}?A zD*Ov>LM1$m@8Ws<4)3Bq+MzGTU^Z4^Cr;q%5Bog$1a8MecmhAgtN1JGp)LAgG-hEX zcHlU!x~@Wn|BD;(MLdY_;5qyj@1QQ)pf^TgCRSiOj-ld5{BH&y$8GpB9>-7c3jTsR zXpLSNi5XaqZ8(Yw*H@_UPq+bJzytU;p2ctQHfp04dSV2oV;Q#M2rlDeAFWW~e{n0m zgvan>{2G5oEwn@r497Gq#TFdKC4AI!{|`Qo`|&M2gJ0n-)Iv#&k#OtVv zCg_Yon1}^fk9|0cYyYJ}h5x|KxC@Wq2lxg4h$?7|P8f&@n2&YXi!=Dpzq$kXH15RL z@O}Iof51P`2pus1<1r6wu?MH|!GEhz;otBn+<~v+d-xfCkH4cKI-ozsVJ_BSH%{RL z|8D>BNmRyH@FZTqYxo-)pgsCwEaqS}cHtzh`49V#n@|Z4jO>9Da*;P#0~` z8>282E3h5MQ1QR)KR%Az@MS!XpWqez1$EFGy)Y6pupHZP6czs4{^JIG0T1BYcox6G z+o+9J=!p@Sj%C=2Be;x@`J(*4xD{W*WB4(CjX$FnTA~MrV;YuX3l8HFKKj4*AD_ql z_!geQukaRXq6NBR7^Y$gHscU3;`$qWBzz9{;hXpoUdEqL1I^J5Loo%5u?Yup0U!Ce z{l{ld)48de9!bTjxd0cm+{l_i12an>1coA=)8k(XD24fNyVgvT$ z96tOB`;X7yZhRe2;g@(FRnY{UF$fc}0PC?2XL0RK_8&LnEY*+AU^Hf7C3fIAuDZqk<3@ZD z58^v`4!^}asEan}jZv7171)krsQ6j?kB{Rvd>N19CwK*aK^?S4FO0+tEXOt+Me&XF zKj8*^0T1BYcox6G+o+9J=!p@Sj%C=2Be;x@ect}#R(uJM;m7zj{)}2^i5?h^X;_La zIE+j9=&kl2pU3_97M{Vc@D^&K1-fGxreX;;;}9<5`Y+gjd=B^FoA?o4#-C6F&Cv}* zF$IgU2?ucjAGyu`$1S)AkK%`T5pSRx znxYE^V-gl(1NP$_K770V$7gUizK*BxOT3P%XoAidgo#*y_1K5AxVDo0$IZA4kKhOR z1^$RCXpBx6hzXdFb=ZqD_)um0k5A)Hd=1~n&+!NR1C7uT127)*uoio88XvsF{^L`) z17F4W@H6}#e@8=fK!1$GT&%%voWck0wEy@dD&s475-;F2{0$Az9{n&DbFdn_a1z(t zW&d##D&b*#7tiB&co+524t+5Ov#|<0aROK0ZU6BJ+>VFv1b&KF@mJJCTlB$b%)(0S zz;Rr4kNwAu_#z&}ckmp3i+4~LZO|K|FcT}V9mi1dUi*)a<2HO5kK-qJ1%E*uv_>zC z#0)IQHXKET`|Lk%z!&fUzKv(`8@!F$Xoa2_f$3O=tvG_q_}KmSAGhL5cnm+rukmNp zLQC|(a7@EeY{6k%!biVk|M7X;k8j}_{0eWOCR(67hG8m}U^5QkBCdbH{^N7F58uR( z@G|~{8fcDg7>X%aj7>O*3;4*F?LR(?d+`lCjhFBys-qdYVhAQ<5jNrg&f~fV?LTh8 zJ$Mv9#EW_0w(yYY2Agd z+kf1QyYL8pfM4K`sDj4mgn^iV`B;a&ID-#;#s1^dxD#K)_wjT50slZFbi@FR$2_dX z9-PJpziR*SDcpgt;(PcReviMSAv&Nx#$hhjU^hr8=IDl@n1aRFgoC(%k34Sw@mbu9Z{TUXgf~$g&CnG?Fd2)m z5eIM{*L}zS;}+b5NAW|vh&NCTP0D-7kDGB99>EXr3;YpP&={RC5EC#T>#!GR@S!K|KR%5+@ilxOKgS>N z4>UqY48VBI!&>aYX?*Z|_8*_Z9r!A~ho9m1_&XY+1Nvhe=3)(Y;}kydefy73qB6dM zC-DMa!{5*V?a>coF$b%$3ny{S59~i~LM1$m@8Ws<4)3Bq+MzGTU^Z4^Cr;q%r|dsI zf!pyAp1@D>D*lRkXp24=jagWU9XO7werW%3Bff|S@f|#e-{KwAMH}?SD9pqPY{xNF zeA@owDYoD+F5#m;w*UA%?#H+A41R^TP!lcC z9m6mcORyP-a1qx(Yya^%+=p-CM|c^3LJc%WHw?uTEXF1r#07lhC-xto#l83jp2kaf z6V=fST`>fcu?QP+0OxVtbM_y%;2u1RAL2#4fof=qE*OkSScnbSk8}9&PwhWGgS+u{ zJcVE4byP(YbjBb|!~(3xKAgq1&)a|8jJxm%et=)#kEnvi=!AiofcaR5y*PspyK^gFmzX_!REISMfdk48O^@fAFY7w{VXh6ZSlei(~6SdCpciEDmg|8Wy4;bD9i&*OJ^7xmE&eK7{J zu?jnJ0$2ai{^Jw49S`9N{1mU^uc(K%=!4Oig_YQW=u8_(i5 zcpJ6R3Oz9b)3FR&aRis~v0vGL+=?&ZG5i?6#-C9OEztwRF%3(x1&477AN{rc$LDcB zzJ+J-E4+o8Xo2n+hN)PB%{YXMxc)cxAD_d0_$GdYm+>dmKy!4%P)xyMY{EfYz(-!O z|M)EK#W(OYUc#HGj%MhJA()Is*oXr-kL!MG|8WcM!K3&gUc?)yhNkF(!I*@F*ns^w zhY!DM|M3~zjj!V={1UIDDw?1(24Nx=U_JKXEUx{X{m0F?3yEg& zk9F9KGx*SJ_8*_do%kBQkDucY_y-!HBL-kR=3y=N;50t?d;5=1;SPKi-^0)Fd;A>@ z(EvyKxF1_=Ek&Cs7$+!IO9aui-bt)In?X!br@(a%{sBNpEPjKxQ5&t$6C*Gk%dizka2X$a%l_k5d2XO%(`HTI>XK^pSfv52j-b8gYLstyJWGuo)9Kd;8_m2I?Ew~4d;)i$*IF*;!&CSX3+VK2_$Lw~dX_%!as*YJJ(9Dl$+&_0w<%J>SN#0z*0e?tSbM?Z|k9IVDJoWwO% z>_2WoB|MDp;(7cI@1j21p)bZ@HdbLLPT=aQ_8*_X?RW@J;HP*Me?>jCMIVgDEUd&1 z9LH7F>_2YA7x5szgXi#Dyo0)EgWed0nOK4CIEISV?LR(_+wf&Pj-TKa`~`K;8oe+Q zGq4=na1<44*nixBFW>=u8_(i5cpJ6R3Oz9b)3FR&aRis~v6}WDx8h593_r%N@n_UR zOZ32SOv6%a!C_p&M{C)Cd>;4XTX+V)!ds|`7U+&)n2IIXj6=AH>ucM8d=B^FoA?o4 z#-C6F&Cv}*F$IgU2?ucjAE{&i@mbu9Z{TUXgf~$g&CnG?Fd2)m5eIM{*VVQExCQs% zQTz}u;tfv#&k#OtVvCg_Yon1}^fk9|0cYwO#8 z+>E>M2!4QH;E$++#^{8Bn1K0MhrKw14>hp=_%!as*YJJ(9Dl$+&sk|M3ajj)(9Beu`J|SJXpW z^ucJ%!b!#5JAlKW;)LJdE$+dHfFVqCVQ8FUDXtR$(Vj;OfryAD_VOcnDA6 zr+5{AMLo1dAB@H2XO%(>1F@%S=@_n;Ay;sH&GqU&=o^4 z8H=zH2XG$O^|t@G1^3`l{17kV4OBx@birUu!a{7oew@RH``CYc26yA@cnZJ7>!^w* z=!`*_hy_@WeK?D2``UlpjJxm%et=)#kEnvi=!AiofcaR5y*Psp^|Sx@|H``W=&a9z z590(95&{7N`s(q#9d>H#XlbRbv$kqmZME*LZCzEX)wWu-wT3|0VQ&aC%q$WhKtc#3 ztb{$n-aBka2%Go1sc>KCyglbz`5gbaujePvot)Z-+{+$iPqXi^m)T#~e^?)u!}3`< zt7co-5!T3>=kfk$_pnFUQ|#O9CH6Xdk7ct_ERU736>JMT%+9l+UF>1@B>NWo1^W|wm-S*J*i<%$En~IpAggE1 z3VHvtkFtl@m)JMi&)IA29oCZ#XH(d0wv=sR2iQ5*w21dVyOVvIeUZJue#ZXD{>6H* zVQexhWlPvbwx6A43DbH1vpd+Q*caII?5FGx?4PVV8_FiJS*(g}VEfn^)})yCKf9fM zl6{_igZ+g4p8bP$V?)?PHj^!8>)Bp*nq6nDOL+gYPq5Fiud^SsSJ~UFD;vym*$lRb ztz&!GDRzye&EWmd9%P?o&#@n|-?6{5EH;QuU?prJTg!H{lk6&MHIw&0dw@N`o@GB| zzh!@8UD!Z2o)xnNYz^DRPOvMi|^XP_BHl>_G|Vg%Vho8SXRX5vsG*dJH{@t7PEQ(v-?>`_Eq*h_AB-V>%{u8 zF|3ePu$62(t78{g${gPR>^}Ax_7(PB_6qwe%V2%kXjZ`Hu^P6G9c33-@?75k>|XXL zdzyWRz0Cf?{=@pP9G1_@SvA|rj<817yo~ohyN5l(o?_o-FR|Czdn}ucVtK5LtzcW& zVRoJ+mGk~*ce4)c%j`w=OZI2>Z`PZQWYgGOww!HdhgbtkoX7j0-NhbePqJ^ZU$8&1 zcUdnsf=y*}*fLhj4zhaItb+GH`zU*eeTjXO{hYnV-eEo2a5ja_W=q*7c7UB@P3QCe zXLqtsvoEq2*w5G>*}qs1HjGVXrECe?$o8|dETNM3Kf8l{ihY4S&wk4O!2Zd)v!QGf zo5iZw2DXo#VNDkB{%5zdPqNRmZ?K=R-?M+PZfppf$Y!#|Y(3k{PP6N*^+MkN>=W#B z?Cb2u>{a$Q>&gbRTsDI(V(Zu*c8XnNX^VLOvj^E{*>mhi?04+%EQ<|d6Icmb$kwvm z>?FI&S}o@N&mLe;uxHs1*>Bn3SQj>sjc3Jd0b9d%u@meHYgxtnpM9J?&YodEV83B+ zvCeD&8^@-zO17HqWXIWMmb!%ZKl>PajD3xLpZ%J>$ue1gHkK8!`D_*2!H%&@ti@8^ z|LlI&k$shYkNt|h!8);iYz!-86>KHj&g$4jma>fZKf8~8hJA&7m%YON$}(7AHkuW% zd8~$QV@KHqmb{$zKf9Mb%ARK5VK1}4u>Y_=EQjT@a#qc@vLmdKHDAH|pWVYAVNbDd zvzOTG>^+vvMzK6r##XQ`>@YjeGSc`x-z4M1&u+T;NxzN{^ZB&#`kWY_Pn!@KKGvHn z^7*uhBA-v2B=Y&R$s(Umnid5|qsoEVGKB>D$q-w87)jpA` z{UTKdM5+#oR2>qjIxJFkM5O9yWcZ|Rok-O&k*eb&RVPHMPKs2W5~(^ZQgue8>a0lB zxm&#PS05~&(2QZ+=RYN$xnFp;X^B2^R80}7nkrH?O{6MMq$*#esz9WwP^79TGF;cwMXHKL zGG>Tm%oNF(C6ZApk}+E(V~$A1+{o}}L6(a=jd>y&6(Sk)MKUTyG8TwrEELIDB$BZ> zGThHqB2`O7s+NjWEfc9)E|O6#l2Id)u~H;sRb;r3SBqq<5y@C9lCe%CW4*}3*&tH2 zQKV{Z zB2@=Ost$@&9TLenB9d`bB%@9w<5*<4kdKRGoDj)4DUxwYB;&Nm!#N{TbylS6oJdu@ zNL52*xS!99R5glJT@b0dC{lGvr0TLr)fJJdt0Gm`M5?YwhWk0=_M2ZN9}nL@cM_?} z6shVgQq@JIDodoQt4LKhk*e+@RXrlZ{oGTes+UMrZ;`5Ok*Yo-ReeRO`iWHa7pWQ` zQZ+C#+|PqVss@Wx4H2mtDpEB}q-wZG)d-QQks?*2M5=Nk!*|!CMKZ>UWQ-HZ7%!4B zK_nwrBx9mT#w3x9$&ul@o+9#arixTe6RFA*smd3rDiEnE6sal_shTcQRU8>^?-G%! z86s6PMXF|rRF#TU%@(PeBT_Y2q^eA$s{9sj{3N+~B2^V4Rr5uvDn+Umh*T{Usahmb zwOFL8N~CH@WVpSTic~EVsah^lwL+w-TBNE*q-v!|)hdyy)go1EBEx0AR-|g3NY#3g zstqDl8%3%%iB#2!RBaZi+9Fc5H8Nc0+eE6ii&X6psoE)0wM(RGw@B3O^F?%uk9`of4@!Es}9oB;%Y& zM!iTzgGk1Ck&MR3j+}c@9L)kCDJr$|+=$Z&i27OBb>sp=zA)mNmdpGZ}Ik*WbA zRRcw;28mP+jtsZ=5Rs~(B2~jgs)mbHjS#6CDN;2`q$)?GYP3kzn49EJ{iSNGNYyxz zs_`OK6GWmj2V&P zex50kF-s()R3u}zNX8tIjJYBiWg<^wUSzm|=Zl~fKFSBRWjE%JJ6L^4*2WULa&SS^yVM&#wM6{%Vm8SdEiB2^nisy2#LZ4#-f z6{*@RQnf{-YO6@qHj%3Bk>MNW9U@gbMXGj*RP7e2+9Oi6SEOp6NY#Fksskcb2P4A` zd`P6~ut?Prk*cF2Rdpg&$3&`*i&UKusX8fAbt*F4z^6s3&WKc<6{$KWQdKWf)gV%J zUZko~r0Rl5)y2qg178xUx-3$4MWpJgNYyows_PQ$?z#iB#o9hRZx(q^dxqs!*h=NTh1INL8^&Rf$N|43Vmt zB2}{@!)0D7QZ-woYK}3Wk*ac$s(B(+6(UvhMXD+z!)3leq-vo^)gqCMDv^vO zBIhm>Id{3pxho>WbzLoTZjH#fD@C5hDv^xUA{lE$p2k{{s&yh&>qV+IM272nqe#^z zk*Zpes?8!*TSTh1id1bAsoE}5wL_$8XJojpcZpQ(7OC1JQngnkW4}no0g;S@A{mE7 zG7jHjhwvTp5s{3eA{lie8OKB(&T)~d6Czb7MXFATRGk*7IujYL>$4(N=R~UNMXDM^ zs?Lj4HHuVS5UIK-QgunB>T+ZUGOmhbTocK-E|QV)(VJg1J9HqUlSoFUNJeLoj4mP> zS&`v>?kZB%O{A*3NJdYQj9wxcy+tyzMb7OT8E)WyBIouOdHDlH&K)Fj?qHE~hlrdz zROICk6Ui7J8Lr$BA{iq^Uj8VNsvME3(IQo2M5@M$RE-m<8ZS~cAu?RKxgu2)MXDx= zR81DCnj%s)RitW~NL8LlRlZ17L1eg)3q`7mM5?BXR27RP`A`hopq^jl?Z~P>=l_FKEM5cs>32x zM?|WQMurQyPNeFXNY!zXsuLnrCq*(&i)5S;$v7*LaV|34&-Ef14I&xmMKT&iGA@Wb zoQon=mqe;Ai&R|^sk#~&uIpFE;uXlsUxf?~!-6V2ut;oyY92suWEg~6PMPB|kk&GQ889PNX zc8O%{7RlHnlCd{3T$}quGWLsP91wXp2Sut5iBugHsX8K3byTFPPNeEsWVlq1i&UKu zsX8fAbxNe_v`Ez%k*c#IRp&&i>P4y=BEubfUZko~r0Rl5)kTr2OClLpL^7_5WLy); zxE>kq*o?bxel2_SaWXoIWMqnDbQa0zBJyyuM9%Fla&C{v@O^Agk#l>AoZDOE+-#9^ z`-q&|SLA8*6L}i_MXCmfR1Lhv8$bSSkVw^Fk*XmgRYOIphKW=S7pWQ{QZ-VfYLrM- zPGq=aM~hUA5vdw0QZ-JbYP?9*1d*y-k*bLzRg*-jCP#)VcZx{WRFSG_B2{@JRrw-S z1tL|2B2`5qRntYPiX+2?Tq066L!@e^NYyNns#1}v*&A{pyNGB${0Y!u1ZB$81plCfFj+^vz}%H1ZCv0Wr%hse3RM9$qUa_%0H z*Sk;T-2IW^jy)i9?m>}r4~e||!y*|+L^6(wWYmeg{9__j$3?16M1~vqq)62%k&H7U z8D~W@&WU8yi)1v2WSketXuQQ6KeO$ENXA8xjLRY!S41+diey|9$+#|(k#Wz>uXG(B zBBN7exMMR#GCGU=Z`4JkDodoQt4LKhk*e+@RXs$idWux_iVSybZ;`5Ok*Yo-ReeRO z`iWHa7s(hXk})VU+@eE7&K)Xp?l6&ahl`v$Lgd_$B2QzKNJfrG#%PhJF(xwn%fsVD zGRBK!Oc2S)6*+g3$hnh6&YdE1?$pR|i%t{C$P>xP7s)6P$tVZnLnok-O&k&F`}=bnlTSMF(%bI*vJdsgJ!b0X)~ zi=5ja@-)tiWHgFAjSC`G7bC-kd`YD0vPjhxk*cdARo6tSu8U-Jy7%Un#m62YBU2=! zb7c5Fwu?wcmPkfdk&JF48Qn!PdWbxno+4GfM5=m=RAono`?-%uRbP>+ej-)Tm%oNF( z6&Y^eQjv_=A{lc;GUke$TP|{Lg~++{Mb53f#T%ceTOe}oLXmS9iJZGwQT57OAQcsah#gwMwLFwMf+(k*c*KRqI5m){9hahzvLIMvqjIvg1;O7R2>(oIw4YZQl#pXNY!bPsxy({LOv@}bxx$JUZko) zr0Tp#RijAN1(B+YB2||}sxC){3;Bvj)m4$IYa&(GMXECHyZKe}@s3n=5~<1*sp>3J z)kUN#D>7WjT}7(8iBxqLsp=t8)l(#+w@5~|NJbx#jJ}cKe(ood(O)EEfJnwbk*6_O zBx8t3#!!)rVImpBBg6l>Hd5r=Q6lH&h`jvKA{k>uGRBHzj1$QiFOo6g7H@ojE?1;# zqDaPMk&G!K8B;|vriq-JFLG`{WVnzEMb0e}c^cD2GKxhqN<=bdh-AzZc^b1ss!Bzw zW=Dn#d5%ccT#>3Wk*ac$s(B(+6(UvhMXD-AsuqY;EsP8o@*GZs3(78LLDxR*Ph;5y@C9lCe(Y;j9;_+8|Q3QKV{9WVnHAMXEN7 zRBaKd+A31DO{8kONYxIJs+}TLyF{vXM}~j!yH_M*pGd}jk&FW(83#o&4vAzO7CHB5 zWVmDNL^6(vWE>aCI3bd8QY7P)NXBWAj58unZTb6$n>@ph;CrRXt4&5Uscmvs!UG8%5}r)>O2Qu! z{+;k%LS{lvLT*A{!hwW#O+VN4$)?XX9oTeA(~71Go33kmrD?Nfw>5jZ*$n^>RNBB@PM`=l==K0O{Hs9AgJ^6o=Z%e*E`O)OxCBL5hX7az1 zyCnBc&QC5$o|9aid@Q*kxp_*bTjP|9s79a3ISc`c=DN@>c@l)9A5DfhH^s>O>f z-fB_YVpogC79ULgdg}A3FQ&ek`c~@OshO$SseM!Dr`DuyNUcjfo_aF%Tn@?y(2tsZRkRI3+Sz0@kB)zDUBTP+iG{KwrO{yeKGCnwBM$^ zk@iknr?g&agVXZTO43qVKi>M8)-SZqY~8bUN$axKD_S3JeMg(Ww)sb!iET>Sl((s9 zv#`z5HYshNZu_;iue5!)ZI8Bn+74*DpzWHrr`qyD)7zwfApO4dN7KKY{#^PC=`W`L zH2t0QuIb&FwHmu-%>Q?rryCyVu(Nxm`xPN$sYzD`{8O zZeF{V?LW}|Bkk{N|6u!P+rQNQ)%Ka~yS9Je^LO|C?{B6@IzIeZVw0X3iSakr6V3Z( zBu*WE<2OS1UC@*zGS00AyI&&1Nwk`Vw}@di@> literal 0 HcmV?d00001 diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv new file mode 100644 index 000000000..f18cdec52 --- /dev/null +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_varchar_5000.csv @@ -0,0 +1,5001 @@ +row_id,key,is_null +0,test_00000,false +1,test_00001,false +2,test_00002,false +3,test_00003,false +4,test_00004,false +5,test_00005,false +6,test_00006,false +7,test_00007,false +8,test_00008,false +9,test_00009,false +10,test_00010,false +11,test_00011,false +12,test_00012,false +13,test_00013,false +14,test_00014,false +15,test_00015,false +16,test_00016,false +17,test_00017,false +18,test_00018,false +19,test_00019,false +20,test_00020,false +21,test_00021,false +22,test_00022,false +23,test_00023,false +24,NULL,true +25,test_00025,false +26,test_00026,false +27,test_00027,false +28,test_00028,false +29,test_00029,false +30,test_00030,false +31,test_00031,false +32,test_00032,false +33,test_00033,false +34,test_00034,false +35,test_00035,false +36,test_00036,false +37,test_00037,false +38,test_00038,false +39,test_00039,false +40,test_00040,false +41,test_00041,false +42,test_00042,false +43,test_00043,false +44,test_00044,false +45,test_00045,false +46,test_00046,false +47,test_00047,false +48,test_00048,false +49,test_00049,false +50,test_00050,false +51,test_00051,false +52,test_00052,false +53,test_00053,false +54,test_00054,false +55,test_00055,false +56,test_00056,false +57,test_00057,false +58,test_00058,false +59,test_00059,false +60,test_00060,false +61,test_00061,false +62,test_00062,false +63,test_00063,false +64,test_00064,false +65,test_00065,false +66,test_00066,false +67,test_00067,false +68,test_00068,false +69,test_00069,false +70,test_00070,false +71,NULL,true +72,NULL,true +73,test_00073,false +74,test_00074,false +75,test_00075,false +76,test_00076,false +77,test_00077,false +78,test_00078,false +79,test_00079,false +80,test_00080,false +81,test_00081,false +82,test_00082,false +83,test_00083,false +84,test_00084,false +85,test_00085,false +86,test_00086,false +87,test_00087,false +88,test_00088,false +89,test_00089,false +90,test_00090,false +91,test_00091,false +92,test_00092,false +93,test_00093,false +94,test_00094,false +95,test_00095,false +96,test_00096,false +97,test_00097,false +98,NULL,true +99,test_00099,false +100,test_00100,false +101,test_00101,false +102,test_00102,false +103,test_00103,false +104,NULL,true +105,test_00105,false +106,test_00106,false +107,test_00107,false +108,test_00108,false +109,test_00109,false +110,test_00110,false +111,test_00111,false +112,test_00112,false +113,test_00113,false +114,test_00114,false +115,test_00115,false +116,test_00116,false +117,test_00117,false +118,test_00118,false +119,test_00119,false +120,test_00120,false +121,test_00121,false +122,test_00122,false +123,test_00123,false +124,test_00124,false +125,test_00125,false +126,test_00126,false +127,test_00127,false +128,test_00128,false +129,test_00129,false +130,test_00130,false +131,test_00131,false +132,test_00132,false +133,test_00133,false +134,test_00134,false +135,test_00135,false +136,test_00136,false +137,test_00137,false +138,test_00138,false +139,test_00139,false +140,test_00140,false +141,test_00141,false +142,test_00142,false +143,test_00143,false +144,test_00144,false +145,test_00145,false +146,test_00146,false +147,test_00147,false +148,test_00148,false +149,test_00149,false +150,test_00150,false +151,test_00151,false +152,test_00152,false +153,test_00153,false +154,NULL,true +155,test_00155,false +156,test_00156,false +157,test_00157,false +158,test_00158,false +159,test_00159,false +160,test_00160,false +161,test_00161,false +162,test_00162,false +163,test_00163,false +164,test_00164,false +165,test_00165,false +166,test_00166,false +167,test_00167,false +168,test_00168,false +169,test_00169,false +170,test_00170,false +171,test_00171,false +172,test_00172,false +173,test_00173,false +174,test_00174,false +175,test_00175,false +176,test_00176,false +177,test_00177,false +178,test_00178,false +179,test_00179,false +180,test_00180,false +181,test_00181,false +182,test_00182,false +183,test_00183,false +184,test_00184,false +185,test_00185,false +186,test_00186,false +187,test_00187,false +188,test_00188,false +189,test_00189,false +190,test_00190,false +191,test_00191,false +192,test_00192,false +193,test_00193,false +194,test_00194,false +195,test_00195,false +196,test_00196,false +197,test_00197,false +198,test_00198,false +199,test_00199,false +200,test_00200,false +201,test_00201,false +202,test_00202,false +203,test_00203,false +204,test_00204,false +205,test_00205,false +206,test_00206,false +207,test_00207,false +208,test_00208,false +209,test_00209,false +210,test_00210,false +211,test_00211,false +212,test_00212,false +213,test_00213,false +214,NULL,true +215,test_00215,false +216,test_00216,false +217,test_00217,false +218,test_00218,false +219,test_00219,false +220,test_00220,false +221,test_00221,false +222,test_00222,false +223,test_00223,false +224,test_00224,false +225,test_00225,false +226,test_00226,false +227,test_00227,false +228,test_00228,false +229,test_00229,false +230,test_00230,false +231,test_00231,false +232,test_00232,false +233,test_00233,false +234,test_00234,false +235,test_00235,false +236,test_00236,false +237,test_00237,false +238,test_00238,false +239,test_00239,false +240,test_00240,false +241,test_00241,false +242,test_00242,false +243,test_00243,false +244,test_00244,false +245,test_00245,false +246,test_00246,false +247,test_00247,false +248,test_00248,false +249,test_00249,false +250,test_00250,false +251,test_00251,false +252,test_00252,false +253,test_00253,false +254,test_00254,false +255,test_00255,false +256,test_00256,false +257,test_00257,false +258,test_00258,false +259,test_00259,false +260,test_00260,false +261,test_00261,false +262,test_00262,false +263,test_00263,false +264,test_00264,false +265,test_00265,false +266,test_00266,false +267,test_00267,false +268,test_00268,false +269,test_00269,false +270,test_00270,false +271,test_00271,false +272,test_00272,false +273,test_00273,false +274,test_00274,false +275,test_00275,false +276,test_00276,false +277,test_00277,false +278,test_00278,false +279,test_00279,false +280,test_00280,false +281,test_00281,false +282,test_00282,false +283,test_00283,false +284,test_00284,false +285,test_00285,false +286,test_00286,false +287,test_00287,false +288,test_00288,false +289,test_00289,false +290,NULL,true +291,test_00291,false +292,test_00292,false +293,test_00293,false +294,test_00294,false +295,test_00295,false +296,test_00296,false +297,test_00297,false +298,NULL,true +299,test_00299,false +300,test_00300,false +301,test_00301,false +302,test_00302,false +303,test_00303,false +304,test_00304,false +305,test_00305,false +306,NULL,true +307,test_00307,false +308,test_00308,false +309,test_00309,false +310,test_00310,false +311,test_00311,false +312,test_00312,false +313,test_00313,false +314,test_00314,false +315,test_00315,false +316,test_00316,false +317,test_00317,false +318,test_00318,false +319,test_00319,false +320,NULL,true +321,test_00321,false +322,test_00322,false +323,test_00323,false +324,NULL,true +325,test_00325,false +326,test_00326,false +327,test_00327,false +328,test_00328,false +329,test_00329,false +330,test_00330,false +331,test_00331,false +332,test_00332,false +333,test_00333,false +334,test_00334,false +335,test_00335,false +336,test_00336,false +337,test_00337,false +338,test_00338,false +339,test_00339,false +340,test_00340,false +341,test_00341,false +342,test_00342,false +343,test_00343,false +344,test_00344,false +345,test_00345,false +346,test_00346,false +347,test_00347,false +348,test_00348,false +349,test_00349,false +350,test_00350,false +351,test_00351,false +352,test_00352,false +353,test_00353,false +354,test_00354,false +355,test_00355,false +356,test_00356,false +357,test_00357,false +358,test_00358,false +359,test_00359,false +360,test_00360,false +361,test_00361,false +362,NULL,true +363,test_00363,false +364,test_00364,false +365,test_00365,false +366,test_00366,false +367,test_00367,false +368,test_00368,false +369,test_00369,false +370,test_00370,false +371,test_00371,false +372,test_00372,false +373,test_00373,false +374,test_00374,false +375,test_00375,false +376,test_00376,false +377,test_00377,false +378,test_00378,false +379,test_00379,false +380,test_00380,false +381,NULL,true +382,NULL,true +383,test_00383,false +384,test_00384,false +385,test_00385,false +386,NULL,true +387,test_00387,false +388,test_00388,false +389,test_00389,false +390,test_00390,false +391,test_00391,false +392,test_00392,false +393,test_00393,false +394,test_00394,false +395,test_00395,false +396,test_00396,false +397,test_00397,false +398,test_00398,false +399,test_00399,false +400,test_00400,false +401,test_00401,false +402,test_00402,false +403,test_00403,false +404,test_00404,false +405,test_00405,false +406,test_00406,false +407,test_00407,false +408,test_00408,false +409,test_00409,false +410,test_00410,false +411,test_00411,false +412,test_00412,false +413,NULL,true +414,test_00414,false +415,test_00415,false +416,test_00416,false +417,test_00417,false +418,test_00418,false +419,test_00419,false +420,NULL,true +421,test_00421,false +422,test_00422,false +423,test_00423,false +424,test_00424,false +425,test_00425,false +426,test_00426,false +427,NULL,true +428,test_00428,false +429,test_00429,false +430,test_00430,false +431,test_00431,false +432,test_00432,false +433,test_00433,false +434,test_00434,false +435,test_00435,false +436,test_00436,false +437,test_00437,false +438,test_00438,false +439,test_00439,false +440,test_00440,false +441,test_00441,false +442,test_00442,false +443,test_00443,false +444,test_00444,false +445,test_00445,false +446,test_00446,false +447,test_00447,false +448,test_00448,false +449,test_00449,false +450,test_00450,false +451,test_00451,false +452,test_00452,false +453,test_00453,false +454,test_00454,false +455,test_00455,false +456,test_00456,false +457,test_00457,false +458,test_00458,false +459,test_00459,false +460,test_00460,false +461,test_00461,false +462,test_00462,false +463,test_00463,false +464,test_00464,false +465,test_00465,false +466,test_00466,false +467,test_00467,false +468,test_00468,false +469,test_00469,false +470,test_00470,false +471,test_00471,false +472,test_00472,false +473,test_00473,false +474,test_00474,false +475,test_00475,false +476,test_00476,false +477,test_00477,false +478,test_00478,false +479,test_00479,false +480,test_00480,false +481,test_00481,false +482,test_00482,false +483,test_00483,false +484,NULL,true +485,test_00485,false +486,test_00486,false +487,test_00487,false +488,test_00488,false +489,test_00489,false +490,test_00490,false +491,test_00491,false +492,test_00492,false +493,test_00493,false +494,test_00494,false +495,test_00495,false +496,test_00496,false +497,test_00497,false +498,test_00498,false +499,test_00499,false +500,test_00500,false +501,test_00501,false +502,test_00502,false +503,test_00503,false +504,test_00504,false +505,test_00505,false +506,test_00506,false +507,test_00507,false +508,test_00508,false +509,test_00509,false +510,test_00510,false +511,test_00511,false +512,test_00512,false +513,test_00513,false +514,test_00514,false +515,test_00515,false +516,test_00516,false +517,test_00517,false +518,test_00518,false +519,test_00519,false +520,test_00520,false +521,test_00521,false +522,test_00522,false +523,test_00523,false +524,test_00524,false +525,test_00525,false +526,test_00526,false +527,test_00527,false +528,test_00528,false +529,test_00529,false +530,NULL,true +531,test_00531,false +532,test_00532,false +533,test_00533,false +534,test_00534,false +535,test_00535,false +536,test_00536,false +537,test_00537,false +538,test_00538,false +539,test_00539,false +540,test_00540,false +541,test_00541,false +542,test_00542,false +543,test_00543,false +544,test_00544,false +545,test_00545,false +546,test_00546,false +547,test_00547,false +548,test_00548,false +549,test_00549,false +550,test_00550,false +551,test_00551,false +552,test_00552,false +553,test_00553,false +554,test_00554,false +555,test_00555,false +556,test_00556,false +557,test_00557,false +558,test_00558,false +559,test_00559,false +560,test_00560,false +561,test_00561,false +562,test_00562,false +563,test_00563,false +564,test_00564,false +565,test_00565,false +566,test_00566,false +567,test_00567,false +568,test_00568,false +569,test_00569,false +570,test_00570,false +571,NULL,true +572,test_00572,false +573,test_00573,false +574,test_00574,false +575,test_00575,false +576,NULL,true +577,test_00577,false +578,test_00578,false +579,test_00579,false +580,test_00580,false +581,test_00581,false +582,test_00582,false +583,test_00583,false +584,NULL,true +585,test_00585,false +586,test_00586,false +587,test_00587,false +588,test_00588,false +589,test_00589,false +590,test_00590,false +591,test_00591,false +592,test_00592,false +593,test_00593,false +594,test_00594,false +595,test_00595,false +596,test_00596,false +597,test_00597,false +598,test_00598,false +599,test_00599,false +600,test_00600,false +601,test_00601,false +602,test_00602,false +603,test_00603,false +604,test_00604,false +605,test_00605,false +606,test_00606,false +607,test_00607,false +608,test_00608,false +609,test_00609,false +610,test_00610,false +611,test_00611,false +612,test_00612,false +613,test_00613,false +614,test_00614,false +615,test_00615,false +616,test_00616,false +617,test_00617,false +618,test_00618,false +619,test_00619,false +620,test_00620,false +621,test_00621,false +622,test_00622,false +623,test_00623,false +624,test_00624,false +625,test_00625,false +626,test_00626,false +627,test_00627,false +628,test_00628,false +629,test_00629,false +630,test_00630,false +631,test_00631,false +632,test_00632,false +633,test_00633,false +634,test_00634,false +635,test_00635,false +636,test_00636,false +637,test_00637,false +638,test_00638,false +639,test_00639,false +640,test_00640,false +641,test_00641,false +642,test_00642,false +643,test_00643,false +644,test_00644,false +645,test_00645,false +646,test_00646,false +647,test_00647,false +648,test_00648,false +649,test_00649,false +650,test_00650,false +651,test_00651,false +652,test_00652,false +653,test_00653,false +654,test_00654,false +655,test_00655,false +656,test_00656,false +657,test_00657,false +658,test_00658,false +659,NULL,true +660,test_00660,false +661,test_00661,false +662,test_00662,false +663,test_00663,false +664,test_00664,false +665,test_00665,false +666,test_00666,false +667,test_00667,false +668,test_00668,false +669,test_00669,false +670,test_00670,false +671,test_00671,false +672,test_00672,false +673,test_00673,false +674,test_00674,false +675,test_00675,false +676,test_00676,false +677,test_00677,false +678,test_00678,false +679,test_00679,false +680,NULL,true +681,test_00681,false +682,test_00682,false +683,test_00683,false +684,test_00684,false +685,test_00685,false +686,test_00686,false +687,test_00687,false +688,test_00688,false +689,test_00689,false +690,test_00690,false +691,test_00691,false +692,test_00692,false +693,test_00693,false +694,test_00694,false +695,test_00695,false +696,test_00696,false +697,test_00697,false +698,test_00698,false +699,test_00699,false +700,test_00700,false +701,NULL,true +702,test_00702,false +703,test_00703,false +704,test_00704,false +705,NULL,true +706,test_00706,false +707,test_00707,false +708,test_00708,false +709,test_00709,false +710,test_00710,false +711,test_00711,false +712,test_00712,false +713,test_00713,false +714,test_00714,false +715,test_00715,false +716,test_00716,false +717,test_00717,false +718,test_00718,false +719,test_00719,false +720,test_00720,false +721,NULL,true +722,test_00722,false +723,test_00723,false +724,test_00724,false +725,test_00725,false +726,test_00726,false +727,test_00727,false +728,test_00728,false +729,test_00729,false +730,test_00730,false +731,test_00731,false +732,test_00732,false +733,test_00733,false +734,test_00734,false +735,test_00735,false +736,test_00736,false +737,test_00737,false +738,test_00738,false +739,test_00739,false +740,test_00740,false +741,test_00741,false +742,test_00742,false +743,test_00743,false +744,test_00744,false +745,test_00745,false +746,test_00746,false +747,test_00747,false +748,test_00748,false +749,test_00749,false +750,test_00750,false +751,test_00751,false +752,test_00752,false +753,test_00753,false +754,test_00754,false +755,test_00755,false +756,test_00756,false +757,test_00757,false +758,test_00758,false +759,test_00759,false +760,test_00760,false +761,test_00761,false +762,test_00762,false +763,test_00763,false +764,NULL,true +765,test_00765,false +766,test_00766,false +767,test_00767,false +768,test_00768,false +769,test_00769,false +770,test_00770,false +771,NULL,true +772,test_00772,false +773,test_00773,false +774,test_00774,false +775,test_00775,false +776,test_00776,false +777,test_00777,false +778,test_00778,false +779,test_00779,false +780,test_00780,false +781,test_00781,false +782,test_00782,false +783,test_00783,false +784,test_00784,false +785,test_00785,false +786,test_00786,false +787,test_00787,false +788,test_00788,false +789,test_00789,false +790,test_00790,false +791,test_00791,false +792,test_00792,false +793,test_00793,false +794,test_00794,false +795,test_00795,false +796,test_00796,false +797,NULL,true +798,test_00798,false +799,test_00799,false +800,test_00800,false +801,test_00801,false +802,test_00802,false +803,test_00803,false +804,test_00804,false +805,test_00805,false +806,test_00806,false +807,test_00807,false +808,test_00808,false +809,test_00809,false +810,test_00810,false +811,test_00811,false +812,test_00812,false +813,test_00813,false +814,test_00814,false +815,test_00815,false +816,test_00816,false +817,test_00817,false +818,test_00818,false +819,test_00819,false +820,test_00820,false +821,test_00821,false +822,test_00822,false +823,test_00823,false +824,test_00824,false +825,test_00825,false +826,test_00826,false +827,test_00827,false +828,test_00828,false +829,test_00829,false +830,test_00830,false +831,test_00831,false +832,test_00832,false +833,test_00833,false +834,test_00834,false +835,NULL,true +836,test_00836,false +837,test_00837,false +838,test_00838,false +839,test_00839,false +840,test_00840,false +841,test_00841,false +842,test_00842,false +843,test_00843,false +844,test_00844,false +845,test_00845,false +846,test_00846,false +847,test_00847,false +848,test_00848,false +849,test_00849,false +850,test_00850,false +851,test_00851,false +852,test_00852,false +853,test_00853,false +854,NULL,true +855,test_00855,false +856,test_00856,false +857,test_00857,false +858,test_00858,false +859,test_00859,false +860,test_00860,false +861,test_00861,false +862,test_00862,false +863,test_00863,false +864,test_00864,false +865,test_00865,false +866,test_00866,false +867,test_00867,false +868,test_00868,false +869,test_00869,false +870,test_00870,false +871,test_00871,false +872,test_00872,false +873,test_00873,false +874,test_00874,false +875,test_00875,false +876,test_00876,false +877,test_00877,false +878,test_00878,false +879,test_00879,false +880,test_00880,false +881,test_00881,false +882,test_00882,false +883,test_00883,false +884,test_00884,false +885,NULL,true +886,test_00886,false +887,test_00887,false +888,test_00888,false +889,test_00889,false +890,test_00890,false +891,test_00891,false +892,test_00892,false +893,test_00893,false +894,test_00894,false +895,test_00895,false +896,test_00896,false +897,test_00897,false +898,test_00898,false +899,test_00899,false +900,test_00900,false +901,test_00901,false +902,test_00902,false +903,test_00903,false +904,test_00904,false +905,test_00905,false +906,test_00906,false +907,test_00907,false +908,test_00908,false +909,NULL,true +910,test_00910,false +911,test_00911,false +912,test_00912,false +913,test_00913,false +914,test_00914,false +915,test_00915,false +916,test_00916,false +917,test_00917,false +918,test_00918,false +919,test_00919,false +920,test_00920,false +921,test_00921,false +922,test_00922,false +923,test_00923,false +924,test_00924,false +925,test_00925,false +926,test_00926,false +927,test_00927,false +928,test_00928,false +929,test_00929,false +930,test_00930,false +931,test_00931,false +932,test_00932,false +933,test_00933,false +934,test_00934,false +935,test_00935,false +936,test_00936,false +937,test_00937,false +938,test_00938,false +939,test_00939,false +940,test_00940,false +941,test_00941,false +942,test_00942,false +943,test_00943,false +944,test_00944,false +945,test_00945,false +946,test_00946,false +947,test_00947,false +948,test_00948,false +949,test_00949,false +950,NULL,true +951,test_00951,false +952,test_00952,false +953,test_00953,false +954,test_00954,false +955,test_00955,false +956,test_00956,false +957,test_00957,false +958,test_00958,false +959,NULL,true +960,test_00960,false +961,test_00961,false +962,test_00962,false +963,test_00963,false +964,NULL,true +965,test_00965,false +966,NULL,true +967,test_00967,false +968,test_00968,false +969,test_00969,false +970,test_00970,false +971,test_00971,false +972,test_00972,false +973,test_00973,false +974,test_00974,false +975,test_00975,false +976,test_00976,false +977,test_00977,false +978,test_00978,false +979,test_00979,false +980,test_00980,false +981,test_00981,false +982,test_00982,false +983,test_00983,false +984,test_00984,false +985,test_00985,false +986,test_00986,false +987,test_00987,false +988,test_00988,false +989,test_00989,false +990,test_00990,false +991,test_00991,false +992,test_00992,false +993,test_00993,false +994,test_00994,false +995,test_00995,false +996,test_00996,false +997,test_00997,false +998,test_00998,false +999,test_00999,false +1000,test_01000,false +1001,test_01001,false +1002,test_01002,false +1003,test_01003,false +1004,test_01004,false +1005,test_01005,false +1006,test_01006,false +1007,test_01007,false +1008,test_01008,false +1009,test_01009,false +1010,test_01010,false +1011,test_01011,false +1012,test_01012,false +1013,test_01013,false +1014,test_01014,false +1015,test_01015,false +1016,test_01016,false +1017,test_01017,false +1018,test_01018,false +1019,test_01019,false +1020,test_01020,false +1021,test_01021,false +1022,test_01022,false +1023,test_01023,false +1024,test_01024,false +1025,test_01025,false +1026,test_01026,false +1027,test_01027,false +1028,test_01028,false +1029,test_01029,false +1030,test_01030,false +1031,test_01031,false +1032,test_01032,false +1033,test_01033,false +1034,test_01034,false +1035,test_01035,false +1036,test_01036,false +1037,test_01037,false +1038,test_01038,false +1039,NULL,true +1040,test_01040,false +1041,NULL,true +1042,test_01042,false +1043,test_01043,false +1044,test_01044,false +1045,test_01045,false +1046,test_01046,false +1047,test_01047,false +1048,test_01048,false +1049,test_01049,false +1050,test_01050,false +1051,test_01051,false +1052,test_01052,false +1053,test_01053,false +1054,test_01054,false +1055,test_01055,false +1056,test_01056,false +1057,test_01057,false +1058,test_01058,false +1059,test_01059,false +1060,test_01060,false +1061,test_01061,false +1062,test_01062,false +1063,test_01063,false +1064,test_01064,false +1065,test_01065,false +1066,test_01066,false +1067,test_01067,false +1068,test_01068,false +1069,test_01069,false +1070,test_01070,false +1071,test_01071,false +1072,test_01072,false +1073,test_01073,false +1074,test_01074,false +1075,test_01075,false +1076,test_01076,false +1077,test_01077,false +1078,test_01078,false +1079,test_01079,false +1080,test_01080,false +1081,test_01081,false +1082,test_01082,false +1083,test_01083,false +1084,test_01084,false +1085,test_01085,false +1086,test_01086,false +1087,test_01087,false +1088,test_01088,false +1089,test_01089,false +1090,test_01090,false +1091,test_01091,false +1092,test_01092,false +1093,test_01093,false +1094,test_01094,false +1095,test_01095,false +1096,test_01096,false +1097,NULL,true +1098,test_01098,false +1099,test_01099,false +1100,test_01100,false +1101,test_01101,false +1102,test_01102,false +1103,test_01103,false +1104,test_01104,false +1105,test_01105,false +1106,test_01106,false +1107,test_01107,false +1108,test_01108,false +1109,test_01109,false +1110,test_01110,false +1111,test_01111,false +1112,test_01112,false +1113,test_01113,false +1114,test_01114,false +1115,test_01115,false +1116,test_01116,false +1117,test_01117,false +1118,test_01118,false +1119,test_01119,false +1120,test_01120,false +1121,test_01121,false +1122,test_01122,false +1123,test_01123,false +1124,test_01124,false +1125,test_01125,false +1126,test_01126,false +1127,test_01127,false +1128,test_01128,false +1129,test_01129,false +1130,test_01130,false +1131,test_01131,false +1132,test_01132,false +1133,test_01133,false +1134,test_01134,false +1135,test_01135,false +1136,test_01136,false +1137,test_01137,false +1138,test_01138,false +1139,test_01139,false +1140,test_01140,false +1141,test_01141,false +1142,test_01142,false +1143,test_01143,false +1144,test_01144,false +1145,test_01145,false +1146,test_01146,false +1147,test_01147,false +1148,test_01148,false +1149,test_01149,false +1150,test_01150,false +1151,test_01151,false +1152,test_01152,false +1153,test_01153,false +1154,test_01154,false +1155,test_01155,false +1156,test_01156,false +1157,test_01157,false +1158,test_01158,false +1159,test_01159,false +1160,test_01160,false +1161,test_01161,false +1162,test_01162,false +1163,test_01163,false +1164,test_01164,false +1165,test_01165,false +1166,test_01166,false +1167,test_01167,false +1168,test_01168,false +1169,test_01169,false +1170,test_01170,false +1171,test_01171,false +1172,test_01172,false +1173,test_01173,false +1174,test_01174,false +1175,test_01175,false +1176,test_01176,false +1177,test_01177,false +1178,NULL,true +1179,NULL,true +1180,test_01180,false +1181,NULL,true +1182,test_01182,false +1183,test_01183,false +1184,test_01184,false +1185,test_01185,false +1186,test_01186,false +1187,test_01187,false +1188,NULL,true +1189,test_01189,false +1190,test_01190,false +1191,test_01191,false +1192,test_01192,false +1193,test_01193,false +1194,test_01194,false +1195,test_01195,false +1196,test_01196,false +1197,test_01197,false +1198,test_01198,false +1199,NULL,true +1200,test_01200,false +1201,NULL,true +1202,test_01202,false +1203,test_01203,false +1204,test_01204,false +1205,test_01205,false +1206,test_01206,false +1207,test_01207,false +1208,test_01208,false +1209,test_01209,false +1210,test_01210,false +1211,test_01211,false +1212,test_01212,false +1213,test_01213,false +1214,test_01214,false +1215,test_01215,false +1216,test_01216,false +1217,test_01217,false +1218,test_01218,false +1219,test_01219,false +1220,test_01220,false +1221,test_01221,false +1222,test_01222,false +1223,test_01223,false +1224,test_01224,false +1225,test_01225,false +1226,test_01226,false +1227,test_01227,false +1228,test_01228,false +1229,test_01229,false +1230,test_01230,false +1231,test_01231,false +1232,test_01232,false +1233,test_01233,false +1234,NULL,true +1235,test_01235,false +1236,test_01236,false +1237,test_01237,false +1238,test_01238,false +1239,test_01239,false +1240,test_01240,false +1241,test_01241,false +1242,test_01242,false +1243,test_01243,false +1244,test_01244,false +1245,test_01245,false +1246,test_01246,false +1247,test_01247,false +1248,test_01248,false +1249,test_01249,false +1250,test_01250,false +1251,test_01251,false +1252,test_01252,false +1253,test_01253,false +1254,test_01254,false +1255,test_01255,false +1256,test_01256,false +1257,test_01257,false +1258,test_01258,false +1259,test_01259,false +1260,test_01260,false +1261,test_01261,false +1262,test_01262,false +1263,test_01263,false +1264,test_01264,false +1265,NULL,true +1266,NULL,true +1267,test_01267,false +1268,NULL,true +1269,test_01269,false +1270,test_01270,false +1271,test_01271,false +1272,test_01272,false +1273,test_01273,false +1274,test_01274,false +1275,test_01275,false +1276,test_01276,false +1277,test_01277,false +1278,test_01278,false +1279,test_01279,false +1280,test_01280,false +1281,test_01281,false +1282,test_01282,false +1283,test_01283,false +1284,test_01284,false +1285,test_01285,false +1286,test_01286,false +1287,test_01287,false +1288,test_01288,false +1289,NULL,true +1290,test_01290,false +1291,test_01291,false +1292,test_01292,false +1293,test_01293,false +1294,test_01294,false +1295,NULL,true +1296,test_01296,false +1297,test_01297,false +1298,test_01298,false +1299,NULL,true +1300,test_01300,false +1301,test_01301,false +1302,test_01302,false +1303,test_01303,false +1304,test_01304,false +1305,test_01305,false +1306,test_01306,false +1307,test_01307,false +1308,test_01308,false +1309,test_01309,false +1310,test_01310,false +1311,test_01311,false +1312,test_01312,false +1313,test_01313,false +1314,test_01314,false +1315,test_01315,false +1316,test_01316,false +1317,test_01317,false +1318,test_01318,false +1319,test_01319,false +1320,test_01320,false +1321,test_01321,false +1322,test_01322,false +1323,test_01323,false +1324,test_01324,false +1325,test_01325,false +1326,test_01326,false +1327,test_01327,false +1328,test_01328,false +1329,test_01329,false +1330,test_01330,false +1331,test_01331,false +1332,test_01332,false +1333,test_01333,false +1334,test_01334,false +1335,test_01335,false +1336,test_01336,false +1337,test_01337,false +1338,test_01338,false +1339,test_01339,false +1340,test_01340,false +1341,NULL,true +1342,test_01342,false +1343,test_01343,false +1344,test_01344,false +1345,test_01345,false +1346,test_01346,false +1347,test_01347,false +1348,test_01348,false +1349,test_01349,false +1350,test_01350,false +1351,test_01351,false +1352,NULL,true +1353,test_01353,false +1354,test_01354,false +1355,test_01355,false +1356,test_01356,false +1357,test_01357,false +1358,test_01358,false +1359,test_01359,false +1360,test_01360,false +1361,test_01361,false +1362,test_01362,false +1363,test_01363,false +1364,test_01364,false +1365,test_01365,false +1366,test_01366,false +1367,test_01367,false +1368,test_01368,false +1369,test_01369,false +1370,test_01370,false +1371,test_01371,false +1372,test_01372,false +1373,test_01373,false +1374,test_01374,false +1375,test_01375,false +1376,test_01376,false +1377,test_01377,false +1378,test_01378,false +1379,test_01379,false +1380,test_01380,false +1381,test_01381,false +1382,test_01382,false +1383,test_01383,false +1384,test_01384,false +1385,test_01385,false +1386,test_01386,false +1387,test_01387,false +1388,test_01388,false +1389,test_01389,false +1390,test_01390,false +1391,test_01391,false +1392,test_01392,false +1393,test_01393,false +1394,test_01394,false +1395,test_01395,false +1396,test_01396,false +1397,test_01397,false +1398,test_01398,false +1399,NULL,true +1400,test_01400,false +1401,test_01401,false +1402,test_01402,false +1403,test_01403,false +1404,test_01404,false +1405,test_01405,false +1406,test_01406,false +1407,test_01407,false +1408,test_01408,false +1409,test_01409,false +1410,test_01410,false +1411,test_01411,false +1412,test_01412,false +1413,NULL,true +1414,test_01414,false +1415,test_01415,false +1416,test_01416,false +1417,test_01417,false +1418,test_01418,false +1419,test_01419,false +1420,test_01420,false +1421,test_01421,false +1422,test_01422,false +1423,test_01423,false +1424,test_01424,false +1425,NULL,true +1426,test_01426,false +1427,test_01427,false +1428,test_01428,false +1429,test_01429,false +1430,test_01430,false +1431,test_01431,false +1432,test_01432,false +1433,test_01433,false +1434,test_01434,false +1435,test_01435,false +1436,test_01436,false +1437,test_01437,false +1438,test_01438,false +1439,test_01439,false +1440,test_01440,false +1441,test_01441,false +1442,test_01442,false +1443,test_01443,false +1444,test_01444,false +1445,test_01445,false +1446,test_01446,false +1447,test_01447,false +1448,test_01448,false +1449,test_01449,false +1450,test_01450,false +1451,test_01451,false +1452,test_01452,false +1453,test_01453,false +1454,test_01454,false +1455,NULL,true +1456,test_01456,false +1457,test_01457,false +1458,test_01458,false +1459,test_01459,false +1460,test_01460,false +1461,test_01461,false +1462,test_01462,false +1463,test_01463,false +1464,test_01464,false +1465,test_01465,false +1466,test_01466,false +1467,test_01467,false +1468,test_01468,false +1469,test_01469,false +1470,test_01470,false +1471,NULL,true +1472,test_01472,false +1473,test_01473,false +1474,test_01474,false +1475,test_01475,false +1476,test_01476,false +1477,test_01477,false +1478,test_01478,false +1479,test_01479,false +1480,test_01480,false +1481,NULL,true +1482,test_01482,false +1483,test_01483,false +1484,test_01484,false +1485,test_01485,false +1486,test_01486,false +1487,test_01487,false +1488,test_01488,false +1489,test_01489,false +1490,NULL,true +1491,test_01491,false +1492,test_01492,false +1493,test_01493,false +1494,test_01494,false +1495,NULL,true +1496,test_01496,false +1497,test_01497,false +1498,test_01498,false +1499,test_01499,false +1500,test_01500,false +1501,test_01501,false +1502,test_01502,false +1503,test_01503,false +1504,NULL,true +1505,test_01505,false +1506,test_01506,false +1507,test_01507,false +1508,test_01508,false +1509,test_01509,false +1510,test_01510,false +1511,test_01511,false +1512,test_01512,false +1513,test_01513,false +1514,test_01514,false +1515,test_01515,false +1516,test_01516,false +1517,test_01517,false +1518,test_01518,false +1519,test_01519,false +1520,test_01520,false +1521,test_01521,false +1522,test_01522,false +1523,test_01523,false +1524,test_01524,false +1525,test_01525,false +1526,test_01526,false +1527,test_01527,false +1528,test_01528,false +1529,test_01529,false +1530,test_01530,false +1531,test_01531,false +1532,test_01532,false +1533,test_01533,false +1534,test_01534,false +1535,test_01535,false +1536,test_01536,false +1537,test_01537,false +1538,test_01538,false +1539,test_01539,false +1540,test_01540,false +1541,test_01541,false +1542,test_01542,false +1543,test_01543,false +1544,test_01544,false +1545,test_01545,false +1546,test_01546,false +1547,NULL,true +1548,test_01548,false +1549,test_01549,false +1550,test_01550,false +1551,test_01551,false +1552,test_01552,false +1553,test_01553,false +1554,test_01554,false +1555,NULL,true +1556,test_01556,false +1557,test_01557,false +1558,test_01558,false +1559,test_01559,false +1560,test_01560,false +1561,test_01561,false +1562,NULL,true +1563,test_01563,false +1564,test_01564,false +1565,test_01565,false +1566,test_01566,false +1567,test_01567,false +1568,test_01568,false +1569,test_01569,false +1570,test_01570,false +1571,test_01571,false +1572,test_01572,false +1573,test_01573,false +1574,test_01574,false +1575,test_01575,false +1576,test_01576,false +1577,test_01577,false +1578,test_01578,false +1579,test_01579,false +1580,test_01580,false +1581,test_01581,false +1582,test_01582,false +1583,test_01583,false +1584,test_01584,false +1585,test_01585,false +1586,test_01586,false +1587,test_01587,false +1588,test_01588,false +1589,test_01589,false +1590,test_01590,false +1591,test_01591,false +1592,test_01592,false +1593,test_01593,false +1594,test_01594,false +1595,test_01595,false +1596,test_01596,false +1597,test_01597,false +1598,test_01598,false +1599,test_01599,false +1600,NULL,true +1601,test_01601,false +1602,test_01602,false +1603,test_01603,false +1604,test_01604,false +1605,test_01605,false +1606,test_01606,false +1607,test_01607,false +1608,test_01608,false +1609,test_01609,false +1610,test_01610,false +1611,test_01611,false +1612,test_01612,false +1613,test_01613,false +1614,test_01614,false +1615,test_01615,false +1616,test_01616,false +1617,test_01617,false +1618,test_01618,false +1619,test_01619,false +1620,test_01620,false +1621,test_01621,false +1622,test_01622,false +1623,test_01623,false +1624,test_01624,false +1625,test_01625,false +1626,test_01626,false +1627,test_01627,false +1628,test_01628,false +1629,test_01629,false +1630,test_01630,false +1631,test_01631,false +1632,test_01632,false +1633,test_01633,false +1634,test_01634,false +1635,test_01635,false +1636,test_01636,false +1637,test_01637,false +1638,test_01638,false +1639,test_01639,false +1640,test_01640,false +1641,test_01641,false +1642,test_01642,false +1643,test_01643,false +1644,test_01644,false +1645,test_01645,false +1646,test_01646,false +1647,test_01647,false +1648,test_01648,false +1649,test_01649,false +1650,test_01650,false +1651,test_01651,false +1652,test_01652,false +1653,test_01653,false +1654,test_01654,false +1655,test_01655,false +1656,test_01656,false +1657,test_01657,false +1658,test_01658,false +1659,test_01659,false +1660,test_01660,false +1661,test_01661,false +1662,test_01662,false +1663,test_01663,false +1664,test_01664,false +1665,test_01665,false +1666,test_01666,false +1667,test_01667,false +1668,test_01668,false +1669,test_01669,false +1670,test_01670,false +1671,test_01671,false +1672,test_01672,false +1673,test_01673,false +1674,test_01674,false +1675,test_01675,false +1676,test_01676,false +1677,test_01677,false +1678,NULL,true +1679,test_01679,false +1680,test_01680,false +1681,test_01681,false +1682,test_01682,false +1683,test_01683,false +1684,test_01684,false +1685,test_01685,false +1686,test_01686,false +1687,test_01687,false +1688,test_01688,false +1689,test_01689,false +1690,test_01690,false +1691,test_01691,false +1692,test_01692,false +1693,test_01693,false +1694,test_01694,false +1695,test_01695,false +1696,test_01696,false +1697,test_01697,false +1698,test_01698,false +1699,test_01699,false +1700,test_01700,false +1701,test_01701,false +1702,test_01702,false +1703,test_01703,false +1704,test_01704,false +1705,test_01705,false +1706,test_01706,false +1707,test_01707,false +1708,NULL,true +1709,test_01709,false +1710,test_01710,false +1711,test_01711,false +1712,test_01712,false +1713,test_01713,false +1714,test_01714,false +1715,test_01715,false +1716,test_01716,false +1717,test_01717,false +1718,test_01718,false +1719,test_01719,false +1720,test_01720,false +1721,test_01721,false +1722,test_01722,false +1723,test_01723,false +1724,test_01724,false +1725,test_01725,false +1726,test_01726,false +1727,test_01727,false +1728,test_01728,false +1729,test_01729,false +1730,test_01730,false +1731,test_01731,false +1732,test_01732,false +1733,test_01733,false +1734,test_01734,false +1735,test_01735,false +1736,test_01736,false +1737,test_01737,false +1738,test_01738,false +1739,test_01739,false +1740,test_01740,false +1741,test_01741,false +1742,test_01742,false +1743,test_01743,false +1744,test_01744,false +1745,test_01745,false +1746,test_01746,false +1747,test_01747,false +1748,test_01748,false +1749,test_01749,false +1750,test_01750,false +1751,test_01751,false +1752,test_01752,false +1753,test_01753,false +1754,test_01754,false +1755,test_01755,false +1756,test_01756,false +1757,test_01757,false +1758,test_01758,false +1759,test_01759,false +1760,test_01760,false +1761,NULL,true +1762,NULL,true +1763,test_01763,false +1764,test_01764,false +1765,test_01765,false +1766,test_01766,false +1767,test_01767,false +1768,test_01768,false +1769,test_01769,false +1770,test_01770,false +1771,test_01771,false +1772,test_01772,false +1773,test_01773,false +1774,test_01774,false +1775,test_01775,false +1776,test_01776,false +1777,test_01777,false +1778,test_01778,false +1779,test_01779,false +1780,test_01780,false +1781,test_01781,false +1782,test_01782,false +1783,test_01783,false +1784,test_01784,false +1785,test_01785,false +1786,test_01786,false +1787,test_01787,false +1788,test_01788,false +1789,test_01789,false +1790,test_01790,false +1791,test_01791,false +1792,test_01792,false +1793,test_01793,false +1794,test_01794,false +1795,test_01795,false +1796,test_01796,false +1797,test_01797,false +1798,test_01798,false +1799,test_01799,false +1800,test_01800,false +1801,test_01801,false +1802,test_01802,false +1803,test_01803,false +1804,test_01804,false +1805,test_01805,false +1806,test_01806,false +1807,test_01807,false +1808,test_01808,false +1809,NULL,true +1810,test_01810,false +1811,test_01811,false +1812,test_01812,false +1813,test_01813,false +1814,test_01814,false +1815,test_01815,false +1816,test_01816,false +1817,test_01817,false +1818,test_01818,false +1819,test_01819,false +1820,NULL,true +1821,NULL,true +1822,test_01822,false +1823,test_01823,false +1824,test_01824,false +1825,test_01825,false +1826,test_01826,false +1827,test_01827,false +1828,test_01828,false +1829,test_01829,false +1830,test_01830,false +1831,NULL,true +1832,test_01832,false +1833,test_01833,false +1834,test_01834,false +1835,test_01835,false +1836,test_01836,false +1837,test_01837,false +1838,test_01838,false +1839,test_01839,false +1840,test_01840,false +1841,test_01841,false +1842,test_01842,false +1843,test_01843,false +1844,NULL,true +1845,test_01845,false +1846,test_01846,false +1847,test_01847,false +1848,test_01848,false +1849,test_01849,false +1850,test_01850,false +1851,test_01851,false +1852,test_01852,false +1853,test_01853,false +1854,test_01854,false +1855,test_01855,false +1856,test_01856,false +1857,test_01857,false +1858,test_01858,false +1859,test_01859,false +1860,test_01860,false +1861,test_01861,false +1862,test_01862,false +1863,test_01863,false +1864,test_01864,false +1865,test_01865,false +1866,test_01866,false +1867,test_01867,false +1868,test_01868,false +1869,test_01869,false +1870,test_01870,false +1871,test_01871,false +1872,test_01872,false +1873,test_01873,false +1874,test_01874,false +1875,test_01875,false +1876,test_01876,false +1877,test_01877,false +1878,test_01878,false +1879,test_01879,false +1880,test_01880,false +1881,test_01881,false +1882,test_01882,false +1883,test_01883,false +1884,test_01884,false +1885,test_01885,false +1886,test_01886,false +1887,test_01887,false +1888,test_01888,false +1889,test_01889,false +1890,test_01890,false +1891,test_01891,false +1892,test_01892,false +1893,test_01893,false +1894,test_01894,false +1895,NULL,true +1896,test_01896,false +1897,test_01897,false +1898,test_01898,false +1899,test_01899,false +1900,test_01900,false +1901,test_01901,false +1902,test_01902,false +1903,NULL,true +1904,test_01904,false +1905,test_01905,false +1906,test_01906,false +1907,NULL,true +1908,test_01908,false +1909,test_01909,false +1910,test_01910,false +1911,test_01911,false +1912,test_01912,false +1913,test_01913,false +1914,NULL,true +1915,test_01915,false +1916,test_01916,false +1917,test_01917,false +1918,test_01918,false +1919,test_01919,false +1920,test_01920,false +1921,test_01921,false +1922,test_01922,false +1923,test_01923,false +1924,NULL,true +1925,test_01925,false +1926,test_01926,false +1927,test_01927,false +1928,test_01928,false +1929,test_01929,false +1930,test_01930,false +1931,test_01931,false +1932,NULL,true +1933,test_01933,false +1934,test_01934,false +1935,test_01935,false +1936,test_01936,false +1937,test_01937,false +1938,test_01938,false +1939,test_01939,false +1940,test_01940,false +1941,test_01941,false +1942,test_01942,false +1943,test_01943,false +1944,test_01944,false +1945,test_01945,false +1946,test_01946,false +1947,test_01947,false +1948,test_01948,false +1949,test_01949,false +1950,test_01950,false +1951,test_01951,false +1952,test_01952,false +1953,test_01953,false +1954,test_01954,false +1955,test_01955,false +1956,test_01956,false +1957,test_01957,false +1958,test_01958,false +1959,test_01959,false +1960,test_01960,false +1961,test_01961,false +1962,test_01962,false +1963,test_01963,false +1964,NULL,true +1965,test_01965,false +1966,test_01966,false +1967,test_01967,false +1968,test_01968,false +1969,test_01969,false +1970,NULL,true +1971,test_01971,false +1972,test_01972,false +1973,test_01973,false +1974,test_01974,false +1975,test_01975,false +1976,NULL,true +1977,test_01977,false +1978,test_01978,false +1979,test_01979,false +1980,test_01980,false +1981,test_01981,false +1982,test_01982,false +1983,test_01983,false +1984,test_01984,false +1985,test_01985,false +1986,test_01986,false +1987,test_01987,false +1988,test_01988,false +1989,test_01989,false +1990,test_01990,false +1991,test_01991,false +1992,test_01992,false +1993,test_01993,false +1994,NULL,true +1995,test_01995,false +1996,test_01996,false +1997,test_01997,false +1998,test_01998,false +1999,test_01999,false +2000,test_02000,false +2001,test_02001,false +2002,test_02002,false +2003,test_02003,false +2004,test_02004,false +2005,test_02005,false +2006,test_02006,false +2007,test_02007,false +2008,test_02008,false +2009,test_02009,false +2010,test_02010,false +2011,test_02011,false +2012,test_02012,false +2013,test_02013,false +2014,test_02014,false +2015,test_02015,false +2016,test_02016,false +2017,test_02017,false +2018,test_02018,false +2019,test_02019,false +2020,test_02020,false +2021,test_02021,false +2022,test_02022,false +2023,test_02023,false +2024,test_02024,false +2025,test_02025,false +2026,test_02026,false +2027,NULL,true +2028,test_02028,false +2029,test_02029,false +2030,test_02030,false +2031,test_02031,false +2032,test_02032,false +2033,test_02033,false +2034,test_02034,false +2035,test_02035,false +2036,test_02036,false +2037,NULL,true +2038,test_02038,false +2039,test_02039,false +2040,test_02040,false +2041,test_02041,false +2042,test_02042,false +2043,test_02043,false +2044,test_02044,false +2045,test_02045,false +2046,test_02046,false +2047,test_02047,false +2048,test_02048,false +2049,test_02049,false +2050,test_02050,false +2051,test_02051,false +2052,test_02052,false +2053,test_02053,false +2054,NULL,true +2055,test_02055,false +2056,test_02056,false +2057,test_02057,false +2058,test_02058,false +2059,NULL,true +2060,test_02060,false +2061,test_02061,false +2062,test_02062,false +2063,test_02063,false +2064,test_02064,false +2065,test_02065,false +2066,test_02066,false +2067,test_02067,false +2068,test_02068,false +2069,test_02069,false +2070,NULL,true +2071,NULL,true +2072,test_02072,false +2073,test_02073,false +2074,test_02074,false +2075,test_02075,false +2076,test_02076,false +2077,NULL,true +2078,test_02078,false +2079,test_02079,false +2080,test_02080,false +2081,test_02081,false +2082,test_02082,false +2083,test_02083,false +2084,test_02084,false +2085,test_02085,false +2086,test_02086,false +2087,test_02087,false +2088,test_02088,false +2089,test_02089,false +2090,test_02090,false +2091,test_02091,false +2092,test_02092,false +2093,test_02093,false +2094,test_02094,false +2095,test_02095,false +2096,test_02096,false +2097,test_02097,false +2098,NULL,true +2099,test_02099,false +2100,test_02100,false +2101,test_02101,false +2102,test_02102,false +2103,test_02103,false +2104,test_02104,false +2105,test_02105,false +2106,test_02106,false +2107,test_02107,false +2108,test_02108,false +2109,test_02109,false +2110,test_02110,false +2111,test_02111,false +2112,test_02112,false +2113,test_02113,false +2114,test_02114,false +2115,test_02115,false +2116,test_02116,false +2117,test_02117,false +2118,test_02118,false +2119,test_02119,false +2120,test_02120,false +2121,test_02121,false +2122,test_02122,false +2123,test_02123,false +2124,test_02124,false +2125,test_02125,false +2126,test_02126,false +2127,test_02127,false +2128,test_02128,false +2129,test_02129,false +2130,test_02130,false +2131,test_02131,false +2132,test_02132,false +2133,test_02133,false +2134,test_02134,false +2135,test_02135,false +2136,test_02136,false +2137,test_02137,false +2138,test_02138,false +2139,test_02139,false +2140,test_02140,false +2141,test_02141,false +2142,test_02142,false +2143,test_02143,false +2144,test_02144,false +2145,NULL,true +2146,test_02146,false +2147,test_02147,false +2148,test_02148,false +2149,test_02149,false +2150,test_02150,false +2151,test_02151,false +2152,test_02152,false +2153,test_02153,false +2154,test_02154,false +2155,test_02155,false +2156,NULL,true +2157,test_02157,false +2158,test_02158,false +2159,test_02159,false +2160,test_02160,false +2161,test_02161,false +2162,test_02162,false +2163,test_02163,false +2164,test_02164,false +2165,test_02165,false +2166,test_02166,false +2167,test_02167,false +2168,test_02168,false +2169,test_02169,false +2170,test_02170,false +2171,test_02171,false +2172,test_02172,false +2173,test_02173,false +2174,test_02174,false +2175,test_02175,false +2176,test_02176,false +2177,test_02177,false +2178,test_02178,false +2179,test_02179,false +2180,test_02180,false +2181,test_02181,false +2182,NULL,true +2183,test_02183,false +2184,test_02184,false +2185,test_02185,false +2186,test_02186,false +2187,test_02187,false +2188,test_02188,false +2189,test_02189,false +2190,test_02190,false +2191,test_02191,false +2192,test_02192,false +2193,test_02193,false +2194,test_02194,false +2195,test_02195,false +2196,test_02196,false +2197,test_02197,false +2198,test_02198,false +2199,test_02199,false +2200,test_02200,false +2201,test_02201,false +2202,test_02202,false +2203,test_02203,false +2204,test_02204,false +2205,test_02205,false +2206,test_02206,false +2207,test_02207,false +2208,test_02208,false +2209,test_02209,false +2210,test_02210,false +2211,test_02211,false +2212,test_02212,false +2213,test_02213,false +2214,test_02214,false +2215,test_02215,false +2216,test_02216,false +2217,test_02217,false +2218,test_02218,false +2219,test_02219,false +2220,test_02220,false +2221,test_02221,false +2222,test_02222,false +2223,test_02223,false +2224,test_02224,false +2225,test_02225,false +2226,test_02226,false +2227,test_02227,false +2228,test_02228,false +2229,test_02229,false +2230,NULL,true +2231,test_02231,false +2232,test_02232,false +2233,test_02233,false +2234,test_02234,false +2235,test_02235,false +2236,test_02236,false +2237,test_02237,false +2238,test_02238,false +2239,test_02239,false +2240,test_02240,false +2241,test_02241,false +2242,test_02242,false +2243,test_02243,false +2244,test_02244,false +2245,test_02245,false +2246,test_02246,false +2247,test_02247,false +2248,test_02248,false +2249,test_02249,false +2250,test_02250,false +2251,test_02251,false +2252,test_02252,false +2253,test_02253,false +2254,test_02254,false +2255,test_02255,false +2256,test_02256,false +2257,test_02257,false +2258,test_02258,false +2259,test_02259,false +2260,test_02260,false +2261,test_02261,false +2262,test_02262,false +2263,test_02263,false +2264,test_02264,false +2265,test_02265,false +2266,test_02266,false +2267,test_02267,false +2268,test_02268,false +2269,NULL,true +2270,test_02270,false +2271,test_02271,false +2272,test_02272,false +2273,test_02273,false +2274,test_02274,false +2275,test_02275,false +2276,test_02276,false +2277,test_02277,false +2278,test_02278,false +2279,test_02279,false +2280,test_02280,false +2281,test_02281,false +2282,NULL,true +2283,test_02283,false +2284,test_02284,false +2285,test_02285,false +2286,test_02286,false +2287,test_02287,false +2288,test_02288,false +2289,test_02289,false +2290,test_02290,false +2291,test_02291,false +2292,test_02292,false +2293,test_02293,false +2294,test_02294,false +2295,test_02295,false +2296,test_02296,false +2297,test_02297,false +2298,test_02298,false +2299,NULL,true +2300,test_02300,false +2301,test_02301,false +2302,test_02302,false +2303,test_02303,false +2304,test_02304,false +2305,test_02305,false +2306,test_02306,false +2307,test_02307,false +2308,test_02308,false +2309,test_02309,false +2310,test_02310,false +2311,test_02311,false +2312,test_02312,false +2313,test_02313,false +2314,test_02314,false +2315,test_02315,false +2316,test_02316,false +2317,test_02317,false +2318,test_02318,false +2319,test_02319,false +2320,test_02320,false +2321,test_02321,false +2322,test_02322,false +2323,test_02323,false +2324,test_02324,false +2325,test_02325,false +2326,test_02326,false +2327,test_02327,false +2328,test_02328,false +2329,test_02329,false +2330,test_02330,false +2331,test_02331,false +2332,test_02332,false +2333,test_02333,false +2334,test_02334,false +2335,test_02335,false +2336,test_02336,false +2337,test_02337,false +2338,test_02338,false +2339,test_02339,false +2340,NULL,true +2341,test_02341,false +2342,test_02342,false +2343,test_02343,false +2344,test_02344,false +2345,test_02345,false +2346,test_02346,false +2347,test_02347,false +2348,test_02348,false +2349,test_02349,false +2350,test_02350,false +2351,test_02351,false +2352,test_02352,false +2353,test_02353,false +2354,test_02354,false +2355,test_02355,false +2356,test_02356,false +2357,test_02357,false +2358,test_02358,false +2359,test_02359,false +2360,test_02360,false +2361,test_02361,false +2362,test_02362,false +2363,test_02363,false +2364,test_02364,false +2365,test_02365,false +2366,test_02366,false +2367,test_02367,false +2368,test_02368,false +2369,test_02369,false +2370,NULL,true +2371,test_02371,false +2372,test_02372,false +2373,test_02373,false +2374,test_02374,false +2375,test_02375,false +2376,test_02376,false +2377,test_02377,false +2378,test_02378,false +2379,test_02379,false +2380,test_02380,false +2381,test_02381,false +2382,test_02382,false +2383,test_02383,false +2384,NULL,true +2385,test_02385,false +2386,test_02386,false +2387,test_02387,false +2388,test_02388,false +2389,test_02389,false +2390,test_02390,false +2391,test_02391,false +2392,test_02392,false +2393,test_02393,false +2394,test_02394,false +2395,test_02395,false +2396,test_02396,false +2397,test_02397,false +2398,test_02398,false +2399,test_02399,false +2400,test_02400,false +2401,test_02401,false +2402,test_02402,false +2403,test_02403,false +2404,test_02404,false +2405,test_02405,false +2406,test_02406,false +2407,test_02407,false +2408,test_02408,false +2409,test_02409,false +2410,test_02410,false +2411,test_02411,false +2412,test_02412,false +2413,test_02413,false +2414,test_02414,false +2415,test_02415,false +2416,test_02416,false +2417,test_02417,false +2418,test_02418,false +2419,test_02419,false +2420,NULL,true +2421,test_02421,false +2422,test_02422,false +2423,test_02423,false +2424,test_02424,false +2425,test_02425,false +2426,test_02426,false +2427,test_02427,false +2428,test_02428,false +2429,test_02429,false +2430,test_02430,false +2431,test_02431,false +2432,test_02432,false +2433,test_02433,false +2434,test_02434,false +2435,test_02435,false +2436,test_02436,false +2437,test_02437,false +2438,test_02438,false +2439,test_02439,false +2440,test_02440,false +2441,test_02441,false +2442,test_02442,false +2443,test_02443,false +2444,test_02444,false +2445,test_02445,false +2446,test_02446,false +2447,test_02447,false +2448,test_02448,false +2449,test_02449,false +2450,test_02450,false +2451,test_02451,false +2452,test_02452,false +2453,test_02453,false +2454,test_02454,false +2455,test_02455,false +2456,test_02456,false +2457,test_02457,false +2458,test_02458,false +2459,test_02459,false +2460,test_02460,false +2461,test_02461,false +2462,test_02462,false +2463,test_02463,false +2464,test_02464,false +2465,test_02465,false +2466,test_02466,false +2467,test_02467,false +2468,test_02468,false +2469,test_02469,false +2470,test_02470,false +2471,test_02471,false +2472,test_02472,false +2473,test_02473,false +2474,test_02474,false +2475,test_02475,false +2476,test_02476,false +2477,test_02477,false +2478,test_02478,false +2479,test_02479,false +2480,test_02480,false +2481,NULL,true +2482,test_02482,false +2483,test_02483,false +2484,test_02484,false +2485,test_02485,false +2486,test_02486,false +2487,test_02487,false +2488,test_02488,false +2489,test_02489,false +2490,test_02490,false +2491,test_02491,false +2492,test_02492,false +2493,test_02493,false +2494,test_02494,false +2495,test_02495,false +2496,test_02496,false +2497,test_02497,false +2498,test_02498,false +2499,test_02499,false +2500,test_02500,false +2501,test_02501,false +2502,test_02502,false +2503,test_02503,false +2504,test_02504,false +2505,test_02505,false +2506,test_02506,false +2507,test_02507,false +2508,test_02508,false +2509,test_02509,false +2510,test_02510,false +2511,test_02511,false +2512,test_02512,false +2513,test_02513,false +2514,test_02514,false +2515,test_02515,false +2516,test_02516,false +2517,test_02517,false +2518,test_02518,false +2519,test_02519,false +2520,test_02520,false +2521,test_02521,false +2522,test_02522,false +2523,test_02523,false +2524,test_02524,false +2525,test_02525,false +2526,NULL,true +2527,test_02527,false +2528,test_02528,false +2529,test_02529,false +2530,test_02530,false +2531,test_02531,false +2532,test_02532,false +2533,test_02533,false +2534,test_02534,false +2535,test_02535,false +2536,test_02536,false +2537,test_02537,false +2538,test_02538,false +2539,test_02539,false +2540,test_02540,false +2541,test_02541,false +2542,test_02542,false +2543,test_02543,false +2544,test_02544,false +2545,test_02545,false +2546,test_02546,false +2547,test_02547,false +2548,test_02548,false +2549,test_02549,false +2550,test_02550,false +2551,NULL,true +2552,test_02552,false +2553,test_02553,false +2554,test_02554,false +2555,test_02555,false +2556,test_02556,false +2557,test_02557,false +2558,test_02558,false +2559,test_02559,false +2560,test_02560,false +2561,test_02561,false +2562,test_02562,false +2563,test_02563,false +2564,test_02564,false +2565,test_02565,false +2566,test_02566,false +2567,test_02567,false +2568,test_02568,false +2569,test_02569,false +2570,test_02570,false +2571,test_02571,false +2572,test_02572,false +2573,test_02573,false +2574,test_02574,false +2575,test_02575,false +2576,test_02576,false +2577,test_02577,false +2578,test_02578,false +2579,test_02579,false +2580,test_02580,false +2581,NULL,true +2582,test_02582,false +2583,test_02583,false +2584,test_02584,false +2585,test_02585,false +2586,test_02586,false +2587,test_02587,false +2588,test_02588,false +2589,test_02589,false +2590,test_02590,false +2591,test_02591,false +2592,test_02592,false +2593,test_02593,false +2594,test_02594,false +2595,test_02595,false +2596,test_02596,false +2597,test_02597,false +2598,test_02598,false +2599,test_02599,false +2600,test_02600,false +2601,test_02601,false +2602,test_02602,false +2603,test_02603,false +2604,test_02604,false +2605,test_02605,false +2606,test_02606,false +2607,test_02607,false +2608,test_02608,false +2609,test_02609,false +2610,test_02610,false +2611,test_02611,false +2612,test_02612,false +2613,test_02613,false +2614,test_02614,false +2615,test_02615,false +2616,test_02616,false +2617,test_02617,false +2618,test_02618,false +2619,test_02619,false +2620,test_02620,false +2621,test_02621,false +2622,test_02622,false +2623,test_02623,false +2624,test_02624,false +2625,test_02625,false +2626,test_02626,false +2627,test_02627,false +2628,test_02628,false +2629,test_02629,false +2630,test_02630,false +2631,test_02631,false +2632,test_02632,false +2633,test_02633,false +2634,NULL,true +2635,test_02635,false +2636,NULL,true +2637,test_02637,false +2638,test_02638,false +2639,test_02639,false +2640,NULL,true +2641,test_02641,false +2642,test_02642,false +2643,test_02643,false +2644,test_02644,false +2645,test_02645,false +2646,test_02646,false +2647,test_02647,false +2648,test_02648,false +2649,test_02649,false +2650,test_02650,false +2651,test_02651,false +2652,test_02652,false +2653,test_02653,false +2654,test_02654,false +2655,test_02655,false +2656,test_02656,false +2657,test_02657,false +2658,test_02658,false +2659,test_02659,false +2660,test_02660,false +2661,test_02661,false +2662,test_02662,false +2663,test_02663,false +2664,test_02664,false +2665,test_02665,false +2666,test_02666,false +2667,test_02667,false +2668,test_02668,false +2669,test_02669,false +2670,test_02670,false +2671,test_02671,false +2672,test_02672,false +2673,test_02673,false +2674,test_02674,false +2675,NULL,true +2676,NULL,true +2677,test_02677,false +2678,test_02678,false +2679,NULL,true +2680,test_02680,false +2681,test_02681,false +2682,test_02682,false +2683,test_02683,false +2684,test_02684,false +2685,test_02685,false +2686,test_02686,false +2687,test_02687,false +2688,test_02688,false +2689,test_02689,false +2690,NULL,true +2691,test_02691,false +2692,test_02692,false +2693,test_02693,false +2694,test_02694,false +2695,test_02695,false +2696,test_02696,false +2697,test_02697,false +2698,test_02698,false +2699,test_02699,false +2700,test_02700,false +2701,NULL,true +2702,test_02702,false +2703,NULL,true +2704,test_02704,false +2705,test_02705,false +2706,test_02706,false +2707,test_02707,false +2708,test_02708,false +2709,test_02709,false +2710,test_02710,false +2711,test_02711,false +2712,test_02712,false +2713,test_02713,false +2714,test_02714,false +2715,test_02715,false +2716,test_02716,false +2717,test_02717,false +2718,test_02718,false +2719,test_02719,false +2720,test_02720,false +2721,test_02721,false +2722,test_02722,false +2723,test_02723,false +2724,test_02724,false +2725,test_02725,false +2726,test_02726,false +2727,test_02727,false +2728,test_02728,false +2729,test_02729,false +2730,test_02730,false +2731,test_02731,false +2732,test_02732,false +2733,test_02733,false +2734,test_02734,false +2735,test_02735,false +2736,test_02736,false +2737,test_02737,false +2738,test_02738,false +2739,test_02739,false +2740,test_02740,false +2741,test_02741,false +2742,test_02742,false +2743,test_02743,false +2744,test_02744,false +2745,test_02745,false +2746,test_02746,false +2747,test_02747,false +2748,test_02748,false +2749,test_02749,false +2750,NULL,true +2751,test_02751,false +2752,test_02752,false +2753,test_02753,false +2754,test_02754,false +2755,test_02755,false +2756,test_02756,false +2757,test_02757,false +2758,test_02758,false +2759,test_02759,false +2760,test_02760,false +2761,test_02761,false +2762,test_02762,false +2763,NULL,true +2764,test_02764,false +2765,test_02765,false +2766,test_02766,false +2767,test_02767,false +2768,test_02768,false +2769,test_02769,false +2770,test_02770,false +2771,NULL,true +2772,test_02772,false +2773,test_02773,false +2774,test_02774,false +2775,test_02775,false +2776,test_02776,false +2777,test_02777,false +2778,test_02778,false +2779,test_02779,false +2780,test_02780,false +2781,test_02781,false +2782,test_02782,false +2783,test_02783,false +2784,test_02784,false +2785,test_02785,false +2786,test_02786,false +2787,test_02787,false +2788,test_02788,false +2789,test_02789,false +2790,test_02790,false +2791,test_02791,false +2792,test_02792,false +2793,test_02793,false +2794,NULL,true +2795,test_02795,false +2796,NULL,true +2797,test_02797,false +2798,NULL,true +2799,test_02799,false +2800,test_02800,false +2801,test_02801,false +2802,test_02802,false +2803,NULL,true +2804,test_02804,false +2805,test_02805,false +2806,test_02806,false +2807,test_02807,false +2808,test_02808,false +2809,test_02809,false +2810,test_02810,false +2811,test_02811,false +2812,test_02812,false +2813,test_02813,false +2814,test_02814,false +2815,test_02815,false +2816,test_02816,false +2817,test_02817,false +2818,test_02818,false +2819,test_02819,false +2820,test_02820,false +2821,test_02821,false +2822,test_02822,false +2823,test_02823,false +2824,test_02824,false +2825,test_02825,false +2826,test_02826,false +2827,test_02827,false +2828,test_02828,false +2829,test_02829,false +2830,test_02830,false +2831,test_02831,false +2832,test_02832,false +2833,test_02833,false +2834,test_02834,false +2835,test_02835,false +2836,test_02836,false +2837,test_02837,false +2838,test_02838,false +2839,NULL,true +2840,test_02840,false +2841,test_02841,false +2842,test_02842,false +2843,test_02843,false +2844,test_02844,false +2845,test_02845,false +2846,test_02846,false +2847,test_02847,false +2848,test_02848,false +2849,test_02849,false +2850,test_02850,false +2851,test_02851,false +2852,test_02852,false +2853,test_02853,false +2854,test_02854,false +2855,test_02855,false +2856,NULL,true +2857,test_02857,false +2858,test_02858,false +2859,test_02859,false +2860,test_02860,false +2861,test_02861,false +2862,test_02862,false +2863,test_02863,false +2864,test_02864,false +2865,test_02865,false +2866,test_02866,false +2867,test_02867,false +2868,test_02868,false +2869,test_02869,false +2870,test_02870,false +2871,test_02871,false +2872,test_02872,false +2873,test_02873,false +2874,test_02874,false +2875,test_02875,false +2876,test_02876,false +2877,test_02877,false +2878,test_02878,false +2879,test_02879,false +2880,test_02880,false +2881,test_02881,false +2882,test_02882,false +2883,test_02883,false +2884,test_02884,false +2885,test_02885,false +2886,test_02886,false +2887,test_02887,false +2888,test_02888,false +2889,test_02889,false +2890,test_02890,false +2891,NULL,true +2892,test_02892,false +2893,test_02893,false +2894,test_02894,false +2895,test_02895,false +2896,test_02896,false +2897,test_02897,false +2898,test_02898,false +2899,test_02899,false +2900,test_02900,false +2901,test_02901,false +2902,test_02902,false +2903,test_02903,false +2904,test_02904,false +2905,test_02905,false +2906,test_02906,false +2907,test_02907,false +2908,test_02908,false +2909,test_02909,false +2910,test_02910,false +2911,test_02911,false +2912,test_02912,false +2913,test_02913,false +2914,test_02914,false +2915,test_02915,false +2916,test_02916,false +2917,test_02917,false +2918,test_02918,false +2919,test_02919,false +2920,test_02920,false +2921,test_02921,false +2922,test_02922,false +2923,test_02923,false +2924,test_02924,false +2925,test_02925,false +2926,test_02926,false +2927,test_02927,false +2928,test_02928,false +2929,test_02929,false +2930,NULL,true +2931,test_02931,false +2932,test_02932,false +2933,test_02933,false +2934,test_02934,false +2935,test_02935,false +2936,test_02936,false +2937,test_02937,false +2938,test_02938,false +2939,test_02939,false +2940,NULL,true +2941,test_02941,false +2942,test_02942,false +2943,test_02943,false +2944,test_02944,false +2945,NULL,true +2946,test_02946,false +2947,test_02947,false +2948,test_02948,false +2949,test_02949,false +2950,test_02950,false +2951,test_02951,false +2952,test_02952,false +2953,test_02953,false +2954,test_02954,false +2955,test_02955,false +2956,test_02956,false +2957,test_02957,false +2958,test_02958,false +2959,test_02959,false +2960,test_02960,false +2961,test_02961,false +2962,test_02962,false +2963,test_02963,false +2964,test_02964,false +2965,test_02965,false +2966,test_02966,false +2967,test_02967,false +2968,test_02968,false +2969,test_02969,false +2970,test_02970,false +2971,test_02971,false +2972,test_02972,false +2973,test_02973,false +2974,test_02974,false +2975,test_02975,false +2976,test_02976,false +2977,test_02977,false +2978,test_02978,false +2979,test_02979,false +2980,test_02980,false +2981,test_02981,false +2982,test_02982,false +2983,test_02983,false +2984,test_02984,false +2985,test_02985,false +2986,test_02986,false +2987,test_02987,false +2988,test_02988,false +2989,NULL,true +2990,test_02990,false +2991,test_02991,false +2992,test_02992,false +2993,test_02993,false +2994,test_02994,false +2995,test_02995,false +2996,test_02996,false +2997,test_02997,false +2998,test_02998,false +2999,test_02999,false +3000,test_03000,false +3001,test_03001,false +3002,test_03002,false +3003,test_03003,false +3004,test_03004,false +3005,test_03005,false +3006,test_03006,false +3007,test_03007,false +3008,test_03008,false +3009,test_03009,false +3010,test_03010,false +3011,test_03011,false +3012,test_03012,false +3013,test_03013,false +3014,test_03014,false +3015,test_03015,false +3016,test_03016,false +3017,test_03017,false +3018,test_03018,false +3019,test_03019,false +3020,test_03020,false +3021,test_03021,false +3022,test_03022,false +3023,test_03023,false +3024,test_03024,false +3025,test_03025,false +3026,test_03026,false +3027,test_03027,false +3028,test_03028,false +3029,test_03029,false +3030,test_03030,false +3031,test_03031,false +3032,test_03032,false +3033,test_03033,false +3034,test_03034,false +3035,NULL,true +3036,test_03036,false +3037,test_03037,false +3038,test_03038,false +3039,test_03039,false +3040,test_03040,false +3041,test_03041,false +3042,test_03042,false +3043,test_03043,false +3044,test_03044,false +3045,test_03045,false +3046,test_03046,false +3047,test_03047,false +3048,test_03048,false +3049,test_03049,false +3050,test_03050,false +3051,test_03051,false +3052,test_03052,false +3053,test_03053,false +3054,test_03054,false +3055,test_03055,false +3056,test_03056,false +3057,test_03057,false +3058,test_03058,false +3059,test_03059,false +3060,test_03060,false +3061,test_03061,false +3062,test_03062,false +3063,test_03063,false +3064,test_03064,false +3065,NULL,true +3066,test_03066,false +3067,test_03067,false +3068,test_03068,false +3069,test_03069,false +3070,test_03070,false +3071,test_03071,false +3072,test_03072,false +3073,test_03073,false +3074,test_03074,false +3075,test_03075,false +3076,test_03076,false +3077,test_03077,false +3078,test_03078,false +3079,test_03079,false +3080,test_03080,false +3081,test_03081,false +3082,test_03082,false +3083,test_03083,false +3084,test_03084,false +3085,test_03085,false +3086,test_03086,false +3087,NULL,true +3088,test_03088,false +3089,test_03089,false +3090,test_03090,false +3091,test_03091,false +3092,test_03092,false +3093,test_03093,false +3094,test_03094,false +3095,test_03095,false +3096,test_03096,false +3097,test_03097,false +3098,test_03098,false +3099,test_03099,false +3100,test_03100,false +3101,test_03101,false +3102,test_03102,false +3103,test_03103,false +3104,test_03104,false +3105,test_03105,false +3106,test_03106,false +3107,test_03107,false +3108,test_03108,false +3109,test_03109,false +3110,test_03110,false +3111,test_03111,false +3112,test_03112,false +3113,test_03113,false +3114,test_03114,false +3115,NULL,true +3116,test_03116,false +3117,test_03117,false +3118,test_03118,false +3119,test_03119,false +3120,test_03120,false +3121,test_03121,false +3122,test_03122,false +3123,test_03123,false +3124,test_03124,false +3125,test_03125,false +3126,test_03126,false +3127,test_03127,false +3128,test_03128,false +3129,test_03129,false +3130,test_03130,false +3131,test_03131,false +3132,test_03132,false +3133,test_03133,false +3134,test_03134,false +3135,test_03135,false +3136,test_03136,false +3137,test_03137,false +3138,NULL,true +3139,test_03139,false +3140,test_03140,false +3141,test_03141,false +3142,test_03142,false +3143,test_03143,false +3144,test_03144,false +3145,test_03145,false +3146,test_03146,false +3147,test_03147,false +3148,test_03148,false +3149,NULL,true +3150,test_03150,false +3151,test_03151,false +3152,test_03152,false +3153,test_03153,false +3154,test_03154,false +3155,test_03155,false +3156,test_03156,false +3157,test_03157,false +3158,test_03158,false +3159,test_03159,false +3160,test_03160,false +3161,test_03161,false +3162,test_03162,false +3163,test_03163,false +3164,test_03164,false +3165,test_03165,false +3166,test_03166,false +3167,test_03167,false +3168,NULL,true +3169,test_03169,false +3170,test_03170,false +3171,test_03171,false +3172,test_03172,false +3173,test_03173,false +3174,test_03174,false +3175,test_03175,false +3176,test_03176,false +3177,test_03177,false +3178,test_03178,false +3179,test_03179,false +3180,test_03180,false +3181,test_03181,false +3182,test_03182,false +3183,test_03183,false +3184,test_03184,false +3185,test_03185,false +3186,test_03186,false +3187,test_03187,false +3188,test_03188,false +3189,test_03189,false +3190,test_03190,false +3191,test_03191,false +3192,test_03192,false +3193,test_03193,false +3194,test_03194,false +3195,test_03195,false +3196,test_03196,false +3197,test_03197,false +3198,test_03198,false +3199,test_03199,false +3200,NULL,true +3201,test_03201,false +3202,test_03202,false +3203,test_03203,false +3204,test_03204,false +3205,test_03205,false +3206,test_03206,false +3207,test_03207,false +3208,test_03208,false +3209,test_03209,false +3210,test_03210,false +3211,test_03211,false +3212,test_03212,false +3213,test_03213,false +3214,test_03214,false +3215,test_03215,false +3216,test_03216,false +3217,test_03217,false +3218,test_03218,false +3219,test_03219,false +3220,test_03220,false +3221,test_03221,false +3222,test_03222,false +3223,NULL,true +3224,test_03224,false +3225,test_03225,false +3226,test_03226,false +3227,test_03227,false +3228,test_03228,false +3229,test_03229,false +3230,test_03230,false +3231,test_03231,false +3232,NULL,true +3233,test_03233,false +3234,test_03234,false +3235,test_03235,false +3236,test_03236,false +3237,test_03237,false +3238,test_03238,false +3239,test_03239,false +3240,test_03240,false +3241,test_03241,false +3242,test_03242,false +3243,test_03243,false +3244,test_03244,false +3245,test_03245,false +3246,test_03246,false +3247,test_03247,false +3248,test_03248,false +3249,test_03249,false +3250,test_03250,false +3251,test_03251,false +3252,test_03252,false +3253,test_03253,false +3254,test_03254,false +3255,test_03255,false +3256,test_03256,false +3257,test_03257,false +3258,test_03258,false +3259,test_03259,false +3260,test_03260,false +3261,test_03261,false +3262,test_03262,false +3263,test_03263,false +3264,test_03264,false +3265,test_03265,false +3266,test_03266,false +3267,test_03267,false +3268,test_03268,false +3269,test_03269,false +3270,NULL,true +3271,test_03271,false +3272,test_03272,false +3273,test_03273,false +3274,test_03274,false +3275,test_03275,false +3276,test_03276,false +3277,test_03277,false +3278,test_03278,false +3279,test_03279,false +3280,test_03280,false +3281,test_03281,false +3282,test_03282,false +3283,test_03283,false +3284,test_03284,false +3285,test_03285,false +3286,NULL,true +3287,test_03287,false +3288,test_03288,false +3289,test_03289,false +3290,test_03290,false +3291,test_03291,false +3292,test_03292,false +3293,test_03293,false +3294,test_03294,false +3295,NULL,true +3296,test_03296,false +3297,test_03297,false +3298,test_03298,false +3299,test_03299,false +3300,test_03300,false +3301,test_03301,false +3302,test_03302,false +3303,test_03303,false +3304,test_03304,false +3305,test_03305,false +3306,test_03306,false +3307,test_03307,false +3308,test_03308,false +3309,test_03309,false +3310,test_03310,false +3311,test_03311,false +3312,test_03312,false +3313,test_03313,false +3314,test_03314,false +3315,test_03315,false +3316,test_03316,false +3317,test_03317,false +3318,test_03318,false +3319,test_03319,false +3320,test_03320,false +3321,test_03321,false +3322,test_03322,false +3323,test_03323,false +3324,test_03324,false +3325,test_03325,false +3326,test_03326,false +3327,test_03327,false +3328,test_03328,false +3329,test_03329,false +3330,test_03330,false +3331,test_03331,false +3332,test_03332,false +3333,test_03333,false +3334,test_03334,false +3335,test_03335,false +3336,test_03336,false +3337,test_03337,false +3338,test_03338,false +3339,test_03339,false +3340,test_03340,false +3341,test_03341,false +3342,test_03342,false +3343,test_03343,false +3344,NULL,true +3345,test_03345,false +3346,test_03346,false +3347,test_03347,false +3348,test_03348,false +3349,test_03349,false +3350,test_03350,false +3351,test_03351,false +3352,test_03352,false +3353,test_03353,false +3354,test_03354,false +3355,test_03355,false +3356,test_03356,false +3357,test_03357,false +3358,test_03358,false +3359,NULL,true +3360,test_03360,false +3361,test_03361,false +3362,test_03362,false +3363,test_03363,false +3364,test_03364,false +3365,test_03365,false +3366,test_03366,false +3367,test_03367,false +3368,test_03368,false +3369,test_03369,false +3370,test_03370,false +3371,test_03371,false +3372,test_03372,false +3373,test_03373,false +3374,test_03374,false +3375,test_03375,false +3376,test_03376,false +3377,test_03377,false +3378,test_03378,false +3379,test_03379,false +3380,test_03380,false +3381,test_03381,false +3382,test_03382,false +3383,test_03383,false +3384,test_03384,false +3385,test_03385,false +3386,test_03386,false +3387,test_03387,false +3388,test_03388,false +3389,test_03389,false +3390,NULL,true +3391,test_03391,false +3392,test_03392,false +3393,test_03393,false +3394,test_03394,false +3395,NULL,true +3396,test_03396,false +3397,test_03397,false +3398,test_03398,false +3399,test_03399,false +3400,test_03400,false +3401,test_03401,false +3402,test_03402,false +3403,test_03403,false +3404,test_03404,false +3405,test_03405,false +3406,test_03406,false +3407,test_03407,false +3408,test_03408,false +3409,test_03409,false +3410,test_03410,false +3411,test_03411,false +3412,test_03412,false +3413,test_03413,false +3414,test_03414,false +3415,test_03415,false +3416,test_03416,false +3417,test_03417,false +3418,test_03418,false +3419,test_03419,false +3420,test_03420,false +3421,test_03421,false +3422,test_03422,false +3423,test_03423,false +3424,test_03424,false +3425,test_03425,false +3426,test_03426,false +3427,test_03427,false +3428,test_03428,false +3429,test_03429,false +3430,NULL,true +3431,test_03431,false +3432,test_03432,false +3433,test_03433,false +3434,test_03434,false +3435,test_03435,false +3436,test_03436,false +3437,test_03437,false +3438,test_03438,false +3439,test_03439,false +3440,test_03440,false +3441,test_03441,false +3442,NULL,true +3443,test_03443,false +3444,test_03444,false +3445,test_03445,false +3446,test_03446,false +3447,test_03447,false +3448,test_03448,false +3449,test_03449,false +3450,test_03450,false +3451,NULL,true +3452,test_03452,false +3453,test_03453,false +3454,test_03454,false +3455,test_03455,false +3456,test_03456,false +3457,NULL,true +3458,test_03458,false +3459,test_03459,false +3460,test_03460,false +3461,test_03461,false +3462,test_03462,false +3463,test_03463,false +3464,test_03464,false +3465,test_03465,false +3466,test_03466,false +3467,NULL,true +3468,test_03468,false +3469,test_03469,false +3470,test_03470,false +3471,test_03471,false +3472,test_03472,false +3473,test_03473,false +3474,test_03474,false +3475,test_03475,false +3476,test_03476,false +3477,NULL,true +3478,test_03478,false +3479,test_03479,false +3480,test_03480,false +3481,test_03481,false +3482,test_03482,false +3483,test_03483,false +3484,test_03484,false +3485,test_03485,false +3486,test_03486,false +3487,test_03487,false +3488,test_03488,false +3489,test_03489,false +3490,test_03490,false +3491,test_03491,false +3492,test_03492,false +3493,test_03493,false +3494,test_03494,false +3495,test_03495,false +3496,test_03496,false +3497,test_03497,false +3498,test_03498,false +3499,NULL,true +3500,test_03500,false +3501,test_03501,false +3502,test_03502,false +3503,test_03503,false +3504,test_03504,false +3505,test_03505,false +3506,NULL,true +3507,test_03507,false +3508,test_03508,false +3509,test_03509,false +3510,test_03510,false +3511,test_03511,false +3512,test_03512,false +3513,test_03513,false +3514,test_03514,false +3515,test_03515,false +3516,test_03516,false +3517,test_03517,false +3518,test_03518,false +3519,test_03519,false +3520,test_03520,false +3521,test_03521,false +3522,test_03522,false +3523,test_03523,false +3524,test_03524,false +3525,test_03525,false +3526,test_03526,false +3527,test_03527,false +3528,test_03528,false +3529,test_03529,false +3530,test_03530,false +3531,test_03531,false +3532,test_03532,false +3533,test_03533,false +3534,test_03534,false +3535,test_03535,false +3536,test_03536,false +3537,test_03537,false +3538,test_03538,false +3539,test_03539,false +3540,test_03540,false +3541,test_03541,false +3542,test_03542,false +3543,test_03543,false +3544,test_03544,false +3545,test_03545,false +3546,test_03546,false +3547,test_03547,false +3548,test_03548,false +3549,test_03549,false +3550,test_03550,false +3551,test_03551,false +3552,test_03552,false +3553,test_03553,false +3554,test_03554,false +3555,test_03555,false +3556,test_03556,false +3557,test_03557,false +3558,test_03558,false +3559,test_03559,false +3560,test_03560,false +3561,test_03561,false +3562,test_03562,false +3563,test_03563,false +3564,test_03564,false +3565,test_03565,false +3566,test_03566,false +3567,test_03567,false +3568,test_03568,false +3569,test_03569,false +3570,test_03570,false +3571,test_03571,false +3572,test_03572,false +3573,test_03573,false +3574,test_03574,false +3575,test_03575,false +3576,test_03576,false +3577,test_03577,false +3578,test_03578,false +3579,test_03579,false +3580,test_03580,false +3581,test_03581,false +3582,test_03582,false +3583,test_03583,false +3584,test_03584,false +3585,test_03585,false +3586,test_03586,false +3587,test_03587,false +3588,test_03588,false +3589,test_03589,false +3590,test_03590,false +3591,test_03591,false +3592,NULL,true +3593,test_03593,false +3594,test_03594,false +3595,test_03595,false +3596,test_03596,false +3597,test_03597,false +3598,test_03598,false +3599,test_03599,false +3600,test_03600,false +3601,test_03601,false +3602,test_03602,false +3603,test_03603,false +3604,test_03604,false +3605,test_03605,false +3606,test_03606,false +3607,test_03607,false +3608,test_03608,false +3609,test_03609,false +3610,test_03610,false +3611,test_03611,false +3612,test_03612,false +3613,test_03613,false +3614,test_03614,false +3615,test_03615,false +3616,test_03616,false +3617,test_03617,false +3618,test_03618,false +3619,test_03619,false +3620,test_03620,false +3621,test_03621,false +3622,test_03622,false +3623,test_03623,false +3624,test_03624,false +3625,test_03625,false +3626,test_03626,false +3627,test_03627,false +3628,test_03628,false +3629,test_03629,false +3630,test_03630,false +3631,test_03631,false +3632,test_03632,false +3633,test_03633,false +3634,test_03634,false +3635,test_03635,false +3636,test_03636,false +3637,test_03637,false +3638,test_03638,false +3639,test_03639,false +3640,NULL,true +3641,test_03641,false +3642,test_03642,false +3643,test_03643,false +3644,test_03644,false +3645,test_03645,false +3646,test_03646,false +3647,test_03647,false +3648,test_03648,false +3649,test_03649,false +3650,test_03650,false +3651,test_03651,false +3652,test_03652,false +3653,test_03653,false +3654,test_03654,false +3655,NULL,true +3656,test_03656,false +3657,test_03657,false +3658,test_03658,false +3659,test_03659,false +3660,test_03660,false +3661,NULL,true +3662,test_03662,false +3663,test_03663,false +3664,test_03664,false +3665,test_03665,false +3666,test_03666,false +3667,test_03667,false +3668,test_03668,false +3669,test_03669,false +3670,test_03670,false +3671,test_03671,false +3672,test_03672,false +3673,test_03673,false +3674,test_03674,false +3675,test_03675,false +3676,test_03676,false +3677,test_03677,false +3678,test_03678,false +3679,test_03679,false +3680,test_03680,false +3681,test_03681,false +3682,test_03682,false +3683,test_03683,false +3684,test_03684,false +3685,test_03685,false +3686,test_03686,false +3687,test_03687,false +3688,test_03688,false +3689,test_03689,false +3690,test_03690,false +3691,test_03691,false +3692,test_03692,false +3693,test_03693,false +3694,test_03694,false +3695,test_03695,false +3696,test_03696,false +3697,test_03697,false +3698,test_03698,false +3699,test_03699,false +3700,test_03700,false +3701,test_03701,false +3702,test_03702,false +3703,test_03703,false +3704,test_03704,false +3705,test_03705,false +3706,test_03706,false +3707,test_03707,false +3708,test_03708,false +3709,test_03709,false +3710,test_03710,false +3711,test_03711,false +3712,test_03712,false +3713,test_03713,false +3714,NULL,true +3715,test_03715,false +3716,test_03716,false +3717,test_03717,false +3718,test_03718,false +3719,test_03719,false +3720,test_03720,false +3721,test_03721,false +3722,NULL,true +3723,test_03723,false +3724,test_03724,false +3725,test_03725,false +3726,test_03726,false +3727,test_03727,false +3728,test_03728,false +3729,test_03729,false +3730,test_03730,false +3731,test_03731,false +3732,test_03732,false +3733,test_03733,false +3734,test_03734,false +3735,test_03735,false +3736,test_03736,false +3737,test_03737,false +3738,test_03738,false +3739,test_03739,false +3740,test_03740,false +3741,test_03741,false +3742,test_03742,false +3743,test_03743,false +3744,test_03744,false +3745,test_03745,false +3746,test_03746,false +3747,test_03747,false +3748,test_03748,false +3749,test_03749,false +3750,test_03750,false +3751,test_03751,false +3752,test_03752,false +3753,test_03753,false +3754,test_03754,false +3755,test_03755,false +3756,test_03756,false +3757,test_03757,false +3758,test_03758,false +3759,test_03759,false +3760,test_03760,false +3761,test_03761,false +3762,NULL,true +3763,test_03763,false +3764,test_03764,false +3765,test_03765,false +3766,test_03766,false +3767,test_03767,false +3768,test_03768,false +3769,test_03769,false +3770,NULL,true +3771,test_03771,false +3772,test_03772,false +3773,test_03773,false +3774,test_03774,false +3775,test_03775,false +3776,test_03776,false +3777,test_03777,false +3778,test_03778,false +3779,test_03779,false +3780,test_03780,false +3781,test_03781,false +3782,test_03782,false +3783,test_03783,false +3784,test_03784,false +3785,NULL,true +3786,test_03786,false +3787,test_03787,false +3788,test_03788,false +3789,test_03789,false +3790,test_03790,false +3791,test_03791,false +3792,test_03792,false +3793,test_03793,false +3794,test_03794,false +3795,test_03795,false +3796,test_03796,false +3797,test_03797,false +3798,test_03798,false +3799,test_03799,false +3800,test_03800,false +3801,test_03801,false +3802,test_03802,false +3803,test_03803,false +3804,test_03804,false +3805,test_03805,false +3806,test_03806,false +3807,test_03807,false +3808,test_03808,false +3809,test_03809,false +3810,test_03810,false +3811,test_03811,false +3812,test_03812,false +3813,test_03813,false +3814,test_03814,false +3815,NULL,true +3816,test_03816,false +3817,test_03817,false +3818,test_03818,false +3819,test_03819,false +3820,test_03820,false +3821,test_03821,false +3822,test_03822,false +3823,test_03823,false +3824,test_03824,false +3825,test_03825,false +3826,test_03826,false +3827,test_03827,false +3828,test_03828,false +3829,test_03829,false +3830,test_03830,false +3831,test_03831,false +3832,test_03832,false +3833,test_03833,false +3834,test_03834,false +3835,test_03835,false +3836,test_03836,false +3837,test_03837,false +3838,test_03838,false +3839,test_03839,false +3840,test_03840,false +3841,test_03841,false +3842,test_03842,false +3843,test_03843,false +3844,test_03844,false +3845,test_03845,false +3846,test_03846,false +3847,test_03847,false +3848,test_03848,false +3849,test_03849,false +3850,test_03850,false +3851,test_03851,false +3852,test_03852,false +3853,test_03853,false +3854,test_03854,false +3855,test_03855,false +3856,test_03856,false +3857,test_03857,false +3858,test_03858,false +3859,test_03859,false +3860,test_03860,false +3861,test_03861,false +3862,test_03862,false +3863,test_03863,false +3864,test_03864,false +3865,test_03865,false +3866,test_03866,false +3867,test_03867,false +3868,test_03868,false +3869,test_03869,false +3870,test_03870,false +3871,NULL,true +3872,test_03872,false +3873,test_03873,false +3874,test_03874,false +3875,test_03875,false +3876,test_03876,false +3877,test_03877,false +3878,test_03878,false +3879,test_03879,false +3880,test_03880,false +3881,test_03881,false +3882,test_03882,false +3883,test_03883,false +3884,test_03884,false +3885,test_03885,false +3886,test_03886,false +3887,test_03887,false +3888,test_03888,false +3889,test_03889,false +3890,test_03890,false +3891,test_03891,false +3892,test_03892,false +3893,test_03893,false +3894,test_03894,false +3895,test_03895,false +3896,test_03896,false +3897,test_03897,false +3898,test_03898,false +3899,test_03899,false +3900,test_03900,false +3901,test_03901,false +3902,test_03902,false +3903,test_03903,false +3904,test_03904,false +3905,test_03905,false +3906,test_03906,false +3907,test_03907,false +3908,test_03908,false +3909,test_03909,false +3910,test_03910,false +3911,test_03911,false +3912,test_03912,false +3913,test_03913,false +3914,test_03914,false +3915,test_03915,false +3916,test_03916,false +3917,test_03917,false +3918,test_03918,false +3919,test_03919,false +3920,test_03920,false +3921,test_03921,false +3922,test_03922,false +3923,test_03923,false +3924,test_03924,false +3925,test_03925,false +3926,test_03926,false +3927,test_03927,false +3928,test_03928,false +3929,test_03929,false +3930,test_03930,false +3931,test_03931,false +3932,test_03932,false +3933,test_03933,false +3934,test_03934,false +3935,test_03935,false +3936,test_03936,false +3937,test_03937,false +3938,test_03938,false +3939,test_03939,false +3940,test_03940,false +3941,test_03941,false +3942,test_03942,false +3943,test_03943,false +3944,test_03944,false +3945,test_03945,false +3946,test_03946,false +3947,test_03947,false +3948,test_03948,false +3949,test_03949,false +3950,test_03950,false +3951,test_03951,false +3952,test_03952,false +3953,NULL,true +3954,test_03954,false +3955,test_03955,false +3956,test_03956,false +3957,test_03957,false +3958,test_03958,false +3959,test_03959,false +3960,NULL,true +3961,test_03961,false +3962,test_03962,false +3963,test_03963,false +3964,test_03964,false +3965,test_03965,false +3966,test_03966,false +3967,test_03967,false +3968,test_03968,false +3969,test_03969,false +3970,test_03970,false +3971,test_03971,false +3972,test_03972,false +3973,test_03973,false +3974,test_03974,false +3975,test_03975,false +3976,test_03976,false +3977,test_03977,false +3978,test_03978,false +3979,test_03979,false +3980,test_03980,false +3981,test_03981,false +3982,test_03982,false +3983,test_03983,false +3984,test_03984,false +3985,test_03985,false +3986,test_03986,false +3987,test_03987,false +3988,test_03988,false +3989,test_03989,false +3990,test_03990,false +3991,test_03991,false +3992,test_03992,false +3993,test_03993,false +3994,test_03994,false +3995,test_03995,false +3996,test_03996,false +3997,test_03997,false +3998,test_03998,false +3999,test_03999,false +4000,test_04000,false +4001,test_04001,false +4002,test_04002,false +4003,test_04003,false +4004,test_04004,false +4005,NULL,true +4006,test_04006,false +4007,test_04007,false +4008,test_04008,false +4009,test_04009,false +4010,test_04010,false +4011,test_04011,false +4012,test_04012,false +4013,test_04013,false +4014,test_04014,false +4015,test_04015,false +4016,test_04016,false +4017,test_04017,false +4018,NULL,true +4019,test_04019,false +4020,test_04020,false +4021,test_04021,false +4022,test_04022,false +4023,test_04023,false +4024,test_04024,false +4025,test_04025,false +4026,test_04026,false +4027,NULL,true +4028,test_04028,false +4029,NULL,true +4030,test_04030,false +4031,test_04031,false +4032,test_04032,false +4033,NULL,true +4034,test_04034,false +4035,test_04035,false +4036,test_04036,false +4037,test_04037,false +4038,NULL,true +4039,test_04039,false +4040,test_04040,false +4041,test_04041,false +4042,test_04042,false +4043,test_04043,false +4044,test_04044,false +4045,test_04045,false +4046,test_04046,false +4047,test_04047,false +4048,test_04048,false +4049,test_04049,false +4050,test_04050,false +4051,test_04051,false +4052,test_04052,false +4053,test_04053,false +4054,test_04054,false +4055,test_04055,false +4056,test_04056,false +4057,test_04057,false +4058,test_04058,false +4059,test_04059,false +4060,test_04060,false +4061,test_04061,false +4062,test_04062,false +4063,test_04063,false +4064,test_04064,false +4065,test_04065,false +4066,test_04066,false +4067,test_04067,false +4068,test_04068,false +4069,test_04069,false +4070,test_04070,false +4071,test_04071,false +4072,test_04072,false +4073,test_04073,false +4074,test_04074,false +4075,test_04075,false +4076,test_04076,false +4077,test_04077,false +4078,test_04078,false +4079,test_04079,false +4080,test_04080,false +4081,test_04081,false +4082,test_04082,false +4083,test_04083,false +4084,test_04084,false +4085,test_04085,false +4086,test_04086,false +4087,test_04087,false +4088,test_04088,false +4089,test_04089,false +4090,test_04090,false +4091,test_04091,false +4092,test_04092,false +4093,test_04093,false +4094,test_04094,false +4095,test_04095,false +4096,test_04096,false +4097,test_04097,false +4098,test_04098,false +4099,test_04099,false +4100,test_04100,false +4101,test_04101,false +4102,test_04102,false +4103,test_04103,false +4104,NULL,true +4105,test_04105,false +4106,test_04106,false +4107,test_04107,false +4108,test_04108,false +4109,test_04109,false +4110,test_04110,false +4111,test_04111,false +4112,test_04112,false +4113,test_04113,false +4114,test_04114,false +4115,test_04115,false +4116,test_04116,false +4117,test_04117,false +4118,test_04118,false +4119,test_04119,false +4120,test_04120,false +4121,test_04121,false +4122,test_04122,false +4123,test_04123,false +4124,test_04124,false +4125,test_04125,false +4126,test_04126,false +4127,test_04127,false +4128,test_04128,false +4129,test_04129,false +4130,test_04130,false +4131,test_04131,false +4132,test_04132,false +4133,test_04133,false +4134,test_04134,false +4135,test_04135,false +4136,test_04136,false +4137,test_04137,false +4138,test_04138,false +4139,test_04139,false +4140,test_04140,false +4141,test_04141,false +4142,test_04142,false +4143,test_04143,false +4144,test_04144,false +4145,test_04145,false +4146,test_04146,false +4147,test_04147,false +4148,test_04148,false +4149,test_04149,false +4150,test_04150,false +4151,test_04151,false +4152,test_04152,false +4153,test_04153,false +4154,test_04154,false +4155,test_04155,false +4156,test_04156,false +4157,test_04157,false +4158,test_04158,false +4159,test_04159,false +4160,test_04160,false +4161,test_04161,false +4162,test_04162,false +4163,NULL,true +4164,test_04164,false +4165,test_04165,false +4166,NULL,true +4167,test_04167,false +4168,test_04168,false +4169,test_04169,false +4170,test_04170,false +4171,test_04171,false +4172,test_04172,false +4173,test_04173,false +4174,test_04174,false +4175,test_04175,false +4176,test_04176,false +4177,test_04177,false +4178,test_04178,false +4179,test_04179,false +4180,test_04180,false +4181,test_04181,false +4182,test_04182,false +4183,test_04183,false +4184,test_04184,false +4185,test_04185,false +4186,test_04186,false +4187,test_04187,false +4188,test_04188,false +4189,test_04189,false +4190,test_04190,false +4191,test_04191,false +4192,test_04192,false +4193,test_04193,false +4194,NULL,true +4195,test_04195,false +4196,test_04196,false +4197,test_04197,false +4198,test_04198,false +4199,test_04199,false +4200,test_04200,false +4201,test_04201,false +4202,test_04202,false +4203,test_04203,false +4204,test_04204,false +4205,test_04205,false +4206,test_04206,false +4207,test_04207,false +4208,test_04208,false +4209,test_04209,false +4210,test_04210,false +4211,test_04211,false +4212,test_04212,false +4213,test_04213,false +4214,test_04214,false +4215,test_04215,false +4216,test_04216,false +4217,test_04217,false +4218,test_04218,false +4219,test_04219,false +4220,NULL,true +4221,test_04221,false +4222,test_04222,false +4223,test_04223,false +4224,test_04224,false +4225,test_04225,false +4226,test_04226,false +4227,test_04227,false +4228,test_04228,false +4229,test_04229,false +4230,test_04230,false +4231,test_04231,false +4232,test_04232,false +4233,NULL,true +4234,test_04234,false +4235,test_04235,false +4236,test_04236,false +4237,test_04237,false +4238,NULL,true +4239,test_04239,false +4240,test_04240,false +4241,test_04241,false +4242,NULL,true +4243,test_04243,false +4244,test_04244,false +4245,test_04245,false +4246,test_04246,false +4247,test_04247,false +4248,test_04248,false +4249,test_04249,false +4250,test_04250,false +4251,test_04251,false +4252,test_04252,false +4253,test_04253,false +4254,test_04254,false +4255,test_04255,false +4256,test_04256,false +4257,test_04257,false +4258,test_04258,false +4259,test_04259,false +4260,test_04260,false +4261,test_04261,false +4262,test_04262,false +4263,test_04263,false +4264,test_04264,false +4265,test_04265,false +4266,test_04266,false +4267,test_04267,false +4268,test_04268,false +4269,test_04269,false +4270,test_04270,false +4271,test_04271,false +4272,test_04272,false +4273,test_04273,false +4274,test_04274,false +4275,test_04275,false +4276,test_04276,false +4277,test_04277,false +4278,test_04278,false +4279,test_04279,false +4280,test_04280,false +4281,test_04281,false +4282,test_04282,false +4283,test_04283,false +4284,test_04284,false +4285,test_04285,false +4286,test_04286,false +4287,test_04287,false +4288,NULL,true +4289,test_04289,false +4290,test_04290,false +4291,test_04291,false +4292,test_04292,false +4293,test_04293,false +4294,test_04294,false +4295,test_04295,false +4296,test_04296,false +4297,test_04297,false +4298,test_04298,false +4299,test_04299,false +4300,test_04300,false +4301,test_04301,false +4302,test_04302,false +4303,NULL,true +4304,test_04304,false +4305,test_04305,false +4306,test_04306,false +4307,test_04307,false +4308,test_04308,false +4309,test_04309,false +4310,test_04310,false +4311,test_04311,false +4312,test_04312,false +4313,test_04313,false +4314,test_04314,false +4315,test_04315,false +4316,test_04316,false +4317,test_04317,false +4318,test_04318,false +4319,test_04319,false +4320,test_04320,false +4321,test_04321,false +4322,test_04322,false +4323,test_04323,false +4324,test_04324,false +4325,test_04325,false +4326,test_04326,false +4327,test_04327,false +4328,test_04328,false +4329,test_04329,false +4330,test_04330,false +4331,test_04331,false +4332,test_04332,false +4333,test_04333,false +4334,test_04334,false +4335,NULL,true +4336,test_04336,false +4337,test_04337,false +4338,test_04338,false +4339,test_04339,false +4340,test_04340,false +4341,test_04341,false +4342,test_04342,false +4343,test_04343,false +4344,test_04344,false +4345,test_04345,false +4346,test_04346,false +4347,test_04347,false +4348,test_04348,false +4349,test_04349,false +4350,test_04350,false +4351,test_04351,false +4352,test_04352,false +4353,test_04353,false +4354,test_04354,false +4355,test_04355,false +4356,test_04356,false +4357,test_04357,false +4358,NULL,true +4359,NULL,true +4360,test_04360,false +4361,test_04361,false +4362,test_04362,false +4363,test_04363,false +4364,test_04364,false +4365,test_04365,false +4366,test_04366,false +4367,NULL,true +4368,test_04368,false +4369,test_04369,false +4370,test_04370,false +4371,test_04371,false +4372,NULL,true +4373,test_04373,false +4374,test_04374,false +4375,test_04375,false +4376,test_04376,false +4377,test_04377,false +4378,test_04378,false +4379,test_04379,false +4380,test_04380,false +4381,test_04381,false +4382,test_04382,false +4383,test_04383,false +4384,test_04384,false +4385,test_04385,false +4386,test_04386,false +4387,test_04387,false +4388,test_04388,false +4389,test_04389,false +4390,NULL,true +4391,test_04391,false +4392,test_04392,false +4393,test_04393,false +4394,test_04394,false +4395,test_04395,false +4396,test_04396,false +4397,test_04397,false +4398,test_04398,false +4399,test_04399,false +4400,test_04400,false +4401,test_04401,false +4402,test_04402,false +4403,test_04403,false +4404,NULL,true +4405,test_04405,false +4406,test_04406,false +4407,test_04407,false +4408,test_04408,false +4409,test_04409,false +4410,test_04410,false +4411,test_04411,false +4412,test_04412,false +4413,test_04413,false +4414,test_04414,false +4415,test_04415,false +4416,test_04416,false +4417,NULL,true +4418,test_04418,false +4419,test_04419,false +4420,test_04420,false +4421,test_04421,false +4422,test_04422,false +4423,test_04423,false +4424,test_04424,false +4425,NULL,true +4426,test_04426,false +4427,test_04427,false +4428,test_04428,false +4429,NULL,true +4430,test_04430,false +4431,test_04431,false +4432,NULL,true +4433,test_04433,false +4434,test_04434,false +4435,test_04435,false +4436,test_04436,false +4437,test_04437,false +4438,test_04438,false +4439,test_04439,false +4440,test_04440,false +4441,test_04441,false +4442,test_04442,false +4443,NULL,true +4444,test_04444,false +4445,test_04445,false +4446,test_04446,false +4447,test_04447,false +4448,test_04448,false +4449,test_04449,false +4450,test_04450,false +4451,test_04451,false +4452,test_04452,false +4453,test_04453,false +4454,test_04454,false +4455,test_04455,false +4456,test_04456,false +4457,test_04457,false +4458,test_04458,false +4459,test_04459,false +4460,test_04460,false +4461,test_04461,false +4462,test_04462,false +4463,test_04463,false +4464,test_04464,false +4465,test_04465,false +4466,test_04466,false +4467,test_04467,false +4468,test_04468,false +4469,test_04469,false +4470,test_04470,false +4471,test_04471,false +4472,test_04472,false +4473,test_04473,false +4474,test_04474,false +4475,NULL,true +4476,test_04476,false +4477,test_04477,false +4478,test_04478,false +4479,test_04479,false +4480,test_04480,false +4481,test_04481,false +4482,test_04482,false +4483,test_04483,false +4484,test_04484,false +4485,test_04485,false +4486,NULL,true +4487,NULL,true +4488,test_04488,false +4489,test_04489,false +4490,test_04490,false +4491,test_04491,false +4492,test_04492,false +4493,test_04493,false +4494,test_04494,false +4495,test_04495,false +4496,test_04496,false +4497,test_04497,false +4498,test_04498,false +4499,test_04499,false +4500,test_04500,false +4501,test_04501,false +4502,test_04502,false +4503,test_04503,false +4504,test_04504,false +4505,test_04505,false +4506,test_04506,false +4507,test_04507,false +4508,test_04508,false +4509,test_04509,false +4510,test_04510,false +4511,test_04511,false +4512,test_04512,false +4513,test_04513,false +4514,test_04514,false +4515,test_04515,false +4516,test_04516,false +4517,test_04517,false +4518,test_04518,false +4519,test_04519,false +4520,test_04520,false +4521,test_04521,false +4522,test_04522,false +4523,test_04523,false +4524,test_04524,false +4525,test_04525,false +4526,test_04526,false +4527,test_04527,false +4528,test_04528,false +4529,test_04529,false +4530,test_04530,false +4531,test_04531,false +4532,test_04532,false +4533,test_04533,false +4534,test_04534,false +4535,test_04535,false +4536,test_04536,false +4537,test_04537,false +4538,test_04538,false +4539,test_04539,false +4540,test_04540,false +4541,test_04541,false +4542,test_04542,false +4543,NULL,true +4544,test_04544,false +4545,NULL,true +4546,test_04546,false +4547,test_04547,false +4548,test_04548,false +4549,test_04549,false +4550,test_04550,false +4551,test_04551,false +4552,test_04552,false +4553,test_04553,false +4554,test_04554,false +4555,test_04555,false +4556,test_04556,false +4557,test_04557,false +4558,test_04558,false +4559,test_04559,false +4560,NULL,true +4561,test_04561,false +4562,test_04562,false +4563,test_04563,false +4564,test_04564,false +4565,test_04565,false +4566,test_04566,false +4567,test_04567,false +4568,NULL,true +4569,test_04569,false +4570,test_04570,false +4571,test_04571,false +4572,test_04572,false +4573,NULL,true +4574,test_04574,false +4575,test_04575,false +4576,test_04576,false +4577,NULL,true +4578,test_04578,false +4579,test_04579,false +4580,test_04580,false +4581,test_04581,false +4582,test_04582,false +4583,test_04583,false +4584,test_04584,false +4585,test_04585,false +4586,test_04586,false +4587,test_04587,false +4588,test_04588,false +4589,test_04589,false +4590,test_04590,false +4591,test_04591,false +4592,NULL,true +4593,test_04593,false +4594,test_04594,false +4595,test_04595,false +4596,test_04596,false +4597,test_04597,false +4598,test_04598,false +4599,test_04599,false +4600,test_04600,false +4601,test_04601,false +4602,NULL,true +4603,test_04603,false +4604,test_04604,false +4605,test_04605,false +4606,test_04606,false +4607,test_04607,false +4608,test_04608,false +4609,test_04609,false +4610,test_04610,false +4611,test_04611,false +4612,test_04612,false +4613,test_04613,false +4614,test_04614,false +4615,test_04615,false +4616,test_04616,false +4617,test_04617,false +4618,test_04618,false +4619,test_04619,false +4620,test_04620,false +4621,test_04621,false +4622,test_04622,false +4623,test_04623,false +4624,test_04624,false +4625,test_04625,false +4626,NULL,true +4627,test_04627,false +4628,test_04628,false +4629,NULL,true +4630,test_04630,false +4631,test_04631,false +4632,test_04632,false +4633,test_04633,false +4634,test_04634,false +4635,test_04635,false +4636,test_04636,false +4637,test_04637,false +4638,test_04638,false +4639,test_04639,false +4640,NULL,true +4641,test_04641,false +4642,test_04642,false +4643,test_04643,false +4644,test_04644,false +4645,NULL,true +4646,test_04646,false +4647,test_04647,false +4648,test_04648,false +4649,test_04649,false +4650,test_04650,false +4651,test_04651,false +4652,test_04652,false +4653,test_04653,false +4654,test_04654,false +4655,test_04655,false +4656,test_04656,false +4657,test_04657,false +4658,test_04658,false +4659,test_04659,false +4660,test_04660,false +4661,test_04661,false +4662,test_04662,false +4663,test_04663,false +4664,test_04664,false +4665,test_04665,false +4666,test_04666,false +4667,test_04667,false +4668,test_04668,false +4669,test_04669,false +4670,test_04670,false +4671,test_04671,false +4672,test_04672,false +4673,test_04673,false +4674,test_04674,false +4675,test_04675,false +4676,test_04676,false +4677,test_04677,false +4678,test_04678,false +4679,test_04679,false +4680,test_04680,false +4681,test_04681,false +4682,test_04682,false +4683,test_04683,false +4684,test_04684,false +4685,test_04685,false +4686,test_04686,false +4687,test_04687,false +4688,NULL,true +4689,test_04689,false +4690,test_04690,false +4691,test_04691,false +4692,test_04692,false +4693,test_04693,false +4694,test_04694,false +4695,test_04695,false +4696,test_04696,false +4697,test_04697,false +4698,test_04698,false +4699,test_04699,false +4700,test_04700,false +4701,test_04701,false +4702,test_04702,false +4703,test_04703,false +4704,test_04704,false +4705,test_04705,false +4706,test_04706,false +4707,test_04707,false +4708,test_04708,false +4709,test_04709,false +4710,test_04710,false +4711,test_04711,false +4712,test_04712,false +4713,test_04713,false +4714,test_04714,false +4715,test_04715,false +4716,NULL,true +4717,test_04717,false +4718,NULL,true +4719,test_04719,false +4720,test_04720,false +4721,test_04721,false +4722,test_04722,false +4723,test_04723,false +4724,test_04724,false +4725,test_04725,false +4726,test_04726,false +4727,test_04727,false +4728,test_04728,false +4729,test_04729,false +4730,test_04730,false +4731,test_04731,false +4732,test_04732,false +4733,test_04733,false +4734,test_04734,false +4735,test_04735,false +4736,NULL,true +4737,test_04737,false +4738,test_04738,false +4739,test_04739,false +4740,test_04740,false +4741,test_04741,false +4742,test_04742,false +4743,test_04743,false +4744,test_04744,false +4745,test_04745,false +4746,test_04746,false +4747,test_04747,false +4748,test_04748,false +4749,test_04749,false +4750,test_04750,false +4751,test_04751,false +4752,test_04752,false +4753,test_04753,false +4754,test_04754,false +4755,test_04755,false +4756,test_04756,false +4757,test_04757,false +4758,test_04758,false +4759,test_04759,false +4760,test_04760,false +4761,test_04761,false +4762,test_04762,false +4763,test_04763,false +4764,test_04764,false +4765,test_04765,false +4766,test_04766,false +4767,test_04767,false +4768,test_04768,false +4769,test_04769,false +4770,test_04770,false +4771,test_04771,false +4772,test_04772,false +4773,test_04773,false +4774,NULL,true +4775,test_04775,false +4776,NULL,true +4777,test_04777,false +4778,test_04778,false +4779,test_04779,false +4780,test_04780,false +4781,test_04781,false +4782,test_04782,false +4783,test_04783,false +4784,test_04784,false +4785,test_04785,false +4786,NULL,true +4787,test_04787,false +4788,test_04788,false +4789,test_04789,false +4790,test_04790,false +4791,test_04791,false +4792,test_04792,false +4793,test_04793,false +4794,NULL,true +4795,test_04795,false +4796,NULL,true +4797,test_04797,false +4798,test_04798,false +4799,test_04799,false +4800,test_04800,false +4801,test_04801,false +4802,test_04802,false +4803,test_04803,false +4804,test_04804,false +4805,test_04805,false +4806,test_04806,false +4807,test_04807,false +4808,test_04808,false +4809,test_04809,false +4810,test_04810,false +4811,test_04811,false +4812,test_04812,false +4813,test_04813,false +4814,test_04814,false +4815,test_04815,false +4816,test_04816,false +4817,test_04817,false +4818,test_04818,false +4819,test_04819,false +4820,test_04820,false +4821,test_04821,false +4822,test_04822,false +4823,test_04823,false +4824,test_04824,false +4825,test_04825,false +4826,test_04826,false +4827,test_04827,false +4828,test_04828,false +4829,test_04829,false +4830,test_04830,false +4831,test_04831,false +4832,test_04832,false +4833,test_04833,false +4834,test_04834,false +4835,test_04835,false +4836,test_04836,false +4837,test_04837,false +4838,test_04838,false +4839,test_04839,false +4840,test_04840,false +4841,test_04841,false +4842,test_04842,false +4843,test_04843,false +4844,test_04844,false +4845,test_04845,false +4846,test_04846,false +4847,test_04847,false +4848,test_04848,false +4849,test_04849,false +4850,test_04850,false +4851,test_04851,false +4852,test_04852,false +4853,test_04853,false +4854,test_04854,false +4855,test_04855,false +4856,test_04856,false +4857,test_04857,false +4858,test_04858,false +4859,test_04859,false +4860,test_04860,false +4861,test_04861,false +4862,test_04862,false +4863,test_04863,false +4864,test_04864,false +4865,test_04865,false +4866,test_04866,false +4867,test_04867,false +4868,test_04868,false +4869,test_04869,false +4870,test_04870,false +4871,test_04871,false +4872,test_04872,false +4873,test_04873,false +4874,test_04874,false +4875,NULL,true +4876,test_04876,false +4877,test_04877,false +4878,test_04878,false +4879,test_04879,false +4880,test_04880,false +4881,test_04881,false +4882,test_04882,false +4883,test_04883,false +4884,NULL,true +4885,test_04885,false +4886,test_04886,false +4887,test_04887,false +4888,test_04888,false +4889,test_04889,false +4890,NULL,true +4891,test_04891,false +4892,test_04892,false +4893,test_04893,false +4894,test_04894,false +4895,test_04895,false +4896,test_04896,false +4897,test_04897,false +4898,test_04898,false +4899,test_04899,false +4900,test_04900,false +4901,test_04901,false +4902,NULL,true +4903,test_04903,false +4904,test_04904,false +4905,test_04905,false +4906,test_04906,false +4907,NULL,true +4908,test_04908,false +4909,test_04909,false +4910,test_04910,false +4911,test_04911,false +4912,test_04912,false +4913,test_04913,false +4914,test_04914,false +4915,test_04915,false +4916,test_04916,false +4917,test_04917,false +4918,test_04918,false +4919,test_04919,false +4920,test_04920,false +4921,test_04921,false +4922,test_04922,false +4923,test_04923,false +4924,test_04924,false +4925,test_04925,false +4926,test_04926,false +4927,test_04927,false +4928,test_04928,false +4929,test_04929,false +4930,test_04930,false +4931,test_04931,false +4932,test_04932,false +4933,test_04933,false +4934,test_04934,false +4935,test_04935,false +4936,NULL,true +4937,test_04937,false +4938,test_04938,false +4939,test_04939,false +4940,test_04940,false +4941,test_04941,false +4942,test_04942,false +4943,test_04943,false +4944,test_04944,false +4945,test_04945,false +4946,test_04946,false +4947,test_04947,false +4948,test_04948,false +4949,test_04949,false +4950,test_04950,false +4951,test_04951,false +4952,test_04952,false +4953,test_04953,false +4954,test_04954,false +4955,test_04955,false +4956,test_04956,false +4957,test_04957,false +4958,test_04958,false +4959,test_04959,false +4960,NULL,true +4961,test_04961,false +4962,test_04962,false +4963,test_04963,false +4964,test_04964,false +4965,test_04965,false +4966,test_04966,false +4967,test_04967,false +4968,NULL,true +4969,test_04969,false +4970,test_04970,false +4971,test_04971,false +4972,test_04972,false +4973,test_04973,false +4974,test_04974,false +4975,test_04975,false +4976,test_04976,false +4977,test_04977,false +4978,test_04978,false +4979,test_04979,false +4980,test_04980,false +4981,test_04981,false +4982,test_04982,false +4983,test_04983,false +4984,test_04984,false +4985,test_04985,false +4986,test_04986,false +4987,test_04987,false +4988,test_04988,false +4989,test_04989,false +4990,test_04990,false +4991,test_04991,false +4992,test_04992,false +4993,test_04993,false +4994,NULL,true +4995,test_04995,false +4996,test_04996,false +4997,test_04997,false +4998,NULL,true +4999,test_04999,false From e371a1a5afe39db7c4525401ac93beee885c6bd2 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 15:41:01 +0800 Subject: [PATCH 06/28] fix: correct binary format for BIGINT/TINYINT/SMALLINT to little-endian - Change BIGINT from string format to 8-byte little-endian binary format - Change TINYINT from string format to 1-byte binary format - Change SMALLINT from string format to 2-byte little-endian binary format - Update compatibility test data to match new binary format --- .../btree/btree_global_indexer.cpp | 24 +++-- .../btree_test_int_no_nulls.bin | Bin 637 -> 481 bytes .../btree_test_int_no_nulls.bin.meta | Bin 17 -> 17 bytes .../btree_test_int_no_nulls.csv | 96 +++++++++--------- 4 files changed, 62 insertions(+), 58 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index f08541bad..0b8a319d2 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -794,9 +794,15 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::BIGINT) { try { int64_t value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); + auto bytes = Bytes::AllocateBytes(8, pool); + bytes->data()[0] = static_cast(value & 0xFF); + bytes->data()[1] = static_cast((value >> 8) & 0xFF); + bytes->data()[2] = static_cast((value >> 16) & 0xFF); + bytes->data()[3] = static_cast((value >> 24) & 0xFF); + bytes->data()[4] = static_cast((value >> 32) & 0xFF); + bytes->data()[5] = static_cast((value >> 40) & 0xFF); + bytes->data()[6] = static_cast((value >> 48) & 0xFF); + bytes->data()[7] = static_cast((value >> 56) & 0xFF); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert bigint literal to MemorySlice: " + @@ -807,7 +813,6 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::INT) { try { int32_t value = literal.GetValue(); - // Store as 4-byte little-endian binary to match Java format auto bytes = Bytes::AllocateBytes(4, pool); bytes->data()[0] = static_cast(value & 0xFF); bytes->data()[1] = static_cast((value >> 8) & 0xFF); @@ -823,9 +828,8 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::TINYINT) { try { int8_t value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); + auto bytes = Bytes::AllocateBytes(1, pool); + bytes->data()[0] = static_cast(value); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert tinyint literal to MemorySlice: " + @@ -836,9 +840,9 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::SMALLINT) { try { int16_t value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); + auto bytes = Bytes::AllocateBytes(2, pool); + bytes->data()[0] = static_cast(value & 0xFF); + bytes->data()[1] = static_cast((value >> 8) & 0xFF); return MemorySlice::Wrap(std::shared_ptr(bytes.release())); } catch (const std::exception& e) { return Status::Invalid("Failed to convert smallint literal to MemorySlice: " + diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.bin b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.bin index fe6f5bad817e84e3c040422a6753d4801c76f641..51b69c389cec3f656365ef4354446d324301df5a 100644 GIT binary patch literal 481 zcmZ9}Pb&j(9LMqBw)fbKVHk#O%%5TYtZ-3C9ClDhDHp}XL5Yh(S=0(($cYYe zszaP4_d3agR(RAYo^+ZQo#9PqdDm*`j_F~dyV`S^_~7+y{_%7ZhiwhuN>6? literal 637 zcmZY4ODIH90LJk%_kLbu9>#kZ!#o&+8Dvs4Sqv$UBxa#d#Kxlge1B3ve diff --git a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv index 78443decd..395aba4b8 100644 --- a/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv +++ b/test/test_data/global_index/btree/btree_compatibility_data/btree_test_int_no_nulls.csv @@ -1,51 +1,51 @@ row_id,key,is_null -0,3,false -1,3,false -2,10,false +0,4,false +1,5,false +2,9,false 3,11,false -4,13,false -5,15,false -6,18,false -7,24,false -8,24,false -9,28,false -10,30,false +4,14,false +5,18,false +6,22,false +7,26,false +8,30,false +9,31,false +10,32,false 11,35,false -12,39,false -13,39,false -14,42,false -15,45,false -16,51,false -17,52,false -18,57,false -19,60,false -20,62,false -21,67,false -22,66,false -23,70,false -24,75,false -25,75,false -26,78,false -27,83,false -28,88,false -29,91,false -30,93,false -31,96,false -32,97,false -33,99,false -34,105,false -35,106,false -36,109,false -37,112,false -38,115,false -39,121,false -40,123,false -41,125,false -42,129,false -43,131,false -44,134,false -45,137,false -46,141,false -47,144,false -48,147,false -49,148,false +12,40,false +13,44,false +14,49,false +15,52,false +16,53,false +17,54,false +18,59,false +19,62,false +20,64,false +21,68,false +22,73,false +23,77,false +24,81,false +25,82,false +26,87,false +27,90,false +28,95,false +29,98,false +30,99,false +31,104,false +32,108,false +33,111,false +34,113,false +35,114,false +36,119,false +37,121,false +38,122,false +39,124,false +40,128,false +41,131,false +42,134,false +43,138,false +44,142,false +45,145,false +46,148,false +47,153,false +48,157,false +49,158,false From b7e1305ee8a0a7bb056c151731ecdab784aec129 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 15:45:48 +0800 Subject: [PATCH 07/28] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=20cpplint=20?= =?UTF-8?q?=E9=94=99=E8=AF=AF=20-=20=E4=BD=BF=E7=94=A8=20C=20=E9=A3=8E?= =?UTF-8?q?=E6=A0=BC=E5=AD=97=E7=AC=A6=E4=B8=B2=E5=B8=B8=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../btree/btree_compatibility_test.cpp | 67 +++++++------------ 1 file changed, 26 insertions(+), 41 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp index 3508777a4..0b772932e 100644 --- a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp +++ b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp @@ -53,8 +53,7 @@ namespace paimon::test { // --------------------------------------------------------------------------- // Test data directory (relative to project root) // --------------------------------------------------------------------------- -static const std::string kTestDataDir = - "test/test_data/global_index/btree/btree_compatibility_data"; +static const char kTestDataDir[] = "test/test_data/global_index/btree/btree_compatibility_data"; // --------------------------------------------------------------------------- // CSV record parsed from the Java-generated CSV files @@ -263,9 +262,8 @@ class BTreeCompatibilityTest : public ::testing::Test { // Helper: build expected row IDs for string keys in range std::set GetRowIdsForStringRange(const std::vector& records, - const std::string& lower, - const std::string& upper, bool lower_inclusive, - bool upper_inclusive) { + const std::string& lower, const std::string& upper, + bool lower_inclusive, bool upper_inclusive) { std::set ids; for (const auto& rec : records) { if (rec.is_null) continue; @@ -378,8 +376,8 @@ TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { auto result = reader->VisitLessThan(literal); ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntRange(records, non_null_keys.front(), mid_key, - true, false); + auto expected_ids = + GetRowIdsForIntRange(records, non_null_keys.front(), mid_key, true, false); EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; } @@ -399,8 +397,8 @@ TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { ASSERT_OK(result.status()) << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForIntRange(records, mid_key, non_null_keys.back(), - true, true); + auto expected_ids = + GetRowIdsForIntRange(records, mid_key, non_null_keys.back(), true, true); EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; } @@ -439,8 +437,7 @@ TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { int32_t k1 = *it++; int32_t k2 = *it++; int32_t k3 = *it++; - std::vector in_literals = { - Literal(k1), Literal(k2), Literal(k3)}; + std::vector in_literals = {Literal(k1), Literal(k2), Literal(k3)}; auto result = reader->VisitIn(in_literals); ASSERT_OK(result.status()) << prefix << ": VisitIn({" << k1 << "," << k2 << "," << k3 << "}) failed"; @@ -450,8 +447,7 @@ TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { for (auto id : GetRowIdsForIntKey(records, k1)) expected_ids.insert(id); for (auto id : GetRowIdsForIntKey(records, k2)) expected_ids.insert(id); for (auto id : GetRowIdsForIntKey(records, k3)) expected_ids.insert(id); - EXPECT_EQ(actual_ids, expected_ids) - << prefix << ": VisitIn mismatch"; + EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitIn mismatch"; } } @@ -462,8 +458,7 @@ TEST_P(BTreeCompatibilityIntTest, ReadAndQueryIntData) { int32_t key_val = std::stoi(rec.key); Literal literal(key_val); auto result = reader->VisitNotEqual(literal); - ASSERT_OK(result.status()) - << prefix << ": VisitNotEqual(" << key_val << ") failed"; + ASSERT_OK(result.status()) << prefix << ": VisitNotEqual(" << key_val << ") failed"; auto actual_ids = CollectRowIds(result.value()); // Expected: all non-null rows except those with this key @@ -580,8 +575,8 @@ TEST_P(BTreeCompatibilityVarcharTest, ReadAndQueryVarcharData) { auto result = reader->VisitLessThan(literal); ASSERT_OK(result.status()) << prefix << ": VisitLessThan(" << mid_key << ") failed"; auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForStringRange(records, non_null_keys.front(), mid_key, - true, false); + auto expected_ids = + GetRowIdsForStringRange(records, non_null_keys.front(), mid_key, true, false); EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitLessThan(" << mid_key << ") mismatch"; } @@ -602,8 +597,8 @@ TEST_P(BTreeCompatibilityVarcharTest, ReadAndQueryVarcharData) { ASSERT_OK(result.status()) << prefix << ": VisitGreaterOrEqual(" << mid_key << ") failed"; auto actual_ids = CollectRowIds(result.value()); - auto expected_ids = GetRowIdsForStringRange(records, mid_key, non_null_keys.back(), - true, true); + auto expected_ids = + GetRowIdsForStringRange(records, mid_key, non_null_keys.back(), true, true); EXPECT_EQ(actual_ids, expected_ids) << prefix << ": VisitGreaterOrEqual(" << mid_key << ") mismatch"; } @@ -639,10 +634,8 @@ TEST_P(BTreeCompatibilityVarcharTest, ReadAndQueryVarcharData) { std::sort(non_null_keys.begin(), non_null_keys.end()); std::string lower = non_null_keys[non_null_keys.size() / 4]; std::string upper = non_null_keys[non_null_keys.size() * 3 / 4]; - Literal lit_lower(FieldType::STRING, lower.c_str(), - static_cast(lower.size())); - Literal lit_upper(FieldType::STRING, upper.c_str(), - static_cast(upper.size())); + Literal lit_lower(FieldType::STRING, lower.c_str(), static_cast(lower.size())); + Literal lit_upper(FieldType::STRING, upper.c_str(), static_cast(upper.size())); auto result = reader->VisitBetween(lit_lower, lit_upper); ASSERT_OK(result.status()) << prefix << ": VisitBetween(" << lower << ", " << upper << ") failed"; @@ -748,8 +741,7 @@ TEST_F(BTreeCompatibilityTest, NoNulls) { int32_t key_val = std::stoi(rec.key); Literal literal(key_val); auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) - << prefix << ": VisitEqual(" << key_val << ") failed"; + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; auto actual_ids = CollectRowIds(result.value()); auto expected_ids = GetRowIdsForIntKey(records, key_val); EXPECT_EQ(actual_ids, expected_ids) @@ -782,8 +774,7 @@ TEST_F(BTreeCompatibilityTest, NoNulls) { auto result = reader->VisitGreaterThan(literal); ASSERT_OK(result.status()) << prefix << ": VisitGreaterThan(" << max_key << ") failed"; auto actual_ids = CollectRowIds(result.value()); - EXPECT_TRUE(actual_ids.empty()) - << prefix << ": VisitGreaterThan(max) should be empty"; + EXPECT_TRUE(actual_ids.empty()) << prefix << ": VisitGreaterThan(max) should be empty"; } } @@ -832,8 +823,7 @@ TEST_F(BTreeCompatibilityTest, DuplicateKeys) { int32_t key_val = std::stoi(rec.key); Literal literal(key_val); auto result = reader->VisitEqual(literal); - ASSERT_OK(result.status()) - << prefix << ": VisitEqual(" << key_val << ") failed"; + ASSERT_OK(result.status()) << prefix << ": VisitEqual(" << key_val << ") failed"; auto actual_ids = CollectRowIds(result.value()); auto expected_ids = GetRowIdsForIntKey(records, key_val); EXPECT_EQ(actual_ids, expected_ids) @@ -844,10 +834,9 @@ TEST_F(BTreeCompatibilityTest, DuplicateKeys) { // ---- Test: VisitIn for keys 0, 5, 9 ---- { - std::vector in_literals = { - Literal(static_cast(0)), - Literal(static_cast(5)), - Literal(static_cast(9))}; + std::vector in_literals = {Literal(static_cast(0)), + Literal(static_cast(5)), + Literal(static_cast(9))}; auto result = reader->VisitIn(in_literals); ASSERT_OK(result.status()) << prefix << ": VisitIn({0,5,9}) failed"; auto actual_ids = CollectRowIds(result.value()); @@ -975,14 +964,10 @@ TEST_F(BTreeCompatibilityTest, MetaDeserialization) { TEST_F(BTreeCompatibilityTest, RowCountConsistency) { // For each test data set, verify that null_count + non_null_count == total_count std::vector>> test_cases = { - {"btree_test_int_50", arrow::int32()}, - {"btree_test_int_100", arrow::int32()}, - {"btree_test_int_500", arrow::int32()}, - {"btree_test_varchar_50", arrow::utf8()}, - {"btree_test_varchar_100", arrow::utf8()}, - {"btree_test_int_all_nulls", arrow::int32()}, - {"btree_test_int_no_nulls", arrow::int32()}, - {"btree_test_int_duplicates", arrow::int32()}, + {"btree_test_int_50", arrow::int32()}, {"btree_test_int_100", arrow::int32()}, + {"btree_test_int_500", arrow::int32()}, {"btree_test_varchar_50", arrow::utf8()}, + {"btree_test_varchar_100", arrow::utf8()}, {"btree_test_int_all_nulls", arrow::int32()}, + {"btree_test_int_no_nulls", arrow::int32()}, {"btree_test_int_duplicates", arrow::int32()}, }; for (const auto& [prefix, arrow_type] : test_cases) { From f70bcdf82fad0c726944a5c54eee248d9682ccc1 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 15:51:44 +0800 Subject: [PATCH 08/28] =?UTF-8?q?style:=20=E4=BD=BF=E7=94=A8=20clang-forma?= =?UTF-8?q?t=20=E4=BF=AE=E5=A4=8D=E4=BB=A3=E7=A0=81=E6=A0=BC=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../btree/btree_global_index_writer_test.cpp | 40 +++++++++++-------- .../global_index/btree/btree_global_indexer.h | 18 ++++----- .../global_index/btree/btree_index_meta.cpp | 3 +- .../lookup/sort/sort_lookup_store_footer.h | 3 +- 4 files changed, 36 insertions(+), 28 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 3a4e0d2df..006ed5dea 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -36,7 +36,8 @@ class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { return prefix + "_" + std::to_string(file_counter_++); } - Result> NewOutputStream(const std::string& file_name) const override { + Result> NewOutputStream( + const std::string& file_name) const override { return fs_->Create(base_path_ + "/" + file_name, true); } @@ -68,7 +69,7 @@ class BTreeGlobalIndexWriterTest : public ::testing::Test { // Helper to create ArrowSchema from arrow type std::unique_ptr CreateArrowSchema(const std::shared_ptr& type, - const std::string& field_name) { + const std::string& field_name) { auto schema = arrow::schema({arrow::field(field_name, type)}); auto c_schema = std::make_unique(); EXPECT_TRUE(arrow::ExportSchema(*schema, c_schema.get()).ok()); @@ -89,7 +90,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with int values auto array = @@ -118,7 +120,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { // Release the ArrowArray ArrowArrayRelease(&c_array); - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -131,7 +133,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); // Create the BTree global index writer - auto writer = std::make_shared("string_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("string_field", c_schema.get(), + file_writer, pool_); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -159,7 +162,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { // Release the ArrowArray ArrowArrayRelease(&c_array); - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -172,7 +175,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") @@ -202,7 +206,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { // Release the ArrowArray ArrowArrayRelease(&c_array); - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -215,7 +219,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create first batch auto array1 = @@ -250,7 +255,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { // Verify metadata const auto& meta = metas[0]; EXPECT_EQ(meta.range_end, 5); // 6 elements, 0-indexed - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -263,14 +268,15 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Finish without adding any data auto result = writer->Finish(); ASSERT_OK(result.status()); auto metas = result.value(); ASSERT_EQ(metas.size(), 0); // No data, no metadata - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -283,7 +289,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = + std::make_shared("int_field", c_schema.get(), file_writer, pool_); // Create an Arrow array with all null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") @@ -309,7 +316,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { // Release the ArrowArray ArrowArrayRelease(&c_array); - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } @@ -322,7 +329,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { auto c_schema = CreateArrowSchema(arrow::float64(), "double_field"); // Create the BTree global index writer - auto writer = std::make_shared("double_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("double_field", c_schema.get(), + file_writer, pool_); // Create an Arrow array with double values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") @@ -344,7 +352,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { // Release the ArrowArray ArrowArrayRelease(&c_array); - + // Release the ArrowSchema ArrowSchemaRelease(c_schema.get()); } diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index 80b1b4cbe..b47343282 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -57,14 +57,12 @@ class BTreeGlobalIndexer : public GlobalIndexer { class BTreeGlobalIndexReader : public GlobalIndexReader { public: - BTreeGlobalIndexReader(const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, - const MemorySlice& min_key, const MemorySlice& max_key, - bool has_min_key, bool has_max_key, - const std::vector& files, - const std::shared_ptr& pool, - std::function - comparator); + BTreeGlobalIndexReader( + const std::shared_ptr& sst_file_reader, + const std::shared_ptr& null_bitmap, const MemorySlice& min_key, + const MemorySlice& max_key, bool has_min_key, bool has_max_key, + const std::vector& files, const std::shared_ptr& pool, + std::function comparator); Result> VisitIsNotNull() override; @@ -124,8 +122,8 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { private: Result RangeQuery(const MemorySlice& lower_bound, - const MemorySlice& upper_bound, - bool lower_inclusive, bool upper_inclusive); + const MemorySlice& upper_bound, bool lower_inclusive, + bool upper_inclusive); Result AllNonNullRows(); diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index 6d3e6135e..d4b3fbca9 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -39,7 +39,8 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt } std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) const { - // Calculate total size: first_key_len(4) + first_key + last_key_len(4) + last_key + has_nulls(1) + // Calculate total size: first_key_len(4) + first_key + last_key_len(4) + last_key + + // has_nulls(1) int32_t first_key_size = first_key_ ? first_key_->size() : 0; int32_t last_key_size = last_key_ ? last_key_->size() : 0; int32_t total_size = 4 + first_key_size + 4 + last_key_size + 1; diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h index c8ccfd1d2..977632910 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h @@ -30,7 +30,8 @@ namespace paimon { /// Footer of a sort lookup store. class PAIMON_EXPORT SortLookupStoreFooter { public: - static Result> ReadSortLookupStoreFooter(MemorySliceInput* input); + static Result> ReadSortLookupStoreFooter( + MemorySliceInput* input); public: SortLookupStoreFooter(const BlockHandle& index_block_handle, From 5f1981f146a910242605922cf54230e2c3ac989c Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 15:59:18 +0800 Subject: [PATCH 09/28] cpplint --- .../common/global_index/btree/btree_compatibility_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp index 0b772932e..86326a229 100644 --- a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp +++ b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp @@ -53,7 +53,7 @@ namespace paimon::test { // --------------------------------------------------------------------------- // Test data directory (relative to project root) // --------------------------------------------------------------------------- -static const char kTestDataDir[] = "test/test_data/global_index/btree/btree_compatibility_data"; +static constexpr char kTestDataDir[] = "test/test_data/global_index/btree/btree_compatibility_data"; // --------------------------------------------------------------------------- // CSV record parsed from the Java-generated CSV files From caafccab21ad361c92fb352fe090b90dfc213566 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 18:13:03 +0800 Subject: [PATCH 10/28] fix: resolve compiler warnings and errors in btree global index --- .../global_index/btree/btree_file_footer.cpp | 38 +++---- .../global_index/btree/btree_file_footer.h | 4 +- .../btree/btree_global_index_writer.cpp | 3 +- .../btree/btree_global_index_writer.h | 3 +- .../btree/btree_global_indexer.cpp | 101 +++++++++--------- .../global_index/btree/btree_global_indexer.h | 5 +- .../btree/btree_global_indexer_test.cpp | 2 +- .../global_index/btree/btree_index_meta.cpp | 4 +- .../global_index/btree/btree_index_meta.h | 15 +-- .../btree/btree_index_meta_test.cpp | 50 ++++----- .../lookup/sort/sort_lookup_store_footer.cpp | 2 +- .../lookup/sort/sort_lookup_store_footer.h | 2 +- .../common/utils/roaring_navigable_map64.cpp | 10 +- .../common/utils/roaring_navigable_map64.h | 2 +- .../utils/roaring_navigable_map64_test.cpp | 2 +- 15 files changed, 117 insertions(+), 126 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp index 78ab4a6b7..605209a82 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -20,7 +20,7 @@ namespace paimon { Result> BTreeFileFooter::Read(MemorySliceInput& input) { // read version and verify magic number - input.SetPosition(ENCODED_LENGTH - 8); + PAIMON_RETURN_NOT_OK(input.SetPosition(ENCODED_LENGTH - 8)); int32_t version = input.ReadInt(); int32_t magic_number = input.ReadInt(); @@ -28,7 +28,7 @@ Result> BTreeFileFooter::Read(MemorySliceInput& return Status::IOError("File is not a btree index file (bad magic number)"); } - input.SetPosition(0); + PAIMON_RETURN_NOT_OK(input.SetPosition(0)); // read bloom filter and index handles auto offset = input.ReadLong(); @@ -63,37 +63,37 @@ MemorySlice BTreeFileFooter::Write(const std::shared_ptr& foote } MemorySlice BTreeFileFooter::Write(const std::shared_ptr& footer, - MemorySliceOutput& ouput) { + MemorySliceOutput& output) { // write bloom filter and index handles auto bloom_filter_handle = footer->GetBloomFilterHandle(); if (!bloom_filter_handle) { - ouput.WriteValue(static_cast(0)); - ouput.WriteValue(static_cast(0)); - ouput.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); } else { - ouput.WriteValue(bloom_filter_handle->Offset()); - ouput.WriteValue(bloom_filter_handle->Size()); - ouput.WriteValue(bloom_filter_handle->ExpectedEntries()); + output.WriteValue(bloom_filter_handle->Offset()); + output.WriteValue(bloom_filter_handle->Size()); + output.WriteValue(bloom_filter_handle->ExpectedEntries()); } auto index_block_handle = footer->GetIndexBlockHandle(); - ouput.WriteValue(index_block_handle->Offset()); - ouput.WriteValue(index_block_handle->Size()); + output.WriteValue(index_block_handle->Offset()); + output.WriteValue(index_block_handle->Size()); auto null_bitmap_handle = footer->GetNullBitmapHandle(); if (!null_bitmap_handle) { - ouput.WriteValue(static_cast(0)); - ouput.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(0)); } else { - ouput.WriteValue(null_bitmap_handle->Offset()); - ouput.WriteValue(null_bitmap_handle->Size()); + output.WriteValue(null_bitmap_handle->Offset()); + output.WriteValue(null_bitmap_handle->Size()); } // write version and magic number - ouput.WriteValue(footer->GetVersion()); - ouput.WriteValue(MAGIC_NUMBER); + output.WriteValue(footer->GetVersion()); + output.WriteValue(MAGIC_NUMBER); - return ouput.ToSlice(); + return output.ToSlice(); } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_file_footer.h b/src/paimon/common/global_index/btree/btree_file_footer.h index 0f650d15c..ec32f882d 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.h +++ b/src/paimon/common/global_index/btree/btree_file_footer.h @@ -30,7 +30,7 @@ class BTreeFileFooter { static Result> Read(MemorySliceInput& input); static MemorySlice Write(const std::shared_ptr& footer, MemoryPool* pool); static MemorySlice Write(const std::shared_ptr& footer, - MemorySliceOutput& ouput); + MemorySliceOutput& output); public: BTreeFileFooter(const std::shared_ptr& bloom_filter_handle, @@ -77,4 +77,4 @@ class BTreeFileFooter { std::shared_ptr null_bitmap_handle_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 77b82fc25..38881eb46 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -38,7 +38,6 @@ BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( file_writer_(file_writer), pool_(pool), block_size_(block_size), - expected_entries_(expected_entries), null_bitmap_(std::make_shared()), has_nulls_(false), current_row_id_(0), @@ -319,4 +318,4 @@ Result> BTreeGlobalIndexWriter::Finish() { return std::vector{io_meta}; } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 78b4ce457..1d627bfae 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -66,7 +66,6 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { std::shared_ptr file_writer_; std::shared_ptr pool_; int32_t block_size_; - int64_t expected_entries_; // SST file writer std::unique_ptr sst_writer_; @@ -88,4 +87,4 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { std::shared_ptr bloom_filter_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 0b8a319d2..c5d25ad27 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -248,19 +248,17 @@ Result> BTreeGlobalIndexer::CreateReader( MemorySlice min_key_slice(MemorySegment(), 0, 0); MemorySlice max_key_slice(MemorySegment(), 0, 0); bool has_min_key = false; - bool has_max_key = false; if (index_meta->FirstKey()) { min_key_slice = MemorySlice::Wrap(index_meta->FirstKey()); has_min_key = true; } if (index_meta->LastKey()) { max_key_slice = MemorySlice::Wrap(index_meta->LastKey()); - has_max_key = true; } return std::make_shared(sst_file_reader, null_bitmap, min_key_slice, - max_key_slice, has_min_key, has_max_key, files, - pool, comparator); + max_key_slice, has_min_key, files, pool, + comparator); } Result> BTreeGlobalIndexer::ToGlobalIndexResult( @@ -330,7 +328,7 @@ Result> BTreeGlobalIndexer::ReadNullBitma } catch (const std::exception& e) { return Status::Invalid( "Fail to deserialize null bitmap but crc check passed, " - "this means the ser/de algorithms not match: " + + "this means the serialization/deserialization algorithms not match: " + std::string(e.what())); } @@ -340,15 +338,14 @@ Result> BTreeGlobalIndexer::ReadNullBitma BTreeGlobalIndexReader::BTreeGlobalIndexReader( const std::shared_ptr& sst_file_reader, const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, bool has_max_key, - const std::vector& files, const std::shared_ptr& pool, + const MemorySlice& max_key, bool has_min_key, const std::vector& files, + const std::shared_ptr& pool, std::function comparator) : sst_file_reader_(sst_file_reader), null_bitmap_(null_bitmap), min_key_(min_key), max_key_(max_key), has_min_key_(has_min_key), - has_max_key_(has_max_key), files_(files), pool_(pool), comparator_(std::move(comparator)) {} @@ -591,64 +588,62 @@ Result> BTreeGlobalIndexReader::VisitNotBetwe Result> BTreeGlobalIndexReader::VisitAnd( const std::vector>>& children) { - return std::make_shared( - [this, &children]() -> Result { - if (children.empty()) { - return Status::Invalid("VisitAnd called with no children"); - } - - auto first_result_status = children[0]; - if (!first_result_status.ok()) { - return first_result_status.status(); - } - auto first_result = std::move(first_result_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); + return std::make_shared([&children]() -> Result { + if (children.empty()) { + return Status::Invalid("VisitAnd called with no children"); + } - RoaringNavigableMap64 result_bitmap; - while (first_iterator->HasNext()) { - result_bitmap.Add(first_iterator->Next()); - } + auto first_result_status = children[0]; + if (!first_result_status.ok()) { + return first_result_status.status(); + } + auto first_result = std::move(first_result_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); - for (size_t i = 1; i < children.size(); ++i) { - auto child_status = children[i]; - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + RoaringNavigableMap64 result_bitmap; + while (first_iterator->HasNext()) { + result_bitmap.Add(first_iterator->Next()); + } - RoaringNavigableMap64 child_bitmap; - while (child_iterator->HasNext()) { - child_bitmap.Add(child_iterator->Next()); - } + for (size_t i = 1; i < children.size(); ++i) { + auto child_status = children[i]; + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - result_bitmap.And(child_bitmap); + RoaringNavigableMap64 child_bitmap; + while (child_iterator->HasNext()) { + child_bitmap.Add(child_iterator->Next()); } - return result_bitmap.GetBitmap(); - }); + result_bitmap.And(child_bitmap); + } + + return result_bitmap.GetBitmap(); + }); } Result> BTreeGlobalIndexReader::VisitOr( const std::vector>>& children) { - return std::make_shared( - [this, &children]() -> Result { - RoaringNavigableMap64 result_bitmap; + return std::make_shared([&children]() -> Result { + RoaringNavigableMap64 result_bitmap; - for (const auto& child_status : children) { - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + for (const auto& child_status : children) { + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - while (child_iterator->HasNext()) { - result_bitmap.Add(child_iterator->Next()); - } + while (child_iterator->HasNext()) { + result_bitmap.Add(child_iterator->Next()); } + } - return result_bitmap.GetBitmap(); - }); + return result_bitmap.GetBitmap(); + }); } Result> BTreeGlobalIndexReader::VisitVectorSearch( @@ -696,7 +691,7 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySli // For the first block, we need to seek within the block to the exact position if (first_block && lower_bytes) { - PAIMON_ASSIGN_OR_RAISE(bool found, data_iterator->SeekTo(lower_bound)); + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool found, data_iterator->SeekTo(lower_bound)); first_block = false; if (!data_iterator->HasNext()) { diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index b47343282..7ef23eb85 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -60,8 +60,8 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { BTreeGlobalIndexReader( const std::shared_ptr& sst_file_reader, const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, bool has_max_key, - const std::vector& files, const std::shared_ptr& pool, + const MemorySlice& max_key, bool has_min_key, const std::vector& files, + const std::shared_ptr& pool, std::function comparator); Result> VisitIsNotNull() override; @@ -132,7 +132,6 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { MemorySlice min_key_; MemorySlice max_key_; bool has_min_key_; - bool has_max_key_; std::vector files_; std::shared_ptr pool_; std::function comparator_; diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp index cf46d69f0..b31c046d0 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -238,4 +238,4 @@ TEST_F(BTreeGlobalIndexerTest, VisitMethodsConcept) { SUCCEED(); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index d4b3fbca9..cf795a7e4 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -27,12 +27,12 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt auto first_key_len = input.ReadInt(); std::shared_ptr first_key; if (first_key_len) { - first_key = std::move(input.ReadSlice(first_key_len).CopyBytes(pool)); + first_key = input.ReadSlice(first_key_len).CopyBytes(pool); } auto last_key_len = input.ReadInt(); std::shared_ptr last_key; if (last_key_len) { - last_key = std::move(input.ReadSlice(last_key_len).CopyBytes(pool)); + last_key = input.ReadSlice(last_key_len).CopyBytes(pool); } auto has_nulls = input.ReadByte() == 1; return std::make_shared(first_key, last_key, has_nulls); diff --git a/src/paimon/common/global_index/btree/btree_index_meta.h b/src/paimon/common/global_index/btree/btree_index_meta.h index fa1735056..7059f8dff 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.h +++ b/src/paimon/common/global_index/btree/btree_index_meta.h @@ -16,18 +16,20 @@ #pragma once -#include "paimon/memory/bytes.h" -#include "paimon/common/memory/memory_slice_input.h" #include +#include "paimon/common/memory/memory_slice_input.h" +#include "paimon/memory/bytes.h" + namespace paimon { /// Index Meta of each BTree index file. The first key and last key of this meta could be null if /// the /// entire btree index file only contains nulls. class BTreeIndexMeta { -public: - static std::shared_ptr Deserialize(const std::shared_ptr& meta, paimon::MemoryPool *pool); - std::shared_ptr Serialize(paimon::MemoryPool *pool) const; + public: + static std::shared_ptr Deserialize(const std::shared_ptr& meta, + paimon::MemoryPool* pool); + std::shared_ptr Serialize(paimon::MemoryPool* pool) const; public: BTreeIndexMeta(const std::shared_ptr& first_key, const std::shared_ptr& last_key, @@ -53,8 +55,7 @@ class BTreeIndexMeta { private: int32_t Size() const { // 9 bytes => first_key_len(4 byte) + last_key_len(4 byte) + has_null(1 byte) - return (first_key_ ? first_key_->size() : 0) + - (last_key_ ? last_key_->size() : 0) + 9; + return (first_key_ ? first_key_->size() : 0) + (last_key_ ? last_key_->size() : 0) + 9; } private: diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index 7e03c7f5e..94b5e4118 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -14,16 +14,19 @@ * limitations under the License. */ +#include "paimon/common/global_index/btree/btree_index_meta.h" + #include -#include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/memory/memory_pool.h" namespace paimon::test { class BTreeIndexMetaTest : public ::testing::Test { -protected: - void SetUp() override { pool_ = GetDefaultPool(); } + protected: + void SetUp() override { + pool_ = GetDefaultPool(); + } std::shared_ptr pool_; }; @@ -32,10 +35,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { // Create a BTreeIndexMeta with normal keys auto first_key = Bytes::AllocateBytes("first_key_data", pool_.get()); auto last_key = Bytes::AllocateBytes("last_key_data", pool_.get()); - auto meta = std::make_shared( - std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), - true); + auto meta = std::make_shared(std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), true); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -49,7 +50,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { // Verify first_key auto deserialized_first = deserialized->FirstKey(); ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), "first_key_data"); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), + "first_key_data"); // Verify last_key auto deserialized_last = deserialized->LastKey(); @@ -87,16 +89,14 @@ TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { // Case 1: Has nulls with keys auto meta1 = std::make_shared( std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - true); + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), true); EXPECT_TRUE(meta1->HasNulls()); EXPECT_FALSE(meta1->OnlyNulls()); // Case 2: No nulls with keys auto meta2 = std::make_shared( std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - false); + std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), false); EXPECT_FALSE(meta2->HasNulls()); EXPECT_FALSE(meta2->OnlyNulls()); @@ -115,10 +115,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { // Create a BTreeIndexMeta without nulls auto first_key = Bytes::AllocateBytes("abc", pool_.get()); auto last_key = Bytes::AllocateBytes("xyz", pool_.get()); - auto meta = std::make_shared( - std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), - false); + auto meta = std::make_shared(std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -135,10 +133,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { // Create a BTreeIndexMeta with only first_key (edge case) auto first_key = Bytes::AllocateBytes("first", pool_.get()); - auto meta = std::make_shared( - std::shared_ptr(first_key.release()), - nullptr, - false); + auto meta = std::make_shared(std::shared_ptr(first_key.release()), + nullptr, false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -160,10 +156,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyLastKey) { // Create a BTreeIndexMeta with only last_key (edge case) auto last_key = Bytes::AllocateBytes("last", pool_.get()); - auto meta = std::make_shared( - nullptr, - std::shared_ptr(last_key.release()), - false); + auto meta = std::make_shared(nullptr, + std::shared_ptr(last_key.release()), false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -188,10 +182,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { std::string binary_last = std::string("last\0key", 8); auto first_key = Bytes::AllocateBytes(binary_first, pool_.get()); auto last_key = Bytes::AllocateBytes(binary_last, pool_.get()); - auto meta = std::make_shared( - std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), - true); + auto meta = std::make_shared(std::shared_ptr(first_key.release()), + std::shared_ptr(last_key.release()), true); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -212,4 +204,4 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp index c45896ae8..7ed35a09b 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp @@ -60,4 +60,4 @@ MemorySlice SortLookupStoreFooter::WriteSortLookupStoreFooter(MemoryPool* pool) output.WriteValue(MAGIC_NUMBER); return output.ToSlice(); } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h index 977632910..c4aa1ab80 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.h +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.h @@ -58,4 +58,4 @@ class PAIMON_EXPORT SortLookupStoreFooter { BlockHandle index_block_handle_; std::shared_ptr bloom_filter_handle_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/utils/roaring_navigable_map64.cpp b/src/paimon/common/utils/roaring_navigable_map64.cpp index 8aa9bf76e..b0e9cc164 100644 --- a/src/paimon/common/utils/roaring_navigable_map64.cpp +++ b/src/paimon/common/utils/roaring_navigable_map64.cpp @@ -122,7 +122,13 @@ std::vector RoaringNavigableMap64::Serialize() const { void RoaringNavigableMap64::Deserialize(const std::vector& data) { // This is a simplified deserialization - in practice, you might want to use // a more sophisticated approach - impl_->bitmap.Deserialize(reinterpret_cast(data.data()), data.size()); + auto status = + impl_->bitmap.Deserialize(reinterpret_cast(data.data()), data.size()); + if (!status.ok()) { + // Log error or handle deserialization failure + // For now, we'll just clear the bitmap on error + impl_->bitmap = RoaringBitmap64(); + } } std::vector RoaringNavigableMap64::ToRangeList() const { @@ -253,4 +259,4 @@ RoaringNavigableMap64::Iterator RoaringNavigableMap64::end() const { return it; } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/utils/roaring_navigable_map64.h b/src/paimon/common/utils/roaring_navigable_map64.h index 178a7a976..5602e3374 100644 --- a/src/paimon/common/utils/roaring_navigable_map64.h +++ b/src/paimon/common/utils/roaring_navigable_map64.h @@ -235,4 +235,4 @@ class PAIMON_EXPORT RoaringNavigableMap64 { std::unique_ptr impl_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/utils/roaring_navigable_map64_test.cpp b/src/paimon/common/utils/roaring_navigable_map64_test.cpp index b294edc0c..5667fbea4 100644 --- a/src/paimon/common/utils/roaring_navigable_map64_test.cpp +++ b/src/paimon/common/utils/roaring_navigable_map64_test.cpp @@ -110,4 +110,4 @@ TEST_F(RoaringNavigableMap64Test, testAddRangeLargeValues) { EXPECT_EQ(values[100], end); } -} // namespace paimon \ No newline at end of file +} // namespace paimon From e790dad3630068c6aaff173af6fef0613711020e Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 20:33:08 +0800 Subject: [PATCH 11/28] fix --- .../paimon/global_index/global_index_reader.h | 8 +-- .../btree_global_index_integration_test.cpp | 4 +- .../btree/btree_global_indexer_test.cpp | 21 ++++---- .../btree/btree_index_meta_test.cpp | 50 +++++++++---------- 4 files changed, 39 insertions(+), 44 deletions(-) diff --git a/include/paimon/global_index/global_index_reader.h b/include/paimon/global_index/global_index_reader.h index 03b1f0adb..b0da2e725 100644 --- a/include/paimon/global_index/global_index_reader.h +++ b/include/paimon/global_index/global_index_reader.h @@ -50,15 +50,15 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor> VisitAnd( - const std::vector>>& children) { + Result> VisitAnd( + const std::vector>>& children) override { return Status::NotImplemented("AND operations not supported by this index type"); } /// VisitOr performs logical OR across multiple child results. /// Default implementation returns "not supported" error. - virtual Result> VisitOr( - const std::vector>>& children) { + Result> VisitOr( + const std::vector>>& children) override { return Status::NotImplemented("OR operations not supported by this index type"); } diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index 182ab4d25..f2ff5d94f 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -33,7 +33,7 @@ namespace paimon::test { class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { public: FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) - : fs_(fs), base_path_(base_path), file_counter_(0) {} + : fs_(fs), base_path_(base_path) {} Result NewFileName(const std::string& prefix) const override { return prefix + "_" + std::to_string(file_counter_++); @@ -56,7 +56,7 @@ class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { private: std::shared_ptr fs_; std::string base_path_; - mutable int64_t file_counter_; + mutable int64_t file_counter_ = 0; }; class FakeGlobalIndexFileReader : public GlobalIndexFileReader { diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp index b31c046d0..63172498f 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -37,12 +37,9 @@ class BTreeGlobalIndexerTest : public ::testing::Test { // Test CreateComparator for STRING type TEST_F(BTreeGlobalIndexerTest, CreateComparatorString) { // Create two MemorySlices for comparison - auto slice_a = MemorySlice::Wrap( - std::shared_ptr(Bytes::AllocateBytes("apple", pool_.get()).release())); - auto slice_b = MemorySlice::Wrap( - std::shared_ptr(Bytes::AllocateBytes("banana", pool_.get()).release())); - auto slice_same = MemorySlice::Wrap( - std::shared_ptr(Bytes::AllocateBytes("apple", pool_.get()).release())); + auto slice_a = MemorySlice::Wrap(std::make_shared("apple", pool_.get())); + auto slice_b = MemorySlice::Wrap(std::make_shared("banana", pool_.get())); + auto slice_same = MemorySlice::Wrap(std::make_shared("apple", pool_.get())); // Lexicographic comparison: "apple" < "banana" auto bytes_a = slice_a.GetHeapMemory(); @@ -66,17 +63,17 @@ TEST_F(BTreeGlobalIndexerTest, CreateComparatorInt) { int32_t val2 = 200; int32_t val3 = 100; - auto bytes1 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + auto bytes1 = std::make_shared(sizeof(int32_t), pool_.get()); memcpy(bytes1->data(), &val1, sizeof(int32_t)); - auto slice1 = MemorySlice::Wrap(std::shared_ptr(bytes1.release())); + auto slice1 = MemorySlice::Wrap(bytes1); - auto bytes2 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + auto bytes2 = std::make_shared(sizeof(int32_t), pool_.get()); memcpy(bytes2->data(), &val2, sizeof(int32_t)); - auto slice2 = MemorySlice::Wrap(std::shared_ptr(bytes2.release())); + auto slice2 = MemorySlice::Wrap(bytes2); - auto bytes3 = Bytes::AllocateBytes(sizeof(int32_t), pool_.get()); + auto bytes3 = std::make_shared(sizeof(int32_t), pool_.get()); memcpy(bytes3->data(), &val3, sizeof(int32_t)); - auto slice3 = MemorySlice::Wrap(std::shared_ptr(bytes3.release())); + auto slice3 = MemorySlice::Wrap(bytes3); // Compare values EXPECT_LT(val1, val2); diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index 94b5e4118..5b2404651 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -33,10 +33,11 @@ class BTreeIndexMetaTest : public ::testing::Test { TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { // Create a BTreeIndexMeta with normal keys - auto first_key = Bytes::AllocateBytes("first_key_data", pool_.get()); - auto last_key = Bytes::AllocateBytes("last_key_data", pool_.get()); - auto meta = std::make_shared(std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), true); + // Use std::make_shared to create shared_ptr with proper memory management + // Bytes constructor uses pool->Malloc() for internal data, and destructor uses pool->Free() + auto first_key = std::make_shared("first_key_data", pool_.get()); + auto last_key = std::make_shared("last_key_data", pool_.get()); + auto meta = std::make_shared(first_key, last_key, true); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -87,16 +88,16 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeEmptyKeys) { TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { // Case 1: Has nulls with keys - auto meta1 = std::make_shared( - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), true); + auto meta1 = + std::make_shared(std::make_shared("key", pool_.get()), + std::make_shared("key", pool_.get()), true); EXPECT_TRUE(meta1->HasNulls()); EXPECT_FALSE(meta1->OnlyNulls()); // Case 2: No nulls with keys - auto meta2 = std::make_shared( - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), - std::shared_ptr(Bytes::AllocateBytes("key", pool_.get()).release()), false); + auto meta2 = + std::make_shared(std::make_shared("key", pool_.get()), + std::make_shared("key", pool_.get()), false); EXPECT_FALSE(meta2->HasNulls()); EXPECT_FALSE(meta2->OnlyNulls()); @@ -113,10 +114,9 @@ TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { // Create a BTreeIndexMeta without nulls - auto first_key = Bytes::AllocateBytes("abc", pool_.get()); - auto last_key = Bytes::AllocateBytes("xyz", pool_.get()); - auto meta = std::make_shared(std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), false); + auto first_key = std::make_shared("abc", pool_.get()); + auto last_key = std::make_shared("xyz", pool_.get()); + auto meta = std::make_shared(first_key, last_key, false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -132,9 +132,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNoNulls) { TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { // Create a BTreeIndexMeta with only first_key (edge case) - auto first_key = Bytes::AllocateBytes("first", pool_.get()); - auto meta = std::make_shared(std::shared_ptr(first_key.release()), - nullptr, false); + auto first_key = std::make_shared("first", pool_.get()); + auto meta = std::make_shared(first_key, nullptr, false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -155,9 +154,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyFirstKey) { TEST_F(BTreeIndexMetaTest, SerializeDeserializeWithOnlyLastKey) { // Create a BTreeIndexMeta with only last_key (edge case) - auto last_key = Bytes::AllocateBytes("last", pool_.get()); - auto meta = std::make_shared(nullptr, - std::shared_ptr(last_key.release()), false); + auto last_key = std::make_shared("last", pool_.get()); + auto meta = std::make_shared(nullptr, last_key, false); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -180,10 +178,9 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { // Create a BTreeIndexMeta with binary keys containing null bytes std::string binary_first = std::string("key\0with\0nulls", 14); std::string binary_last = std::string("last\0key", 8); - auto first_key = Bytes::AllocateBytes(binary_first, pool_.get()); - auto last_key = Bytes::AllocateBytes(binary_last, pool_.get()); - auto meta = std::make_shared(std::shared_ptr(first_key.release()), - std::shared_ptr(last_key.release()), true); + auto first_key = std::make_shared(binary_first, pool_.get()); + auto last_key = std::make_shared(binary_last, pool_.get()); + auto meta = std::make_shared(first_key, last_key, true); // Serialize auto serialized = meta->Serialize(pool_.get()); @@ -196,7 +193,8 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { // Verify first_key auto deserialized_first = deserialized->FirstKey(); ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), binary_first); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), + binary_first); // Verify last_key auto deserialized_last = deserialized->LastKey(); @@ -204,4 +202,4 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); } -} // namespace paimon::test +} // namespace paimon::test \ No newline at end of file From 8cdbc7fde66f337344bbc96bcc2d05972bf81208 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 21:39:05 +0800 Subject: [PATCH 12/28] Fix merge: keep SortLookupStoreFooter API, add CacheManager parameter --- .../lookup/sort/sort_lookup_store_factory.cpp | 13 ++++- src/paimon/common/sst/sst_file_io_test.cpp | 49 +++++++++++++++++-- src/paimon/common/sst/sst_file_reader.cpp | 32 +++++------- src/paimon/common/sst/sst_file_reader.h | 18 ++++--- 4 files changed, 83 insertions(+), 29 deletions(-) diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index 2b138b6a8..da17d0d33 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -36,8 +36,19 @@ Result> SortLookupStoreFactory::CreateReader( const std::shared_ptr& fs, const std::string& file_path, const std::shared_ptr& pool) const { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); + PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); + PAIMON_RETURN_NOT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool.get()); + PAIMON_RETURN_NOT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, - SstFileReader::Create(in, comparator_, cache_manager_, pool)); + SstFileReader::Create(pool, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, + cache_manager_)); return std::make_unique(in, reader); } diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index 5edf5d140..fd4848dd2 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -144,8 +144,19 @@ TEST_P(SstFileIOTest, TestSimple) { // test read ASSERT_OK_AND_ASSIGN(in, fs_->Open(index_path)); + ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); + ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::Create(in, comparator_, cache_manager_, pool_)); + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, + cache_manager_)); // not exist key std::string k0 = "k0"; @@ -177,9 +188,22 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { std::string file = GetDataDir() + "/sst/" + param.file_path; ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file)); + // read footer + ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); + ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); + // test read ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::Create(in, comparator_, cache_manager_, pool_)); + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, + cache_manager_)); // not exist key std::string k0 = "10000"; ASSERT_FALSE(reader->Lookup(std::make_shared(k0, pool_.get())).value()); @@ -270,7 +294,26 @@ TEST_F(SstFileIOTest, TestIOException) { CHECK_HOOK_STATUS(in_result.status(), i); std::shared_ptr in = std::move(in_result).value(); - auto reader_result = SstFileReader::Create(in, comparator_, cache_manager_, pool_); + auto file_len_result = in->Length(); + CHECK_HOOK_STATUS(file_len_result.status(), i); + uint64_t file_len = file_len_result.value(); + + CHECK_HOOK_STATUS( + in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET), i); + auto footer_bytes = + Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); + auto read_result = in->Read(footer_bytes->data(), footer_bytes->size()); + CHECK_HOOK_STATUS(read_result.status(), i); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + auto read_footer_result = SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input); + CHECK_HOOK_STATUS(read_footer_result.status(), i); + + auto reader_result = + SstFileReader::Create(pool_, in, read_footer_result.value()->GetIndexBlockHandle(), + read_footer_result.value()->GetBloomFilterHandle(), comparator_, + cache_manager_); CHECK_HOOK_STATUS(reader_result.status(), i); std::shared_ptr reader = std::move(reader_result).value(); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index 8a1f6605b..afa592ac9 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -22,24 +22,15 @@ namespace paimon { Result> SstFileReader::Create( - const std::shared_ptr& in, MemorySlice::SliceComparator comparator, - const std::shared_ptr& cache_manager, const std::shared_ptr& pool) { - PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); + const std::shared_ptr& pool, const std::shared_ptr& in, + const BlockHandle& index_block_handle, + const std::shared_ptr& bloom_filter_handle, + MemorySlice::SliceComparator comparator, + const std::shared_ptr& cache_manager) { PAIMON_ASSIGN_OR_RAISE(std::string file_path, in->GetUri()); auto block_cache = std::make_shared(file_path, in, cache_manager, pool); - // read footer - PAIMON_ASSIGN_OR_RAISE( - MemorySegment segment, - block_cache->GetBlock(file_len - BlockFooter::ENCODED_LENGTH, BlockFooter::ENCODED_LENGTH, - /*is_index=*/true, /*decompress_func=*/nullptr)); - auto slice = MemorySlice::Wrap(segment); - auto input = slice.ToInput(); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr footer, - BlockFooter::ReadBlockFooter(&input)); - // read bloom filter directly now - auto bloom_filter_handle = footer->GetBloomFilterHandle(); std::shared_ptr bloom_filter = nullptr; if (bloom_filter_handle && (bloom_filter_handle->ExpectedEntries() || bloom_filter_handle->Size() || bloom_filter_handle->Offset())) { @@ -53,7 +44,6 @@ Result> SstFileReader::Create( } // create index block reader - auto index_block_handle = footer->GetIndexBlockHandle(); PAIMON_ASSIGN_OR_RAISE( MemorySegment trailer_data, block_cache->GetBlock(index_block_handle.Offset() + index_block_handle.Size(), @@ -71,14 +61,14 @@ Result> SstFileReader::Create( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, BlockReader::Create(MemorySlice::Wrap(block_data), comparator)); return std::shared_ptr( - new SstFileReader(block_cache, bloom_filter, reader, comparator, pool)); + new SstFileReader(pool, block_cache, bloom_filter, reader, comparator)); } -SstFileReader::SstFileReader(const std::shared_ptr& block_cache, +SstFileReader::SstFileReader(const std::shared_ptr& pool, + const std::shared_ptr& block_cache, const std::shared_ptr& bloom_filter, const std::shared_ptr& index_block_reader, - MemorySlice::SliceComparator comparator, - const std::shared_ptr& pool) + MemorySlice::SliceComparator comparator) : pool_(pool), block_cache_(block_cache), bloom_filter_(bloom_filter), @@ -89,6 +79,10 @@ std::unique_ptr SstFileReader::CreateIterator() { return std::make_unique(this, index_block_reader_->Iterator()); } +std::unique_ptr SstFileReader::CreateIndexIterator() { + return index_block_reader_->Iterator(); +} + Result> SstFileReader::Lookup(const std::shared_ptr& key) { if (bloom_filter_.get() && !bloom_filter_->TestHash(MurmurHashUtils::HashBytes(key))) { return std::shared_ptr(); diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index acef1bbe0..734dedfd4 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -20,7 +20,6 @@ #include "paimon/common/compression/block_compression_factory.h" #include "paimon/common/sst/block_cache.h" -#include "paimon/common/sst/block_footer.h" #include "paimon/common/sst/block_handle.h" #include "paimon/common/sst/block_iterator.h" #include "paimon/common/sst/block_reader.h" @@ -41,12 +40,17 @@ class SstFileIterator; class PAIMON_EXPORT SstFileReader { public: static Result> Create( - const std::shared_ptr& input, MemorySlice::SliceComparator comparator, - const std::shared_ptr& cache_manager, - const std::shared_ptr& pool); + const std::shared_ptr& pool, const std::shared_ptr& input, + const BlockHandle& index_block_handle, + const std::shared_ptr& bloom_filter_handle, + MemorySlice::SliceComparator comparator, + const std::shared_ptr& cache_manager); std::unique_ptr CreateIterator(); + /// Create an iterator for the index block. + std::unique_ptr CreateIndexIterator(); + /// Lookup the specified key in the file. /// /// @param key serialized key @@ -68,10 +72,11 @@ class PAIMON_EXPORT SstFileReader { const std::shared_ptr& trailer, const std::shared_ptr& pool); - SstFileReader(const std::shared_ptr& block_cache, + SstFileReader(const std::shared_ptr& pool, + const std::shared_ptr& block_cache, const std::shared_ptr& bloom_filter, const std::shared_ptr& index_block_reader, - MemorySlice::SliceComparator comparator, const std::shared_ptr& pool); + MemorySlice::SliceComparator comparator); private: std::shared_ptr pool_; @@ -94,4 +99,5 @@ class PAIMON_EXPORT SstFileIterator { std::unique_ptr index_iterator_; std::unique_ptr data_iterator_; }; + } // namespace paimon \ No newline at end of file From b42cb1929151dcb22ed6b4e14c6bd367381ee80a Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 21:47:36 +0800 Subject: [PATCH 13/28] Fix CacheManager constructor call in btree_global_indexer.cpp --- .../common/global_index/btree/btree_global_indexer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index c5d25ad27..ce0cfe420 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -221,8 +221,8 @@ Result> BTreeGlobalIndexer::CreateReader( }; // Read BTree file footer first - auto block_cache = - std::make_shared(meta.file_path, in, pool, std::make_unique()); + auto cache_manager = std::make_shared(1024 * 1024, 0.0); + auto block_cache = std::make_shared(meta.file_path, in, cache_manager, pool); PAIMON_ASSIGN_OR_RAISE(MemorySegment segment, block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, BTreeFileFooter::ENCODED_LENGTH, true, @@ -236,7 +236,7 @@ Result> BTreeGlobalIndexer::CreateReader( PAIMON_ASSIGN_OR_RAISE( std::shared_ptr sst_file_reader, SstFileReader::Create(pool, in, *footer->GetIndexBlockHandle(), - footer->GetBloomFilterHandle(), result_comparator)); + footer->GetBloomFilterHandle(), result_comparator, cache_manager)); // prepare null_bitmap PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, From dd3d379f04e7ca2360348f5f52296e5d63791070 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 22:16:44 +0800 Subject: [PATCH 14/28] Merge main branch and add BTree Index configuration options - Add BTree Index configuration options (BTREE_INDEX_CACHE_SIZE, BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO, etc.) - Fix SstFileWriter constructor parameter order - Update CacheManager to use options from configuration - Merge main branch changes for CacheManager integration --- include/paimon/defs.h | 21 ++++++++++++ src/paimon/common/defs.cpp | 7 ++++ .../btree/btree_global_index_writer.cpp | 4 +-- .../btree/btree_global_indexer.cpp | 34 ++++++++++++++++++- .../lookup/sort/sort_lookup_store_factory.cpp | 15 ++++---- src/paimon/common/sst/sst_file_io_test.cpp | 23 ++++++------- src/paimon/common/sst/sst_file_reader.cpp | 3 +- 7 files changed, 83 insertions(+), 24 deletions(-) diff --git a/include/paimon/defs.h b/include/paimon/defs.h index 20798951e..f21337d93 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -407,6 +407,27 @@ struct PAIMON_EXPORT Options { /// "lookup.cache.high-priority-pool-ratio" - The fraction of cache memory that is reserved for /// high-priority data like index, filter. Default value is 0.25. static const char LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[]; + /// "btree-index.compression" - The compression algorithm to use for BTreeIndex. + /// Default value is "none". + static const char BTREE_INDEX_COMPRESSION[]; + /// "btree-index.compression-level" - The compression level of the compression algorithm. + /// Default value is 1. + static const char BTREE_INDEX_COMPRESSION_LEVEL[]; + /// "btree-index.block-size" - The block size to use for BTreeIndex. + /// Default value is 64 KB. + static const char BTREE_INDEX_BLOCK_SIZE[]; + /// "btree-index.cache-size" - The cache size to use for BTreeIndex. + /// Default value is 128 MB. + static const char BTREE_INDEX_CACHE_SIZE[]; + /// "btree-index.high-priority-pool-ratio" - The high priority pool ratio to use for BTreeIndex. + /// Default value is 0.1. + static const char BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[]; + /// "btree-index.records-per-range" - The expected number of records per BTree Index File. + /// Default value is 1000000. + static const char BTREE_INDEX_RECORDS_PER_RANGE[]; + /// "btree-index.build.max-parallelism" - The max parallelism of Flink/Spark for building + /// BTreeIndex. Default value is 4096. + static const char BTREE_INDEX_BUILD_MAX_PARALLELISM[]; }; static constexpr int64_t BATCH_WRITE_COMMIT_IDENTIFIER = std::numeric_limits::max(); diff --git a/src/paimon/common/defs.cpp b/src/paimon/common/defs.cpp index 44d4c952d..57e0e1bc1 100644 --- a/src/paimon/common/defs.cpp +++ b/src/paimon/common/defs.cpp @@ -115,5 +115,12 @@ const char Options::LOOKUP_COMPACT[] = "lookup-compact"; const char Options::LOOKUP_COMPACT_MAX_INTERVAL[] = "lookup-compact.max-interval"; const char Options::LOOKUP_CACHE_MAX_MEMORY_SIZE[] = "lookup.cache-max-memory-size"; const char Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[] = "lookup.cache.high-priority-pool-ratio"; +const char Options::BTREE_INDEX_COMPRESSION[] = "btree-index.compression"; +const char Options::BTREE_INDEX_COMPRESSION_LEVEL[] = "btree-index.compression-level"; +const char Options::BTREE_INDEX_BLOCK_SIZE[] = "btree-index.block-size"; +const char Options::BTREE_INDEX_CACHE_SIZE[] = "btree-index.cache-size"; +const char Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[] = "btree-index.high-priority-pool-ratio"; +const char Options::BTREE_INDEX_RECORDS_PER_RANGE[] = "btree-index.records-per-range"; +const char Options::BTREE_INDEX_BUILD_MAX_PARALLELISM[] = "btree-index.build.max-parallelism"; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 38881eb46..4342137cf 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -95,8 +95,8 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { PAIMON_ASSIGN_OR_RAISE(auto compression_factory, BlockCompressionFactory::Create(BlockCompressionType::NONE)); - sst_writer_ = std::make_unique(output_stream_, pool_, bloom_filter_, - block_size_, compression_factory); + sst_writer_ = std::make_unique(output_stream_, bloom_filter_, block_size_, + compression_factory, pool_); } // Group row IDs by key value diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index ce0cfe420..a3f7f56d3 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -24,10 +24,12 @@ #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/options/memory_size.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" #include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/defs.h" #include "paimon/file_index/bitmap_index_result.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/memory/bytes.h" @@ -35,6 +37,34 @@ namespace paimon { +// Helper function to get cache size from options with default value +static int64_t GetBTreeIndexCacheSize(const std::map& options) { + auto it = options.find(Options::BTREE_INDEX_CACHE_SIZE); + if (it != options.end()) { + auto result = MemorySize::ParseBytes(it->second); + if (result.ok()) { + return result.value(); + } + } + // Default: 128 MB + return 128 * 1024 * 1024; +} + +// Helper function to get high priority pool ratio from options with default value +static double GetBTreeIndexHighPriorityPoolRatio( + const std::map& options) { + auto it = options.find(Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO); + if (it != options.end()) { + try { + return std::stod(it->second); + } catch (...) { + // Ignore parsing errors, use default + } + } + // Default: 0.1 + return 0.1; +} + Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, @@ -221,7 +251,9 @@ Result> BTreeGlobalIndexer::CreateReader( }; // Read BTree file footer first - auto cache_manager = std::make_shared(1024 * 1024, 0.0); + int64_t cache_size = GetBTreeIndexCacheSize(options_); + double high_priority_pool_ratio = GetBTreeIndexHighPriorityPoolRatio(options_); + auto cache_manager = std::make_shared(cache_size, high_priority_pool_ratio); auto block_cache = std::make_shared(meta.file_path, in, cache_manager, pool); PAIMON_ASSIGN_OR_RAISE(MemorySegment segment, block_cache->GetBlock(meta.file_size - BTreeFileFooter::ENCODED_LENGTH, diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index da17d0d33..3909a2d18 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -27,8 +27,8 @@ Result> SortLookupStoreFactory::CreateWriter( PAIMON_ASSIGN_OR_RAISE(std::shared_ptr out, fs->Create(file_path, /*overwrite=*/false)); return std::make_unique( - out, std::make_shared(out, bloom_filter, block_size_, compression_factory_, - pool), + out, + std::make_shared(out, bloom_filter, block_size_, compression_factory_, pool), pool); } @@ -37,7 +37,8 @@ Result> SortLookupStoreFactory::CreateReader( const std::shared_ptr& pool) const { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); - PAIMON_RETURN_NOT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + PAIMON_RETURN_NOT_OK( + in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool.get()); PAIMON_RETURN_NOT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); @@ -45,10 +46,10 @@ Result> SortLookupStoreFactory::CreateReader( auto footer_input = footer_slice.ToInput(); PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_footer, SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, - SstFileReader::Create(pool, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, - cache_manager_)); + PAIMON_ASSIGN_OR_RAISE( + std::shared_ptr reader, + SstFileReader::Create(pool, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); return std::make_unique(in, reader); } diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index fd4848dd2..341b09a61 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -153,10 +153,10 @@ TEST_P(SstFileIOTest, TestSimple) { auto footer_input = footer_slice.ToInput(); ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); - ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, - cache_manager_)); + ASSERT_OK_AND_ASSIGN( + auto reader, + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); // not exist key std::string k0 = "k0"; @@ -200,10 +200,10 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); // test read - ASSERT_OK_AND_ASSIGN(auto reader, - SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, - cache_manager_)); + ASSERT_OK_AND_ASSIGN( + auto reader, + SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); // not exist key std::string k0 = "10000"; ASSERT_FALSE(reader->Lookup(std::make_shared(k0, pool_.get())).value()); @@ -310,10 +310,9 @@ TEST_F(SstFileIOTest, TestIOException) { auto read_footer_result = SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input); CHECK_HOOK_STATUS(read_footer_result.status(), i); - auto reader_result = - SstFileReader::Create(pool_, in, read_footer_result.value()->GetIndexBlockHandle(), - read_footer_result.value()->GetBloomFilterHandle(), comparator_, - cache_manager_); + auto reader_result = SstFileReader::Create( + pool_, in, read_footer_result.value()->GetIndexBlockHandle(), + read_footer_result.value()->GetBloomFilterHandle(), comparator_, cache_manager_); CHECK_HOOK_STATUS(reader_result.status(), i); std::shared_ptr reader = std::move(reader_result).value(); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index afa592ac9..484a40613 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -25,8 +25,7 @@ Result> SstFileReader::Create( const std::shared_ptr& pool, const std::shared_ptr& in, const BlockHandle& index_block_handle, const std::shared_ptr& bloom_filter_handle, - MemorySlice::SliceComparator comparator, - const std::shared_ptr& cache_manager) { + MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager) { PAIMON_ASSIGN_OR_RAISE(std::string file_path, in->GetUri()); auto block_cache = std::make_shared(file_path, in, cache_manager, pool); From 38a2b8b63bf032347aac661e7b7361bd5e186bb4 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 22:22:03 +0800 Subject: [PATCH 15/28] Add trailing newlines to source files --- src/paimon/common/global_index/btree/btree_index_meta_test.cpp | 2 +- src/paimon/common/io/cache/cache.h | 2 +- src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp | 2 +- src/paimon/common/sst/sst_file_io_test.cpp | 2 +- src/paimon/common/sst/sst_file_reader.cpp | 2 +- src/paimon/common/sst/sst_file_reader.h | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index 5b2404651..1e3dde590 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -202,4 +202,4 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), binary_last); } -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index 0bdb3b842..dc06353cf 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -82,4 +82,4 @@ class CacheValue { MemorySegment segment_; CacheCallback callback_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index 3909a2d18..2bb365ca6 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -70,4 +70,4 @@ Status SortLookupStoreWriter::Close() { return Status::OK(); } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index 341b09a61..5a08dc45d 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -338,4 +338,4 @@ INSTANTIATE_TEST_SUITE_P(Group, SstFileIOTest, SstFileParam{"lz4/10540951-41d3-4216-aa2c-b15dfd25eb75", BlockCompressionType::LZ4})); -} // namespace paimon::test \ No newline at end of file +} // namespace paimon::test diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index 484a40613..1379509a5 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -200,4 +200,4 @@ Status SstFileIterator::SeekTo(const std::shared_ptr& key) { } return Status::OK(); } -} // namespace paimon \ No newline at end of file +} // namespace paimon diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index 734dedfd4..6d513f9b2 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -100,4 +100,4 @@ class PAIMON_EXPORT SstFileIterator { std::unique_ptr data_iterator_; }; -} // namespace paimon \ No newline at end of file +} // namespace paimon From 858b06853257d0a710f5e5db17f564d6e08b2ddf Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 22:23:12 +0800 Subject: [PATCH 16/28] Fix formatting in btree_index_meta_test.cpp --- src/paimon/common/global_index/btree/btree_index_meta_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index 1e3dde590..1017e958e 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -193,8 +193,7 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeBinaryKeys) { // Verify first_key auto deserialized_first = deserialized->FirstKey(); ASSERT_NE(deserialized_first, nullptr); - EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), - binary_first); + EXPECT_EQ(std::string(deserialized_first->data(), deserialized_first->size()), binary_first); // Verify last_key auto deserialized_last = deserialized->LastKey(); From 78f9bccf11fa66a75fcca36b0f9ce4321c0382a8 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 8 Apr 2026 22:27:38 +0800 Subject: [PATCH 17/28] fix: mark CacheValue constructor as explicit to prevent implicit conversions --- src/paimon/common/io/cache/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index dc06353cf..ef893539f 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -64,7 +64,7 @@ class PAIMON_EXPORT NoCache : public Cache { class CacheValue { public: - CacheValue(const MemorySegment& segment, CacheCallback callback = nullptr) + explicit CacheValue(const MemorySegment& segment, CacheCallback callback = nullptr) : segment_(segment), callback_(std::move(callback)) {} const MemorySegment& GetSegment() const { From e45da4c955d6aa143f19a4263f5c6946cdbd8ed6 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Thu, 9 Apr 2026 13:17:22 +0800 Subject: [PATCH 18/28] fix: fix clang-tidy warnings and memory safety issues 1. Fix modernize-use-auto warnings - Replace explicit type declarations with auto when initializing with template casts - Fixed 13 warnings in btree_global_indexer.cpp - Fixed 5 warnings in btree_global_indexer_test.cpp 2. Fix AddressSanitizer alloc-dealloc-mismatch errors - Replace Bytes::AllocateBytes with std::make_shared - Avoid memory pool allocated objects being freed by operator delete - Fixed 13 memory allocation/deallocation mismatches 3. Fix UndefinedBehaviorSanitizer null pointer error - Add num_bytes > 0 check in MemorySegmentUtils::CopyToBytes - Avoid passing null pointer to memcpy when num_bytes is 0 4. Fix modernize-use-default-member-init warning - Use default member initializer for file_counter_ in btree_global_index_writer_test.cpp --- .../btree/btree_compatibility_test.cpp | 4 +- .../btree/btree_global_index_writer.cpp | 2 +- .../btree/btree_global_index_writer_test.cpp | 4 +- .../btree/btree_global_indexer.cpp | 73 +++++++++---------- .../btree/btree_global_indexer_test.cpp | 10 +-- .../common/memory/memory_segment_utils.cpp | 4 +- 6 files changed, 49 insertions(+), 48 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp index 86326a229..3c9208c0c 100644 --- a/src/paimon/common/global_index/btree/btree_compatibility_test.cpp +++ b/src/paimon/common/global_index/btree/btree_compatibility_test.cpp @@ -103,9 +103,9 @@ static std::shared_ptr ReadBinaryFile(const std::string& path, MemoryPool if (!ifs.is_open()) return nullptr; auto size = ifs.tellg(); ifs.seekg(0, std::ios::beg); - auto bytes = Bytes::AllocateBytes(static_cast(size), pool); + auto bytes = std::make_shared(static_cast(size), pool); ifs.read(bytes->data(), size); - return std::shared_ptr(bytes.release()); + return bytes; } // --------------------------------------------------------------------------- diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 4342137cf..965543ef4 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -131,7 +131,7 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { case arrow::Type::BINARY: { auto str_array = std::static_pointer_cast(array); auto view = str_array->GetView(i); - key_bytes = Bytes::AllocateBytes(view.size(), pool_.get()); + key_bytes = std::make_shared(view.size(), pool_.get()); memcpy(key_bytes->data(), view.data(), view.size()); break; } diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 006ed5dea..157ee440e 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -30,7 +30,7 @@ namespace paimon::test { class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { public: FakeGlobalIndexFileWriter(const std::shared_ptr& fs, const std::string& base_path) - : fs_(fs), base_path_(base_path), file_counter_(0) {} + : fs_(fs), base_path_(base_path) {} Result NewFileName(const std::string& prefix) const override { return prefix + "_" + std::to_string(file_counter_++); @@ -53,7 +53,7 @@ class FakeGlobalIndexFileWriter : public GlobalIndexFileWriter { private: std::shared_ptr fs_; std::string base_path_; - mutable int64_t file_counter_; + mutable int64_t file_counter_{0}; }; class BTreeGlobalIndexWriterTest : public ::testing::Test { diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index a3f7f56d3..69bb6acf8 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -411,7 +411,7 @@ Result> BTreeGlobalIndexReader::VisitStartsWi std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); bool overflow = true; for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { - unsigned char c = static_cast(upper_bound_str[i]); + auto c = static_cast(upper_bound_str[i]); if (c < 0xFF) { upper_bound_str[i] = c + 1; overflow = false; @@ -421,9 +421,8 @@ Result> BTreeGlobalIndexReader::VisitStartsWi } if (!overflow) { - auto upper_bytes = Bytes::AllocateBytes(upper_bound_str, pool_.get()); - auto upper_bound_slice = - MemorySlice::Wrap(std::shared_ptr(upper_bytes.release())); + auto upper_bytes = std::make_shared(upper_bound_str, pool_.get()); + auto upper_bound_slice = MemorySlice::Wrap(upper_bytes); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(prefix_slice, upper_bound_slice, true, false)); return result.GetBitmap(); @@ -461,7 +460,7 @@ Result> BTreeGlobalIndexReader::VisitLike( return Status::Invalid("LIKE pattern cannot be null"); } - std::string pattern = literal.GetValue(); + auto pattern = literal.GetValue(); bool is_prefix_pattern = false; std::string prefix; @@ -808,9 +807,9 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo // Handle string/binary types if (type == FieldType::STRING || type == FieldType::BINARY) { try { - std::string str_value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto str_value = literal.GetValue(); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert string/binary literal to MemorySlice: " + std::string(e.what())); @@ -820,8 +819,8 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo // Handle integer types if (type == FieldType::BIGINT) { try { - int64_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(8, pool); + auto value = literal.GetValue(); + auto bytes = std::make_shared(8, pool); bytes->data()[0] = static_cast(value & 0xFF); bytes->data()[1] = static_cast((value >> 8) & 0xFF); bytes->data()[2] = static_cast((value >> 16) & 0xFF); @@ -830,7 +829,7 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo bytes->data()[5] = static_cast((value >> 40) & 0xFF); bytes->data()[6] = static_cast((value >> 48) & 0xFF); bytes->data()[7] = static_cast((value >> 56) & 0xFF); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert bigint literal to MemorySlice: " + std::string(e.what())); @@ -839,13 +838,13 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::INT) { try { - int32_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(4, pool); + auto value = literal.GetValue(); + auto bytes = std::make_shared(4, pool); bytes->data()[0] = static_cast(value & 0xFF); bytes->data()[1] = static_cast((value >> 8) & 0xFF); bytes->data()[2] = static_cast((value >> 16) & 0xFF); bytes->data()[3] = static_cast((value >> 24) & 0xFF); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert int literal to MemorySlice: " + std::string(e.what())); @@ -854,10 +853,10 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::TINYINT) { try { - int8_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(1, pool); + auto value = literal.GetValue(); + auto bytes = std::make_shared(1, pool); bytes->data()[0] = static_cast(value); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert tinyint literal to MemorySlice: " + std::string(e.what())); @@ -866,11 +865,11 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::SMALLINT) { try { - int16_t value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(2, pool); + auto value = literal.GetValue(); + auto bytes = std::make_shared(2, pool); bytes->data()[0] = static_cast(value & 0xFF); bytes->data()[1] = static_cast((value >> 8) & 0xFF); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert smallint literal to MemorySlice: " + std::string(e.what())); @@ -882,8 +881,8 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo bool value = literal.GetValue(); // Convert to string "1" or "0" to match the format used in BTreeGlobalIndexWriter std::string str_value = value ? "1" : "0"; - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert boolean literal to MemorySlice: " + std::string(e.what())); @@ -892,11 +891,11 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::FLOAT) { try { - float value = literal.GetValue(); + auto value = literal.GetValue(); // Convert to string to match the format used in BTreeGlobalIndexWriter std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert float literal to MemorySlice: " + std::string(e.what())); @@ -905,11 +904,11 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::DOUBLE) { try { - double value = literal.GetValue(); + auto value = literal.GetValue(); // Convert to string to match the format used in BTreeGlobalIndexWriter std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert double literal to MemorySlice: " + std::string(e.what())); @@ -918,11 +917,11 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::DATE) { try { - int32_t value = literal.GetValue(); + auto value = literal.GetValue(); // Convert to string to match the format used in BTreeGlobalIndexWriter std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert date literal to MemorySlice: " + std::string(e.what())); @@ -931,11 +930,11 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::TIMESTAMP) { try { - int64_t value = literal.GetValue(); + auto value = literal.GetValue(); // Convert to string to match the format used in BTreeGlobalIndexWriter std::string str_value = std::to_string(value); - auto bytes = Bytes::AllocateBytes(str_value, pool); - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert timestamp literal to MemorySlice: " + std::string(e.what())); @@ -944,8 +943,8 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo if (type == FieldType::DECIMAL) { try { - Decimal decimal_value = literal.GetValue(); - auto bytes = Bytes::AllocateBytes(16, pool); + auto decimal_value = literal.GetValue(); + auto bytes = std::make_shared(16, pool); uint64_t high_bits = decimal_value.HighBits(); uint64_t low_bits = decimal_value.LowBits(); for (int i = 0; i < 8; ++i) { @@ -954,7 +953,7 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo for (int i = 0; i < 8; ++i) { bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); } - return MemorySlice::Wrap(std::shared_ptr(bytes.release())); + return MemorySlice::Wrap(bytes); } catch (const std::exception& e) { return Status::Invalid("Failed to convert decimal literal to MemorySlice: " + std::string(e.what())); diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp index 63172498f..f69b9fc71 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -110,7 +110,7 @@ TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceString) { EXPECT_FALSE(literal.IsNull()); EXPECT_EQ(literal.GetType(), FieldType::STRING); - std::string value = literal.GetValue(); + auto value = literal.GetValue(); EXPECT_EQ(value, "test_value"); } @@ -120,7 +120,7 @@ TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceInt) { EXPECT_FALSE(literal.IsNull()); EXPECT_EQ(literal.GetType(), FieldType::INT); - int32_t value = literal.GetValue(); + auto value = literal.GetValue(); EXPECT_EQ(value, 42); } @@ -130,7 +130,7 @@ TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceBigInt) { EXPECT_FALSE(literal.IsNull()); EXPECT_EQ(literal.GetType(), FieldType::BIGINT); - int64_t value = literal.GetValue(); + auto value = literal.GetValue(); EXPECT_EQ(value, 12345678901234LL); } @@ -140,7 +140,7 @@ TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceFloat) { EXPECT_FALSE(literal.IsNull()); EXPECT_EQ(literal.GetType(), FieldType::FLOAT); - float value = literal.GetValue(); + auto value = literal.GetValue(); EXPECT_FLOAT_EQ(value, 3.14f); } @@ -150,7 +150,7 @@ TEST_F(BTreeGlobalIndexerTest, LiteralToMemorySliceDouble) { EXPECT_FALSE(literal.IsNull()); EXPECT_EQ(literal.GetType(), FieldType::DOUBLE); - double value = literal.GetValue(); + auto value = literal.GetValue(); EXPECT_DOUBLE_EQ(value, 3.14159265358979); } diff --git a/src/paimon/common/memory/memory_segment_utils.cpp b/src/paimon/common/memory/memory_segment_utils.cpp index c2f844e73..d2fc569b1 100644 --- a/src/paimon/common/memory/memory_segment_utils.cpp +++ b/src/paimon/common/memory/memory_segment_utils.cpp @@ -63,7 +63,9 @@ PAIMON_UNIQUE_PTR MemorySegmentUtils::CopyToBytes(const std::vector 0) { + CopyToBytes(segments, offset, bytes.get(), 0, num_bytes); + } return bytes; } From 7c87891efbea4710b86c7f678be16757ae998d7f Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Thu, 9 Apr 2026 15:45:51 +0800 Subject: [PATCH 19/28] chore: add btree compatibility test data to Git LFS - Add *.bin and *.bin.meta files under btree_compatibility_data to LFS - Translate README.md to English - Keep repo size manageable and avoid noisy diffs --- .gitattributes | 2 ++ .../btree/btree_compatibility_data/README.md | 32 +++++++++---------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/.gitattributes b/.gitattributes index d905e63fd..f69291175 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6,3 +6,5 @@ test/test_data/sst/none/79d01717-8380-4504-86e1-387e6c058d0a filter=lfs diff=lfs merge=lfs -text test/test_data/sst/lz4/10540951-41d3-4216-aa2c-b15dfd25eb75 filter=lfs diff=lfs merge=lfs -text test/test_data/sst/zstd/83d05c53-2353-4160-b756-d50dd851b474 filter=lfs diff=lfs merge=lfs -text +test/test_data/global_index/btree/btree_compatibility_data/*.bin filter=lfs diff=lfs merge=lfs -text +test/test_data/global_index/btree/btree_compatibility_data/*.bin.meta filter=lfs diff=lfs merge=lfs -text diff --git a/test/test_data/global_index/btree/btree_compatibility_data/README.md b/test/test_data/global_index/btree/btree_compatibility_data/README.md index 8da35acfa..66940fb75 100644 --- a/test/test_data/global_index/btree/btree_compatibility_data/README.md +++ b/test/test_data/global_index/btree/btree_compatibility_data/README.md @@ -1,15 +1,15 @@ -# BTree 兼容性测试数据 +# BTree Compatibility Test Data -## 文件说明 +## File Description -### 数据文件 -- `btree_test_int_.csv` - 整数类型测试数据(CSV格式) -- `btree_test_int_.bin` - 整数类型测试数据(二进制格式) -- `btree_test_varchar_.csv` - 字符串类型测试数据(CSV格式) -- `btree_test_varchar_.bin` - 字符串类型测试数据(二进制格式) +### Data Files +- `btree_test_int_.csv` - Integer type test data (CSV format) +- `btree_test_int_.bin` - Integer type test data (binary format) +- `btree_test_varchar_.csv` - String type test data (CSV format) +- `btree_test_varchar_.bin` - String type test data (binary format) -### 数据格式 -CSV文件格式: +### Data Format +CSV file format: ``` row_id,key,is_null 0,123,false @@ -17,11 +17,11 @@ row_id,key,is_null 2,456,false ``` -### 测试场景 -1. **小规模数据**:50、100条记录 -2. **中等规模数据**:500、1000条记录 -3. **大规模数据**:5000条记录 -4. **边界条件**:空值、重复键、边界值 +### Test Scenarios +1. **Small-scale data**: 50, 100 records +2. **Medium-scale data**: 500, 1000 records +3. **Large-scale data**: 5000 records +4. **Edge cases**: null values, duplicate keys, boundary values -### 使用说明 -这些数据可用于验证 C++ 版本的 BTree 索引实现与 Java 版本的兼容性。 +### Usage +This data can be used to verify the compatibility between the C++ BTree index implementation and the Java version. \ No newline at end of file From b89591009efbeb7507ceccf6ae1025a484928147 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Tue, 14 Apr 2026 18:36:46 +0800 Subject: [PATCH 20/28] address --- .../compression/block_compression_factory.h | 3 +- .../empty/empty_file_index_reader.h | 8 +- src/paimon/common/global_index/CMakeLists.txt | 2 - .../global_index/btree/btree_file_footer.cpp | 36 +- .../global_index/btree/btree_file_footer.h | 4 +- .../btree/btree_file_footer_test.cpp | 46 +- .../btree/btree_global_index_factory.cpp | 2 +- .../btree_global_index_integration_test.cpp | 18 +- .../btree/btree_global_index_writer.cpp | 55 ++- .../btree/btree_global_index_writer.h | 8 +- .../btree/btree_global_index_writer_test.cpp | 52 +-- .../btree/btree_global_indexer.cpp | 402 +++++++++--------- .../global_index/btree/btree_global_indexer.h | 4 +- .../global_index/btree/btree_index_meta.cpp | 7 +- .../global_index/btree/btree_index_meta.h | 4 +- .../btree/btree_index_meta_test.cpp | 10 +- src/paimon/common/io/cache/cache.h | 15 +- src/paimon/common/io/cache/lru_cache.h | 2 +- src/paimon/common/sst/sst_file_writer.cpp | 5 - src/paimon/common/sst/sst_file_writer.h | 2 - 20 files changed, 311 insertions(+), 374 deletions(-) diff --git a/src/paimon/common/compression/block_compression_factory.h b/src/paimon/common/compression/block_compression_factory.h index 7154b67d3..728fc2081 100644 --- a/src/paimon/common/compression/block_compression_factory.h +++ b/src/paimon/common/compression/block_compression_factory.h @@ -23,11 +23,12 @@ #include "paimon/common/compression/block_decompressor.h" #include "paimon/core/options/compress_options.h" #include "paimon/result.h" +#include "paimon/visibility.h" namespace paimon { /// Each compression codec has an implementation of {@link BlockCompressionFactory} to create /// compressors and decompressors. -class BlockCompressionFactory { +class PAIMON_EXPORT BlockCompressionFactory { public: static Result> Create( const CompressOptions& compression); diff --git a/src/paimon/common/file_index/empty/empty_file_index_reader.h b/src/paimon/common/file_index/empty/empty_file_index_reader.h index 42dba7540..bed4e3332 100644 --- a/src/paimon/common/file_index/empty/empty_file_index_reader.h +++ b/src/paimon/common/file_index/empty/empty_file_index_reader.h @@ -67,14 +67,14 @@ class EmptyFileIndexReader : public FileIndexReader { } Result> VisitNotEqual(const Literal& literal) override { - // Empty file has no data, so all records are not equal to any value - return FileIndexResult::Remain(); + // Empty file has no data, so nothing to return + return FileIndexResult::Skip(); } Result> VisitNotIn( const std::vector& literals) override { - // Empty file has no data, so all records are not in any set - return FileIndexResult::Remain(); + // Empty file has no data, so nothing to return + return FileIndexResult::Skip(); } Result> VisitBetween(const Literal& from, diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index 3b61ea9ee..88e2fb556 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -31,13 +31,11 @@ add_paimon_lib(paimon_global_index paimon_file_index_shared STATIC_LINK_LIBS arrow - glog fmt dl Threads::Threads SHARED_LINK_LIBS paimon_shared paimon_file_index_shared - paimon_static SHARED_LINK_FLAGS ${PAIMON_VERSION_SCRIPT_FLAGS}) diff --git a/src/paimon/common/global_index/btree/btree_file_footer.cpp b/src/paimon/common/global_index/btree/btree_file_footer.cpp index 605209a82..fd8ca485a 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer.cpp @@ -16,24 +16,28 @@ #include "paimon/common/global_index/btree/btree_file_footer.h" +#include + namespace paimon { -Result> BTreeFileFooter::Read(MemorySliceInput& input) { +Result> BTreeFileFooter::Read(MemorySliceInput* input) { // read version and verify magic number - PAIMON_RETURN_NOT_OK(input.SetPosition(ENCODED_LENGTH - 8)); - - int32_t version = input.ReadInt(); - int32_t magic_number = input.ReadInt(); - if (magic_number != MAGIC_NUMBER) { - return Status::IOError("File is not a btree index file (bad magic number)"); + PAIMON_RETURN_NOT_OK(input->SetPosition(ENCODED_LENGTH - 8)); + + int32_t version = input->ReadInt(); + int32_t magic_number = input->ReadInt(); + if (magic_number != kMagicNumber) { + return Status::Invalid( + fmt::format("File is not a btree index file (expected magic number {:#x}, got {:#x})", + kMagicNumber, magic_number)); } - PAIMON_RETURN_NOT_OK(input.SetPosition(0)); + PAIMON_RETURN_NOT_OK(input->SetPosition(0)); // read bloom filter and index handles - auto offset = input.ReadLong(); - auto size = input.ReadInt(); - auto expected_entries = input.ReadLong(); + auto offset = input->ReadLong(); + auto size = input->ReadInt(); + auto expected_entries = input->ReadLong(); std::shared_ptr bloom_filter_handle = std::make_shared(offset, size, expected_entries); if (bloom_filter_handle->Offset() == 0 && bloom_filter_handle->Size() == 0 && @@ -41,12 +45,12 @@ Result> BTreeFileFooter::Read(MemorySliceInput& bloom_filter_handle = nullptr; } - offset = input.ReadLong(); - size = input.ReadInt(); + offset = input->ReadLong(); + size = input->ReadInt(); std::shared_ptr index_block_handle = std::make_shared(offset, size); - offset = input.ReadLong(); - size = input.ReadInt(); + offset = input->ReadLong(); + size = input->ReadInt(); std::shared_ptr null_bitmap_handle = std::make_shared(offset, size); if (null_bitmap_handle->Offset() == 0 && null_bitmap_handle->Size() == 0) { null_bitmap_handle = nullptr; @@ -91,7 +95,7 @@ MemorySlice BTreeFileFooter::Write(const std::shared_ptr& foote // write version and magic number output.WriteValue(footer->GetVersion()); - output.WriteValue(MAGIC_NUMBER); + output.WriteValue(kMagicNumber); return output.ToSlice(); } diff --git a/src/paimon/common/global_index/btree/btree_file_footer.h b/src/paimon/common/global_index/btree/btree_file_footer.h index ec32f882d..c963521d2 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer.h +++ b/src/paimon/common/global_index/btree/btree_file_footer.h @@ -27,7 +27,7 @@ namespace paimon { /// The Footer for BTree file. class BTreeFileFooter { public: - static Result> Read(MemorySliceInput& input); + static Result> Read(MemorySliceInput* input); static MemorySlice Write(const std::shared_ptr& footer, MemoryPool* pool); static MemorySlice Write(const std::shared_ptr& footer, MemorySliceOutput& output); @@ -66,7 +66,7 @@ class BTreeFileFooter { } public: - static constexpr int32_t MAGIC_NUMBER = 0x50425449; + static constexpr int32_t kMagicNumber = 0x50425449; static constexpr int32_t CURRENT_VERSION = 1; static constexpr int32_t ENCODED_LENGTH = 52; diff --git a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp index e5df2c504..91172253d 100644 --- a/src/paimon/common/global_index/btree/btree_file_footer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_file_footer_test.cpp @@ -46,9 +46,7 @@ TEST_F(BTreeFileFooterTest, ReadWriteRoundTrip) { EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - ASSERT_OK(deserialized.status()); - auto deserialized_footer = deserialized.value(); + ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); auto bf_handle = deserialized_footer->GetBloomFilterHandle(); ASSERT_NE(bf_handle, nullptr); @@ -78,9 +76,7 @@ TEST_F(BTreeFileFooterTest, ReadWriteWithNullBloomFilter) { EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - ASSERT_OK(deserialized.status()); - auto deserialized_footer = deserialized.value(); + ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); @@ -106,9 +102,7 @@ TEST_F(BTreeFileFooterTest, ReadWriteWithNullNullBitmap) { EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - ASSERT_OK(deserialized.status()); - auto deserialized_footer = deserialized.value(); + ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); auto bf_handle = deserialized_footer->GetBloomFilterHandle(); ASSERT_NE(bf_handle, nullptr); @@ -133,9 +127,7 @@ TEST_F(BTreeFileFooterTest, ReadWriteWithAllNullHandles) { EXPECT_EQ(serialized.Length(), BTreeFileFooter::ENCODED_LENGTH); auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - ASSERT_OK(deserialized.status()); - auto deserialized_footer = deserialized.value(); + ASSERT_OK_AND_ASSIGN(auto deserialized_footer, BTreeFileFooter::Read(&input)); EXPECT_EQ(deserialized_footer->GetBloomFilterHandle(), nullptr); @@ -147,17 +139,6 @@ TEST_F(BTreeFileFooterTest, ReadWriteWithAllNullHandles) { EXPECT_EQ(deserialized_footer->GetNullBitmapHandle(), nullptr); } -TEST_F(BTreeFileFooterTest, MagicNumberVerification) { - auto index_block_handle = std::make_shared(200, 80); - auto footer = std::make_shared(nullptr, index_block_handle, nullptr); - - auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - - auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - ASSERT_OK(deserialized.status()); -} - TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { MemorySliceOutput output(BTreeFileFooter::ENCODED_LENGTH, pool_.get()); @@ -171,27 +152,14 @@ TEST_F(BTreeFileFooterTest, InvalidMagicNumber) { output.WriteValue(static_cast(0)); output.WriteValue(static_cast(0)); + output.WriteValue(static_cast(1)); // version output.WriteValue(static_cast(12345)); // Invalid magic number auto serialized = output.ToSlice(); auto input = serialized.ToInput(); - auto deserialized = BTreeFileFooter::Read(input); - EXPECT_FALSE(deserialized.ok()); - EXPECT_TRUE(deserialized.status().IsIOError()); -} - -TEST_F(BTreeFileFooterTest, EncodedLength) { - EXPECT_EQ(BTreeFileFooter::ENCODED_LENGTH, 52); - - auto bloom_filter_handle = std::make_shared(100, 50, 1000); - auto index_block_handle = std::make_shared(200, 80); - auto null_bitmap_handle = std::make_shared(300, 40); - auto footer = std::make_shared(bloom_filter_handle, index_block_handle, - null_bitmap_handle); - - auto serialized = BTreeFileFooter::Write(footer, pool_.get()); - EXPECT_EQ(serialized.Length(), 52); + auto deserialized = BTreeFileFooter::Read(&input); + ASSERT_NOK_WITH_MSG(deserialized, "not a btree index file"); } } // namespace paimon::test diff --git a/src/paimon/common/global_index/btree/btree_global_index_factory.cpp b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp index 71b8fc5e4..f07eeb85d 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_factory.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_factory.cpp @@ -21,7 +21,7 @@ #include "paimon/common/global_index/btree/btree_global_indexer.h" namespace paimon { -const char BTreeGlobalIndexerFactory::IDENTIFIER[] = "btree"; +const char BTreeGlobalIndexerFactory::IDENTIFIER[] = "btree-global"; Result> BTreeGlobalIndexerFactory::Create( const std::map& options) const { diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index f2ff5d94f..267bdc2ed 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -123,8 +123,8 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with int values // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 @@ -190,7 +190,7 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { // Create the BTree global index writer auto writer = std::make_shared("string_field", c_schema.get(), - file_writer, pool_); + file_writer, pool_, 4096, 100000); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -245,8 +245,8 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with null values // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 @@ -309,8 +309,8 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with int values auto array = @@ -372,8 +372,8 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with int values auto array = diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 965543ef4..6e636ef26 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -25,6 +25,7 @@ #include "paimon/common/memory/memory_slice_output.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" +#include "paimon/common/utils/date_time_utils.h" #include "paimon/common/utils/field_type_utils.h" #include "paimon/memory/bytes.h" @@ -57,7 +58,7 @@ BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( if (arrow_schema) { auto schema_result = arrow::ImportSchema(arrow_schema); if (schema_result.ok()) { - auto schema = schema_result.ValueOrDie(); + auto schema = *schema_result; if (schema->num_fields() > 0) { arrow_type_ = schema->field(0)->type(); } @@ -76,11 +77,8 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { } // Import Arrow array with the correct type - auto import_result = arrow::ImportArray(arrow_array, arrow_type_); - if (!import_result.ok()) { - return Status::Invalid("Failed to import array: " + import_result.status().ToString()); - } - auto array = import_result.ValueOrDie(); + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, + arrow::ImportArray(arrow_array, arrow_type_)); // Initialize SST writer on first batch if (!sst_writer_) { @@ -178,17 +176,32 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { case arrow::Type::DATE32: { auto date_array = std::static_pointer_cast(array); int32_t value = date_array->Value(i); - // Store as 4-byte little-endian + // Store as 4-byte int32 to match Java's writeInt for DATE type key_bytes = std::make_shared(sizeof(int32_t), pool_.get()); memcpy(key_bytes->data(), &value, sizeof(int32_t)); break; } case arrow::Type::TIMESTAMP: { auto ts_array = std::static_pointer_cast(array); - int64_t value = ts_array->Value(i); - // Store as 8-byte little-endian - key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); - memcpy(key_bytes->data(), &value, sizeof(int64_t)); + auto ts_type = std::static_pointer_cast(array->type()); + int32_t precision = DateTimeUtils::GetPrecisionFromType(ts_type); + auto time_type = DateTimeUtils::GetTimeTypeFromArrowType(ts_type); + int64_t raw_value = ts_array->Value(i); + auto [milli, nano] = DateTimeUtils::TimestampConverter( + raw_value, time_type, DateTimeUtils::TimeType::MILLISECOND, + DateTimeUtils::TimeType::NANOSECOND); + if (Timestamp::IsCompact(precision)) { + // compact: writeLong(millisecond) — 8 bytes + key_bytes = std::make_shared(sizeof(int64_t), pool_.get()); + memcpy(key_bytes->data(), &milli, sizeof(int64_t)); + } else { + // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) + MemorySliceOutput ts_out(13, pool_.get()); + ts_out.WriteValue(milli); + ts_out.WriteVarLenInt(static_cast(nano)); + auto slice = ts_out.ToSlice(); + key_bytes = slice.GetHeapMemory(); + } break; } default: @@ -225,7 +238,7 @@ Status BTreeGlobalIndexWriter::WriteKeyValue(std::shared_ptr key, std::shared_ptr BTreeGlobalIndexWriter::SerializeRowIds( const std::vector& row_ids) { // Format: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)][row_id2]... - // Use VarLenLong for row IDs to match Java's DataOutputStream.writeVarLong + // Use VarLenLong for row IDs to match Java's MemorySliceOutput.writeVarLenLong int32_t estimated_size = 10 + row_ids.size() * 10; // Conservative estimate auto output = std::make_shared(estimated_size, pool_.get()); @@ -285,18 +298,22 @@ Result> BTreeGlobalIndexWriter::Finish() { // Flush any remaining data in the data block writer PAIMON_RETURN_NOT_OK(sst_writer_->Flush()); - // Write index block - PAIMON_ASSIGN_OR_RAISE(auto index_block_handle, sst_writer_->WriteIndexBlock()); + // Write null bitmap first (matches Java write order: null bitmap → bloom filter → index block) + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap_handle, + WriteNullBitmap(output_stream_)); // Write bloom filter - PAIMON_ASSIGN_OR_RAISE(auto bloom_filter_handle, sst_writer_->WriteBloomFilter()); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr bloom_filter_handle, + sst_writer_->WriteBloomFilter()); - // Write null bitmap - PAIMON_ASSIGN_OR_RAISE(auto null_bitmap_handle, WriteNullBitmap(output_stream_)); + // Write index block + PAIMON_ASSIGN_OR_RAISE(BlockHandle index_block_handle, sst_writer_->WriteIndexBlock()); // Write BTree file footer - auto footer = std::make_shared( - bloom_filter_handle, std::make_shared(index_block_handle), null_bitmap_handle); + auto index_block_handle_ptr = + std::make_shared(index_block_handle.Offset(), index_block_handle.Size()); + auto footer = std::make_shared(bloom_filter_handle, index_block_handle_ptr, + null_bitmap_handle); auto footer_slice = BTreeFileFooter::Write(footer, pool_.get()); auto footer_bytes = footer_slice.CopyBytes(pool_.get()); PAIMON_RETURN_NOT_OK(output_stream_->Write(footer_bytes->data(), footer_bytes->size())); diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 1d627bfae..28dba2d27 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -34,8 +34,8 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { public: BTreeGlobalIndexWriter(const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size = 4096, - int64_t expected_entries = 100000); + const std::shared_ptr& pool, int32_t block_size, + int64_t expected_entries); ~BTreeGlobalIndexWriter() override = default; @@ -67,9 +67,9 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { std::shared_ptr pool_; int32_t block_size_; - // SST file writer - std::unique_ptr sst_writer_; + // SST file writer (declared after pool_ to ensure correct destruction order) std::shared_ptr output_stream_; + std::unique_ptr sst_writer_; std::string file_name_; // Track first and last keys for index meta diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 157ee440e..4f5cb38c2 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -90,8 +90,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with int values auto array = @@ -107,9 +107,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { ASSERT_OK(status); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Verify metadata @@ -134,7 +132,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { // Create the BTree global index writer auto writer = std::make_shared("string_field", c_schema.get(), - file_writer, pool_); + file_writer, pool_, 4096, 100000); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -150,9 +148,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { ASSERT_OK(status); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Verify metadata @@ -175,8 +171,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") @@ -191,9 +187,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { ASSERT_OK(status); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Verify metadata @@ -219,8 +213,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create first batch auto array1 = @@ -247,9 +241,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { ArrowArrayRelease(&c_array2); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Verify metadata @@ -268,13 +260,11 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Finish without adding any data - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 0); // No data, no metadata // Release the ArrowSchema @@ -289,8 +279,8 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = - std::make_shared("int_field", c_schema.get(), file_writer, pool_); + auto writer = std::make_shared("int_field", c_schema.get(), file_writer, + pool_, 4096, 100000); // Create an Arrow array with all null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") @@ -305,9 +295,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { ASSERT_OK(status); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Verify metadata - should have null bitmap but no keys @@ -330,7 +318,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { // Create the BTree global index writer auto writer = std::make_shared("double_field", c_schema.get(), - file_writer, pool_); + file_writer, pool_, 4096, 100000); // Create an Arrow array with double values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") @@ -345,9 +333,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { ASSERT_OK(status); // Finish writing - auto result = writer->Finish(); - ASSERT_OK(result.status()); - auto metas = result.value(); + ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); ASSERT_EQ(metas.size(), 1); // Release the ArrowArray diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 69bb6acf8..4f01e5ddc 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -15,6 +15,7 @@ */ #include "paimon/common/global_index/btree/btree_global_indexer.h" +#include #include #include @@ -24,11 +25,15 @@ #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/memory/memory_slice_output.h" #include "paimon/common/options/memory_size.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" +#include "paimon/common/utils/date_time_utils.h" #include "paimon/common/utils/field_type_utils.h" +#include "paimon/common/utils/options_utils.h" #include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/data/timestamp.h" #include "paimon/defs.h" #include "paimon/file_index/bitmap_index_result.h" #include "paimon/global_index/bitmap_global_index_result.h" @@ -38,51 +43,43 @@ namespace paimon { // Helper function to get cache size from options with default value -static int64_t GetBTreeIndexCacheSize(const std::map& options) { - auto it = options.find(Options::BTREE_INDEX_CACHE_SIZE); - if (it != options.end()) { - auto result = MemorySize::ParseBytes(it->second); - if (result.ok()) { - return result.value(); - } +static Result GetBTreeIndexCacheSize(const std::map& options) { + auto str_result = + OptionsUtils::GetValueFromMap(options, Options::BTREE_INDEX_CACHE_SIZE); + if (!str_result.ok()) { + return 128 * 1024 * 1024; } - // Default: 128 MB - return 128 * 1024 * 1024; + return MemorySize::ParseBytes(str_result.value()); } // Helper function to get high priority pool ratio from options with default value -static double GetBTreeIndexHighPriorityPoolRatio( +static Result GetBTreeIndexHighPriorityPoolRatio( const std::map& options) { - auto it = options.find(Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO); - if (it != options.end()) { - try { - return std::stod(it->second); - } catch (...) { - // Ignore parsing errors, use default - } - } - // Default: 0.1 - return 0.1; + return OptionsUtils::GetValueFromMap( + options, Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO, 0.1); } Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, const std::shared_ptr& pool) const { - return std::make_shared(field_name, arrow_schema, file_writer, pool); + return std::make_shared(field_name, arrow_schema, file_writer, pool, + 4096, 100000); } // Forward declarations for helper functions -static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool); +static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, + int32_t ts_precision); // Create a comparator function based on field type // Keys are stored in binary format to match Java's DataOutputStream format static std::function CreateComparator( - FieldType field_type) { + FieldType field_type, const std::shared_ptr& arrow_type) { // For numeric types, compare as binary values in little-endian format // to match Java's DataOutputStream.writeInt/writeLong format switch (field_type) { case FieldType::INT: + case FieldType::DATE: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { if (a.Length() < static_cast(sizeof(int32_t)) || b.Length() < static_cast(sizeof(int32_t))) { @@ -101,8 +98,6 @@ static std::function CreateComp return 0; }; case FieldType::BIGINT: - case FieldType::DATE: - case FieldType::TIMESTAMP: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { if (a.Length() < static_cast(sizeof(int64_t)) || b.Length() < static_cast(sizeof(int64_t))) { @@ -120,6 +115,40 @@ static std::function CreateComp if (a_val > b_val) return 1; return 0; }; + case FieldType::TIMESTAMP: { + int32_t precision = Timestamp::MILLIS_PRECISION; + if (arrow_type->id() == arrow::Type::TIMESTAMP) { + auto ts_type = std::static_pointer_cast(arrow_type); + precision = DateTimeUtils::GetPrecisionFromType(ts_type); + } + if (Timestamp::IsCompact(precision)) { + // compact: compare as int64 (millisecond only) + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + int64_t a_val = a.ReadLong(0); + int64_t b_val = b.ReadLong(0); + if (a_val < b_val) return -1; + if (a_val > b_val) return 1; + return 0; + }; + } else { + // non-compact: compare millisecond first, then nanoOfMillisecond + return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { + auto a_input = a.ToInput(); + auto b_input = b.ToInput(); + int64_t a_milli = a_input.ReadLong(); + int64_t b_milli = b_input.ReadLong(); + if (a_milli < b_milli) return -1; + if (a_milli > b_milli) return 1; + auto a_nano = a_input.ReadVarLenInt(); + auto b_nano = b_input.ReadVarLenInt(); + if (a_nano.ok() && b_nano.ok()) { + if (a_nano.value() < b_nano.value()) return -1; + if (a_nano.value() > b_nano.value()) return 1; + } + return 0; + }; + } + } case FieldType::SMALLINT: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { if (a.Length() < static_cast(sizeof(int16_t)) || @@ -242,7 +271,7 @@ Result> BTreeGlobalIndexer::CreateReader( FieldTypeUtils::ConvertToFieldType(arrow_type->id())); // Create comparator based on field type - auto comparator = CreateComparator(field_type); + auto comparator = CreateComparator(field_type, arrow_type); // Wrap the comparator to return Result MemorySlice::SliceComparator result_comparator = @@ -251,8 +280,9 @@ Result> BTreeGlobalIndexer::CreateReader( }; // Read BTree file footer first - int64_t cache_size = GetBTreeIndexCacheSize(options_); - double high_priority_pool_ratio = GetBTreeIndexHighPriorityPoolRatio(options_); + PAIMON_ASSIGN_OR_RAISE(int64_t cache_size, GetBTreeIndexCacheSize(options_)); + PAIMON_ASSIGN_OR_RAISE(double high_priority_pool_ratio, + GetBTreeIndexHighPriorityPoolRatio(options_)); auto cache_manager = std::make_shared(cache_size, high_priority_pool_ratio); auto block_cache = std::make_shared(meta.file_path, in, cache_manager, pool); PAIMON_ASSIGN_OR_RAISE(MemorySegment segment, @@ -262,7 +292,7 @@ Result> BTreeGlobalIndexer::CreateReader( auto footer_slice = MemorySlice::Wrap(segment); auto footer_input = footer_slice.ToInput(); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr footer, - BTreeFileFooter::Read(footer_input)); + BTreeFileFooter::Read(&footer_input)); // Create SST file reader with footer information PAIMON_ASSIGN_OR_RAISE( @@ -288,9 +318,16 @@ Result> BTreeGlobalIndexer::CreateReader( max_key_slice = MemorySlice::Wrap(index_meta->LastKey()); } + // Get timestamp precision if applicable + int32_t ts_precision = Timestamp::MILLIS_PRECISION; + if (arrow_type->id() == arrow::Type::TIMESTAMP) { + auto ts_type = std::static_pointer_cast(arrow_type); + ts_precision = DateTimeUtils::GetPrecisionFromType(ts_type); + } + return std::make_shared(sst_file_reader, null_bitmap, min_key_slice, max_key_slice, has_min_key, files, pool, - comparator); + comparator, ts_precision); } Result> BTreeGlobalIndexer::ToGlobalIndexResult( @@ -337,12 +374,10 @@ Result> BTreeGlobalIndexer::ReadNullBitma // Calculate CRC32C checksum uint32_t crc_value = CRC32C::calculate(null_bitmap_view.data(), null_bitmap_view.size()); - // Read expected CRC value (stored as uint32_t in little-endian) - uint32_t expected_crc_value = 0; - for (int i = 0; i < 4; ++i) { - expected_crc_value |= static_cast(static_cast(slice_input.ReadByte())) - << (i * 8); - } + // Read expected CRC value (stored as native uint32_t) + auto crc_slice = slice_input.ReadSlice(sizeof(uint32_t)); + uint32_t expected_crc_value; + std::memcpy(&expected_crc_value, crc_slice.ReadStringView().data(), sizeof(expected_crc_value)); // Verify CRC checksum if (crc_value != expected_crc_value) { @@ -352,17 +387,10 @@ Result> BTreeGlobalIndexer::ReadNullBitma } // Deserialize null bitmap - try { - std::vector data( - reinterpret_cast(null_bitmap_view.data()), - reinterpret_cast(null_bitmap_view.data()) + null_bitmap_view.size()); - null_bitmap->Deserialize(data); - } catch (const std::exception& e) { - return Status::Invalid( - "Fail to deserialize null bitmap but crc check passed, " - "this means the serialization/deserialization algorithms not match: " + - std::string(e.what())); - } + std::vector data( + reinterpret_cast(null_bitmap_view.data()), + reinterpret_cast(null_bitmap_view.data()) + null_bitmap_view.size()); + null_bitmap->Deserialize(data); return null_bitmap; } @@ -372,7 +400,7 @@ BTreeGlobalIndexReader::BTreeGlobalIndexReader( const std::shared_ptr& null_bitmap, const MemorySlice& min_key, const MemorySlice& max_key, bool has_min_key, const std::vector& files, const std::shared_ptr& pool, - std::function comparator) + std::function comparator, int32_t ts_precision) : sst_file_reader_(sst_file_reader), null_bitmap_(null_bitmap), min_key_(min_key), @@ -380,7 +408,8 @@ BTreeGlobalIndexReader::BTreeGlobalIndexReader( has_min_key_(has_min_key), files_(files), pool_(pool), - comparator_(std::move(comparator)) {} + comparator_(std::move(comparator)), + ts_precision_(ts_precision) {} Result> BTreeGlobalIndexReader::VisitIsNotNull() { return std::make_shared([this]() -> Result { @@ -397,7 +426,8 @@ Result> BTreeGlobalIndexReader::VisitIsNull() Result> BTreeGlobalIndexReader::VisitStartsWith( const Literal& prefix) { return std::make_shared([this, &prefix]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, LiteralToMemorySlice(prefix, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, + LiteralToMemorySlice(prefix, pool_.get(), ts_precision_)); auto prefix_type = prefix.GetType(); @@ -497,7 +527,8 @@ Result> BTreeGlobalIndexReader::VisitLike( Result> BTreeGlobalIndexReader::VisitLessThan( const Literal& literal) { return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(min_key_, literal_slice, true, false)); return result.GetBitmap(); @@ -507,7 +538,8 @@ Result> BTreeGlobalIndexReader::VisitLessThan Result> BTreeGlobalIndexReader::VisitGreaterOrEqual( const Literal& literal) { return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(literal_slice, max_key_, true, true)); return result.GetBitmap(); @@ -518,7 +550,8 @@ Result> BTreeGlobalIndexReader::VisitNotEqual const Literal& literal) { return std::make_shared([this, &literal]() -> Result { PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 equal_result, RangeQuery(literal_slice, literal_slice, true, true)); result.AndNot(equal_result); @@ -529,7 +562,8 @@ Result> BTreeGlobalIndexReader::VisitNotEqual Result> BTreeGlobalIndexReader::VisitLessOrEqual( const Literal& literal) { return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(min_key_, literal_slice, true, true)); return result.GetBitmap(); @@ -539,7 +573,8 @@ Result> BTreeGlobalIndexReader::VisitLessOrEq Result> BTreeGlobalIndexReader::VisitEqual( const Literal& literal) { return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(literal_slice, literal_slice, true, true)); return result.GetBitmap(); @@ -549,7 +584,8 @@ Result> BTreeGlobalIndexReader::VisitEqual( Result> BTreeGlobalIndexReader::VisitGreaterThan( const Literal& literal) { return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, RangeQuery(literal_slice, max_key_, false, true)); return result.GetBitmap(); @@ -558,17 +594,18 @@ Result> BTreeGlobalIndexReader::VisitGreaterT Result> BTreeGlobalIndexReader::VisitIn( const std::vector& literals) { - return std::make_shared([this, - &literals]() -> Result { - RoaringNavigableMap64 result; - for (const auto& literal : literals) { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, LiteralToMemorySlice(literal, pool_.get())); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 literal_result, - RangeQuery(literal_slice, literal_slice, true, true)); - result.Or(literal_result); - } - return result.GetBitmap(); - }); + return std::make_shared( + [this, &literals]() -> Result { + RoaringNavigableMap64 result; + for (const auto& literal : literals) { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 literal_result, + RangeQuery(literal_slice, literal_slice, true, true)); + result.Or(literal_result); + } + return result.GetBitmap(); + }); } Result> BTreeGlobalIndexReader::VisitNotIn( @@ -592,29 +629,31 @@ Result> BTreeGlobalIndexReader::VisitNotIn( Result> BTreeGlobalIndexReader::VisitBetween(const Literal& from, const Literal& to) { - return std::make_shared( - [this, &from, &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, LiteralToMemorySlice(from, pool_.get())); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get())); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(from_slice, to_slice, true, true)); - return result.GetBitmap(); - }); + return std::make_shared([this, &from, + &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, + LiteralToMemorySlice(from, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, + RangeQuery(from_slice, to_slice, true, true)); + return result.GetBitmap(); + }); } Result> BTreeGlobalIndexReader::VisitNotBetween( const Literal& from, const Literal& to) { - return std::make_shared( - [this, &from, &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, LiteralToMemorySlice(from, pool_.get())); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get())); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 lower_result, - RangeQuery(min_key_, from_slice, true, false)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 upper_result, - RangeQuery(to_slice, max_key_, false, true)); - lower_result.Or(upper_result); - return lower_result.GetBitmap(); - }); + return std::make_shared([this, &from, + &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, + LiteralToMemorySlice(from, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 lower_result, + RangeQuery(min_key_, from_slice, true, false)); + PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 upper_result, + RangeQuery(to_slice, max_key_, false, true)); + lower_result.Or(upper_result); + return lower_result.GetBitmap(); + }); } Result> BTreeGlobalIndexReader::VisitAnd( @@ -730,13 +769,15 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySli } } + // Compare key with bounds using the comparator + if (!comparator_) { + return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); + } + // Iterate through entries in the data block while (data_iterator->HasNext()) { PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); - - // Compare key with bounds using the comparator - const auto& comparator = comparator_; - int cmp_lower = comparator ? comparator(entry->key, lower_bound) : 0; + int cmp_lower = comparator_(entry->key, lower_bound); // Check lower bound if (!lower_inclusive && cmp_lower == 0) { @@ -744,7 +785,7 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySli } // Check upper bound - int cmp_upper = comparator ? comparator(entry->key, upper_bound) : 0; + int cmp_upper = comparator_(entry->key, upper_bound); if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { return result; @@ -797,7 +838,8 @@ Result BTreeGlobalIndexReader::AllNonNullRows() { } // Helper function to convert Literal to MemorySlice -static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool) { +static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, + int32_t ts_precision) { if (literal.IsNull()) { return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); } @@ -806,158 +848,98 @@ static Result LiteralToMemorySlice(const Literal& literal, MemoryPo // Handle string/binary types if (type == FieldType::STRING || type == FieldType::BINARY) { - try { - auto str_value = literal.GetValue(); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert string/binary literal to MemorySlice: " + - std::string(e.what())); - } + auto str_value = literal.GetValue(); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); } // Handle integer types if (type == FieldType::BIGINT) { - try { - auto value = literal.GetValue(); - auto bytes = std::make_shared(8, pool); - bytes->data()[0] = static_cast(value & 0xFF); - bytes->data()[1] = static_cast((value >> 8) & 0xFF); - bytes->data()[2] = static_cast((value >> 16) & 0xFF); - bytes->data()[3] = static_cast((value >> 24) & 0xFF); - bytes->data()[4] = static_cast((value >> 32) & 0xFF); - bytes->data()[5] = static_cast((value >> 40) & 0xFF); - bytes->data()[6] = static_cast((value >> 48) & 0xFF); - bytes->data()[7] = static_cast((value >> 56) & 0xFF); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert bigint literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(8, pool); + memcpy(bytes->data(), &value, sizeof(int64_t)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::INT) { - try { - auto value = literal.GetValue(); - auto bytes = std::make_shared(4, pool); - bytes->data()[0] = static_cast(value & 0xFF); - bytes->data()[1] = static_cast((value >> 8) & 0xFF); - bytes->data()[2] = static_cast((value >> 16) & 0xFF); - bytes->data()[3] = static_cast((value >> 24) & 0xFF); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert int literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(4, pool); + memcpy(bytes->data(), &value, sizeof(int32_t)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::TINYINT) { - try { - auto value = literal.GetValue(); - auto bytes = std::make_shared(1, pool); - bytes->data()[0] = static_cast(value); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert tinyint literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(1, pool); + bytes->data()[0] = static_cast(value); + return MemorySlice::Wrap(bytes); } if (type == FieldType::SMALLINT) { - try { - auto value = literal.GetValue(); - auto bytes = std::make_shared(2, pool); - bytes->data()[0] = static_cast(value & 0xFF); - bytes->data()[1] = static_cast((value >> 8) & 0xFF); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert smallint literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(2, pool); + memcpy(bytes->data(), &value, sizeof(int16_t)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::BOOLEAN) { - try { - bool value = literal.GetValue(); - // Convert to string "1" or "0" to match the format used in BTreeGlobalIndexWriter - std::string str_value = value ? "1" : "0"; - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert boolean literal to MemorySlice: " + - std::string(e.what())); - } + bool value = literal.GetValue(); + auto bytes = std::make_shared(1, pool); + bytes->data()[0] = value ? 1 : 0; + return MemorySlice::Wrap(bytes); } if (type == FieldType::FLOAT) { - try { - auto value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert float literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(float), pool); + memcpy(bytes->data(), &value, sizeof(float)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::DOUBLE) { - try { - auto value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert double literal to MemorySlice: " + - std::string(e.what())); - } + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(double), pool); + memcpy(bytes->data(), &value, sizeof(double)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::DATE) { - try { - auto value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert date literal to MemorySlice: " + - std::string(e.what())); - } + // DATE is stored as int32_t to match Java's writeInt + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(int32_t), pool); + memcpy(bytes->data(), &value, sizeof(int32_t)); + return MemorySlice::Wrap(bytes); } if (type == FieldType::TIMESTAMP) { - try { - auto value = literal.GetValue(); - // Convert to string to match the format used in BTreeGlobalIndexWriter - std::string str_value = std::to_string(value); - auto bytes = std::make_shared(str_value, pool); + auto ts = literal.GetValue(); + if (Timestamp::IsCompact(ts_precision)) { + // compact: writeLong(millisecond) + int64_t value = ts.GetMillisecond(); + auto bytes = std::make_shared(sizeof(int64_t), pool); + memcpy(bytes->data(), &value, sizeof(int64_t)); return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert timestamp literal to MemorySlice: " + - std::string(e.what())); + } else { + // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) + MemorySliceOutput ts_out(13, pool); + ts_out.WriteValue(ts.GetMillisecond()); + ts_out.WriteVarLenInt(ts.GetNanoOfMillisecond()); + return ts_out.ToSlice(); } } if (type == FieldType::DECIMAL) { - try { - auto decimal_value = literal.GetValue(); - auto bytes = std::make_shared(16, pool); - uint64_t high_bits = decimal_value.HighBits(); - uint64_t low_bits = decimal_value.LowBits(); - for (int i = 0; i < 8; ++i) { - bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); - } - for (int i = 0; i < 8; ++i) { - bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); - } - return MemorySlice::Wrap(bytes); - } catch (const std::exception& e) { - return Status::Invalid("Failed to convert decimal literal to MemorySlice: " + - std::string(e.what())); + auto decimal_value = literal.GetValue(); + auto bytes = std::make_shared(16, pool); + uint64_t high_bits = decimal_value.HighBits(); + uint64_t low_bits = decimal_value.LowBits(); + for (int i = 0; i < 8; ++i) { + bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); + } + for (int i = 0; i < 8; ++i) { + bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); } + return MemorySlice::Wrap(bytes); } return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index 7ef23eb85..90d1688c0 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -62,7 +62,8 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { const std::shared_ptr& null_bitmap, const MemorySlice& min_key, const MemorySlice& max_key, bool has_min_key, const std::vector& files, const std::shared_ptr& pool, - std::function comparator); + std::function comparator, + int32_t ts_precision); Result> VisitIsNotNull() override; @@ -135,6 +136,7 @@ class BTreeGlobalIndexReader : public GlobalIndexReader { std::vector files_; std::shared_ptr pool_; std::function comparator_; + int32_t ts_precision_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.cpp b/src/paimon/common/global_index/btree/btree_index_meta.cpp index cf795a7e4..78c6897de 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta.cpp @@ -34,7 +34,7 @@ std::shared_ptr BTreeIndexMeta::Deserialize(const std::shared_pt if (last_key_len) { last_key = input.ReadSlice(last_key_len).CopyBytes(pool); } - auto has_nulls = input.ReadByte() == 1; + auto has_nulls = static_cast(input.ReadByte()) == 1; return std::make_shared(first_key, last_key, has_nulls); } @@ -43,7 +43,7 @@ std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) const // has_nulls(1) int32_t first_key_size = first_key_ ? first_key_->size() : 0; int32_t last_key_size = last_key_ ? last_key_->size() : 0; - int32_t total_size = 4 + first_key_size + 4 + last_key_size + 1; + int32_t total_size = Size(); MemorySliceOutput output(total_size, pool); @@ -62,8 +62,7 @@ std::shared_ptr BTreeIndexMeta::Serialize(paimon::MemoryPool* pool) const // Write has_nulls output.WriteValue(static_cast(has_nulls_ ? 1 : 0)); - auto slice = output.ToSlice(); - return slice.CopyBytes(pool); + return output.ToSlice().GetHeapMemory(); } } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_index_meta.h b/src/paimon/common/global_index/btree/btree_index_meta.h index 7059f8dff..f6507ad2c 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta.h +++ b/src/paimon/common/global_index/btree/btree_index_meta.h @@ -36,11 +36,11 @@ class BTreeIndexMeta { bool has_nulls) : first_key_(first_key), last_key_(last_key), has_nulls_(has_nulls) {} - std::shared_ptr FirstKey() const { + const std::shared_ptr& FirstKey() const { return first_key_; } - std::shared_ptr LastKey() const { + const std::shared_ptr& LastKey() const { return last_key_; } diff --git a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp index 1017e958e..d4ffde8f5 100644 --- a/src/paimon/common/global_index/btree/btree_index_meta_test.cpp +++ b/src/paimon/common/global_index/btree/btree_index_meta_test.cpp @@ -60,7 +60,7 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeNormalKeys) { EXPECT_EQ(std::string(deserialized_last->data(), deserialized_last->size()), "last_key_data"); // Verify has_nulls - EXPECT_TRUE(deserialized->HasNulls()); + ASSERT_TRUE(deserialized->HasNulls()); } TEST_F(BTreeIndexMetaTest, SerializeDeserializeEmptyKeys) { @@ -76,14 +76,14 @@ TEST_F(BTreeIndexMetaTest, SerializeDeserializeEmptyKeys) { ASSERT_NE(deserialized, nullptr); // Verify keys are null - EXPECT_EQ(deserialized->FirstKey(), nullptr); - EXPECT_EQ(deserialized->LastKey(), nullptr); + ASSERT_EQ(deserialized->FirstKey(), nullptr); + ASSERT_EQ(deserialized->LastKey(), nullptr); // Verify has_nulls - EXPECT_TRUE(deserialized->HasNulls()); + ASSERT_TRUE(deserialized->HasNulls()); // Verify OnlyNulls - EXPECT_TRUE(deserialized->OnlyNulls()); + ASSERT_TRUE(deserialized->OnlyNulls()); } TEST_F(BTreeIndexMetaTest, HasNullsAndOnlyNulls) { diff --git a/src/paimon/common/io/cache/cache.h b/src/paimon/common/io/cache/cache.h index ef893539f..b4dbcd794 100644 --- a/src/paimon/common/io/cache/cache.h +++ b/src/paimon/common/io/cache/cache.h @@ -49,22 +49,9 @@ class PAIMON_EXPORT Cache { virtual size_t Size() const = 0; }; -class PAIMON_EXPORT NoCache : public Cache { - public: - Result> Get( - const std::shared_ptr& key, - std::function>(const std::shared_ptr&)> - supplier) override; - void Put(const std::shared_ptr& key, - const std::shared_ptr& value) override; - void Invalidate(const std::shared_ptr& key) override; - void InvalidateAll() override; - size_t Size() const override; -}; - class CacheValue { public: - explicit CacheValue(const MemorySegment& segment, CacheCallback callback = nullptr) + explicit CacheValue(const MemorySegment& segment, CacheCallback callback) : segment_(segment), callback_(std::move(callback)) {} const MemorySegment& GetSegment() const { diff --git a/src/paimon/common/io/cache/lru_cache.h b/src/paimon/common/io/cache/lru_cache.h index 2a4e85afc..a958cfe20 100644 --- a/src/paimon/common/io/cache/lru_cache.h +++ b/src/paimon/common/io/cache/lru_cache.h @@ -39,7 +39,7 @@ namespace paimon { /// capacity is measured in bytes (sum of MemorySegment sizes) /// when an entry is evicted, its CacheCallback is invoked to notify the upper layer /// @note Thread-safe: all public methods are protected by mutex (read-write lock). -class LruCache : public Cache { +class PAIMON_EXPORT LruCache : public Cache { public: explicit LruCache(int64_t max_weight); diff --git a/src/paimon/common/sst/sst_file_writer.cpp b/src/paimon/common/sst/sst_file_writer.cpp index add91c3c3..cb099e640 100644 --- a/src/paimon/common/sst/sst_file_writer.cpp +++ b/src/paimon/common/sst/sst_file_writer.cpp @@ -50,11 +50,6 @@ Status SstFileWriter::Write(std::shared_ptr&& key, std::shared_ptr return Status::OK(); } -Status SstFileWriter::Write(const MemorySlice& slice) { - auto data = slice.ReadStringView(); - return WriteBytes(data.data(), data.size()); -} - Status SstFileWriter::Flush() { if (data_block_writer_->Size() == 0) { return Status::OK(); diff --git a/src/paimon/common/sst/sst_file_writer.h b/src/paimon/common/sst/sst_file_writer.h index fb544d69d..a04226b3e 100644 --- a/src/paimon/common/sst/sst_file_writer.h +++ b/src/paimon/common/sst/sst_file_writer.h @@ -49,8 +49,6 @@ class PAIMON_EXPORT SstFileWriter { Status Write(std::shared_ptr&& key, std::shared_ptr&& value); - Status Write(const MemorySlice& slice); - Status Flush(); Result WriteIndexBlock(); From ff169c99efac54a4492cfe6d4ce6027ecaf2f5d9 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 11:36:12 +0800 Subject: [PATCH 21/28] address --- src/paimon/common/io/cache/lru_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/io/cache/lru_cache.h b/src/paimon/common/io/cache/lru_cache.h index e58175687..96ce38c61 100644 --- a/src/paimon/common/io/cache/lru_cache.h +++ b/src/paimon/common/io/cache/lru_cache.h @@ -34,7 +34,7 @@ namespace paimon { /// CacheCallback is invoked to notify the upper layer. /// /// @note Thread-safe: all public methods are protected by the underlying GenericLruCache lock. -class LruCache : public Cache { +class PAIMON_EXPORT LruCache : public Cache { public: explicit LruCache(int64_t max_weight); From 1f72308a4cbefedd26078e065fe0f5d89079c889 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 15:39:30 +0800 Subject: [PATCH 22/28] fix --- include/paimon/defs.h | 3 -- src/paimon/common/defs.cpp | 1 - .../empty/empty_file_index_reader.h | 23 +-------------- .../common/file_index/file_index_reader.cpp | 28 ++++++------------- 4 files changed, 9 insertions(+), 46 deletions(-) diff --git a/include/paimon/defs.h b/include/paimon/defs.h index fa360ea0b..3d4b8afcd 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -439,9 +439,6 @@ struct PAIMON_EXPORT Options { /// "btree-index.records-per-range" - The expected number of records per BTree Index File. /// Default value is 1000000. static const char BTREE_INDEX_RECORDS_PER_RANGE[]; - /// "btree-index.build.max-parallelism" - The max parallelism of Flink/Spark for building - /// BTreeIndex. Default value is 4096. - static const char BTREE_INDEX_BUILD_MAX_PARALLELISM[]; }; static constexpr int64_t BATCH_WRITE_COMMIT_IDENTIFIER = std::numeric_limits::max(); diff --git a/src/paimon/common/defs.cpp b/src/paimon/common/defs.cpp index 6b6406d17..7cab45ad8 100644 --- a/src/paimon/common/defs.cpp +++ b/src/paimon/common/defs.cpp @@ -125,6 +125,5 @@ const char Options::BTREE_INDEX_BLOCK_SIZE[] = "btree-index.block-size"; const char Options::BTREE_INDEX_CACHE_SIZE[] = "btree-index.cache-size"; const char Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[] = "btree-index.high-priority-pool-ratio"; const char Options::BTREE_INDEX_RECORDS_PER_RANGE[] = "btree-index.records-per-range"; -const char Options::BTREE_INDEX_BUILD_MAX_PARALLELISM[] = "btree-index.build.max-parallelism"; } // namespace paimon diff --git a/src/paimon/common/file_index/empty/empty_file_index_reader.h b/src/paimon/common/file_index/empty/empty_file_index_reader.h index bed4e3332..5c07461a1 100644 --- a/src/paimon/common/file_index/empty/empty_file_index_reader.h +++ b/src/paimon/common/file_index/empty/empty_file_index_reader.h @@ -66,17 +66,6 @@ class EmptyFileIndexReader : public FileIndexReader { return FileIndexResult::Skip(); } - Result> VisitNotEqual(const Literal& literal) override { - // Empty file has no data, so nothing to return - return FileIndexResult::Skip(); - } - - Result> VisitNotIn( - const std::vector& literals) override { - // Empty file has no data, so nothing to return - return FileIndexResult::Skip(); - } - Result> VisitBetween(const Literal& from, const Literal& to) override { return FileIndexResult::Skip(); @@ -84,17 +73,7 @@ class EmptyFileIndexReader : public FileIndexReader { Result> VisitNotBetween(const Literal& from, const Literal& to) override { - return FileIndexResult::Skip(); - } - - Result> VisitAnd( - const std::vector>>& children) override { - return FileIndexResult::Skip(); - } - - Result> VisitOr( - const std::vector>>& children) override { - return FileIndexResult::Skip(); + return FileIndexResult::Remain(); } }; diff --git a/src/paimon/common/file_index/file_index_reader.cpp b/src/paimon/common/file_index/file_index_reader.cpp index ba5996448..fa99a479d 100644 --- a/src/paimon/common/file_index/file_index_reader.cpp +++ b/src/paimon/common/file_index/file_index_reader.cpp @@ -106,19 +106,13 @@ Result> FileIndexReader::VisitAnd( } // Start with the first child - auto result = children[0]; - if (!result.ok()) { - return result.status(); - } - auto current = std::move(result).value(); + PAIMON_RETURN_NOT_OK(children[0]); + auto current = children[0].value(); // AND with remaining children for (size_t i = 1; i < children.size(); ++i) { - auto child_status = children[i]; - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); + PAIMON_RETURN_NOT_OK(children[i]); + auto child = children[i].value(); PAIMON_ASSIGN_OR_RAISE(current, current->And(child)); } @@ -132,19 +126,13 @@ Result> FileIndexReader::VisitOr( } // Start with the first child - auto result = children[0]; - if (!result.ok()) { - return result.status(); - } - auto current = std::move(result).value(); + PAIMON_RETURN_NOT_OK(children[0]); + auto current = children[0].value(); // OR with remaining children for (size_t i = 1; i < children.size(); ++i) { - auto child_status = children[i]; - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); + PAIMON_RETURN_NOT_OK(children[i]); + auto child = children[i].value(); PAIMON_ASSIGN_OR_RAISE(current, current->Or(child)); } From 5709dc5b60c4eb22dbd6d36a3bd1b4befcc5231a Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 17:52:47 +0800 Subject: [PATCH 23/28] address --- src/paimon/CMakeLists.txt | 2 - src/paimon/common/global_index/CMakeLists.txt | 1 + .../btree_global_index_integration_test.cpp | 25 +- .../btree/btree_global_index_reader.cpp | 584 ++++++++++++++++++ .../btree/btree_global_index_reader.h | 118 ++++ .../btree/btree_global_index_writer.cpp | 70 ++- .../btree/btree_global_index_writer.h | 20 +- .../btree/btree_global_index_writer_test.cpp | 35 +- .../btree/btree_global_indexer.cpp | 578 +---------------- .../global_index/btree/btree_global_indexer.h | 92 +-- src/paimon/common/utils/roaring_bitmap32.cpp | 4 + .../common/utils/roaring_bitmap64_test.cpp | 104 ++++ .../common/utils/roaring_navigable_map64.cpp | 262 -------- .../common/utils/roaring_navigable_map64.h | 238 ------- .../utils/roaring_navigable_map64_test.cpp | 113 ---- 15 files changed, 914 insertions(+), 1332 deletions(-) create mode 100644 src/paimon/common/global_index/btree/btree_global_index_reader.cpp create mode 100644 src/paimon/common/global_index/btree/btree_global_index_reader.h delete mode 100644 src/paimon/common/utils/roaring_navigable_map64.cpp delete mode 100644 src/paimon/common/utils/roaring_navigable_map64.h delete mode 100644 src/paimon/common/utils/roaring_navigable_map64_test.cpp diff --git a/src/paimon/CMakeLists.txt b/src/paimon/CMakeLists.txt index 79150fa7e..6960bdf43 100644 --- a/src/paimon/CMakeLists.txt +++ b/src/paimon/CMakeLists.txt @@ -142,7 +142,6 @@ set(PAIMON_COMMON_SRCS common/utils/byte_range_combiner.cpp common/utils/roaring_bitmap32.cpp common/utils/roaring_bitmap64.cpp - common/utils/roaring_navigable_map64.cpp common/utils/status.cpp common/utils/string_utils.cpp) @@ -474,7 +473,6 @@ if(PAIMON_BUILD_TESTS) common/utils/rapidjson_util_test.cpp common/utils/roaring_bitmap32_test.cpp common/utils/roaring_bitmap64_test.cpp - common/utils/roaring_navigable_map64_test.cpp common/utils/range_helper_test.cpp common/utils/read_ahead_cache_test.cpp common/io/cache/lru_cache_test.cpp diff --git a/src/paimon/common/global_index/CMakeLists.txt b/src/paimon/common/global_index/CMakeLists.txt index 88e2fb556..1778b5686 100644 --- a/src/paimon/common/global_index/CMakeLists.txt +++ b/src/paimon/common/global_index/CMakeLists.txt @@ -18,6 +18,7 @@ set(PAIMON_GLOBAL_INDEX_SRC btree/btree_file_footer.cpp btree/btree_global_index_factory.cpp btree/btree_global_indexer.cpp + btree/btree_global_index_reader.cpp btree/btree_global_index_writer.cpp btree/btree_index_meta.cpp rangebitmap/range_bitmap_global_index.cpp diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index 267bdc2ed..7be8a7214 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -123,8 +123,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with int values // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 @@ -189,8 +190,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); // Create the BTree global index writer - auto writer = std::make_shared("string_field", c_schema.get(), - file_writer, pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, + pool_, 4096, 100000)); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -245,8 +247,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with null values // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 @@ -309,8 +312,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with int values auto array = @@ -372,8 +376,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with int values auto array = diff --git a/src/paimon/common/global_index/btree/btree_global_index_reader.cpp b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp new file mode 100644 index 000000000..9a3c88aa6 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp @@ -0,0 +1,584 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "paimon/common/global_index/btree/btree_global_index_reader.h" + +#include + +#include "paimon/common/memory/memory_slice.h" +#include "paimon/common/memory/memory_slice_input.h" +#include "paimon/common/memory/memory_slice_output.h" +#include "paimon/common/utils/date_time_utils.h" +#include "paimon/common/utils/field_type_utils.h" +#include "paimon/data/timestamp.h" +#include "paimon/global_index/bitmap_global_index_result.h" +#include "paimon/memory/bytes.h" +#include "paimon/predicate/literal.h" + +namespace paimon { + +// Helper function to convert Literal to MemorySlice +static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, + int32_t ts_precision) { + if (literal.IsNull()) { + return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); + } + + auto type = literal.GetType(); + + // Handle string/binary types + if (type == FieldType::STRING || type == FieldType::BINARY) { + auto str_value = literal.GetValue(); + auto bytes = std::make_shared(str_value, pool); + return MemorySlice::Wrap(bytes); + } + + // Handle integer types + if (type == FieldType::BIGINT) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(8, pool); + memcpy(bytes->data(), &value, sizeof(int64_t)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::INT) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(4, pool); + memcpy(bytes->data(), &value, sizeof(int32_t)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::TINYINT) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(1, pool); + bytes->data()[0] = static_cast(value); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::SMALLINT) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(2, pool); + memcpy(bytes->data(), &value, sizeof(int16_t)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::BOOLEAN) { + bool value = literal.GetValue(); + auto bytes = std::make_shared(1, pool); + bytes->data()[0] = value ? 1 : 0; + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::FLOAT) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(float), pool); + memcpy(bytes->data(), &value, sizeof(float)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::DOUBLE) { + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(double), pool); + memcpy(bytes->data(), &value, sizeof(double)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::DATE) { + // DATE is stored as int32_t to match Java's writeInt + auto value = literal.GetValue(); + auto bytes = std::make_shared(sizeof(int32_t), pool); + memcpy(bytes->data(), &value, sizeof(int32_t)); + return MemorySlice::Wrap(bytes); + } + + if (type == FieldType::TIMESTAMP) { + auto ts = literal.GetValue(); + if (Timestamp::IsCompact(ts_precision)) { + // compact: writeLong(millisecond) + int64_t value = ts.GetMillisecond(); + auto bytes = std::make_shared(sizeof(int64_t), pool); + memcpy(bytes->data(), &value, sizeof(int64_t)); + return MemorySlice::Wrap(bytes); + } else { + // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) + MemorySliceOutput ts_out(13, pool); + ts_out.WriteValue(ts.GetMillisecond()); + ts_out.WriteVarLenInt(ts.GetNanoOfMillisecond()); + return ts_out.ToSlice(); + } + } + + if (type == FieldType::DECIMAL) { + auto decimal_value = literal.GetValue(); + auto bytes = std::make_shared(16, pool); + uint64_t high_bits = decimal_value.HighBits(); + uint64_t low_bits = decimal_value.LowBits(); + for (int i = 0; i < 8; ++i) { + bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); + } + for (int i = 0; i < 8; ++i) { + bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); + } + return MemorySlice::Wrap(bytes); + } + + return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + + " not yet supported in btree index"); +} + +BTreeGlobalIndexReader::BTreeGlobalIndexReader( + const std::shared_ptr& sst_file_reader, + const std::shared_ptr& null_bitmap, const MemorySlice& min_key, + const MemorySlice& max_key, bool has_min_key, const std::vector& files, + const std::shared_ptr& pool, + std::function comparator, int32_t ts_precision) + : sst_file_reader_(sst_file_reader), + null_bitmap_(null_bitmap), + min_key_(min_key), + max_key_(max_key), + has_min_key_(has_min_key), + files_(files), + pool_(pool), + comparator_(std::move(comparator)), + ts_precision_(ts_precision) {} + +Result> BTreeGlobalIndexReader::VisitIsNotNull() { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitIsNull() { + return std::make_shared( + [this]() -> Result { return *null_bitmap_; }); +} + +Result> BTreeGlobalIndexReader::VisitStartsWith( + const Literal& prefix) { + return std::make_shared([this, &prefix]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, + LiteralToMemorySlice(prefix, pool_.get(), ts_precision_)); + + auto prefix_type = prefix.GetType(); + + if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { + auto prefix_bytes = prefix_slice.GetHeapMemory(); + if (!prefix_bytes || prefix_bytes->size() == 0) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + return result; + } + + std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); + bool overflow = true; + for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { + auto c = static_cast(upper_bound_str[i]); + if (c < 0xFF) { + upper_bound_str[i] = c + 1; + overflow = false; + } else { + upper_bound_str[i] = 0x00; + } + } + + if (!overflow) { + auto upper_bytes = std::make_shared(upper_bound_str, pool_.get()); + auto upper_bound_slice = MemorySlice::Wrap(upper_bytes); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(prefix_slice, upper_bound_slice, true, false)); + return result; + } else { + // If overflow (all bytes were 0xFF), use max_key_ as upper bound + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(prefix_slice, max_key_, true, false)); + return result; + } + } + + return RoaringBitmap64(); + }); +} + +Result> BTreeGlobalIndexReader::VisitEndsWith( + const Literal& suffix) { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitContains( + const Literal& literal) { + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitLike( + const Literal& literal) { + if (literal.IsNull()) { + return Status::Invalid("LIKE pattern cannot be null"); + } + + auto pattern = literal.GetValue(); + + bool is_prefix_pattern = false; + std::string prefix; + + size_t first_wildcard = pattern.find_first_of("_%"); + + if (first_wildcard != std::string::npos) { + if (pattern[first_wildcard] == '%' && first_wildcard == pattern.length() - 1) { + bool has_wildcard_in_prefix = false; + for (size_t i = 0; i < first_wildcard; ++i) { + if (pattern[i] == '_' || pattern[i] == '%') { + has_wildcard_in_prefix = true; + break; + } + } + if (!has_wildcard_in_prefix) { + is_prefix_pattern = true; + prefix = pattern.substr(0, first_wildcard); + } + } + } + + if (is_prefix_pattern) { + Literal prefix_literal(FieldType::STRING, prefix.c_str(), prefix.length()); + return VisitStartsWith(prefix_literal); + } + + return std::make_shared([this]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitLessThan( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(min_key_, literal_slice, true, false)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitGreaterOrEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(literal_slice, max_key_, true, true)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitNotEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 equal_result, + RangeQuery(literal_slice, literal_slice, true, true)); + result -= equal_result; + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitLessOrEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(min_key_, literal_slice, true, true)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitEqual( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(literal_slice, literal_slice, true, true)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitGreaterThan( + const Literal& literal) { + return std::make_shared([this, &literal]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(literal_slice, max_key_, false, true)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitIn( + const std::vector& literals) { + return std::make_shared( + [this, &literals]() -> Result { + RoaringBitmap64 result; + for (const auto& literal : literals) { + PAIMON_ASSIGN_OR_RAISE(auto literal_slice, + LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 literal_result, + RangeQuery(literal_slice, literal_slice, true, true)); + result |= literal_result; + } + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitNotIn( + const std::vector& literals) { + return std::make_shared( + [this, &literals]() -> Result { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, AllNonNullRows()); + + PAIMON_ASSIGN_OR_RAISE(auto in_result_ptr, VisitIn(literals)); + PAIMON_ASSIGN_OR_RAISE(auto in_iterator, in_result_ptr->CreateIterator()); + + RoaringBitmap64 in_bitmap; + while (in_iterator->HasNext()) { + in_bitmap.Add(in_iterator->Next()); + } + + result -= in_bitmap; + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitBetween(const Literal& from, + const Literal& to) { + return std::make_shared([this, &from, + &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, + LiteralToMemorySlice(from, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 result, + RangeQuery(from_slice, to_slice, true, true)); + return result; + }); +} + +Result> BTreeGlobalIndexReader::VisitNotBetween( + const Literal& from, const Literal& to) { + return std::make_shared([this, &from, + &to]() -> Result { + PAIMON_ASSIGN_OR_RAISE(auto from_slice, + LiteralToMemorySlice(from, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 lower_result, + RangeQuery(min_key_, from_slice, true, false)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap64 upper_result, + RangeQuery(to_slice, max_key_, false, true)); + lower_result |= upper_result; + return lower_result; + }); +} + +Result> BTreeGlobalIndexReader::VisitAnd( + const std::vector>>& children) { + return std::make_shared([&children]() -> Result { + if (children.empty()) { + return Status::Invalid("VisitAnd called with no children"); + } + + auto first_result_status = children[0]; + if (!first_result_status.ok()) { + return first_result_status.status(); + } + auto first_result = std::move(first_result_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); + + RoaringBitmap64 result_bitmap; + while (first_iterator->HasNext()) { + result_bitmap.Add(first_iterator->Next()); + } + + for (size_t i = 1; i < children.size(); ++i) { + auto child_status = children[i]; + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + + RoaringBitmap64 child_bitmap; + while (child_iterator->HasNext()) { + child_bitmap.Add(child_iterator->Next()); + } + + result_bitmap &= child_bitmap; + } + + return result_bitmap; + }); +} + +Result> BTreeGlobalIndexReader::VisitOr( + const std::vector>>& children) { + return std::make_shared([&children]() -> Result { + RoaringBitmap64 result_bitmap; + + for (const auto& child_status : children) { + if (!child_status.ok()) { + return child_status.status(); + } + auto child = std::move(child_status).value(); + PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); + + while (child_iterator->HasNext()) { + result_bitmap.Add(child_iterator->Next()); + } + } + + return result_bitmap; + }); +} + +Result> BTreeGlobalIndexReader::VisitVectorSearch( + const std::shared_ptr& vector_search) { + return Status::NotImplemented("Vector search not supported in BTree index"); +} + +Result> BTreeGlobalIndexReader::VisitFullTextSearch( + const std::shared_ptr& full_text_search) { + return Status::NotImplemented("Full text search not supported in BTree index"); +} + +Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lower_bound, + const MemorySlice& upper_bound, + bool lower_inclusive, + bool upper_inclusive) { + RoaringBitmap64 result; + + // Create an index block iterator to iterate through data blocks + auto index_iterator = sst_file_reader_->CreateIndexIterator(); + + // Seek iterator to the lower bound + auto lower_bytes = lower_bound.GetHeapMemory(); + + if (lower_bytes) { + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool seek_result, + index_iterator->SeekTo(lower_bound)); + } + + // Check if there are any blocks to read + if (!index_iterator->HasNext()) { + return result; + } + + bool first_block = true; + + while (index_iterator->HasNext()) { + // Get the next data block + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, + sst_file_reader_->GetNextBlock(index_iterator)); + + if (!data_iterator || !data_iterator->HasNext()) { + break; + } + + // For the first block, we need to seek within the block to the exact position + if (first_block && lower_bytes) { + PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool found, data_iterator->SeekTo(lower_bound)); + first_block = false; + + if (!data_iterator->HasNext()) { + continue; + } + } + + // Compare key with bounds using the comparator + if (!comparator_) { + return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); + } + + // Iterate through entries in the data block + while (data_iterator->HasNext()) { + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); + int cmp_lower = comparator_(entry->key, lower_bound); + + // Check lower bound + if (!lower_inclusive && cmp_lower == 0) { + continue; + } + + // Check upper bound + int cmp_upper = comparator_(entry->key, upper_bound); + + if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { + return result; + } + + // Deserialize row IDs from the value + auto value_bytes = entry->value.CopyBytes(pool_.get()); + auto value_slice = MemorySlice::Wrap(value_bytes); + auto value_input = value_slice.ToInput(); + + // Read row IDs. The format is: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)]... + // Use VarLenLong to match Java's DataOutputStream.writeVarLong format + PAIMON_ASSIGN_OR_RAISE(int64_t num_row_ids, value_input.ReadVarLenLong()); + + for (int64_t i = 0; i < num_row_ids; i++) { + PAIMON_ASSIGN_OR_RAISE(int64_t row_id, value_input.ReadVarLenLong()); + result.Add(row_id); + } + } + } + + return result; +} + +Result BTreeGlobalIndexReader::AllNonNullRows() { + if (files_.empty()) { + return RoaringBitmap64(); + } + + int64_t total_rows = files_[0].range_end + 1; + uint64_t null_count = null_bitmap_->Cardinality(); + + const double NULL_RATIO_THRESHOLD = 0.1; + const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; + + bool use_subtraction = (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && + (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); + + if (use_subtraction) { + RoaringBitmap64 result; + result.AddRange(0, total_rows); + result -= *null_bitmap_; + return result; + } + + if (!has_min_key_) { + return RoaringBitmap64(); + } + return RangeQuery(min_key_, max_key_, true, true); +} + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_reader.h b/src/paimon/common/global_index/btree/btree_global_index_reader.h new file mode 100644 index 000000000..1a62ac731 --- /dev/null +++ b/src/paimon/common/global_index/btree/btree_global_index_reader.h @@ -0,0 +1,118 @@ +/* + * Copyright 2026-present Alibaba Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +#include "paimon/common/sst/sst_file_reader.h" +#include "paimon/global_index/global_index_io_meta.h" +#include "paimon/global_index/global_index_reader.h" +#include "paimon/utils/roaring_bitmap64.h" + +namespace paimon { + +/// Reader for BTree Global Index files. +/// This reader evaluates filter predicates against a BTree-based SST file +/// where each key maps to a list of row IDs. +class BTreeGlobalIndexReader : public GlobalIndexReader { + public: + BTreeGlobalIndexReader( + const std::shared_ptr& sst_file_reader, + const std::shared_ptr& null_bitmap, const MemorySlice& min_key, + const MemorySlice& max_key, bool has_min_key, const std::vector& files, + const std::shared_ptr& pool, + std::function comparator, + int32_t ts_precision); + + Result> VisitIsNotNull() override; + + Result> VisitIsNull() override; + + Result> VisitEqual(const Literal& literal) override; + + Result> VisitNotEqual(const Literal& literal) override; + + Result> VisitLessThan(const Literal& literal) override; + + Result> VisitLessOrEqual(const Literal& literal) override; + + Result> VisitGreaterThan(const Literal& literal) override; + + Result> VisitGreaterOrEqual(const Literal& literal) override; + + Result> VisitIn( + const std::vector& literals) override; + + Result> VisitNotIn( + const std::vector& literals) override; + + Result> VisitBetween(const Literal& from, + const Literal& to) override; + + Result> VisitNotBetween(const Literal& from, + const Literal& to) override; + + Result> VisitStartsWith(const Literal& prefix) override; + + Result> VisitEndsWith(const Literal& suffix) override; + + Result> VisitContains(const Literal& literal) override; + + Result> VisitLike(const Literal& literal) override; + + Result> VisitAnd( + const std::vector>>& children) override; + + Result> VisitOr( + const std::vector>>& children) override; + + Result> VisitVectorSearch( + const std::shared_ptr& vector_search) override; + + Result> VisitFullTextSearch( + const std::shared_ptr& full_text_search) override; + + bool IsThreadSafe() const override { + return false; + } + + std::string GetIndexType() const override { + return "btree"; + } + + private: + Result RangeQuery(const MemorySlice& lower_bound, + const MemorySlice& upper_bound, bool lower_inclusive, + bool upper_inclusive); + + Result AllNonNullRows(); + + std::shared_ptr sst_file_reader_; + std::shared_ptr null_bitmap_; + MemorySlice min_key_; + MemorySlice max_key_; + bool has_min_key_; + std::vector files_; + std::shared_ptr pool_; + std::function comparator_; + int32_t ts_precision_; +}; + +} // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index 6e636ef26..b0d5362db 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -31,41 +31,49 @@ namespace paimon { -BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( +Result> BTreeGlobalIndexWriter::Create( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries) - : field_name_(field_name), - file_writer_(file_writer), - pool_(pool), - block_size_(block_size), - null_bitmap_(std::make_shared()), - has_nulls_(false), - current_row_id_(0), - bloom_filter_(BloomFilter::Create(expected_entries, 0.01)) { - // Allocate memory for bloom filter and set memory segment - if (bloom_filter_) { - int64_t bloom_filter_size = bloom_filter_->ByteLength(); - auto bloom_filter_segment = - MemorySegment::AllocateHeapMemory(bloom_filter_size, pool.get()); - auto status = bloom_filter_->SetMemorySegment(bloom_filter_segment); - if (!status.ok()) { - // Failed to set memory segment for bloom filter - } + const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries) { + // Create and initialize bloom filter + auto bloom_filter = BloomFilter::Create(expected_entries, 0.01); + if (!bloom_filter) { + return Status::Invalid("Failed to create bloom filter"); } + int64_t bloom_filter_size = bloom_filter->ByteLength(); + auto bloom_filter_segment = MemorySegment::AllocateHeapMemory(bloom_filter_size, pool.get()); + PAIMON_RETURN_NOT_OK(bloom_filter->SetMemorySegment(bloom_filter_segment)); + // Import schema to get the field type + std::shared_ptr arrow_type; if (arrow_schema) { - auto schema_result = arrow::ImportSchema(arrow_schema); - if (schema_result.ok()) { - auto schema = *schema_result; - if (schema->num_fields() > 0) { - arrow_type_ = schema->field(0)->type(); - } + PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr schema, + arrow::ImportSchema(arrow_schema)); + if (schema->num_fields() > 0) { + arrow_type = schema->field(0)->type(); } } + + return std::shared_ptr(new BTreeGlobalIndexWriter( + field_name, std::move(arrow_type), file_writer, pool, block_size, std::move(bloom_filter))); } +BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( + const std::string& field_name, std::shared_ptr arrow_type, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool, int32_t block_size, + std::shared_ptr bloom_filter) + : field_name_(field_name), + arrow_type_(std::move(arrow_type)), + file_writer_(file_writer), + pool_(pool), + block_size_(block_size), + null_bitmap_(std::make_shared()), + has_nulls_(false), + current_row_id_(0), + bloom_filter_(std::move(bloom_filter)) {} + Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { if (!arrow_array) { return Status::Invalid("ArrowArray is null"); @@ -269,8 +277,8 @@ Result> BTreeGlobalIndexWriter::WriteNullBitmap( } // Serialize null bitmap - std::vector bitmap_data = null_bitmap_->Serialize(); - if (bitmap_data.empty()) { + auto bitmap_bytes = null_bitmap_->Serialize(pool_.get()); + if (!bitmap_bytes || bitmap_bytes->size() == 0) { return std::shared_ptr(nullptr); } @@ -278,15 +286,13 @@ Result> BTreeGlobalIndexWriter::WriteNullBitmap( PAIMON_ASSIGN_OR_RAISE(int64_t offset, out->GetPos()); // Write bitmap data - PAIMON_RETURN_NOT_OK( - out->Write(reinterpret_cast(bitmap_data.data()), bitmap_data.size())); + PAIMON_RETURN_NOT_OK(out->Write(bitmap_bytes->data(), bitmap_bytes->size())); // Calculate and write CRC32C - uint32_t crc = - CRC32C::calculate(reinterpret_cast(bitmap_data.data()), bitmap_data.size()); + uint32_t crc = CRC32C::calculate(bitmap_bytes->data(), bitmap_bytes->size()); PAIMON_RETURN_NOT_OK(out->Write(reinterpret_cast(&crc), sizeof(crc))); - return std::make_shared(offset, bitmap_data.size()); + return std::make_shared(offset, bitmap_bytes->size()); } Result> BTreeGlobalIndexWriter::Finish() { diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 28dba2d27..921e6c2a8 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -22,9 +22,9 @@ #include "paimon/common/global_index/btree/btree_file_footer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/sst/sst_file_writer.h" -#include "paimon/common/utils/roaring_navigable_map64.h" #include "paimon/global_index/global_index_writer.h" #include "paimon/global_index/io/global_index_file_writer.h" +#include "paimon/utils/roaring_bitmap64.h" namespace paimon { @@ -32,10 +32,12 @@ namespace paimon { /// This writer builds an SST file where each key maps to a list of row IDs. class BTreeGlobalIndexWriter : public GlobalIndexWriter { public: - BTreeGlobalIndexWriter(const std::string& field_name, ::ArrowSchema* arrow_schema, - const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, - int64_t expected_entries); + /// Factory method that may fail during initialization (e.g., bloom filter setup, + /// Arrow schema import). Use this instead of the constructor. + static Result> Create( + const std::string& field_name, ::ArrowSchema* arrow_schema, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries); ~BTreeGlobalIndexWriter() override = default; @@ -47,6 +49,12 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { Result> Finish() override; private: + BTreeGlobalIndexWriter(const std::string& field_name, + std::shared_ptr arrow_type, + const std::shared_ptr& file_writer, + const std::shared_ptr& pool, int32_t block_size, + std::shared_ptr bloom_filter); + // Helper method to write a key-value pair to the SST file Status WriteKeyValue(std::shared_ptr key, const std::vector& row_ids); @@ -77,7 +85,7 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { std::shared_ptr last_key_; // Null bitmap tracking - std::shared_ptr null_bitmap_; + std::shared_ptr null_bitmap_; bool has_nulls_; // Current row ID counter diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 4f5cb38c2..e3512a6e7 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -90,8 +90,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with int values auto array = @@ -131,8 +132,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { auto c_schema = CreateArrowSchema(arrow::utf8(), "string_field"); // Create the BTree global index writer - auto writer = std::make_shared("string_field", c_schema.get(), - file_writer, pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, + pool_, 4096, 100000)); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -171,8 +173,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") @@ -213,8 +216,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create first batch auto array1 = @@ -260,8 +264,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Finish without adding any data ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); @@ -279,8 +284,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - auto writer = std::make_shared("int_field", c_schema.get(), file_writer, - pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN( + auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, + 4096, 100000)); // Create an Arrow array with all null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") @@ -317,8 +323,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { auto c_schema = CreateArrowSchema(arrow::float64(), "double_field"); // Create the BTree global index writer - auto writer = std::make_shared("double_field", c_schema.get(), - file_writer, pool_, 4096, 100000); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("double_field", c_schema.get(), file_writer, + pool_, 4096, 100000)); // Create an Arrow array with double values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 4f01e5ddc..9b8182b0b 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -21,24 +21,23 @@ #include "arrow/c/bridge.h" #include "paimon/common/global_index/btree/btree_file_footer.h" +#include "paimon/common/global_index/btree/btree_global_index_reader.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/common/global_index/btree/btree_index_meta.h" #include "paimon/common/memory/memory_slice.h" #include "paimon/common/memory/memory_slice_input.h" -#include "paimon/common/memory/memory_slice_output.h" #include "paimon/common/options/memory_size.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" #include "paimon/common/utils/date_time_utils.h" #include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/options_utils.h" -#include "paimon/common/utils/roaring_navigable_map64.h" #include "paimon/data/timestamp.h" #include "paimon/defs.h" #include "paimon/file_index/bitmap_index_result.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/memory/bytes.h" -#include "paimon/predicate/literal.h" +#include "paimon/utils/roaring_bitmap64.h" namespace paimon { @@ -63,14 +62,12 @@ Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, const std::shared_ptr& pool) const { - return std::make_shared(field_name, arrow_schema, file_writer, pool, - 4096, 100000); + PAIMON_ASSIGN_OR_RAISE( + auto writer, + BTreeGlobalIndexWriter::Create(field_name, arrow_schema, file_writer, pool, 4096, 100000)); + return writer; } -// Forward declarations for helper functions -static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, - int32_t ts_precision); - // Create a comparator function based on field type // Keys are stored in binary format to match Java's DataOutputStream format static std::function CreateComparator( @@ -301,7 +298,7 @@ Result> BTreeGlobalIndexer::CreateReader( footer->GetBloomFilterHandle(), result_comparator, cache_manager)); // prepare null_bitmap - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, ReadNullBitmap(block_cache, footer->GetNullBitmapHandle())); auto index_meta = BTreeIndexMeta::Deserialize(meta.metadata, pool.get()); @@ -352,9 +349,9 @@ Result> BTreeGlobalIndexer::ToGlobalIndexResu "invalid FileIndexResult, supposed to be Remain or Skip or BitmapIndexResult"); } -Result> BTreeGlobalIndexer::ReadNullBitmap( +Result> BTreeGlobalIndexer::ReadNullBitmap( const std::shared_ptr& cache, const std::shared_ptr& block_handle) { - auto null_bitmap = std::make_shared(); + auto null_bitmap = std::make_shared(); if (block_handle == nullptr) { return null_bitmap; } @@ -387,563 +384,10 @@ Result> BTreeGlobalIndexer::ReadNullBitma } // Deserialize null bitmap - std::vector data( - reinterpret_cast(null_bitmap_view.data()), - reinterpret_cast(null_bitmap_view.data()) + null_bitmap_view.size()); - null_bitmap->Deserialize(data); + PAIMON_RETURN_NOT_OK( + null_bitmap->Deserialize(null_bitmap_view.data(), null_bitmap_view.size())); return null_bitmap; } -BTreeGlobalIndexReader::BTreeGlobalIndexReader( - const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, const std::vector& files, - const std::shared_ptr& pool, - std::function comparator, int32_t ts_precision) - : sst_file_reader_(sst_file_reader), - null_bitmap_(null_bitmap), - min_key_(min_key), - max_key_(max_key), - has_min_key_(has_min_key), - files_(files), - pool_(pool), - comparator_(std::move(comparator)), - ts_precision_(ts_precision) {} - -Result> BTreeGlobalIndexReader::VisitIsNotNull() { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitIsNull() { - return std::make_shared( - [this]() -> Result { return null_bitmap_->GetBitmap(); }); -} - -Result> BTreeGlobalIndexReader::VisitStartsWith( - const Literal& prefix) { - return std::make_shared([this, &prefix]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto prefix_slice, - LiteralToMemorySlice(prefix, pool_.get(), ts_precision_)); - - auto prefix_type = prefix.GetType(); - - if (prefix_type == FieldType::STRING || prefix_type == FieldType::BINARY) { - auto prefix_bytes = prefix_slice.GetHeapMemory(); - if (!prefix_bytes || prefix_bytes->size() == 0) { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - return result.GetBitmap(); - } - - std::string upper_bound_str(prefix_bytes->data(), prefix_bytes->size()); - bool overflow = true; - for (int i = static_cast(upper_bound_str.size()) - 1; i >= 0 && overflow; --i) { - auto c = static_cast(upper_bound_str[i]); - if (c < 0xFF) { - upper_bound_str[i] = c + 1; - overflow = false; - } else { - upper_bound_str[i] = 0x00; - } - } - - if (!overflow) { - auto upper_bytes = std::make_shared(upper_bound_str, pool_.get()); - auto upper_bound_slice = MemorySlice::Wrap(upper_bytes); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(prefix_slice, upper_bound_slice, true, false)); - return result.GetBitmap(); - } else { - // If overflow (all bytes were 0xFF), use max_key_ as upper bound - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(prefix_slice, max_key_, true, false)); - return result.GetBitmap(); - } - } - - return RoaringBitmap64(); - }); -} - -Result> BTreeGlobalIndexReader::VisitEndsWith( - const Literal& suffix) { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitContains( - const Literal& literal) { - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitLike( - const Literal& literal) { - if (literal.IsNull()) { - return Status::Invalid("LIKE pattern cannot be null"); - } - - auto pattern = literal.GetValue(); - - bool is_prefix_pattern = false; - std::string prefix; - - size_t first_wildcard = pattern.find_first_of("_%"); - - if (first_wildcard != std::string::npos) { - if (pattern[first_wildcard] == '%' && first_wildcard == pattern.length() - 1) { - bool has_wildcard_in_prefix = false; - for (size_t i = 0; i < first_wildcard; ++i) { - if (pattern[i] == '_' || pattern[i] == '%') { - has_wildcard_in_prefix = true; - break; - } - } - if (!has_wildcard_in_prefix) { - is_prefix_pattern = true; - prefix = pattern.substr(0, first_wildcard); - } - } - } - - if (is_prefix_pattern) { - Literal prefix_literal(FieldType::STRING, prefix.c_str(), prefix.length()); - return VisitStartsWith(prefix_literal); - } - - return std::make_shared([this]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitLessThan( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(min_key_, literal_slice, true, false)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitGreaterOrEqual( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(literal_slice, max_key_, true, true)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitNotEqual( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 equal_result, - RangeQuery(literal_slice, literal_slice, true, true)); - result.AndNot(equal_result); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitLessOrEqual( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(min_key_, literal_slice, true, true)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitEqual( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(literal_slice, literal_slice, true, true)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitGreaterThan( - const Literal& literal) { - return std::make_shared([this, &literal]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(literal_slice, max_key_, false, true)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitIn( - const std::vector& literals) { - return std::make_shared( - [this, &literals]() -> Result { - RoaringNavigableMap64 result; - for (const auto& literal : literals) { - PAIMON_ASSIGN_OR_RAISE(auto literal_slice, - LiteralToMemorySlice(literal, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 literal_result, - RangeQuery(literal_slice, literal_slice, true, true)); - result.Or(literal_result); - } - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitNotIn( - const std::vector& literals) { - return std::make_shared( - [this, &literals]() -> Result { - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, AllNonNullRows()); - - PAIMON_ASSIGN_OR_RAISE(auto in_result_ptr, VisitIn(literals)); - PAIMON_ASSIGN_OR_RAISE(auto in_iterator, in_result_ptr->CreateIterator()); - - RoaringNavigableMap64 in_navigable; - while (in_iterator->HasNext()) { - in_navigable.Add(in_iterator->Next()); - } - - result.AndNot(in_navigable); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitBetween(const Literal& from, - const Literal& to) { - return std::make_shared([this, &from, - &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, - LiteralToMemorySlice(from, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 result, - RangeQuery(from_slice, to_slice, true, true)); - return result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitNotBetween( - const Literal& from, const Literal& to) { - return std::make_shared([this, &from, - &to]() -> Result { - PAIMON_ASSIGN_OR_RAISE(auto from_slice, - LiteralToMemorySlice(from, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(auto to_slice, LiteralToMemorySlice(to, pool_.get(), ts_precision_)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 lower_result, - RangeQuery(min_key_, from_slice, true, false)); - PAIMON_ASSIGN_OR_RAISE(RoaringNavigableMap64 upper_result, - RangeQuery(to_slice, max_key_, false, true)); - lower_result.Or(upper_result); - return lower_result.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitAnd( - const std::vector>>& children) { - return std::make_shared([&children]() -> Result { - if (children.empty()) { - return Status::Invalid("VisitAnd called with no children"); - } - - auto first_result_status = children[0]; - if (!first_result_status.ok()) { - return first_result_status.status(); - } - auto first_result = std::move(first_result_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto first_iterator, first_result->CreateIterator()); - - RoaringNavigableMap64 result_bitmap; - while (first_iterator->HasNext()) { - result_bitmap.Add(first_iterator->Next()); - } - - for (size_t i = 1; i < children.size(); ++i) { - auto child_status = children[i]; - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - - RoaringNavigableMap64 child_bitmap; - while (child_iterator->HasNext()) { - child_bitmap.Add(child_iterator->Next()); - } - - result_bitmap.And(child_bitmap); - } - - return result_bitmap.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitOr( - const std::vector>>& children) { - return std::make_shared([&children]() -> Result { - RoaringNavigableMap64 result_bitmap; - - for (const auto& child_status : children) { - if (!child_status.ok()) { - return child_status.status(); - } - auto child = std::move(child_status).value(); - PAIMON_ASSIGN_OR_RAISE(auto child_iterator, child->CreateIterator()); - - while (child_iterator->HasNext()) { - result_bitmap.Add(child_iterator->Next()); - } - } - - return result_bitmap.GetBitmap(); - }); -} - -Result> BTreeGlobalIndexReader::VisitVectorSearch( - const std::shared_ptr& vector_search) { - return Status::NotImplemented("Vector search not supported in BTree index"); -} - -Result> BTreeGlobalIndexReader::VisitFullTextSearch( - const std::shared_ptr& full_text_search) { - return Status::NotImplemented("Full text search not supported in BTree index"); -} - -Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lower_bound, - const MemorySlice& upper_bound, - bool lower_inclusive, - bool upper_inclusive) { - RoaringNavigableMap64 result; - - // Create an index block iterator to iterate through data blocks - auto index_iterator = sst_file_reader_->CreateIndexIterator(); - - // Seek iterator to the lower bound - auto lower_bytes = lower_bound.GetHeapMemory(); - - if (lower_bytes) { - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool seek_result, - index_iterator->SeekTo(lower_bound)); - } - - // Check if there are any blocks to read - if (!index_iterator->HasNext()) { - return result; - } - - bool first_block = true; - - while (index_iterator->HasNext()) { - // Get the next data block - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, - sst_file_reader_->GetNextBlock(index_iterator)); - - if (!data_iterator || !data_iterator->HasNext()) { - break; - } - - // For the first block, we need to seek within the block to the exact position - if (first_block && lower_bytes) { - PAIMON_ASSIGN_OR_RAISE([[maybe_unused]] bool found, data_iterator->SeekTo(lower_bound)); - first_block = false; - - if (!data_iterator->HasNext()) { - continue; - } - } - - // Compare key with bounds using the comparator - if (!comparator_) { - return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); - } - - // Iterate through entries in the data block - while (data_iterator->HasNext()) { - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); - int cmp_lower = comparator_(entry->key, lower_bound); - - // Check lower bound - if (!lower_inclusive && cmp_lower == 0) { - continue; - } - - // Check upper bound - int cmp_upper = comparator_(entry->key, upper_bound); - - if (cmp_upper > 0 || (!upper_inclusive && cmp_upper == 0)) { - return result; - } - - // Deserialize row IDs from the value - auto value_bytes = entry->value.CopyBytes(pool_.get()); - auto value_slice = MemorySlice::Wrap(value_bytes); - auto value_input = value_slice.ToInput(); - - // Read row IDs. The format is: [num_row_ids (VarLenLong)][row_id1 (VarLenLong)]... - // Use VarLenLong to match Java's DataOutputStream.writeVarLong format - PAIMON_ASSIGN_OR_RAISE(int64_t num_row_ids, value_input.ReadVarLenLong()); - - for (int64_t i = 0; i < num_row_ids; i++) { - PAIMON_ASSIGN_OR_RAISE(int64_t row_id, value_input.ReadVarLenLong()); - result.Add(row_id); - } - } - } - - return result; -} - -Result BTreeGlobalIndexReader::AllNonNullRows() { - if (files_.empty()) { - return RoaringNavigableMap64(); - } - - int64_t total_rows = files_[0].range_end + 1; - uint64_t null_count = null_bitmap_->GetLongCardinality(); - - const double NULL_RATIO_THRESHOLD = 0.1; - const int64_t MAX_ROWS_FOR_SUBTRACTION = 10000000; - - bool use_subtraction = (total_rows <= MAX_ROWS_FOR_SUBTRACTION) && - (null_count < static_cast(total_rows * NULL_RATIO_THRESHOLD)); - - if (use_subtraction) { - RoaringNavigableMap64 result; - result.AddRange(Range(0, total_rows - 1)); - result.AndNot(*null_bitmap_); - return result; - } - - if (!has_min_key_) { - return RoaringNavigableMap64(); - } - return RangeQuery(min_key_, max_key_, true, true); -} - -// Helper function to convert Literal to MemorySlice -static Result LiteralToMemorySlice(const Literal& literal, MemoryPool* pool, - int32_t ts_precision) { - if (literal.IsNull()) { - return Status::Invalid("Cannot convert null literal to MemorySlice for btree index query"); - } - - auto type = literal.GetType(); - - // Handle string/binary types - if (type == FieldType::STRING || type == FieldType::BINARY) { - auto str_value = literal.GetValue(); - auto bytes = std::make_shared(str_value, pool); - return MemorySlice::Wrap(bytes); - } - - // Handle integer types - if (type == FieldType::BIGINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(8, pool); - memcpy(bytes->data(), &value, sizeof(int64_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::INT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(4, pool); - memcpy(bytes->data(), &value, sizeof(int32_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::TINYINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(1, pool); - bytes->data()[0] = static_cast(value); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::SMALLINT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(2, pool); - memcpy(bytes->data(), &value, sizeof(int16_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::BOOLEAN) { - bool value = literal.GetValue(); - auto bytes = std::make_shared(1, pool); - bytes->data()[0] = value ? 1 : 0; - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::FLOAT) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(float), pool); - memcpy(bytes->data(), &value, sizeof(float)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::DOUBLE) { - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(double), pool); - memcpy(bytes->data(), &value, sizeof(double)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::DATE) { - // DATE is stored as int32_t to match Java's writeInt - auto value = literal.GetValue(); - auto bytes = std::make_shared(sizeof(int32_t), pool); - memcpy(bytes->data(), &value, sizeof(int32_t)); - return MemorySlice::Wrap(bytes); - } - - if (type == FieldType::TIMESTAMP) { - auto ts = literal.GetValue(); - if (Timestamp::IsCompact(ts_precision)) { - // compact: writeLong(millisecond) - int64_t value = ts.GetMillisecond(); - auto bytes = std::make_shared(sizeof(int64_t), pool); - memcpy(bytes->data(), &value, sizeof(int64_t)); - return MemorySlice::Wrap(bytes); - } else { - // non-compact: writeLong(millisecond) + writeVarLenInt(nanoOfMillisecond) - MemorySliceOutput ts_out(13, pool); - ts_out.WriteValue(ts.GetMillisecond()); - ts_out.WriteVarLenInt(ts.GetNanoOfMillisecond()); - return ts_out.ToSlice(); - } - } - - if (type == FieldType::DECIMAL) { - auto decimal_value = literal.GetValue(); - auto bytes = std::make_shared(16, pool); - uint64_t high_bits = decimal_value.HighBits(); - uint64_t low_bits = decimal_value.LowBits(); - for (int i = 0; i < 8; ++i) { - bytes->data()[i] = static_cast((high_bits >> (56 - i * 8)) & 0xFF); - } - for (int i = 0; i < 8; ++i) { - bytes->data()[8 + i] = static_cast((low_bits >> (56 - i * 8)) & 0xFF); - } - return MemorySlice::Wrap(bytes); - } - - return Status::NotImplemented("Literal type " + FieldTypeUtils::FieldTypeToString(type) + - " not yet supported in btree index"); -} - } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.h b/src/paimon/common/global_index/btree/btree_global_indexer.h index 90d1688c0..835ae9613 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.h +++ b/src/paimon/common/global_index/btree/btree_global_indexer.h @@ -20,13 +20,13 @@ #include #include -#include "paimon/common/file_index/bitmap/bitmap_file_index.h" +#include "paimon/common/global_index/btree/btree_global_index_reader.h" #include "paimon/common/sst/block_cache.h" #include "paimon/common/sst/block_handle.h" -#include "paimon/common/sst/sst_file_reader.h" -#include "paimon/common/utils/roaring_navigable_map64.h" +#include "paimon/file_index/file_index_result.h" #include "paimon/global_index/global_indexer.h" #include "paimon/global_index/io/global_index_file_reader.h" +#include "paimon/utils/roaring_bitmap64.h" namespace paimon { class BTreeGlobalIndexer : public GlobalIndexer { @@ -48,95 +48,11 @@ class BTreeGlobalIndexer : public GlobalIndexer { static Result> ToGlobalIndexResult( int64_t range_end, const std::shared_ptr& result); - static Result> ReadNullBitmap( + static Result> ReadNullBitmap( const std::shared_ptr& cache, const std::shared_ptr& block_handle); private: std::map options_; }; -class BTreeGlobalIndexReader : public GlobalIndexReader { - public: - BTreeGlobalIndexReader( - const std::shared_ptr& sst_file_reader, - const std::shared_ptr& null_bitmap, const MemorySlice& min_key, - const MemorySlice& max_key, bool has_min_key, const std::vector& files, - const std::shared_ptr& pool, - std::function comparator, - int32_t ts_precision); - - Result> VisitIsNotNull() override; - - Result> VisitIsNull() override; - - Result> VisitEqual(const Literal& literal) override; - - Result> VisitNotEqual(const Literal& literal) override; - - Result> VisitLessThan(const Literal& literal) override; - - Result> VisitLessOrEqual(const Literal& literal) override; - - Result> VisitGreaterThan(const Literal& literal) override; - - Result> VisitGreaterOrEqual(const Literal& literal) override; - - Result> VisitIn( - const std::vector& literals) override; - - Result> VisitNotIn( - const std::vector& literals) override; - - Result> VisitBetween(const Literal& from, - const Literal& to) override; - - Result> VisitNotBetween(const Literal& from, - const Literal& to) override; - - Result> VisitStartsWith(const Literal& prefix) override; - - Result> VisitEndsWith(const Literal& suffix) override; - - Result> VisitContains(const Literal& literal) override; - - Result> VisitLike(const Literal& literal) override; - - Result> VisitAnd( - const std::vector>>& children) override; - - Result> VisitOr( - const std::vector>>& children) override; - - Result> VisitVectorSearch( - const std::shared_ptr& vector_search) override; - - Result> VisitFullTextSearch( - const std::shared_ptr& full_text_search) override; - - bool IsThreadSafe() const override { - return false; - } - - std::string GetIndexType() const override { - return "btree"; - } - - private: - Result RangeQuery(const MemorySlice& lower_bound, - const MemorySlice& upper_bound, bool lower_inclusive, - bool upper_inclusive); - - Result AllNonNullRows(); - - std::shared_ptr sst_file_reader_; - std::shared_ptr null_bitmap_; - MemorySlice min_key_; - MemorySlice max_key_; - bool has_min_key_; - std::vector files_; - std::shared_ptr pool_; - std::function comparator_; - int32_t ts_precision_; -}; - } // namespace paimon diff --git a/src/paimon/common/utils/roaring_bitmap32.cpp b/src/paimon/common/utils/roaring_bitmap32.cpp index 4f5b2ada7..de8e90107 100644 --- a/src/paimon/common/utils/roaring_bitmap32.cpp +++ b/src/paimon/common/utils/roaring_bitmap32.cpp @@ -209,6 +209,10 @@ bool RoaringBitmap32::operator==(const RoaringBitmap32& other) const noexcept { PAIMON_UNIQUE_PTR RoaringBitmap32::Serialize(MemoryPool* pool) const { GetRoaringBitmap(roaring_bitmap_).runOptimize(); auto& bitmap = GetRoaringBitmap(roaring_bitmap_); + // Use default pool if no pool is provided + if (pool == nullptr) { + pool = GetDefaultPool().get(); + } auto bytes = Bytes::AllocateBytes(bitmap.getSizeInBytes(), pool); bitmap.write(bytes->data()); return bytes; diff --git a/src/paimon/common/utils/roaring_bitmap64_test.cpp b/src/paimon/common/utils/roaring_bitmap64_test.cpp index 71e15c8a6..bc70cee89 100644 --- a/src/paimon/common/utils/roaring_bitmap64_test.cpp +++ b/src/paimon/common/utils/roaring_bitmap64_test.cpp @@ -16,6 +16,7 @@ #include "paimon/utils/roaring_bitmap64.h" +#include #include #include #include @@ -26,6 +27,7 @@ #include "paimon/io/byte_array_input_stream.h" #include "paimon/result.h" #include "paimon/testing/utils/testharness.h" +#include "paimon/utils/range.h" namespace paimon::test { TEST(RoaringBitmap64Test, TestSimple) { @@ -414,4 +416,106 @@ TEST(RoaringBitmap64Test, TestIteratorEqualOrLarger) { iter.EqualOrLarger(200l); ASSERT_EQ(iter, roaring.End()); } + +// Helper function to convert a RoaringBitmap64 to a list of contiguous ranges. +static std::vector ToRangeList(const RoaringBitmap64& bitmap) { + std::vector ranges; + if (bitmap.IsEmpty()) { + return ranges; + } + + int64_t current_start = -1; + int64_t current_end = -1; + + for (auto it = bitmap.Begin(); it != bitmap.End(); ++it) { + int64_t value = *it; + if (current_start == -1) { + current_start = value; + current_end = value; + } else if (value == current_end + 1) { + current_end = value; + } else { + ranges.emplace_back(current_start, current_end); + current_start = value; + current_end = value; + } + } + + if (current_start != -1) { + ranges.emplace_back(current_start, current_end); + } + + return ranges; +} + +TEST(RoaringBitmap64Test, TestAddRangeBasic) { + RoaringBitmap64 bitmap; + bitmap.AddRange(5, 11); // half-open interval [5, 11) == closed [5, 10] + + ASSERT_EQ(bitmap.Cardinality(), 6); + ASSERT_FALSE(bitmap.Contains(4)); + ASSERT_TRUE(bitmap.Contains(5)); + ASSERT_TRUE(bitmap.Contains(7)); + ASSERT_TRUE(bitmap.Contains(10)); + ASSERT_FALSE(bitmap.Contains(11)); +} + +TEST(RoaringBitmap64Test, TestAddRangeSingleElement) { + RoaringBitmap64 bitmap; + bitmap.AddRange(100, 101); // half-open interval [100, 101) == single element 100 + + ASSERT_EQ(bitmap.Cardinality(), 1); + ASSERT_FALSE(bitmap.Contains(99)); + ASSERT_TRUE(bitmap.Contains(100)); + ASSERT_FALSE(bitmap.Contains(101)); +} + +TEST(RoaringBitmap64Test, TestAddRangeMultipleNonOverlapping) { + RoaringBitmap64 bitmap; + bitmap.AddRange(0, 6); // [0, 5] + bitmap.AddRange(10, 16); // [10, 15] + bitmap.AddRange(20, 26); // [20, 25] + + ASSERT_EQ(bitmap.Cardinality(), 18); + + ASSERT_FALSE(bitmap.Contains(6)); + ASSERT_FALSE(bitmap.Contains(9)); + ASSERT_FALSE(bitmap.Contains(16)); + ASSERT_FALSE(bitmap.Contains(19)); + + ASSERT_TRUE(bitmap.Contains(0)); + ASSERT_TRUE(bitmap.Contains(5)); + ASSERT_TRUE(bitmap.Contains(10)); + ASSERT_TRUE(bitmap.Contains(15)); + ASSERT_TRUE(bitmap.Contains(20)); + ASSERT_TRUE(bitmap.Contains(25)); + + std::vector ranges = ToRangeList(bitmap); + ASSERT_EQ(ranges.size(), 3); + ASSERT_EQ(ranges[0], Range(0, 5)); + ASSERT_EQ(ranges[1], Range(10, 15)); + ASSERT_EQ(ranges[2], Range(20, 25)); +} + +TEST(RoaringBitmap64Test, TestAddRangeLargeValues) { + RoaringBitmap64 bitmap; + int64_t start = static_cast(INT_MAX) + 100L; + int64_t end = static_cast(INT_MAX) + 200L; + bitmap.AddRange(start, end + 1); // half-open interval [start, end+1) == closed [start, end] + + ASSERT_EQ(bitmap.Cardinality(), 101); + ASSERT_FALSE(bitmap.Contains(start - 1)); + ASSERT_TRUE(bitmap.Contains(start)); + ASSERT_TRUE(bitmap.Contains(start + 50)); + ASSERT_TRUE(bitmap.Contains(end)); + ASSERT_FALSE(bitmap.Contains(end + 1)); + + std::vector values; + for (auto it = bitmap.Begin(); it != bitmap.End(); ++it) { + values.push_back(*it); + } + ASSERT_EQ(values.size(), 101); + ASSERT_EQ(values[0], start); + ASSERT_EQ(values[100], end); +} } // namespace paimon::test diff --git a/src/paimon/common/utils/roaring_navigable_map64.cpp b/src/paimon/common/utils/roaring_navigable_map64.cpp deleted file mode 100644 index b0e9cc164..000000000 --- a/src/paimon/common/utils/roaring_navigable_map64.cpp +++ /dev/null @@ -1,262 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "paimon/common/utils/roaring_navigable_map64.h" - -#include -#include -#include - -#include "paimon/memory/memory_pool.h" -#include "paimon/utils/range.h" -#include "paimon/utils/roaring_bitmap64.h" - -namespace paimon { - -class RoaringNavigableMap64::Impl { - public: - RoaringBitmap64 bitmap; -}; - -class RoaringNavigableMap64::Iterator::Impl { - public: - explicit Impl(const RoaringBitmap64& bitmap) : iterator(bitmap.Begin()) {} - explicit Impl(const RoaringBitmap64::Iterator& iter) : iterator(iter) {} - RoaringBitmap64::Iterator iterator; -}; - -RoaringNavigableMap64::RoaringNavigableMap64() : impl_(std::make_unique()) {} - -RoaringNavigableMap64::RoaringNavigableMap64(const RoaringNavigableMap64& other) - : impl_(std::make_unique()) { - impl_->bitmap = other.impl_->bitmap; -} - -RoaringNavigableMap64::RoaringNavigableMap64(RoaringNavigableMap64&& other) noexcept = default; - -RoaringNavigableMap64& RoaringNavigableMap64::operator=(const RoaringNavigableMap64& other) { - if (this != &other) { - impl_->bitmap = other.impl_->bitmap; - } - return *this; -} - -RoaringNavigableMap64& RoaringNavigableMap64::operator=(RoaringNavigableMap64&& other) noexcept = - default; - -RoaringNavigableMap64::~RoaringNavigableMap64() = default; - -void RoaringNavigableMap64::AddRange(const Range& range) { - impl_->bitmap.AddRange(range.from, range.to + 1); -} - -bool RoaringNavigableMap64::Contains(int64_t x) const { - return impl_->bitmap.Contains(x); -} - -void RoaringNavigableMap64::Add(int64_t x) { - impl_->bitmap.Add(x); -} - -void RoaringNavigableMap64::Or(const RoaringNavigableMap64& other) { - impl_->bitmap |= other.impl_->bitmap; -} - -void RoaringNavigableMap64::And(const RoaringNavigableMap64& other) { - impl_->bitmap &= other.impl_->bitmap; -} - -void RoaringNavigableMap64::AndNot(const RoaringNavigableMap64& other) { - impl_->bitmap -= other.impl_->bitmap; -} - -bool RoaringNavigableMap64::IsEmpty() const { - return impl_->bitmap.IsEmpty(); -} - -bool RoaringNavigableMap64::RunOptimize() { - // Note: RoaringBitmap64 doesn't have a direct RunOptimize method - // This is a placeholder - in practice, optimization happens automatically - return false; -} - -int64_t RoaringNavigableMap64::GetLongCardinality() const { - return impl_->bitmap.Cardinality(); -} - -int32_t RoaringNavigableMap64::GetIntCardinality() const { - return static_cast(impl_->bitmap.Cardinality()); -} - -void RoaringNavigableMap64::Clear() { - impl_->bitmap = RoaringBitmap64(); -} - -std::vector RoaringNavigableMap64::Serialize() const { - // This is a simplified serialization - in practice, you might want to use - // a more sophisticated approach - // Use default pool when no pool is provided - auto bytes = impl_->bitmap.Serialize(GetDefaultPool().get()); - if (!bytes) { - return {}; - } - - std::vector result(bytes->size()); - std::memcpy(result.data(), bytes->data(), bytes->size()); - return result; -} - -void RoaringNavigableMap64::Deserialize(const std::vector& data) { - // This is a simplified deserialization - in practice, you might want to use - // a more sophisticated approach - auto status = - impl_->bitmap.Deserialize(reinterpret_cast(data.data()), data.size()); - if (!status.ok()) { - // Log error or handle deserialization failure - // For now, we'll just clear the bitmap on error - impl_->bitmap = RoaringBitmap64(); - } -} - -std::vector RoaringNavigableMap64::ToRangeList() const { - std::vector ranges; - if (IsEmpty()) { - return ranges; - } - - int64_t current_start = -1; - int64_t current_end = -1; - - for (auto it = begin(); it != end(); ++it) { - int64_t value = *it; - if (current_start == -1) { - current_start = value; - current_end = value; - } else if (value == current_end + 1) { - // Continue the current range - current_end = value; - } else { - // End the current range and start a new one - ranges.emplace_back(current_start, current_end); - current_start = value; - current_end = value; - } - } - - if (current_start != -1) { - ranges.emplace_back(current_start, current_end); - } - - return ranges; -} - -const RoaringBitmap64& RoaringNavigableMap64::GetBitmap() const { - return impl_->bitmap; -} - -RoaringNavigableMap64 RoaringNavigableMap64::BitmapOf(const std::vector& values) { - RoaringNavigableMap64 result; - for (int64_t value : values) { - result.Add(value); - } - return result; -} - -RoaringNavigableMap64 RoaringNavigableMap64::And(const RoaringNavigableMap64& x1, - const RoaringNavigableMap64& x2) { - RoaringNavigableMap64 result; - result.impl_->bitmap = RoaringBitmap64::And(x1.impl_->bitmap, x2.impl_->bitmap); - return result; -} - -RoaringNavigableMap64 RoaringNavigableMap64::Or(const RoaringNavigableMap64& x1, - const RoaringNavigableMap64& x2) { - RoaringNavigableMap64 result; - result.impl_->bitmap = RoaringBitmap64::Or(x1.impl_->bitmap, x2.impl_->bitmap); - return result; -} - -bool RoaringNavigableMap64::operator==(const RoaringNavigableMap64& other) const { - return impl_->bitmap == other.impl_->bitmap; -} - -bool RoaringNavigableMap64::operator!=(const RoaringNavigableMap64& other) const { - return !(*this == other); -} - -// Iterator implementation -RoaringNavigableMap64::Iterator::Iterator(const RoaringNavigableMap64& bitmap) - : impl_(std::make_unique(bitmap.impl_->bitmap.Begin())) {} - -RoaringNavigableMap64::Iterator::Iterator(const RoaringNavigableMap64& bitmap, bool is_end) - : impl_(std::make_unique(bitmap.impl_->bitmap.End())) {} - -RoaringNavigableMap64::Iterator::Iterator(const Iterator& other) - : impl_(std::make_unique(other.impl_->iterator)) {} - -RoaringNavigableMap64::Iterator::Iterator(Iterator&& other) noexcept = default; - -RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator=(const Iterator& other) { - if (this != &other) { - impl_ = std::make_unique(other.impl_->iterator); - } - return *this; -} - -RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator=( - Iterator&& other) noexcept = default; - -RoaringNavigableMap64::Iterator::~Iterator() = default; - -int64_t RoaringNavigableMap64::Iterator::operator*() const { - return *impl_->iterator; -} - -RoaringNavigableMap64::Iterator& RoaringNavigableMap64::Iterator::operator++() { - ++impl_->iterator; - return *this; -} - -RoaringNavigableMap64::Iterator RoaringNavigableMap64::Iterator::operator++(int) { - Iterator temp(*this); - ++(*this); - return temp; -} - -bool RoaringNavigableMap64::Iterator::operator==(const Iterator& other) const { - return impl_->iterator == other.impl_->iterator; -} - -bool RoaringNavigableMap64::Iterator::operator!=(const Iterator& other) const { - return !(*this == other); -} - -RoaringNavigableMap64::Iterator RoaringNavigableMap64::begin() const { - return Iterator(*this); -} - -RoaringNavigableMap64::Iterator RoaringNavigableMap64::end() const { - // Create an iterator that represents the end - // For now, we'll create an iterator and set it to a special state - // In practice, this might need a more sophisticated approach - Iterator it(*this); - // Move to the end by advancing past the last element - auto underlying_end = impl_->bitmap.End(); - it.impl_->iterator = underlying_end; - return it; -} - -} // namespace paimon diff --git a/src/paimon/common/utils/roaring_navigable_map64.h b/src/paimon/common/utils/roaring_navigable_map64.h deleted file mode 100644 index 5602e3374..000000000 --- a/src/paimon/common/utils/roaring_navigable_map64.h +++ /dev/null @@ -1,238 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include "paimon/utils/range.h" -#include "paimon/utils/roaring_bitmap64.h" -#include "paimon/visibility.h" - -namespace paimon { - -/** - * A compressed bitmap for 64-bit integer aggregated by tree. - * This is a wrapper around RoaringBitmap64 that provides additional functionality - * and a more convenient interface. - */ -class PAIMON_EXPORT RoaringNavigableMap64 { - public: - /// Default constructor creates an empty bitmap - RoaringNavigableMap64(); - - /// Copy constructor - RoaringNavigableMap64(const RoaringNavigableMap64& other); - - /// Move constructor - RoaringNavigableMap64(RoaringNavigableMap64&& other) noexcept; - - /// Copy assignment operator - RoaringNavigableMap64& operator=(const RoaringNavigableMap64& other); - - /// Move assignment operator - RoaringNavigableMap64& operator=(RoaringNavigableMap64&& other) noexcept; - - /// Destructor - ~RoaringNavigableMap64(); - - /** - * Adds a range of values to the bitmap. - * @param range The range to add (inclusive of both endpoints) - */ - void AddRange(const Range& range); - - /** - * Checks if the bitmap contains the given value. - * @param x The value to check - * @return true if the value is in the bitmap, false otherwise - */ - bool Contains(int64_t x) const; - - /** - * Adds a single value to the bitmap. - * @param x The value to add - */ - void Add(int64_t x); - - /** - * Performs a bitwise OR operation with another bitmap. - * @param other The other bitmap to OR with - */ - void Or(const RoaringNavigableMap64& other); - - /** - * Performs a bitwise AND operation with another bitmap. - * @param other The other bitmap to AND with - */ - void And(const RoaringNavigableMap64& other); - - /** - * Performs a bitwise AND NOT operation with another bitmap. - * This removes all elements from this bitmap that are present in the other bitmap. - * @param other The other bitmap to AND NOT with - */ - void AndNot(const RoaringNavigableMap64& other); - - /** - * Checks if the bitmap is empty. - * @return true if the bitmap contains no elements, false otherwise - */ - bool IsEmpty() const; - - /** - * Optimizes the bitmap by applying run-length encoding. - * @return true if the bitmap was modified, false otherwise - */ - bool RunOptimize(); - - /** - * Gets the cardinality of the bitmap as a 64-bit integer. - * @return The number of elements in the bitmap - */ - int64_t GetLongCardinality() const; - - /** - * Gets the cardinality of the bitmap as a 32-bit integer. - * @return The number of elements in the bitmap (truncated to 32 bits) - */ - int32_t GetIntCardinality() const; - - /** - * Clears all elements from the bitmap. - */ - void Clear(); - - /** - * Serializes the bitmap to a byte array. - * @return A vector containing the serialized bitmap - */ - std::vector Serialize() const; - - /** - * Deserializes the bitmap from a byte array. - * @param data The byte array containing the serialized bitmap - */ - void Deserialize(const std::vector& data); - - /** - * Converts this bitmap to a list of contiguous ranges. - * This is useful for interoperability with APIs that expect std::vector. - * @return A vector of ranges representing the bitmap - */ - std::vector ToRangeList() const; - - /** - * Gets the internal RoaringBitmap64 without copying. - * This is an optimization to avoid O(n) conversion when the navigable map - * is no longer needed for modifications. - * @return A const reference to the internal RoaringBitmap64 - */ - const RoaringBitmap64& GetBitmap() const; - - /** - * Creates a new bitmap from a list of values. - * @param values The values to include in the bitmap - * @return A new RoaringNavigableMap64 containing the specified values - */ - static RoaringNavigableMap64 BitmapOf(const std::vector& values); - - /** - * Computes the intersection of two bitmaps. - * @param x1 The first bitmap - * @param x2 The second bitmap - * @return A new bitmap containing the intersection of the two input bitmaps - */ - static RoaringNavigableMap64 And(const RoaringNavigableMap64& x1, - const RoaringNavigableMap64& x2); - - /** - * Computes the union of two bitmaps. - * @param x1 The first bitmap - * @param x2 The second bitmap - * @return A new bitmap containing the union of the two input bitmaps - */ - static RoaringNavigableMap64 Or(const RoaringNavigableMap64& x1, - const RoaringNavigableMap64& x2); - - /** - * Equality operator. - * @param other The other bitmap to compare with - * @return true if the bitmaps are equal, false otherwise - */ - bool operator==(const RoaringNavigableMap64& other) const; - - /** - * Inequality operator. - * @param other The other bitmap to compare with - * @return true if the bitmaps are not equal, false otherwise - */ - bool operator!=(const RoaringNavigableMap64& other) const; - - /** - * Iterator class for iterating over the values in the bitmap. - */ - class Iterator { - public: - using iterator_category = std::forward_iterator_tag; - using value_type = int64_t; - using difference_type = std::ptrdiff_t; - using pointer = const int64_t*; - using reference = const int64_t&; - - explicit Iterator(const RoaringNavigableMap64& bitmap); - Iterator(const Iterator& other); - Iterator(Iterator&& other) noexcept; - Iterator& operator=(const Iterator& other); - Iterator& operator=(Iterator&& other) noexcept; - ~Iterator(); - - int64_t operator*() const; - Iterator& operator++(); - Iterator operator++(int); - bool operator==(const Iterator& other) const; - bool operator!=(const Iterator& other) const; - - private: - friend class RoaringNavigableMap64; - - class Impl; - std::unique_ptr impl_; - - // Private constructor for creating end iterator - Iterator(const RoaringNavigableMap64& bitmap, bool is_end); - }; - - /** - * Returns an iterator to the beginning of the bitmap. - * @return Iterator pointing to the first element - */ - Iterator begin() const; - - /** - * Returns an iterator to the end of the bitmap. - * @return Iterator pointing to the end - */ - Iterator end() const; - - private: - class Impl; - std::unique_ptr impl_; -}; - -} // namespace paimon diff --git a/src/paimon/common/utils/roaring_navigable_map64_test.cpp b/src/paimon/common/utils/roaring_navigable_map64_test.cpp deleted file mode 100644 index 5667fbea4..000000000 --- a/src/paimon/common/utils/roaring_navigable_map64_test.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright 2026-present Alibaba Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "paimon/common/utils/roaring_navigable_map64.h" - -#include - -#include -#include - -#include "paimon/utils/range.h" - -namespace paimon { - -class RoaringNavigableMap64Test : public ::testing::Test { - protected: - void SetUp() override {} - void TearDown() override {} -}; - -TEST_F(RoaringNavigableMap64Test, testAddRangeBasic) { - RoaringNavigableMap64 bitmap; - bitmap.AddRange(Range(5, 10)); - - // Verify the range [5, 10] is added (inclusive on both ends) - EXPECT_EQ(bitmap.GetLongCardinality(), 6); - EXPECT_FALSE(bitmap.Contains(4)); - EXPECT_TRUE(bitmap.Contains(5)); - EXPECT_TRUE(bitmap.Contains(7)); - EXPECT_TRUE(bitmap.Contains(10)); - EXPECT_FALSE(bitmap.Contains(11)); -} - -TEST_F(RoaringNavigableMap64Test, testAddRangeSingleElement) { - RoaringNavigableMap64 bitmap; - bitmap.AddRange(Range(100, 100)); - - // A range where from == to should add exactly one element - EXPECT_EQ(bitmap.GetLongCardinality(), 1); - EXPECT_FALSE(bitmap.Contains(99)); - EXPECT_TRUE(bitmap.Contains(100)); - EXPECT_FALSE(bitmap.Contains(101)); -} - -TEST_F(RoaringNavigableMap64Test, testAddRangeMultipleNonOverlapping) { - RoaringNavigableMap64 bitmap; - bitmap.AddRange(Range(0, 5)); - bitmap.AddRange(Range(10, 15)); - bitmap.AddRange(Range(20, 25)); - - // Verify cardinality: 6 + 6 + 6 = 18 - EXPECT_EQ(bitmap.GetLongCardinality(), 18); - - // Verify gaps are not filled - EXPECT_FALSE(bitmap.Contains(6)); - EXPECT_FALSE(bitmap.Contains(9)); - EXPECT_FALSE(bitmap.Contains(16)); - EXPECT_FALSE(bitmap.Contains(19)); - - // Verify ranges contain expected values - EXPECT_TRUE(bitmap.Contains(0)); - EXPECT_TRUE(bitmap.Contains(5)); - EXPECT_TRUE(bitmap.Contains(10)); - EXPECT_TRUE(bitmap.Contains(15)); - EXPECT_TRUE(bitmap.Contains(20)); - EXPECT_TRUE(bitmap.Contains(25)); - - // Verify ToRangeList reconstructs the ranges correctly - std::vector ranges = bitmap.ToRangeList(); - EXPECT_EQ(ranges.size(), 3); - EXPECT_EQ(ranges[0], Range(0, 5)); - EXPECT_EQ(ranges[1], Range(10, 15)); - EXPECT_EQ(ranges[2], Range(20, 25)); -} - -TEST_F(RoaringNavigableMap64Test, testAddRangeLargeValues) { - RoaringNavigableMap64 bitmap; - // Test with values beyond Integer.MAX_VALUE - int64_t start = static_cast(INT_MAX) + 100L; - int64_t end = static_cast(INT_MAX) + 200L; - bitmap.AddRange(Range(start, end)); - - EXPECT_EQ(bitmap.GetLongCardinality(), 101); - EXPECT_FALSE(bitmap.Contains(start - 1)); - EXPECT_TRUE(bitmap.Contains(start)); - EXPECT_TRUE(bitmap.Contains(start + 50)); - EXPECT_TRUE(bitmap.Contains(end)); - EXPECT_FALSE(bitmap.Contains(end + 1)); - - // Verify iteration order - std::vector values; - for (auto it = bitmap.begin(); it != bitmap.end(); ++it) { - values.push_back(*it); - } - EXPECT_EQ(values.size(), 101); - EXPECT_EQ(values[0], start); - EXPECT_EQ(values[100], end); -} - -} // namespace paimon From 3b79a01c105e135050582a73f69db8b699a9a16c Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 18:19:50 +0800 Subject: [PATCH 24/28] address --- .../btree/btree_global_index_writer.cpp | 2 +- .../btree/btree_global_index_writer.h | 4 +- .../btree/btree_global_index_writer_test.cpp | 2 - .../btree/btree_global_indexer.cpp | 5 +- .../lookup/sort/sort_lookup_store_factory.cpp | 16 +----- .../common/memory/memory_segment_utils.cpp | 4 +- src/paimon/common/sst/sst_file_io_test.cpp | 53 +++---------------- src/paimon/common/sst/sst_file_reader.cpp | 25 +++++++-- src/paimon/common/sst/sst_file_reader.h | 15 ++++-- 9 files changed, 48 insertions(+), 78 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index b0d5362db..ff58607c9 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -66,8 +66,8 @@ BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( std::shared_ptr bloom_filter) : field_name_(field_name), arrow_type_(std::move(arrow_type)), - file_writer_(file_writer), pool_(pool), + file_writer_(file_writer), block_size_(block_size), null_bitmap_(std::make_shared()), has_nulls_(false), diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index 921e6c2a8..bbe215695 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -71,13 +71,13 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { private: std::string field_name_; std::shared_ptr arrow_type_; - std::shared_ptr file_writer_; std::shared_ptr pool_; + std::shared_ptr file_writer_; int32_t block_size_; // SST file writer (declared after pool_ to ensure correct destruction order) - std::shared_ptr output_stream_; std::unique_ptr sst_writer_; + std::shared_ptr output_stream_; std::string file_name_; // Track first and last keys for index meta diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index e3512a6e7..27bb7fd05 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -197,8 +197,6 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { const auto& meta = metas[0]; EXPECT_FALSE(meta.file_path.empty()); EXPECT_GT(meta.file_size, 0); - - // Verify that metadata contains null bitmap info (has_nulls should be true) EXPECT_NE(meta.metadata, nullptr); // Release the ArrowArray diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 9b8182b0b..b80d37534 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -294,8 +294,9 @@ Result> BTreeGlobalIndexer::CreateReader( // Create SST file reader with footer information PAIMON_ASSIGN_OR_RAISE( std::shared_ptr sst_file_reader, - SstFileReader::Create(pool, in, *footer->GetIndexBlockHandle(), - footer->GetBloomFilterHandle(), result_comparator, cache_manager)); + SstFileReader::Create(in, *footer->GetIndexBlockHandle(), + footer->GetBloomFilterHandle(), result_comparator, cache_manager, + pool)); // prepare null_bitmap PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp index 2bb365ca6..2d6bcf918 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_factory.cpp @@ -36,20 +36,8 @@ Result> SortLookupStoreFactory::CreateReader( const std::shared_ptr& fs, const std::string& file_path, const std::shared_ptr& pool) const { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr in, fs->Open(file_path)); - PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); - PAIMON_RETURN_NOT_OK( - in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); - auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool.get()); - PAIMON_RETURN_NOT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); - auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); - auto footer_slice = MemorySlice::Wrap(footer_segment); - auto footer_input = footer_slice.ToInput(); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_footer, - SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); - PAIMON_ASSIGN_OR_RAISE( - std::shared_ptr reader, - SstFileReader::Create(pool, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr reader, + SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool)); return std::make_unique(in, reader); } diff --git a/src/paimon/common/memory/memory_segment_utils.cpp b/src/paimon/common/memory/memory_segment_utils.cpp index d2fc569b1..c2f844e73 100644 --- a/src/paimon/common/memory/memory_segment_utils.cpp +++ b/src/paimon/common/memory/memory_segment_utils.cpp @@ -63,9 +63,7 @@ PAIMON_UNIQUE_PTR MemorySegmentUtils::CopyToBytes(const std::vector 0) { - CopyToBytes(segments, offset, bytes.get(), 0, num_bytes); - } + CopyToBytes(segments, offset, bytes.get(), 0, num_bytes); return bytes; } diff --git a/src/paimon/common/sst/sst_file_io_test.cpp b/src/paimon/common/sst/sst_file_io_test.cpp index 5a08dc45d..a1ef71b7e 100644 --- a/src/paimon/common/sst/sst_file_io_test.cpp +++ b/src/paimon/common/sst/sst_file_io_test.cpp @@ -144,19 +144,8 @@ TEST_P(SstFileIOTest, TestSimple) { // test read ASSERT_OK_AND_ASSIGN(in, fs_->Open(index_path)); - ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); - ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); - auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); - ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); - auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); - auto footer_slice = MemorySlice::Wrap(footer_segment); - auto footer_input = footer_slice.ToInput(); - ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, - SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); - ASSERT_OK_AND_ASSIGN( - auto reader, - SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); + ASSERT_OK_AND_ASSIGN(auto reader, + SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_)); // not exist key std::string k0 = "k0"; @@ -188,22 +177,9 @@ TEST_P(SstFileIOTest, TestJavaCompatibility) { std::string file = GetDataDir() + "/sst/" + param.file_path; ASSERT_OK_AND_ASSIGN(std::shared_ptr in, fs_->Open(file)); - // read footer - ASSERT_OK_AND_ASSIGN(uint64_t file_len, in->Length()); - ASSERT_OK(in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); - auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); - ASSERT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); - auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); - auto footer_slice = MemorySlice::Wrap(footer_segment); - auto footer_input = footer_slice.ToInput(); - ASSERT_OK_AND_ASSIGN(std::unique_ptr read_footer, - SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); - // test read - ASSERT_OK_AND_ASSIGN( - auto reader, - SstFileReader::Create(pool_, in, read_footer->GetIndexBlockHandle(), - read_footer->GetBloomFilterHandle(), comparator_, cache_manager_)); + ASSERT_OK_AND_ASSIGN(auto reader, + SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_)); // not exist key std::string k0 = "10000"; ASSERT_FALSE(reader->Lookup(std::make_shared(k0, pool_.get())).value()); @@ -294,25 +270,8 @@ TEST_F(SstFileIOTest, TestIOException) { CHECK_HOOK_STATUS(in_result.status(), i); std::shared_ptr in = std::move(in_result).value(); - auto file_len_result = in->Length(); - CHECK_HOOK_STATUS(file_len_result.status(), i); - uint64_t file_len = file_len_result.value(); - - CHECK_HOOK_STATUS( - in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET), i); - auto footer_bytes = - Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool_.get()); - auto read_result = in->Read(footer_bytes->data(), footer_bytes->size()); - CHECK_HOOK_STATUS(read_result.status(), i); - auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); - auto footer_slice = MemorySlice::Wrap(footer_segment); - auto footer_input = footer_slice.ToInput(); - auto read_footer_result = SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input); - CHECK_HOOK_STATUS(read_footer_result.status(), i); - - auto reader_result = SstFileReader::Create( - pool_, in, read_footer_result.value()->GetIndexBlockHandle(), - read_footer_result.value()->GetBloomFilterHandle(), comparator_, cache_manager_); + auto reader_result = + SstFileReader::CreateFromStream(in, comparator_, cache_manager_, pool_); CHECK_HOOK_STATUS(reader_result.status(), i); std::shared_ptr reader = std::move(reader_result).value(); diff --git a/src/paimon/common/sst/sst_file_reader.cpp b/src/paimon/common/sst/sst_file_reader.cpp index 1379509a5..80f691872 100644 --- a/src/paimon/common/sst/sst_file_reader.cpp +++ b/src/paimon/common/sst/sst_file_reader.cpp @@ -16,16 +16,17 @@ #include "paimon/common/sst/sst_file_reader.h" #include "fmt/format.h" +#include "paimon/common/lookup/sort/sort_lookup_store_footer.h" #include "paimon/common/sst/sst_file_utils.h" #include "paimon/common/utils/crc32c.h" #include "paimon/common/utils/murmurhash_utils.h" namespace paimon { Result> SstFileReader::Create( - const std::shared_ptr& pool, const std::shared_ptr& in, - const BlockHandle& index_block_handle, + const std::shared_ptr& in, const BlockHandle& index_block_handle, const std::shared_ptr& bloom_filter_handle, - MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager) { + MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, + const std::shared_ptr& pool) { PAIMON_ASSIGN_OR_RAISE(std::string file_path, in->GetUri()); auto block_cache = std::make_shared(file_path, in, cache_manager, pool); @@ -63,6 +64,24 @@ Result> SstFileReader::Create( new SstFileReader(pool, block_cache, bloom_filter, reader, comparator)); } +Result> SstFileReader::CreateFromStream( + const std::shared_ptr& in, MemorySlice::SliceComparator comparator, + const std::shared_ptr& cache_manager, const std::shared_ptr& pool) { + PAIMON_ASSIGN_OR_RAISE(uint64_t file_len, in->Length()); + PAIMON_RETURN_NOT_OK( + in->Seek(file_len - SortLookupStoreFooter::ENCODED_LENGTH, SeekOrigin::FS_SEEK_SET)); + auto footer_bytes = Bytes::AllocateBytes(SortLookupStoreFooter::ENCODED_LENGTH, pool.get()); + PAIMON_RETURN_NOT_OK(in->Read(footer_bytes->data(), footer_bytes->size())); + auto footer_segment = MemorySegment::Wrap(std::move(footer_bytes)); + auto footer_slice = MemorySlice::Wrap(footer_segment); + auto footer_input = footer_slice.ToInput(); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_footer, + SortLookupStoreFooter::ReadSortLookupStoreFooter(&footer_input)); + return SstFileReader::Create(in, read_footer->GetIndexBlockHandle(), + read_footer->GetBloomFilterHandle(), std::move(comparator), + cache_manager, pool); +} + SstFileReader::SstFileReader(const std::shared_ptr& pool, const std::shared_ptr& block_cache, const std::shared_ptr& bloom_filter, diff --git a/src/paimon/common/sst/sst_file_reader.h b/src/paimon/common/sst/sst_file_reader.h index 6d513f9b2..607f26f08 100644 --- a/src/paimon/common/sst/sst_file_reader.h +++ b/src/paimon/common/sst/sst_file_reader.h @@ -40,11 +40,18 @@ class SstFileIterator; class PAIMON_EXPORT SstFileReader { public: static Result> Create( - const std::shared_ptr& pool, const std::shared_ptr& input, - const BlockHandle& index_block_handle, + const std::shared_ptr& input, const BlockHandle& index_block_handle, const std::shared_ptr& bloom_filter_handle, - MemorySlice::SliceComparator comparator, - const std::shared_ptr& cache_manager); + MemorySlice::SliceComparator comparator, const std::shared_ptr& cache_manager, + const std::shared_ptr& pool); + + /// Create an SstFileReader by reading the SortLookupStoreFooter from the given InputStream. + /// This method encapsulates the common pattern of reading the footer, parsing it, and + /// creating the reader, which avoids code duplication across callers. + static Result> CreateFromStream( + const std::shared_ptr& input, MemorySlice::SliceComparator comparator, + const std::shared_ptr& cache_manager, + const std::shared_ptr& pool); std::unique_ptr CreateIterator(); From b83bfc6a543da04a14ce53e908484e360b184aa0 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 18:30:11 +0800 Subject: [PATCH 25/28] minor fix --- .../global_index/btree/btree_global_index_reader.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_index_reader.cpp b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp index 9a3c88aa6..acf060236 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_reader.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_reader.cpp @@ -494,6 +494,11 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lo bool first_block = true; + // Compare key with bounds using the comparator + if (!comparator_) { + return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); + } + while (index_iterator->HasNext()) { // Get the next data block PAIMON_ASSIGN_OR_RAISE(std::unique_ptr data_iterator, @@ -513,11 +518,6 @@ Result BTreeGlobalIndexReader::RangeQuery(const MemorySlice& lo } } - // Compare key with bounds using the comparator - if (!comparator_) { - return Status::Invalid("Comparator is not set for BTreeGlobalIndexReader"); - } - // Iterate through entries in the data block while (data_iterator->HasNext()) { PAIMON_ASSIGN_OR_RAISE(std::unique_ptr entry, data_iterator->Next()); From 9462bf5173439e93b9b260707d91d285b835b7fa Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 19:21:03 +0800 Subject: [PATCH 26/28] minor fix --- .../common/global_index/btree/btree_global_indexer.cpp | 5 ++--- src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp | 3 +++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index b80d37534..fd31171b9 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -294,9 +294,8 @@ Result> BTreeGlobalIndexer::CreateReader( // Create SST file reader with footer information PAIMON_ASSIGN_OR_RAISE( std::shared_ptr sst_file_reader, - SstFileReader::Create(in, *footer->GetIndexBlockHandle(), - footer->GetBloomFilterHandle(), result_comparator, cache_manager, - pool)); + SstFileReader::Create(in, *footer->GetIndexBlockHandle(), footer->GetBloomFilterHandle(), + result_comparator, cache_manager, pool)); // prepare null_bitmap PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap, diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp index 7ed35a09b..c1a2f506f 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp @@ -33,6 +33,9 @@ Result> SortLookupStoreFooter::ReadSortLo auto index_size = input->ReadInt(); BlockHandle index_block_handle(index_offset, index_size); + // skip padding + input->SetPosition(ENCODED_LENGTH - 4); + auto magic = input->ReadInt(); if (magic != MAGIC_NUMBER) { return Status::IOError( From ca9f594ed0b0959053f956cbe344709dc4f14e25 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 20:14:46 +0800 Subject: [PATCH 27/28] address --- .../btree/btree_global_indexer.cpp | 59 ------------------- .../lookup/sort/sort_lookup_store_footer.cpp | 2 +- 2 files changed, 1 insertion(+), 60 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index fd31171b9..1983ee41f 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -78,16 +78,6 @@ static std::function CreateComp case FieldType::INT: case FieldType::DATE: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < static_cast(sizeof(int32_t)) || - b.Length() < static_cast(sizeof(int32_t))) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } int32_t a_val = a.ReadInt(0); int32_t b_val = b.ReadInt(0); if (a_val < b_val) return -1; @@ -96,16 +86,6 @@ static std::function CreateComp }; case FieldType::BIGINT: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < static_cast(sizeof(int64_t)) || - b.Length() < static_cast(sizeof(int64_t))) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } int64_t a_val = a.ReadLong(0); int64_t b_val = b.ReadLong(0); if (a_val < b_val) return -1; @@ -148,16 +128,6 @@ static std::function CreateComp } case FieldType::SMALLINT: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < static_cast(sizeof(int16_t)) || - b.Length() < static_cast(sizeof(int16_t))) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } int16_t a_val = a.ReadShort(0); int16_t b_val = b.ReadShort(0); if (a_val < b_val) return -1; @@ -166,15 +136,6 @@ static std::function CreateComp }; case FieldType::TINYINT: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < 1 || b.Length() < 1) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } int8_t a_val = a.ReadByte(0); int8_t b_val = b.ReadByte(0); if (a_val < b_val) return -1; @@ -183,16 +144,6 @@ static std::function CreateComp }; case FieldType::FLOAT: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < static_cast(sizeof(float)) || - b.Length() < static_cast(sizeof(float))) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } // Read float from bytes (little-endian) float a_val, b_val; std::memcpy(&a_val, a.ReadStringView().data(), sizeof(float)); @@ -203,16 +154,6 @@ static std::function CreateComp }; case FieldType::DOUBLE: return [](const MemorySlice& a, const MemorySlice& b) -> int32_t { - if (a.Length() < static_cast(sizeof(double)) || - b.Length() < static_cast(sizeof(double))) { - size_t min_len = - std::min(static_cast(a.Length()), static_cast(b.Length())); - int cmp = memcmp(a.ReadStringView().data(), b.ReadStringView().data(), min_len); - if (cmp != 0) return cmp < 0 ? -1 : 1; - if (a.Length() < b.Length()) return -1; - if (a.Length() > b.Length()) return 1; - return 0; - } // Read double from bytes (little-endian) double a_val, b_val; std::memcpy(&a_val, a.ReadStringView().data(), sizeof(double)); diff --git a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp index c1a2f506f..56b4fa6d9 100644 --- a/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp +++ b/src/paimon/common/lookup/sort/sort_lookup_store_footer.cpp @@ -34,7 +34,7 @@ Result> SortLookupStoreFooter::ReadSortLo BlockHandle index_block_handle(index_offset, index_size); // skip padding - input->SetPosition(ENCODED_LENGTH - 4); + PAIMON_RETURN_NOT_OK(input->SetPosition(ENCODED_LENGTH - 4)); auto magic = input->ReadInt(); if (magic != MAGIC_NUMBER) { From 365d865e2e2cdaa7aaef695867b30bbd9ab9b4d9 Mon Sep 17 00:00:00 2001 From: "zhangchaoming.zcm" Date: Wed, 15 Apr 2026 21:02:06 +0800 Subject: [PATCH 28/28] improve create BTreeGlobalIndexWriter --- .../btree_global_index_integration_test.cpp | 29 ++++---- .../btree/btree_global_index_writer.cpp | 66 +++++++------------ .../btree/btree_global_index_writer.h | 18 +++-- .../btree/btree_global_index_writer_test.cpp | 37 ++++++----- .../btree/btree_global_indexer.cpp | 26 +++++++- .../btree/btree_global_indexer_test.cpp | 4 +- 6 files changed, 92 insertions(+), 88 deletions(-) diff --git a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp index 7be8a7214..7e5846393 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_integration_test.cpp @@ -18,6 +18,7 @@ #include #include +#include "paimon/common/compression/block_compression_factory.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" #include "paimon/common/global_index/btree/btree_global_indexer.h" #include "paimon/common/utils/arrow/status_utils.h" @@ -81,6 +82,7 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { test_dir_ = UniqueTestDirectory::Create("local"); fs_ = test_dir_->GetFileSystem(); base_path_ = test_dir_->Str(); + compression_factory_ = BlockCompressionFactory::Create(BlockCompressionType::NONE).value(); } void TearDown() override {} @@ -110,6 +112,7 @@ class BTreeGlobalIndexIntegrationTest : public ::testing::Test { } std::shared_ptr pool_; + std::shared_ptr compression_factory_; std::unique_ptr test_dir_; std::shared_ptr fs_; std::string base_path_; @@ -123,9 +126,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with int values // Row IDs: 0->1, 1->2, 2->3, 3->2, 4->1, 5->4, 6->5, 7->5, 8->5 @@ -192,7 +195,7 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadStringData) { // Create the BTree global index writer ASSERT_OK_AND_ASSIGN(auto writer, BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, - pool_, 4096, 100000)); + compression_factory_, pool_, 4096)); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -247,9 +250,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with null values // Row IDs: 0->1, 1->null, 2->3, 3->null, 4->5 @@ -312,9 +315,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadRangeQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with int values auto array = @@ -376,9 +379,9 @@ TEST_F(BTreeGlobalIndexIntegrationTest, WriteAndReadInQuery) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with int values auto array = diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp index ff58607c9..a3e3c6813 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.cpp @@ -21,7 +21,6 @@ #include #include "paimon/common/compression/block_compression_factory.h" -#include "paimon/common/memory/memory_segment.h" #include "paimon/common/memory/memory_slice_output.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/crc32c.h" @@ -34,17 +33,8 @@ namespace paimon { Result> BTreeGlobalIndexWriter::Create( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries) { - // Create and initialize bloom filter - auto bloom_filter = BloomFilter::Create(expected_entries, 0.01); - if (!bloom_filter) { - return Status::Invalid("Failed to create bloom filter"); - } - - int64_t bloom_filter_size = bloom_filter->ByteLength(); - auto bloom_filter_segment = MemorySegment::AllocateHeapMemory(bloom_filter_size, pool.get()); - PAIMON_RETURN_NOT_OK(bloom_filter->SetMemorySegment(bloom_filter_segment)); - + const std::shared_ptr& compression_factory, + const std::shared_ptr& pool, int32_t block_size) { // Import schema to get the field type std::shared_ptr arrow_type; if (arrow_schema) { @@ -55,15 +45,27 @@ Result> BTreeGlobalIndexWriter::Create( } } - return std::shared_ptr(new BTreeGlobalIndexWriter( - field_name, std::move(arrow_type), file_writer, pool, block_size, std::move(bloom_filter))); + auto writer = std::shared_ptr(new BTreeGlobalIndexWriter( + field_name, std::move(arrow_type), file_writer, compression_factory, pool, block_size)); + + // Initialize SST writer + if (!file_writer) { + return Status::Invalid("file_writer is null"); + } + PAIMON_ASSIGN_OR_RAISE(writer->file_name_, file_writer->NewFileName("btree")); + PAIMON_ASSIGN_OR_RAISE(writer->output_stream_, + file_writer->NewOutputStream(writer->file_name_)); + writer->sst_writer_ = std::make_unique(writer->output_stream_, nullptr, + block_size, compression_factory, pool); + + return writer; } BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( const std::string& field_name, std::shared_ptr arrow_type, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, - std::shared_ptr bloom_filter) + const std::shared_ptr& compression_factory, + const std::shared_ptr& pool, int32_t block_size) : field_name_(field_name), arrow_type_(std::move(arrow_type)), pool_(pool), @@ -71,8 +73,7 @@ BTreeGlobalIndexWriter::BTreeGlobalIndexWriter( block_size_(block_size), null_bitmap_(std::make_shared()), has_nulls_(false), - current_row_id_(0), - bloom_filter_(std::move(bloom_filter)) {} + current_row_id_(0) {} Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { if (!arrow_array) { @@ -88,23 +89,6 @@ Status BTreeGlobalIndexWriter::AddBatch(::ArrowArray* arrow_array) { PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr array, arrow::ImportArray(arrow_array, arrow_type_)); - // Initialize SST writer on first batch - if (!sst_writer_) { - auto file_name_result = file_writer_->NewFileName(field_name_); - if (!file_name_result.ok()) { - return file_name_result.status(); - } - file_name_ = file_name_result.value(); - - PAIMON_ASSIGN_OR_RAISE(output_stream_, file_writer_->NewOutputStream(file_name_)); - - PAIMON_ASSIGN_OR_RAISE(auto compression_factory, - BlockCompressionFactory::Create(BlockCompressionType::NONE)); - - sst_writer_ = std::make_unique(output_stream_, bloom_filter_, block_size_, - compression_factory, pool_); - } - // Group row IDs by key value // Use std::map with custom comparator for binary keys // Keys are stored in binary format to match Java's serialization @@ -296,7 +280,7 @@ Result> BTreeGlobalIndexWriter::WriteNullBitmap( } Result> BTreeGlobalIndexWriter::Finish() { - if (!sst_writer_) { + if (current_row_id_ == 0) { // No data was written, return empty metadata return std::vector(); } @@ -308,18 +292,14 @@ Result> BTreeGlobalIndexWriter::Finish() { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr null_bitmap_handle, WriteNullBitmap(output_stream_)); - // Write bloom filter - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr bloom_filter_handle, - sst_writer_->WriteBloomFilter()); - // Write index block PAIMON_ASSIGN_OR_RAISE(BlockHandle index_block_handle, sst_writer_->WriteIndexBlock()); - // Write BTree file footer + // Write BTree file footer (no bloom filter) auto index_block_handle_ptr = std::make_shared(index_block_handle.Offset(), index_block_handle.Size()); - auto footer = std::make_shared(bloom_filter_handle, index_block_handle_ptr, - null_bitmap_handle); + auto footer = + std::make_shared(nullptr, index_block_handle_ptr, null_bitmap_handle); auto footer_slice = BTreeFileFooter::Write(footer, pool_.get()); auto footer_bytes = footer_slice.CopyBytes(pool_.get()); PAIMON_RETURN_NOT_OK(output_stream_->Write(footer_bytes->data(), footer_bytes->size())); diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer.h b/src/paimon/common/global_index/btree/btree_global_index_writer.h index bbe215695..fb4bf0fc3 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer.h +++ b/src/paimon/common/global_index/btree/btree_global_index_writer.h @@ -32,12 +32,13 @@ namespace paimon { /// This writer builds an SST file where each key maps to a list of row IDs. class BTreeGlobalIndexWriter : public GlobalIndexWriter { public: - /// Factory method that may fail during initialization (e.g., bloom filter setup, + /// Factory method that may fail during initialization (e.g., /// Arrow schema import). Use this instead of the constructor. static Result> Create( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, int64_t expected_entries); + const std::shared_ptr& compression_factory, + const std::shared_ptr& pool, int32_t block_size); ~BTreeGlobalIndexWriter() override = default; @@ -49,11 +50,11 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { Result> Finish() override; private: - BTreeGlobalIndexWriter(const std::string& field_name, - std::shared_ptr arrow_type, - const std::shared_ptr& file_writer, - const std::shared_ptr& pool, int32_t block_size, - std::shared_ptr bloom_filter); + BTreeGlobalIndexWriter( + const std::string& field_name, std::shared_ptr arrow_type, + const std::shared_ptr& file_writer, + const std::shared_ptr& compression_factory, + const std::shared_ptr& pool, int32_t block_size); // Helper method to write a key-value pair to the SST file Status WriteKeyValue(std::shared_ptr key, const std::vector& row_ids); @@ -90,9 +91,6 @@ class BTreeGlobalIndexWriter : public GlobalIndexWriter { // Current row ID counter int64_t current_row_id_; - - // Bloom filter for the SST file - std::shared_ptr bloom_filter_; }; } // namespace paimon diff --git a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp index 27bb7fd05..933a5193d 100644 --- a/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_index_writer_test.cpp @@ -20,6 +20,7 @@ #include #include +#include "paimon/common/compression/block_compression_factory.h" #include "paimon/fs/file_system.h" #include "paimon/global_index/io/global_index_file_writer.h" #include "paimon/memory/memory_pool.h" @@ -63,6 +64,7 @@ class BTreeGlobalIndexWriterTest : public ::testing::Test { test_dir_ = UniqueTestDirectory::Create("local"); fs_ = test_dir_->GetFileSystem(); base_path_ = test_dir_->Str(); + compression_factory_ = BlockCompressionFactory::Create(BlockCompressionType::NONE).value(); } void TearDown() override {} @@ -77,6 +79,7 @@ class BTreeGlobalIndexWriterTest : public ::testing::Test { } std::shared_ptr pool_; + std::shared_ptr compression_factory_; std::unique_ptr test_dir_; std::shared_ptr fs_; std::string base_path_; @@ -90,9 +93,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteIntData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with int values auto array = @@ -134,7 +137,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteStringData) { // Create the BTree global index writer ASSERT_OK_AND_ASSIGN(auto writer, BTreeGlobalIndexWriter::Create("string_field", c_schema.get(), file_writer, - pool_, 4096, 100000)); + compression_factory_, pool_, 4096)); // Create an Arrow array with string values auto array = arrow::ipc::internal::json::ArrayFromJSON( @@ -173,9 +176,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteWithNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[1, null, 3, null, 5]") @@ -214,9 +217,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteMultipleBatches) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create first batch auto array1 = @@ -262,9 +265,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteEmptyData) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Finish without adding any data ASSERT_OK_AND_ASSIGN(auto metas, writer->Finish()); @@ -282,9 +285,9 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteAllNulls) { auto c_schema = CreateArrowSchema(arrow::int32(), "int_field"); // Create the BTree global index writer - ASSERT_OK_AND_ASSIGN( - auto writer, BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, pool_, - 4096, 100000)); + ASSERT_OK_AND_ASSIGN(auto writer, + BTreeGlobalIndexWriter::Create("int_field", c_schema.get(), file_writer, + compression_factory_, pool_, 4096)); // Create an Arrow array with all null values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), "[null, null, null]") @@ -323,7 +326,7 @@ TEST_F(BTreeGlobalIndexWriterTest, WriteDoubleData) { // Create the BTree global index writer ASSERT_OK_AND_ASSIGN(auto writer, BTreeGlobalIndexWriter::Create("double_field", c_schema.get(), file_writer, - pool_, 4096, 100000)); + compression_factory_, pool_, 4096)); // Create an Arrow array with double values auto array = arrow::ipc::internal::json::ArrayFromJSON(arrow::float64(), "[1.5, 2.5, 3.5, 1.5]") diff --git a/src/paimon/common/global_index/btree/btree_global_indexer.cpp b/src/paimon/common/global_index/btree/btree_global_indexer.cpp index 1983ee41f..e72f4392a 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer.cpp @@ -20,6 +20,7 @@ #include #include "arrow/c/bridge.h" +#include "paimon/common/compression/block_compression_factory.h" #include "paimon/common/global_index/btree/btree_file_footer.h" #include "paimon/common/global_index/btree/btree_global_index_reader.h" #include "paimon/common/global_index/btree/btree_global_index_writer.h" @@ -32,6 +33,7 @@ #include "paimon/common/utils/date_time_utils.h" #include "paimon/common/utils/field_type_utils.h" #include "paimon/common/utils/options_utils.h" +#include "paimon/core/options/compress_options.h" #include "paimon/data/timestamp.h" #include "paimon/defs.h" #include "paimon/file_index/bitmap_index_result.h" @@ -62,9 +64,27 @@ Result> BTreeGlobalIndexer::CreateWriter( const std::string& field_name, ::ArrowSchema* arrow_schema, const std::shared_ptr& file_writer, const std::shared_ptr& pool) const { - PAIMON_ASSIGN_OR_RAISE( - auto writer, - BTreeGlobalIndexWriter::Create(field_name, arrow_schema, file_writer, pool, 4096, 100000)); + // Read block size from options (default: 64 KB) + auto block_size_str_result = + OptionsUtils::GetValueFromMap(options_, Options::BTREE_INDEX_BLOCK_SIZE); + int32_t block_size = 64 * 1024; // default 64 KB + if (block_size_str_result.ok()) { + PAIMON_ASSIGN_OR_RAISE(int64_t parsed_size, + MemorySize::ParseBytes(block_size_str_result.value())); + block_size = static_cast(parsed_size); + } + // Read compression options + auto compress_str = OptionsUtils::GetValueFromMap( + options_, Options::BTREE_INDEX_COMPRESSION, "none"); + auto compress_level = + OptionsUtils::GetValueFromMap(options_, Options::BTREE_INDEX_COMPRESSION_LEVEL, 1); + CompressOptions compress_options{compress_str.value(), compress_level.value()}; + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr compression_factory, + BlockCompressionFactory::Create(compress_options)); + + PAIMON_ASSIGN_OR_RAISE(auto writer, + BTreeGlobalIndexWriter::Create(field_name, arrow_schema, file_writer, + compression_factory, pool, block_size)); return writer; } diff --git a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp index f69b9fc71..f01ab798c 100644 --- a/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp +++ b/src/paimon/common/global_index/btree/btree_global_indexer_test.cpp @@ -177,9 +177,9 @@ TEST_F(BTreeGlobalIndexerTest, CreateIndexer) { std::map options; BTreeGlobalIndexer indexer(options); - // CreateWriter should return NotImplemented + // CreateWriter with nullptr file_writer should fail auto writer_result = indexer.CreateWriter("test_field", nullptr, nullptr, pool_); - EXPECT_TRUE(writer_result.ok()); + EXPECT_FALSE(writer_result.ok()); } // Test RangeQuery boundary conditions conceptually