Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
3b9e47d
feat: btree global index
ChaomingZhangCN Mar 2, 2026
ba22e7a
feat: btree global index
ChaomingZhangCN Mar 4, 2026
8602449
types
ChaomingZhangCN Mar 11, 2026
8ff03de
Merge remote-tracking branch 'chaoming/main' into btree-global-index
ChaomingZhangCN Mar 13, 2026
f70a4ce
Merge remote-tracking branch 'github/main' into btree-global-index
ChaomingZhangCN Mar 23, 2026
fb4015f
feat: implement btree global index writer and fix compilation errors
ChaomingZhangCN Mar 26, 2026
93b0485
Merge remote-tracking branch 'github/main' into btree-global-index
ChaomingZhangCN Mar 26, 2026
5f4bbf8
Merge remote-tracking branch 'github/main' into btree-global-index
ChaomingZhangCN Apr 7, 2026
fd24bef
feat: implement B-tree global index compatibility
ChaomingZhangCN Apr 8, 2026
e371a1a
fix: correct binary format for BIGINT/TINYINT/SMALLINT to little-endian
ChaomingZhangCN Apr 8, 2026
b7e1305
fix: 修复 cpplint 错误 - 使用 C 风格字符串常量
ChaomingZhangCN Apr 8, 2026
f70bcdf
style: 使用 clang-format 修复代码格式
ChaomingZhangCN Apr 8, 2026
5f1981f
cpplint
ChaomingZhangCN Apr 8, 2026
caafcca
fix: resolve compiler warnings and errors in btree global index
ChaomingZhangCN Apr 8, 2026
e790dad
fix
ChaomingZhangCN Apr 8, 2026
e0cbd1d
Merge github/main into btree-global-index
ChaomingZhangCN Apr 8, 2026
8cdbc7f
Fix merge: keep SortLookupStoreFooter API, add CacheManager parameter
ChaomingZhangCN Apr 8, 2026
b42cb19
Fix CacheManager constructor call in btree_global_indexer.cpp
ChaomingZhangCN Apr 8, 2026
dd3d379
Merge main branch and add BTree Index configuration options
ChaomingZhangCN Apr 8, 2026
38a2b8b
Add trailing newlines to source files
ChaomingZhangCN Apr 8, 2026
858b068
Fix formatting in btree_index_meta_test.cpp
ChaomingZhangCN Apr 8, 2026
78f9bcc
fix: mark CacheValue constructor as explicit to prevent implicit conv…
ChaomingZhangCN Apr 8, 2026
e45da4c
fix: fix clang-tidy warnings and memory safety issues
ChaomingZhangCN Apr 9, 2026
7c87891
chore: add btree compatibility test data to Git LFS
ChaomingZhangCN Apr 9, 2026
96e6669
Merge branch 'main' into btree-global-index
ChaomingZhangCN Apr 10, 2026
b895910
address
ChaomingZhangCN Apr 14, 2026
cf5b585
Merge remote-tracking branch 'github/main' into btree-global-index
ChaomingZhangCN Apr 15, 2026
ff169c9
address
ChaomingZhangCN Apr 15, 2026
1f72308
fix
ChaomingZhangCN Apr 15, 2026
5709dc5
address
ChaomingZhangCN Apr 15, 2026
3b79a01
address
ChaomingZhangCN Apr 15, 2026
b83bfc6
minor fix
ChaomingZhangCN Apr 15, 2026
9462bf5
minor fix
ChaomingZhangCN Apr 15, 2026
ca9f594
address
ChaomingZhangCN Apr 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@
test/test_data/sst/none/79d01717-8380-4504-86e1-387e6c058d0a filter=lfs diff=lfs merge=lfs -text
test/test_data/sst/lz4/10540951-41d3-4216-aa2c-b15dfd25eb75 filter=lfs diff=lfs merge=lfs -text
test/test_data/sst/zstd/83d05c53-2353-4160-b756-d50dd851b474 filter=lfs diff=lfs merge=lfs -text
test/test_data/global_index/btree/btree_compatibility_data/*.bin filter=lfs diff=lfs merge=lfs -text
test/test_data/global_index/btree/btree_compatibility_data/*.bin.meta filter=lfs diff=lfs merge=lfs -text
18 changes: 18 additions & 0 deletions include/paimon/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,24 @@ struct PAIMON_EXPORT Options {
/// "lookup.cache-max-disk-size" - Max disk size for lookup cache, you can use this option
/// to limit the use of local disks. Default value is unlimited (INT64_MAX).
static const char LOOKUP_CACHE_MAX_DISK_SIZE[];
/// "btree-index.compression" - The compression algorithm to use for BTreeIndex.
/// Default value is "none".
static const char BTREE_INDEX_COMPRESSION[];
/// "btree-index.compression-level" - The compression level of the compression algorithm.
/// Default value is 1.
static const char BTREE_INDEX_COMPRESSION_LEVEL[];
/// "btree-index.block-size" - The block size to use for BTreeIndex.
/// Default value is 64 KB.
static const char BTREE_INDEX_BLOCK_SIZE[];
/// "btree-index.cache-size" - The cache size to use for BTreeIndex.
/// Default value is 128 MB.
static const char BTREE_INDEX_CACHE_SIZE[];
/// "btree-index.high-priority-pool-ratio" - The high priority pool ratio to use for BTreeIndex.
/// Default value is 0.1.
static const char BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[];
/// "btree-index.records-per-range" - The expected number of records per BTree Index File.
/// Default value is 1000000.
static const char BTREE_INDEX_RECORDS_PER_RANGE[];
};

static constexpr int64_t BATCH_WRITE_COMMIT_IDENTIFIER = std::numeric_limits<int64_t>::max();
Expand Down
6 changes: 6 additions & 0 deletions include/paimon/file_index/file_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ class PAIMON_EXPORT FileIndexReader : public FunctionVisitor<std::shared_ptr<Fil
Result<std::shared_ptr<FileIndexResult>> VisitContains(const Literal& literal) override;

Result<std::shared_ptr<FileIndexResult>> VisitLike(const Literal& literal) override;

Result<std::shared_ptr<FileIndexResult>> VisitAnd(
const std::vector<Result<std::shared_ptr<FileIndexResult>>>& children) override;

Result<std::shared_ptr<FileIndexResult>> VisitOr(
const std::vector<Result<std::shared_ptr<FileIndexResult>>>& children) override;
};

} // namespace paimon
14 changes: 14 additions & 0 deletions include/paimon/global_index/global_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ class PAIMON_EXPORT GlobalIndexReader : public FunctionVisitor<std::shared_ptr<G
virtual Result<std::shared_ptr<GlobalIndexResult>> VisitFullTextSearch(
const std::shared_ptr<FullTextSearch>& full_text_search) = 0;

/// VisitAnd performs logical AND across multiple child results.
/// Default implementation returns "not supported" error.
Result<std::shared_ptr<GlobalIndexResult>> VisitAnd(
const std::vector<Result<std::shared_ptr<GlobalIndexResult>>>& children) override {
return Status::NotImplemented("AND operations not supported by this index type");
}

/// VisitOr performs logical OR across multiple child results.
/// Default implementation returns "not supported" error.
Result<std::shared_ptr<GlobalIndexResult>> VisitOr(
const std::vector<Result<std::shared_ptr<GlobalIndexResult>>>& children) override {
return Status::NotImplemented("OR operations not supported by this index type");
}

/// @return true if the reader is thread-safe; false otherwise.
virtual bool IsThreadSafe() const = 0;

Expand Down
36 changes: 36 additions & 0 deletions include/paimon/predicate/function_visitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,5 +74,41 @@ class PAIMON_EXPORT FunctionVisitor {

/// Evaluates whether string values like the given string.
virtual Result<T> VisitLike(const Literal& literal) = 0;

/// Evaluates the BETWEEN predicate with the given lower and upper bounds.
virtual Result<T> VisitBetween(const Literal& from, const Literal& to) {
// Default implementation: BETWEEN is equivalent to >= AND <=
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if VisitBetween is strictly necessary, since PredicateBuilder::Between is achieved by AND of GreaterOrEqual and LessOrEqual. And VisitAnd and VisitOr for FileIndexReader is only used for Between?

auto lower_result = VisitGreaterOrEqual(from);
if (!lower_result.ok()) {
return lower_result.status();
}
auto upper_result = VisitLessOrEqual(to);
if (!upper_result.ok()) {
return upper_result.status();
}
return VisitAnd({std::move(lower_result).value(), std::move(upper_result).value()});
}

/// Evaluates the NOT BETWEEN predicate with the given lower and upper bounds.
virtual Result<T> VisitNotBetween(const Literal& from, const Literal& to) {
// Default implementation: NOT BETWEEN is equivalent to < OR >
auto lower_result = VisitLessThan(from);
if (!lower_result.ok()) {
return lower_result.status();
}
auto upper_result = VisitGreaterThan(to);
if (!upper_result.ok()) {
return upper_result.status();
}
return VisitOr({std::move(lower_result).value(), std::move(upper_result).value()});
}

// ----------------- Compound functions ------------------------

/// Evaluates the AND predicate across multiple child results.
virtual Result<T> VisitAnd(const std::vector<Result<T>>& children) = 0;

/// Evaluates the OR predicate across multiple child results.
virtual Result<T> VisitOr(const std::vector<Result<T>>& children) = 0;
};
} // namespace paimon
8 changes: 7 additions & 1 deletion src/paimon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ set(PAIMON_COMMON_SRCS
common/io/cache/lru_cache.cpp
common/logging/logging.cpp
common/lookup/sort/sort_lookup_store_factory.cpp
common/lookup/sort/sort_lookup_store_footer.cpp
common/lookup/lookup_store_factory.cpp
common/memory/bytes.cpp
common/memory/memory_pool.cpp
Expand Down Expand Up @@ -112,7 +113,6 @@ set(PAIMON_COMMON_SRCS
common/reader/complete_row_kind_batch_reader.cpp
common/reader/data_evolution_file_reader.cpp
common/sst/block_handle.cpp
common/sst/block_footer.cpp
common/sst/block_iterator.cpp
common/sst/block_trailer.cpp
common/sst/block_reader.cpp
Expand Down Expand Up @@ -403,6 +403,12 @@ if(PAIMON_BUILD_TESTS)
common/global_index/bitmap_global_index_result_test.cpp
common/global_index/bitmap_scored_global_index_result_test.cpp
common/global_index/bitmap/bitmap_global_index_test.cpp
common/global_index/btree/btree_index_meta_test.cpp
common/global_index/btree/btree_file_footer_test.cpp
common/global_index/btree/btree_global_indexer_test.cpp
common/global_index/btree/btree_global_index_writer_test.cpp
common/global_index/btree/btree_global_index_integration_test.cpp
common/global_index/btree/btree_compatibility_test.cpp
common/global_index/rangebitmap/range_bitmap_global_index_test.cpp
common/global_index/wrap/file_index_reader_wrapper_test.cpp
common/io/byte_array_input_stream_test.cpp
Expand Down
3 changes: 2 additions & 1 deletion src/paimon/common/compression/block_compression_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,12 @@
#include "paimon/common/compression/block_decompressor.h"
#include "paimon/core/options/compress_options.h"
#include "paimon/result.h"
#include "paimon/visibility.h"
namespace paimon {

/// Each compression codec has an implementation of {@link BlockCompressionFactory} to create
/// compressors and decompressors.
class BlockCompressionFactory {
class PAIMON_EXPORT BlockCompressionFactory {
public:
static Result<std::shared_ptr<BlockCompressionFactory>> Create(
const CompressOptions& compression);
Expand Down
6 changes: 6 additions & 0 deletions src/paimon/common/defs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,5 +119,11 @@ const char Options::LOOKUP_CACHE_MAX_MEMORY_SIZE[] = "lookup.cache-max-memory-si
const char Options::LOOKUP_CACHE_HIGH_PRIO_POOL_RATIO[] = "lookup.cache.high-priority-pool-ratio";
const char Options::LOOKUP_CACHE_FILE_RETENTION[] = "lookup.cache-file-retention";
const char Options::LOOKUP_CACHE_MAX_DISK_SIZE[] = "lookup.cache-max-disk-size";
const char Options::BTREE_INDEX_COMPRESSION[] = "btree-index.compression";
const char Options::BTREE_INDEX_COMPRESSION_LEVEL[] = "btree-index.compression-level";
const char Options::BTREE_INDEX_BLOCK_SIZE[] = "btree-index.block-size";
const char Options::BTREE_INDEX_CACHE_SIZE[] = "btree-index.cache-size";
const char Options::BTREE_INDEX_HIGH_PRIORITY_POOL_RATIO[] = "btree-index.high-priority-pool-ratio";
const char Options::BTREE_INDEX_RECORDS_PER_RANGE[] = "btree-index.records-per-range";

} // namespace paimon
10 changes: 10 additions & 0 deletions src/paimon/common/file_index/empty/empty_file_index_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ class EmptyFileIndexReader : public FileIndexReader {
const std::vector<Literal>& literals) override {
return FileIndexResult::Skip();
}

Result<std::shared_ptr<FileIndexResult>> VisitBetween(const Literal& from,
const Literal& to) override {
return FileIndexResult::Skip();
}

Result<std::shared_ptr<FileIndexResult>> VisitNotBetween(const Literal& from,
const Literal& to) override {
return FileIndexResult::Remain();
}
};

} // namespace paimon
40 changes: 40 additions & 0 deletions src/paimon/common/file_index/file_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,44 @@ Result<std::shared_ptr<FileIndexResult>> FileIndexReader::VisitNotIn(
}
return file_index_result;
}

Result<std::shared_ptr<FileIndexResult>> FileIndexReader::VisitAnd(
const std::vector<Result<std::shared_ptr<FileIndexResult>>>& children) {
if (children.empty()) {
return Status::Invalid("VisitAnd called with no children");
}

// Start with the first child
PAIMON_RETURN_NOT_OK(children[0]);
auto current = children[0].value();

// AND with remaining children
for (size_t i = 1; i < children.size(); ++i) {
PAIMON_RETURN_NOT_OK(children[i]);
auto child = children[i].value();
PAIMON_ASSIGN_OR_RAISE(current, current->And(child));
}

return current;
}

Result<std::shared_ptr<FileIndexResult>> FileIndexReader::VisitOr(
const std::vector<Result<std::shared_ptr<FileIndexResult>>>& children) {
if (children.empty()) {
return Status::Invalid("VisitOr called with no children");
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using Result<T> as a function parameter is generally not recommended — Result should be used for return types, not inputs. Also, before adding more visitor methods like VisitOr, could we first confirm whether FileIndexReader truly needs this interface?

}

// Start with the first child
PAIMON_RETURN_NOT_OK(children[0]);
auto current = children[0].value();

// OR with remaining children
for (size_t i = 1; i < children.size(); ++i) {
PAIMON_RETURN_NOT_OK(children[i]);
auto child = children[i].value();
PAIMON_ASSIGN_OR_RAISE(current, current->Or(child));
}

return current;
}
} // namespace paimon
9 changes: 8 additions & 1 deletion src/paimon/common/global_index/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@
# limitations under the License.

set(PAIMON_GLOBAL_INDEX_SRC
bitmap/bitmap_global_index.cpp bitmap/bitmap_global_index_factory.cpp
bitmap/bitmap_global_index.cpp
bitmap/bitmap_global_index_factory.cpp
btree/btree_file_footer.cpp
btree/btree_global_index_factory.cpp
btree/btree_global_indexer.cpp
btree/btree_global_index_reader.cpp
btree/btree_global_index_writer.cpp
btree/btree_index_meta.cpp
rangebitmap/range_bitmap_global_index.cpp
rangebitmap/range_bitmap_global_index_factory.cpp)

Expand Down
Loading
Loading