From 8f2bb321fe9ba3e28d59da4fddb2ed970c508c94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=B1=D0=B5=D1=80=D1=82=20=D0=A1=D0=BC=D0=B0?= =?UTF-8?q?=D0=B9=D1=82?= <111169073+RSMT98@users.noreply.github.com> Date: Sun, 1 Mar 2026 18:23:50 +0300 Subject: [PATCH 1/5] empty commit to trigger CI From 6578c9d13af836b91b41e26d55102688e09dfa20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=B1=D0=B5=D1=80=D1=82=20=D0=A1=D0=BC=D0=B0?= =?UTF-8?q?=D0=B9=D1=82?= <111169073+RSMT98@users.noreply.github.com> Date: Sun, 1 Mar 2026 21:36:51 +0300 Subject: [PATCH 2/5] added continuity ledger; fixed deduplication in coverage.txt; increased coverage of rmm-tree to 90+% --- AGENTS.md | 30 +++++++ scripts/coverage_report.sh | 2 +- src/tests/test_rmm.cpp | 171 +++++++++++++++++++++++++++++++++++++ 3 files changed, 202 insertions(+), 1 deletion(-) diff --git a/AGENTS.md b/AGENTS.md index 5422c3e..1407d03 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,5 +1,35 @@ # AGENTS.md - AI Coding Assistant Guidelines for Pixie +## Continuity Ledger (compaction-safe) +Maintain a single Continuity Ledger for this workspace in `./CONTINUITY.md`. The ledger is the canonical session briefing designed to survive context compaction; do not rely on earlier chat text unless it’s reflected in the ledger. + +### How it works +- At the start of every assistant turn: read `./CONTINUITY.md`, update it to reflect the latest goal/constraints/decisions/state, then proceed with the work. +- Update `./CONTINUITY.md` again whenever any of these change: goal, constraints/assumptions, key decisions, progress state (Done/Now/Next), or important tool outcomes. +- Keep it short and stable: facts only, no transcripts. Prefer bullets. Mark uncertainty as `UNCONFIRMED` (never guess). +- If you notice missing recall or a compaction/summary event: refresh/rebuild the ledger from visible context, mark gaps `UNCONFIRMED`, ask up to 1–3 targeted questions, then continue. + +### `functions.update_plan` vs the Ledger +- `functions.update_plan` is for short-term execution scaffolding while you work (a small 3–7 step plan with pending/in_progress/completed). +- `./CONTINUITY.md` is for long-running continuity across compaction (the “what/why/current state”), not a step-by-step task list. +- Keep them consistent: when the plan or state changes, update the ledger at the intent/progress level (not every micro-step). + +### In replies +- Begin with a brief “Ledger Snapshot” (Goal + Now/Next + Open Questions). Print the full ledger only when it materially changes or when the user asks. + +### `./CONTINUITY.md` format (keep headings) +- Goal (incl. success criteria): +- Constraints/Assumptions: +- Key decisions: +- State: +- Done: +- Now: +- Next: +- Open questions (UNCONFIRMED if needed): +- Working set (files/ids/commands): + +--- + ## Project Overview Pixie is a **succinct data structures library** written in C++20. It provides space-efficient data structures that use close to the theoretical minimum space while supporting efficient queries. The library targets practical performance for data sizes up to 2^64 bits. diff --git a/scripts/coverage_report.sh b/scripts/coverage_report.sh index af51064..0820856 100755 --- a/scripts/coverage_report.sh +++ b/scripts/coverage_report.sh @@ -12,7 +12,7 @@ cmake --build --preset coverage "${BUILD_DIR}/test_rmm" cd "${BUILD_DIR}" -find . -name "*.gcno" -o -name "*.gcda" > gcov_files.txt +find . -name "*.gcda" > gcov_files.txt while read -r f; do case "${f}" in *"/third_party/"*|*"/src/benchmarks/"*) diff --git a/src/tests/test_rmm.cpp b/src/tests/test_rmm.cpp index 59cc667..e6da921 100644 --- a/src/tests/test_rmm.cpp +++ b/src/tests/test_rmm.cpp @@ -511,6 +511,177 @@ TEST(RmMEdgeCases, EmptyInput) { EXPECT_EQ(rm.range_max_query_pos(0, 0), nv.range_max_query_pos(0, 0)); } +static void expect_rank_select_equal(const pixie::RmMTree& rm, + const NaiveRmM& nv, + const size_t& n) { + for (size_t x = 0; x <= n; ++x) { + EXPECT_EQ(rm.rank1(x), nv.rank1(x)) << "rank1 x=" << x; + EXPECT_EQ(rm.rank0(x), nv.rank0(x)) << "rank0 x=" << x; + EXPECT_EQ(rm.rank10(x), nv.rank10(x)) << "rank10 x=" << x; + } + + const size_t ones = nv.rank1(n); + const size_t zeros = n - ones; + const size_t pairs10 = (n >= 2 ? nv.rank10(n) : 0); + + for (size_t k = 1; k <= ones + 1; ++k) { + EXPECT_EQ(rm.select1(k), nv.select1(k)) << "select1 k=" << k; + } + for (size_t k = 1; k <= zeros + 1; ++k) { + EXPECT_EQ(rm.select0(k), nv.select0(k)) << "select0 k=" << k; + } + for (size_t k = 1; k <= pairs10 + 1; ++k) { + EXPECT_EQ(rm.select10(k), nv.select10(k)) << "select10 k=" << k; + } +} + +static void expect_range_ops_equal(const pixie::RmMTree& rm, + const NaiveRmM& nv, + const size_t& n) { + if (n == 0) { + return; + } + std::mt19937_64 rng(42); + std::uniform_int_distribution pos(0, n - 1); + for (int t = 0; t < 512; ++t) { + size_t i = pos(rng); + size_t j = pos(rng); + if (i > j) { + std::swap(i, j); + } + + EXPECT_EQ(rm.range_min_query_pos(i, j), nv.range_min_query_pos(i, j)); + EXPECT_EQ(rm.range_min_query_val(i, j), nv.range_min_query_val(i, j)); + EXPECT_EQ(rm.range_max_query_pos(i, j), nv.range_max_query_pos(i, j)); + EXPECT_EQ(rm.range_max_query_val(i, j), nv.range_max_query_val(i, j)); + + size_t cnt = nv.mincount(i, j); + EXPECT_EQ(rm.mincount(i, j), cnt); + size_t k = std::uniform_int_distribution(1, cnt + 1)(rng); + EXPECT_EQ(rm.minselect(i, j, k), nv.minselect(i, j, k)); + } +} + +TEST(RmMEdgeCases, MultiwordPattern10AcrossWordBoundaries) { + const size_t n = 640; + std::string bits(n, '1'); + + for (size_t i = 0; i + 1 < n; i += 3) { + bits[i] = '1'; + bits[i + 1] = '0'; + } + for (size_t boundary = 63; boundary + 1 < n; boundary += 64) { + bits[boundary] = '1'; + bits[boundary + 1] = '0'; + } + + pixie::RmMTree rm(bits, /*leaf_block_bits=*/256); + NaiveRmM nv(bits); + + expect_rank_select_equal(rm, nv, n); + expect_range_ops_equal(rm, nv, n); +} + +TEST(RmMEdgeCases, PartialLastLeafSelects) { + const size_t n = 600; + + std::string mostly_zero(n, '0'); + for (size_t i = 576; i < n; ++i) { + mostly_zero[i] = '1'; + } + pixie::RmMTree rm_select1(mostly_zero, /*leaf_block_bits=*/256); + NaiveRmM nv_select1(mostly_zero); + expect_rank_select_equal(rm_select1, nv_select1, n); + + std::string mostly_one(n, '1'); + for (size_t i = 576; i < n; ++i) { + mostly_one[i] = '0'; + } + pixie::RmMTree rm_select0(mostly_one, /*leaf_block_bits=*/256); + NaiveRmM nv_select0(mostly_one); + expect_rank_select_equal(rm_select0, nv_select0, n); +} + +/** + * Invalid arguments should fail fast and return npos/0 as specified. + * Covers bad ranks, bad ranges and out-of-bounds BP navigation calls. + */ +TEST(RmMEdgeCases, InvalidArgumentsGuards) { + const size_t n = 600; + std::string bits(n, '1'); + for (size_t i = 0; i < n; i += 5) { + bits[i] = '0'; + } + + pixie::RmMTree rm(bits, /*leaf_block_bits=*/256); + + EXPECT_EQ(rm.select1(0), pixie::RmMTree::npos); + EXPECT_EQ(rm.select0(0), pixie::RmMTree::npos); + EXPECT_EQ(rm.select10(0), pixie::RmMTree::npos); + + EXPECT_EQ(rm.fwdsearch(n, 0), pixie::RmMTree::npos); + EXPECT_EQ(rm.bwdsearch(0, 0), pixie::RmMTree::npos); + EXPECT_EQ(rm.bwdsearch(n + 1, 0), pixie::RmMTree::npos); + + EXPECT_EQ(rm.range_min_query_pos(10, 9), pixie::RmMTree::npos); + EXPECT_EQ(rm.range_min_query_pos(0, n), pixie::RmMTree::npos); + EXPECT_EQ(rm.range_max_query_pos(10, 9), pixie::RmMTree::npos); + EXPECT_EQ(rm.range_max_query_pos(0, n), pixie::RmMTree::npos); + EXPECT_EQ(rm.range_min_query_val(10, 9), 0); + EXPECT_EQ(rm.range_max_query_val(10, 9), 0); + EXPECT_EQ(rm.mincount(10, 9), 0); + EXPECT_EQ(rm.minselect(10, 9, 1), pixie::RmMTree::npos); + EXPECT_EQ(rm.minselect(0, n - 1, 0), pixie::RmMTree::npos); + + EXPECT_EQ(rm.close(n), pixie::RmMTree::npos); + EXPECT_EQ(rm.open(0), pixie::RmMTree::npos); + EXPECT_EQ(rm.open(n + 1), pixie::RmMTree::npos); + EXPECT_EQ(rm.enclose(0), pixie::RmMTree::npos); + EXPECT_EQ(rm.enclose(n + 1), pixie::RmMTree::npos); +} + +/** + * bit_count is larger than the provided words buffer. + * Verifies that words beyond the provided buffer are treated as zeros after + * resize. + */ +TEST(RmMEdgeCases, WordsConstructorResizesInputStorage) { + std::vector words = {0xAAAAAAAAAAAAAAAAull}; + const size_t bit_count = 300; + + pixie::RmMTree rm(words, bit_count, /*leaf_block_bits=*/64); + NaiveRmM nv(words, bit_count); + + expect_rank_select_equal(rm, nv, bit_count); + expect_range_ops_equal(rm, nv, bit_count); +} + +/** + * Same bitvector built through different configuration paths (auto vs explicit + * leaf size, different overhead caps, and words-based constructor). Query + * results must be identical. + */ +TEST(RmMEdgeCases, ExplicitBuildParametersAndOverheadCap) { + std::mt19937_64 rng(42); + const size_t n = 128; + const std::string bits = random_bits(rng, n); + NaiveRmM nv(bits); + + pixie::RmMTree rm_auto(bits, /*leaf_block_bits=*/0, /*max_overhead=*/1.f); + pixie::RmMTree rm_explicit(bits, /*leaf_block_bits=*/512, + /*max_overhead=*/2.f); + auto words = pack_words_lsb_first(bits); + pixie::RmMTree rm_words(words, n, /*leaf_block_bits=*/256, + /*max_overhead=*/1.f); + + expect_rank_select_equal(rm_auto, nv, n); + expect_range_ops_equal(rm_auto, nv, n); + expect_rank_select_equal(rm_explicit, nv, n); + expect_range_ops_equal(rm_explicit, nv, n); + expect_rank_select_equal(rm_words, nv, n); + expect_range_ops_equal(rm_words, nv, n); +} + TEST(RmMTreeStress, LongRandom) { Limits L; L.OPS_PER_CASE = 2000; From 20a42e2cd9a3f97beb81d4db7737997e9b5a2462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=B1=D0=B5=D1=80=D1=82=20=D0=A1=D0=BC=D0=B0?= =?UTF-8?q?=D0=B9=D1=82?= <111169073+RSMT98@users.noreply.github.com> Date: Sun, 1 Mar 2026 22:58:58 +0300 Subject: [PATCH 3/5] fixed ASAN check on rmm-tree --- .github/workflows/build-test.yml | 87 ++++++++++++++++---------------- include/pixie/rmm_tree.h | 33 ++++++++---- src/tests/test_rmm.cpp | 21 ++++++++ 3 files changed, 88 insertions(+), 53 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index 09f3113..b383db9 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -1,46 +1,45 @@ -name: Tests (Asan) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - build-and-test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Create Build Directory - run: mkdir build - - - name: Configure CMake - working-directory: ./build - run: cmake -DDISABLE_AVX512=ON -DENABLE_ADDRESS_SANITIZER=ON -DPIXIE_BENCHMARKS=OFF .. - - - name: Build Project - working-directory: ./build - run: make -j - - - name: Run Unittests - working-directory: ./build - run: ./unittests - - - name: Run LOUDS Tree Tests - working-directory: ./build - run: ./louds_tree_tests - - - name: Run Benchmark Tests - working-directory: ./build - run: ./benchmark_tests - - # TODO: fix RmM tests under Asan - # - name: Run RmM Tree Tests - # working-directory: ./build - # run: ./test_rmm - +name: Tests (Asan) + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build-and-test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Create Build Directory + run: mkdir build + + - name: Configure CMake + working-directory: ./build + run: cmake -DDISABLE_AVX512=ON -DENABLE_ADDRESS_SANITIZER=ON -DPIXIE_BENCHMARKS=OFF .. + + - name: Build Project + working-directory: ./build + run: make -j + + - name: Run Unittests + working-directory: ./build + run: ./unittests + + - name: Run LOUDS Tree Tests + working-directory: ./build + run: ./louds_tree_tests + + - name: Run Benchmark Tests + working-directory: ./build + run: ./benchmark_tests + + - name: Run RmM Tree Tests + working-directory: ./build + run: ./test_rmm + build-and-test-with-SDE: runs-on: ubuntu-latest timeout-minutes: 60 @@ -92,4 +91,4 @@ jobs: exit 0 fi exit $rc - + diff --git a/include/pixie/rmm_tree.h b/include/pixie/rmm_tree.h index 45caee0..fe8bbe7 100644 --- a/include/pixie/rmm_tree.h +++ b/include/pixie/rmm_tree.h @@ -270,26 +270,41 @@ class RmMTree { if (node_pattern10_count[node_index] < target_pattern_rank) { return npos; } + const size_t tree_size = segment_size_bits.size() - 1; size_t segment_base = 0; while (node_index < first_leaf_index) { - const size_t left_child = node_index << 1, right_child = left_child | 1; + const size_t left_child = node_index << 1; + const size_t left_segment_size = + (left_child <= tree_size) ? segment_size_bits[left_child] : 0; + if (left_segment_size == 0) { + return npos; + } + + const size_t left_count = node_pattern10_count[left_child]; + if (left_count >= target_pattern_rank) { + node_index = left_child; + continue; + } + + size_t remaining_rank = target_pattern_rank - left_count; + const size_t right_child = left_child | 1; + const bool has_right = + (right_child <= tree_size) && (segment_size_bits[right_child] != 0); + if (!has_right) { + return npos; + } + const size_t crossing_pattern = (node_last_bit[left_child] == 1 && node_first_bit[right_child] == 0) ? 1u : 0u; - if (node_pattern10_count[left_child] >= target_pattern_rank) { - node_index = left_child; - continue; - } - size_t remaining_rank = - target_pattern_rank - node_pattern10_count[left_child]; if (crossing_pattern) { if (remaining_rank == 1) { - return segment_base + segment_size_bits[left_child] - 1; + return segment_base + left_segment_size - 1; } --remaining_rank; } - segment_base += segment_size_bits[left_child]; + segment_base += left_segment_size; node_index = right_child; target_pattern_rank = remaining_rank; } diff --git a/src/tests/test_rmm.cpp b/src/tests/test_rmm.cpp index e6da921..194ef98 100644 --- a/src/tests/test_rmm.cpp +++ b/src/tests/test_rmm.cpp @@ -602,6 +602,27 @@ TEST(RmMEdgeCases, PartialLastLeafSelects) { expect_rank_select_equal(rm_select0, nv_select0, n); } +TEST(RmMEdgeCases, Select10OnIncompleteInternalNode) { + constexpr size_t leaf_block_bits = 256; + const size_t n = (leaf_block_bits * 2) + 32; // exactly 3 leaves + std::string bits(n, '1'); + + // Put all "10" patterns into the last (partial) leaf. + for (size_t i = leaf_block_bits * 2; i + 1 < n; i += 4) { + bits[i] = '1'; + bits[i + 1] = '0'; + } + + pixie::RmMTree rm(bits, leaf_block_bits); + NaiveRmM nv(bits); + + const size_t pairs10 = nv.rank10(n); + ASSERT_GT(pairs10, 0u); + for (size_t k = 1; k <= pairs10 + 1; ++k) { + EXPECT_EQ(rm.select10(k), nv.select10(k)) << "select10 k=" << k; + } +} + /** * Invalid arguments should fail fast and return npos/0 as specified. * Covers bad ranks, bad ranges and out-of-bounds BP navigation calls. From ef71dcebd980d5a74b9635b64b4b08c80ad07260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=B1=D0=B5=D1=80=D1=82=20=D0=A1=D0=BC=D0=B0?= =?UTF-8?q?=D0=B9=D1=82?= <111169073+RSMT98@users.noreply.github.com> Date: Sun, 1 Mar 2026 23:46:48 +0300 Subject: [PATCH 4/5] removed continuity ledger --- AGENTS.md | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 1407d03..5422c3e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,35 +1,5 @@ # AGENTS.md - AI Coding Assistant Guidelines for Pixie -## Continuity Ledger (compaction-safe) -Maintain a single Continuity Ledger for this workspace in `./CONTINUITY.md`. The ledger is the canonical session briefing designed to survive context compaction; do not rely on earlier chat text unless it’s reflected in the ledger. - -### How it works -- At the start of every assistant turn: read `./CONTINUITY.md`, update it to reflect the latest goal/constraints/decisions/state, then proceed with the work. -- Update `./CONTINUITY.md` again whenever any of these change: goal, constraints/assumptions, key decisions, progress state (Done/Now/Next), or important tool outcomes. -- Keep it short and stable: facts only, no transcripts. Prefer bullets. Mark uncertainty as `UNCONFIRMED` (never guess). -- If you notice missing recall or a compaction/summary event: refresh/rebuild the ledger from visible context, mark gaps `UNCONFIRMED`, ask up to 1–3 targeted questions, then continue. - -### `functions.update_plan` vs the Ledger -- `functions.update_plan` is for short-term execution scaffolding while you work (a small 3–7 step plan with pending/in_progress/completed). -- `./CONTINUITY.md` is for long-running continuity across compaction (the “what/why/current state”), not a step-by-step task list. -- Keep them consistent: when the plan or state changes, update the ledger at the intent/progress level (not every micro-step). - -### In replies -- Begin with a brief “Ledger Snapshot” (Goal + Now/Next + Open Questions). Print the full ledger only when it materially changes or when the user asks. - -### `./CONTINUITY.md` format (keep headings) -- Goal (incl. success criteria): -- Constraints/Assumptions: -- Key decisions: -- State: -- Done: -- Now: -- Next: -- Open questions (UNCONFIRMED if needed): -- Working set (files/ids/commands): - ---- - ## Project Overview Pixie is a **succinct data structures library** written in C++20. It provides space-efficient data structures that use close to the theoretical minimum space while supporting efficient queries. The library targets practical performance for data sizes up to 2^64 bits. From 69b650903a7366def41b07c89d95d19449def885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A0=D0=BE=D0=B1=D0=B5=D1=80=D1=82=20=D0=A1=D0=BC=D0=B0?= =?UTF-8?q?=D0=B9=D1=82?= <111169073+RSMT98@users.noreply.github.com> Date: Mon, 2 Mar 2026 00:09:05 +0300 Subject: [PATCH 5/5] applied bot suggestions --- src/tests/test_rmm.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/tests/test_rmm.cpp b/src/tests/test_rmm.cpp index 194ef98..569a6f7 100644 --- a/src/tests/test_rmm.cpp +++ b/src/tests/test_rmm.cpp @@ -513,7 +513,7 @@ TEST(RmMEdgeCases, EmptyInput) { static void expect_rank_select_equal(const pixie::RmMTree& rm, const NaiveRmM& nv, - const size_t& n) { + size_t n) { for (size_t x = 0; x <= n; ++x) { EXPECT_EQ(rm.rank1(x), nv.rank1(x)) << "rank1 x=" << x; EXPECT_EQ(rm.rank0(x), nv.rank0(x)) << "rank0 x=" << x; @@ -537,12 +537,13 @@ static void expect_rank_select_equal(const pixie::RmMTree& rm, static void expect_range_ops_equal(const pixie::RmMTree& rm, const NaiveRmM& nv, - const size_t& n) { + size_t n) { if (n == 0) { return; } std::mt19937_64 rng(42); std::uniform_int_distribution pos(0, n - 1); + std::uniform_int_distribution k_dist; for (int t = 0; t < 512; ++t) { size_t i = pos(rng); size_t j = pos(rng); @@ -557,7 +558,8 @@ static void expect_range_ops_equal(const pixie::RmMTree& rm, size_t cnt = nv.mincount(i, j); EXPECT_EQ(rm.mincount(i, j), cnt); - size_t k = std::uniform_int_distribution(1, cnt + 1)(rng); + k_dist.param(std::uniform_int_distribution::param_type(1, cnt + 1)); + size_t k = k_dist(rng); EXPECT_EQ(rm.minselect(i, j, k), nv.minselect(i, j, k)); } } @@ -670,7 +672,7 @@ TEST(RmMEdgeCases, WordsConstructorResizesInputStorage) { std::vector words = {0xAAAAAAAAAAAAAAAAull}; const size_t bit_count = 300; - pixie::RmMTree rm(words, bit_count, /*leaf_block_bits=*/64); + pixie::RmMTree rm(words, bit_count); NaiveRmM nv(words, bit_count); expect_rank_select_equal(rm, nv, bit_count);