diff --git a/book/src/drive/count-index-group-by-examples.md b/book/src/drive/count-index-group-by-examples.md index 1f97da6038..29d16c8057 100644 --- a/book/src/drive/count-index-group-by-examples.md +++ b/book/src/drive/count-index-group-by-examples.md @@ -40,13 +40,17 @@ All proof-size and behaviour numbers below come from the same bench helper (`rep | # | Query | Filter + group_by | Complexity | Avg time | Proof size | Verified shape | Notes | |---|-------|-------------------|------------|----------|------------|----------------|-------| | G1 | [`In` on `byBrand`](#g1--in-on-bybrand-grouped-by-brand) | `brand IN ["brand_000", "brand_001"]`
`group_by = [brand]` | O(k · log B) | 38.6 µs | 1 102 B | `Entries(2 groups, sum = 2 000)` | Byte-identical to [Q5](./count-index-examples.md#query-5--in-on-bybrand) | +| G1a | [`In` on `byBrand` with an absent value](#g1a--in-on-bybrand-with-one-absent-value-grouped-by-brand) | `brand IN ["brand_000", "brand_100"]`
`group_by = [brand]` | O(k · log B) | 44.4 µs | 1 357 B | `Entries(1 group, sum = 1 000)` | One In value (`brand_100`) is absent — proof grows by 255 B for the absence subproof; verifier omits the absent branch from entries | +| G1b | [High-fanout `In` on `byBrand` (\|IN\| = B)](#g1b--high-fanout-in-on-bybrand-in--b-grouped-by-brand) | `brand IN [100 values]`
`group_by = [brand]` | O(k · log B) | 1 532 µs | 10 038 B | `Entries(100 groups, sum = 100 000)` | Same shape as G1, scaled from `\|IN\| = 2` → `\|IN\| = 100`; reveals every byBrand entry when `\|IN\| = B` | | G2 | [`In` on `byColor`](#g2--in-on-bycolor-grouped-by-color) | `color IN ["color_00000000", "color_00000001"]`
`group_by = [color]` | O(k · log C) | 62.1 µs | 1 381 B | `Entries(2 groups, sum = 200)` | Byte-identical to [Q6](./count-index-examples.md#query-6--in-on-bycolor-rangecountable) | | G3 | [Compound `In` + Equal](#g3--compound-in--equal-grouped-by-brand) | `brand IN [...] AND color == Y`
`group_by = [brand]` | O(k · (log B + log C')) | 106.2 µs | 2 842 B | `Entries(2 groups, sum = 2)` | Per-In compound resolution; two parallel Q4 descents sharing L1–L6 | | G4 | [Range on `byColor`](#g4--range-on-bycolor-grouped-by-color) | `color > "color_00000500"`
`group_by = [color]` | O(R · log C) | 762.9 µs | 10 992 B | `Entries(100 groups, sum = 10 000)` | `GroupByRange`: enumerates distinct in-range keys instead of Q7's boundary aggregate | -| G5 | [Compound `In` + Range](#g5--compound-in--range-grouped-by-brand-color) | `brand IN [...] AND color > floor`
`group_by = [brand, color]` | O(k · R' · log C') | 737.5 µs | 11 554 B | `Entries(100 groups, sum = 100)` | Compound In-fan-out × in-range distinct keys (G3 outer × G4 inner) | -| G6 | [High-fanout `In` on `byBrand`](#g6--high-fanout-in-on-bybrand) | `brand IN [100 values]`
`group_by = [brand]` | O(k · log B) | 1 532 µs | 10 038 B | `Entries(100 groups, sum = 100 000)` | Scales linearly with `\|IN\|`; reveals every byBrand entry when `\|IN\| = B` | +| G5 | [Compound `In` + Range](#g5--compound-in--range-grouped-by-brand-color) | `brand IN [...] AND color > "color_00000500"`
`group_by = [brand, color]` | O(k · R' · log C') | 737.5 µs | 11 554 B | `Entries(100 groups, sum = 100)` | Compound In-fan-out × in-range distinct keys (G3 outer × G4 inner) | | G7 | [Carrier `In` + Range (`byBrandColor`)](#g7--carrier-in--range-grouped-by-brand) | `brand IN [...] AND color > "color_00000500"`
`group_by = [brand]` | O(k · (log B + log C')) | 255.9 µs | 4 332 B | `Entries(2 groups, sum = 998)` | Per-In aggregate via `AggregateCountOnRange` as a carrier subquery; one `u64` per branch | | G8 | [Carrier outer Range + Range (`byBrandColor`)](#g8--carrier-outer-range--range-grouped-by-brand) | `brand > "brand_050" AND color > "color_00000500"`
`group_by = [brand]` | O(L · (log B + log C')) | 523 µs | 18 022 B | `Entries(10 groups, sum = 4 990)` | Outer-Range carrier with a platform-max `SizedQuery::limit` of 10; caller may pass smaller, can't pass larger | +| G8a | [Bounded carrier + bounded ACOR, descending](#g8a--bounded-carrier--bounded-acor-grouped-by-brand-descending) | `brand > "brand_050" AND brand < "brand_065" AND color > "color_00000200" AND color < "color_00000400"`
`group_by = [brand]`, `order_by = [(brand, desc)]` | O(L · (log B + log C')) | 807 µs | 29 010 B | `Entries(10 groups, sum = 1 990)` | Bounded ranges on both axes + descending walk; same carrier shape as G8, different op variants on both range commitments | +| G8b | [Same carrier `where` but `group_by = [brand, color]`](#g8b--two-range-carrier-with-group_by--brand-color-rejected) | `brand > "brand_050" AND color > "color_00000500"`
`group_by = [brand, color]` | — | — | **rejected** | `InvalidWhereClauseComponents("count query supports at most one range where-clause; …or use `group_by = [outer_range_field]` with `prove = true`…")` | Two-range carrier is opened only for `GroupByRange + single-field group_by`; the compound shape can't fan over both ranges | +| G8c | [Same carrier `where` but `group_by = []`](#g8c--two-range-carrier-with-group_by---rejected) | `brand > "brand_050" AND color > "color_00000500"`
`group_by = []` | — | — | **rejected** | `InvalidWhereClauseComponents("count query supports at most one range where-clause; …or use `group_by = [outer_range_field]` with `prove = true`…")` | Aggregate (no group_by) can't collapse the carrier's per-branch `u64`s into a single sum at the verifier | **Complexity variables.** `B` = distinct brands in the byBrand merk-tree (≈ 100); `C` = distinct colors in byColor (≈ 1 000); `C'` = distinct colors per brand in byBrandColor (≈ 1 000); `R` = distinct in-range values returned by `GroupByRange` (capped at 100 in this fixture by an implicit response-size limit); `R'` = distinct in-range values per fan-out branch (similarly capped); `k` = `|IN|` for the In-outer carrier shapes; `L` = the effective outer-walk limit for the Range-outer carrier shape (G8). The platform's `MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT = 10` is both the default (when the caller passes no `limit`) and a hard ceiling; callers may pass a smaller `limit` to truncate further. See [G8](#g8--carrier-outer-range--range-grouped-by-brand) for the rationale. As in [chapter 29](./count-index-examples.md#queries-in-this-chapter), the total document count `N` doesn't appear — count proofs read pre-committed `count_value`s rather than enumerating docs. @@ -164,6 +168,328 @@ flowchart TB Identical to [Q5's Layer-5+ diagram](./count-index-examples.md#query-5--in-on-bybrand) — same merk ops, same byBrand binary tree, same two `KVValueHashFeatureTypeWithChildHash` targets. The only difference is what the verifier returns at the end (`Entries(...)` instead of `Aggregate(2000)`); the per-layer structure is unchanged. See chapter 29 for the diagram. +## G1a — `In` on `byBrand` with one absent value, Grouped By `brand` + +```text +select = COUNT +where = brand IN ["brand_000", "brand_100"] +group_by = [brand] +prove = true +``` + +The bench fixture has brands `brand_000` … `brand_099` (`BRAND_COUNT = 100`); `brand_100` is **deliberately outside** that range. G1a is G1's same-shape sibling: same path query, same `point_lookup_count_path_query` builder, same `CountMode::GroupByIn` dispatch. The only structural difference is one of the In keys doesn't exist in the byBrand merk tree. + +**Path query** (identical shape to G1; only the second key differs): + +```text +path: ["@", contract_id, 0x01, "widget", "brand"] +query items: [Key("brand_000"), Key("brand_100")] +``` + +**Verified payload** (note: only **one** entry — the absent branch is silently dropped): + +```text +Entries([ + ("brand_000", CountTree { count_value_or_default: 1000 }), +]) +``` + +This is the load-bearing behaviour to know about: grovedb's `verify_query` *without* `absence_proofs_for_non_existing_searched_keys: true` drops absent-Key branches from the elements stream. The drive-side verifier ([`verify_point_lookup_count_proof_v0`](https://github.com/dashpay/platform/blob/v3.1-dev/packages/rs-drive/src/verify/document_count/verify_point_lookup_count_proof/v0/mod.rs)) uses the default (off) and so emits one entry per **present** In value, not one per **requested** In value. Test coverage: [`test_point_lookup_proof_omits_absent_in_branches_from_entries`](https://github.com/dashpay/platform/blob/v3.1-dev/packages/rs-drive/src/query/drive_document_count_query/tests.rs). + +**Caller implication.** Callers MUST NOT assume `entries.len() == |In|`. To check whether a specific In value matched, demux entries by serialized key (the same `serialize_value_for_key(field, value)` the path-query builder uses for outer Keys) — see the test for the canonical pattern. A `0`-count vs absent-key distinction would require passing `absence_proofs_for_non_existing_searched_keys: true` end-to-end, which the platform doesn't expose today. + +**Proof size:** 1 357 B (**+255 B over G1's 1 102 B**). The delta is the absence subproof: grovedb walks the byBrand merk tree to commit the rightmost present key (`brand_099`) and the chain of `Child` ops that proves there's nothing between `brand_099` and end-of-tree. Even though the verifier drops the absent entry, the *prover* must cryptographically commit to the absence — otherwise a malicious prover could omit a present branch by claiming it's absent. + +**Mode:** `CountMode::GroupByIn` routed to `DocumentCountMode::PointLookupProof` — same as G1. + +**Proof display:** + +The absence-subproof shape is what makes G1a interesting. The L8 (byBrand value tree) layer commits both: + +1. The present branch (op 0): `Push(KVValueHashFeatureTypeWithChildHash(brand_000, CountTree(636f6c6f72, 1000, …)))` — `brand_000` as a CountTree with count = 1000, exactly as in G1. +2. The absence commitment (op 36): `Push(KVDigest(brand_099, HASH[…]))` — the rightmost present brand in the byBrand merk tree, paired with a chain of `Child` ops (37–42) that the verifier replays to confirm there's no key strictly between `brand_099` and end-of-tree. `brand_100` would have to sort after `brand_099` (which is true: `brand_099` < `brand_100` lexicographically), so the verifier's merk-root recomputation succeeds with **no** `brand_100` element emitted. + +The bench's `[gproof] G1a` output dumps the full 1357-byte proof: + +
+Expand to see the structured proof (L1–L8 for byBrand, with one present CountTree at L8 + one absence subproof at L8) + +```text +GroveDBProofV1 { + LayerProof { // L1: roots merk + proof: Merk( + 0: Push(Hash(HASH[bd29…3b3])) // sibling: contracts subtree + 1: Push(KVValueHash(@, Tree(4ed2…289), HASH[…])) // KVValueHash of `@` (data-contract subtree root) — descend + 2: Parent + 3: Push(Hash(HASH[19c9…b71])) // sibling + 4: Child) + lower_layers: { + @ => { + LayerProof { // L2: `@` subtree + proof: Merk( + 0: Push(KVValueHash(0x4ed2…289, Tree(01), HASH[…]))) // descend into contract-id subtree + lower_layers: { + 0x4ed2…289 => { + LayerProof { // L3: contract-id subtree + proof: Merk( + 0: Push(Hash(HASH[49e7…df8])) // sibling + 1: Push(KVValueHash(0x01, Tree(widget), HASH[…])) // descend into doctype `widget` + 2: Parent) + lower_layers: { + 0x01 => { + LayerProof { // L4: doctype-prefix subtree + proof: Merk( + 0: Push(KVValueHash(widget, Tree(brand), HASH[…]))) // descend into byBrand index + lower_layers: { + widget => { + LayerProof { // L5: widget subtree + proof: Merk( + 0: Push(Hash(HASH[9862…9d9])) // sibling + 1: Push(KVValueHash(brand, Tree(brand_063), HASH[…])) // descend into byBrand value tree (rooted at `brand_063`) + 2: Parent + 3: Push(Hash(HASH[6c36…a86])) + 4: Child) + lower_layers: { + brand => { + LayerProof { // L6+L7+L8: byBrand value tree (binary search down to `brand_000` + absence walk to `brand_099`) + proof: Merk( + 0: Push(KVValueHashFeatureTypeWithChildHash(brand_000, CountTree(color, 1000, flags), HASH[…], BasicMerkNode, HASH[…])) // PRESENT — `brand_000` as CountTree(count=1000) + 1: Push(KVHash(HASH[…])) + 2: Parent + 3: Push(Hash(HASH[…])) + 4: Child + … (24 intermediate `KVHash`/`Hash`/`Parent`/`Child` ops walking the binary search) + 35: Push(KVHash(HASH[…])) + 36: Push(KVDigest(brand_099, HASH[…])) // ABSENCE COMMITMENT — rightmost present brand + 37: Child + 38: Child + 39: Child + 40: Child + 41: Child + 42: Child) + }}}}}}}}}}}}}}}}} +``` + +Op 36 (`KVDigest(brand_099, …)`) is the load-bearing piece. The verifier replays ops 37–42 (`Child`s) against the byBrand merk root committed at L5; any tampering — say, an honest `brand_099` swapped for a malicious `brand_100`-shaped commitment — would change the merk root and the verification would fail. + +
+ +### Diagram: conceptual flow (where the absence proof sits) + +```mermaid +flowchart TB + RQ["IN [brand_000, brand_100]"]:::request + RQ --> M["dispatcher → PointLookupProof
(group_by = [brand])"]:::dispatch + M --> P["point_lookup_count_path_query
outer Keys = [brand_000, brand_100]"]:::path + P --> V["grovedb walks byBrand merk tree"]:::engine + V --> P1["brand_000 ✓ present
commit CountTree(count=1000)"]:::present + V --> P2["brand_100 ✗ absent
commit rightmost present (brand_099)
+ Child chain to end-of-tree"]:::absent + P1 --> R["Proof bytes: 1357 B
(1102 B for the present branch +
~255 B for the absence subproof)"]:::result + P2 --> R + R --> SDK["verify_point_lookup_count_proof
(absence_proofs_for_non_existing_searched_keys = false)"]:::verify + SDK --> OUT["Entries([(brand_000, 1000)])
brand_100 silently dropped"]:::sdk + + classDef request fill:#1f6feb,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef dispatch fill:#21262d,color:#c9d1d9,stroke:#1f6feb; + classDef path fill:#6e7681,color:#fff,stroke:#1f6feb; + classDef engine fill:#21262d,color:#c9d1d9,stroke:#39c5cf; + classDef present fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; + classDef absent fill:#d29922,color:#0d1117,stroke:#d29922,stroke-width:3px,stroke-dasharray: 6 3; + classDef result fill:#21262d,color:#c9d1d9,stroke:#39c5cf,stroke-width:2px; + classDef verify fill:#21262d,color:#c9d1d9,stroke:#a371f7,stroke-width:2px; + classDef sdk fill:#21262d,color:#39c5cf,stroke:#39c5cf,stroke-width:2px,stroke-dasharray: 4 2; +``` + +### Per-layer merk-tree structure (Layer 5+) + +```mermaid +flowchart TB + L5["L5 — widget subtree:
KVValueHash(brand, Tree(brand_063))"]:::path + L5 --> L6["L6 — byBrand value tree root:
brand_063 (binary-search root)"]:::path + L6 --> L7L["brand_031 (left subtree boundary)"]:::sibling + L6 --> L7R["brand_095 (right subtree boundary)"]:::sibling + L7L --> P000["brand_000
(present, CountTree count=1000)"]:::target + L7R --> A099["brand_099
(rightmost present, absence-proof anchor)"]:::boundary + L7R -.-> A100["brand_100 (not in tree — absence proven
by Child chain to end-of-tree)"]:::absent + + classDef path fill:#6e7681,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef sibling fill:#6e7681,color:#fff,stroke:#6e7681; + classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; + classDef boundary fill:#d29922,color:#0d1117,stroke:#d29922,stroke-width:2px; + classDef absent fill:#21262d,color:#d29922,stroke:#d29922,stroke-width:2px,stroke-dasharray: 6 3; +``` + +**Why absence-proof matters for count queries.** The drive count fast path treats absent branches as 0, but it does NOT trust the SDK to apply that rule on un-committed data — every count *or non-existence* the verifier reports must be cryptographically committed by the prover. If absent branches were silently summed into `0` without a proof, a malicious prover could omit a present branch (with positive count) and claim it's absent, shrinking the result without detection. The 255-B absence-subproof overhead is the price of that integrity — small in absolute terms, but it scales linearly with the number of absent In values, so callers building queries with many speculative In values pay per-absence overhead. + +## G1b — High-fanout `In` on `byBrand` (|IN| = B), Grouped By `brand` + +```text +select = COUNT +where = brand IN ["brand_000", "brand_001", ..., "brand_099"] +group_by = [brand] +prove = true +``` + +**Path query** (same shape as G1, scaled to `|IN| = 100`): + +```text +path: ["@", contract_id, 0x01, "widget", "brand"] +query items: [Key("brand_000"), Key("brand_001"), ..., Key("brand_099")] +``` + +**Verified payload:** + +```text +Entries(100 groups, sum = 100 000) +``` + +Every document in the fixture, partitioned by brand. Each `Entries[i]` carries `(brand_NNN, CountTree count=1000)`. + +**Proof size:** 10 038 B. **Mode:** `CountMode::GroupByIn`. + +Same structural shape as [G1](#g1--in-on-bybrand-grouped-by-brand), scaled from `|IN| = 2` to `|IN| = 100`. The byBrand merk binary tree at L6 emits all 100 brands as `KVValueHashFeatureTypeWithChildHash` targets — each ~100 B (key + leaf kv-hash + `CountTree(00, 1000, ...)` + `BasicMerkNode` feature + child-hash) — plus minimal boundary glue at the binary-tree corners. The proof grows linearly with `|IN|`: G1 (`|IN|=2`) was 1 102 B; G1b (`|IN|=100`) is 10 038 B; the slope is ~99 B per additional In value. + +Compare against the `byColor` equivalent (`group_by_color_in_proof_100_rangecountable_branches`, 10 512 B): the `ProvableCountTree` overhead from `byColor`'s `KVHashCount` running counts adds ~5 % to the byBrand baseline, even though those running counts aren't consumed by a point-lookup group_by. This is the same `ProvableCountTree` overhead [G2](#g2--in-on-bycolor-grouped-by-color) carried at the smaller scale (`|IN|=2`). + +**Proof display:** + +
+Expand to see the structured proof (5 layers; bottom layer enumerates 100 brands as `KVValueHashFeatureTypeWithChildHash` targets — 192 merk ops total at L6 including binary-tree glue) — or open interactively in the visualizer ↗ + +```text +GroveDBProofV1 { + LayerProof { + proof: Merk( + 0: Push(Hash(HASH[bd291f29893fb6f6d6201087746ca1f23a178dd08e1346cb6c127e91ae3623b3])) + 1: Push(KVValueHash(@, Tree(4ed22624752972af97fb71abf4067b23e6d296a61a02f35b2098819fde39d289), HASH[4a5a28cb1b40226aa35b2f0d502767df13268bdf4678627dbfde26a557acdf73])) + 2: Parent + 3: Push(Hash(HASH[19c924989e473a90d0848277d0b1498ccc8db3dc870cbc130e773f3d79ea5b71])) + 4: Child) + lower_layers: { + // L2..L4 are byte-identical to every other query in this chapter + // (the @ / contract_id / 0x01 descent into widget); see chapter 29's + // Q1 verbatim for the full L1..L4 chain. + ... + widget => { + LayerProof { + proof: Merk( + // L5 widget doctype — `brand` queried, opaque siblings 9862 / 6c36 + 0: Push(Hash(HASH[9862894b16a0792688fdcf64edcb2ceade5c8b234649bfc6cfc6426869b0e9d9])) + 1: Push(KVValueHash(brand, Tree(6272616e645f303633), HASH[68b697da99d6ea70a83eb41794dca7ba3938d0ba98fbfaeb3cd0c19b3b5d0ff2])) + 2: Parent + 3: Push(Hash(HASH[6c36729e93b1a316cbf60fe282eb630c0ed6e45db088e365110302b6c9caba86])) + 4: Child) + lower_layers: { + brand => { + LayerProof { + proof: Merk( + // L6 byBrand merk-tree — 100 targets + binary-tree glue + // (192 merk ops total; structurally a fully-resolved in-order + // traversal of all 100 brand entries in the byBrand merk tree) + 0: Push(KVValueHashFeatureTypeWithChildHash(brand_000, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[90ff6f6d9a3d901195982128130677243bfd27b75736206f3c8400966ef0d37b], BasicMerkNode, HASH[19b58883c492e746861db1e6ad07529a5a91cc8330af522682486db9346d6875])) + 1: Push(KVValueHashFeatureTypeWithChildHash(brand_001, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[484ca11fb4ec8f479be1f78af903ce0c9d4fe630517579fb0172c2576d6b9652], BasicMerkNode, HASH[0bf12023f8e067c12db4cec1583909a0283878d6d909c76196736299750b5879])) + 2: Parent + 3: Push(KVValueHashFeatureTypeWithChildHash(brand_002, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[4c19f047068654e71813dce7839a579edfdcb446e3d70efa1b8592c73259da16], BasicMerkNode, HASH[e8d5372904b7f4ac9334aeb4ddab619d9ad7a308732a4f231416e10208a0a356])) + ... + // 97 more KVValueHashFeatureTypeWithChildHash targets following + // the same template — brand_003 ... brand_099 — interleaved with + // Parent/Child ops glueing them into the byBrand merk binary tree. + // Every target shares the structure: + // Push(KVValueHashFeatureTypeWithChildHash( + // brand_NNN, + // CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), // count_value=1000 + // HASH[], + // BasicMerkNode, // NormalTree (no count on the merk node) + // HASH[] + // )) + ... + 189: Push(KVValueHashFeatureTypeWithChildHash(brand_097, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[92adee932cc12927cd76ad9fd25906bbfe547df2bf21e826845bb4d3b47f5314], BasicMerkNode, HASH[34b69e1e424aa023c74f61554db2823da6c19dcbc51bdd5dece32e3f6f9fd219])) + 190: Parent + 191: Push(KVValueHashFeatureTypeWithChildHash(brand_098, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[68e02fcf66f86797035fbc8d53290185fe3fed7de897a8654743cae4007c47c3], BasicMerkNode, HASH[acfc3a88b852e8895449b4c7e01f4b1cc25028e6a80e4915cdde578ff6eb029b])) + 192: Push(KVValueHashFeatureTypeWithChildHash(brand_099, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[af9667a8f2a10a9402b3d1fb0ac6e0b64d1e3dde5b8829c03b8d2c9cfc94e16d], BasicMerkNode, HASH[d049fe7e250b7dd763a4a5daa4227dcd2e41733dd95fd0758641ac06c63c3b51])) + // + closing Parent/Child ops binding the last few entries + ) + } + } + } + } + } + } + } +} +``` + +The 254-line full verbatim sits in the bench's `[gproof] G1b` output — same template (one `KVValueHashFeatureTypeWithChildHash` per brand, all with `CountTree count=1000` and `BasicMerkNode` feature) repeating 100 times. The schematic above shows the first 3 and last 3 targets so the structural pattern is clear without reproducing 100 near-identical lines. + +**Key observation:** `BasicMerkNode` (not `ProvableCountedMerkNode`) is the feature type on each L6 op. byBrand is a `NormalTree`, so its merk binary tree's internal nodes don't carry running counts — only the per-brand `CountTree count=1000` values stored *inside* each brand's element matter. Contrast this with G1b's `byColor` cousin (`group_by_color_in_proof_100_rangecountable_branches`, 10 512 B): there the L6 targets would carry `ProvableCountedMerkNode(...)` features because byColor IS a `ProvableCountTree`. The ~5 % size difference is exactly those count fields × 100 nodes. + +
+ +```mermaid +flowchart TB + WD["@/contract_id/0x01/widget"]:::tree + WD ==> BR["brand: NormalTree (100 entries)"]:::path + BR ==> B000["brand_000: CountTree count=1000"]:::target + BR ==> B001["brand_001: CountTree count=1000"]:::target + BR ==> BMore["... 96 more in-range targets
(brand_002 ... brand_097)"]:::target + BR ==> B098["brand_098: CountTree count=1000"]:::target + BR ==> B099["brand_099: CountTree count=1000"]:::target + + SDK["Entries(100 groups, sum=100 000):
("brand_000", 1000),
("brand_001", 1000),
...
("brand_099", 1000)"]:::sdk + B000 -.-> SDK + B099 -.-> SDK + + classDef tree fill:#21262d,color:#c9d1d9,stroke:#1f6feb,stroke-width:2px; + classDef path fill:#6e7681,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; + classDef sdk fill:#21262d,color:#39c5cf,stroke:#39c5cf,stroke-width:2px,stroke-dasharray: 4 2; + + linkStyle 0 stroke:#1f6feb,stroke-width:3px; + linkStyle 1 stroke:#1f6feb,stroke-width:3px; + linkStyle 2 stroke:#1f6feb,stroke-width:3px; + linkStyle 3 stroke:#1f6feb,stroke-width:3px; + linkStyle 4 stroke:#1f6feb,stroke-width:3px; + linkStyle 5 stroke:#1f6feb,stroke-width:3px; +``` + +### Diagram: per-layer merk-tree structure (Layer 5+) + +Identical to [G1's L5–L6 shape](#g1--in-on-bybrand-grouped-by-brand), just with all 100 entries in the byBrand merk tree resolved as visible targets rather than just two. The byBrand binary tree has all 100 keys exposed — no opaque sibling subtrees (`Hash` ops) at all, only `KVValueHashFeatureTypeWithChildHash` (full reveal) plus `Parent` / `Child` glue. + +```mermaid +flowchart TB + subgraph L5["Layer 5 — widget doctype merk-tree"] + direction TB + L5_q["brand (queried)
kv_hash=HASH[68b6...]"]:::queried + L5_left["HASH[9862...]"]:::sibling + L5_right["HASH[6c36...]"]:::sibling + L5_q --> L5_left + L5_q --> L5_right + end + + subgraph L6["Layer 6 — byBrand merk-tree (ALL 100 targets fully resolved)"] + direction TB + L6_t0["brand_000
CountTree count=1000
BasicMerkNode"]:::target + L6_t1["brand_001
CountTree count=1000"]:::target + L6_tmid["... 97 more KVValueHashFeatureTypeWithChildHash
targets, each CountTree count=1000
(192 merk ops total: 100 Push + 92 Parent/Child)"]:::target + L6_t99["brand_099
CountTree count=1000"]:::target + + L6_t0 --> L6_t1 + L6_t1 --> L6_tmid + L6_tmid --> L6_t99 + end + + L5_q -. "Tree(merk_root[byBrand])" .-> L6_t0 + + classDef queried fill:#1f6feb,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef sibling fill:#6e7681,color:#fff,stroke:#6e7681; + classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; +``` + +Because the In set covers *every* brand in the fixture, the proof has zero opaque-sibling subtree commitments at L6 — every binary-tree node is revealed as a `KVValueHashFeatureTypeWithChildHash` target. That's the most efficient byte-per-key shape `GroupByIn` can hit: at `|IN| = B` (where `B` is the total entries in the property tree), the proof bytes ≈ `B × (kv-hash + count + child-hash + glue)` ≈ `B × 100 B`. For `B = 100`, that's exactly the 10 038 B we observe. + +By contrast, smaller In sets (G1's `|IN| = 2`) pay the boundary-proof tax: the byBrand merk tree has ~98 unresolved entries, each contributing one `KVHash` (opaque-key commitment, ~33 B) or `Hash` (opaque-subtree commitment, ~33 B). The asymptotic crossover at which "reveal everything" becomes cheaper than "reveal-some-and-commit-the-rest" depends on the ratio of `|IN|` to `B` — for byBrand with `B = 100`, the crossover is around `|IN| ≈ 50`. + ## G2 — `In` on `byColor`, Grouped By `color` ```text @@ -868,197 +1194,28 @@ flowchart TB The 50-targets-per-brand limit reflects the shared response-size cap. In the 2-brand case the cap kicks in at 50 colors per brand; if the In set had 1 brand it would be 100 colors; if it had 4 brands it would be 25 each. The dispatcher slices the cap evenly across the In fan-out so the *total* number of returned entries equals the limit, regardless of how many In branches share it. That's why the bench's `[matrix]` row for this case shows `Entries(len=100, sum=100)` rather than `len=200, sum=200`. -## G6 — High-Fanout `In` on `byBrand` +## G7 — Carrier `In` + Range, Grouped By `brand` ```text select = COUNT -where = brand IN ["brand_000", "brand_001", ..., "brand_099"] +where = brand IN ["brand_000", "brand_001"] AND color > "color_00000500" group_by = [brand] prove = true ``` -**Path query** (same shape as G1, scaled to `|IN| = 100`): +**Path query** (carrier `AggregateCountOnRange` — outer Keys per In value, ACOR subquery over each brand's color subtree): ```text -path: ["@", contract_id, 0x01, "widget", "brand"] -query items: [Key("brand_000"), Key("brand_001"), ..., Key("brand_099")] +path: ["@", contract_id, 0x01, "widget", "brand"] +outer query items: [Key("brand_000"), Key("brand_001")] +subquery_path: ["color"] +subquery items: [AggregateCountOnRange([RangeAfter("color_00000500"..)])] ``` -**Verified payload:** +**Verified payload** (verifier returns one `(in_key, u64)` per resolved In branch via `GroveDb::verify_aggregate_count_query_per_key`): ```text -Entries(100 groups, sum = 100 000) -``` - -Every document in the fixture, partitioned by brand. Each `Entries[i]` carries `(brand_NNN, CountTree count=1000)`. - -**Proof size:** 10 038 B. **Mode:** `CountMode::GroupByIn`. - -Same structural shape as [G1](#g1--in-on-bybrand-grouped-by-brand), scaled from `|IN| = 2` to `|IN| = 100`. The byBrand merk binary tree at L6 emits all 100 brands as `KVValueHashFeatureTypeWithChildHash` targets — each ~100 B (key + leaf kv-hash + `CountTree(00, 1000, ...)` + `BasicMerkNode` feature + child-hash) — plus minimal boundary glue at the binary-tree corners. The proof grows linearly with `|IN|`: G1 (`|IN|=2`) was 1 102 B; G6 (`|IN|=100`) is 10 038 B; the slope is ~99 B per additional In value. - -Compare against the `byColor` equivalent (`group_by_color_in_proof_100_rangecountable_branches`, 10 512 B): the `ProvableCountTree` overhead from `byColor`'s `KVHashCount` running counts adds ~5 % to the byBrand baseline, even though those running counts aren't consumed by a point-lookup group_by. This is the same `ProvableCountTree` overhead [G2](#g2--in-on-bycolor-grouped-by-color) carried at the smaller scale (`|IN|=2`). - -**Proof display:** - -
-Expand to see the structured proof (5 layers; bottom layer enumerates 100 brands as `KVValueHashFeatureTypeWithChildHash` targets — 192 merk ops total at L6 including binary-tree glue) — or open interactively in the visualizer ↗ - -```text -GroveDBProofV1 { - LayerProof { - proof: Merk( - 0: Push(Hash(HASH[bd291f29893fb6f6d6201087746ca1f23a178dd08e1346cb6c127e91ae3623b3])) - 1: Push(KVValueHash(@, Tree(4ed22624752972af97fb71abf4067b23e6d296a61a02f35b2098819fde39d289), HASH[4a5a28cb1b40226aa35b2f0d502767df13268bdf4678627dbfde26a557acdf73])) - 2: Parent - 3: Push(Hash(HASH[19c924989e473a90d0848277d0b1498ccc8db3dc870cbc130e773f3d79ea5b71])) - 4: Child) - lower_layers: { - // L2..L4 are byte-identical to every other query in this chapter - // (the @ / contract_id / 0x01 descent into widget); see chapter 29's - // Q1 verbatim for the full L1..L4 chain. - ... - widget => { - LayerProof { - proof: Merk( - // L5 widget doctype — `brand` queried, opaque siblings 9862 / 6c36 - 0: Push(Hash(HASH[9862894b16a0792688fdcf64edcb2ceade5c8b234649bfc6cfc6426869b0e9d9])) - 1: Push(KVValueHash(brand, Tree(6272616e645f303633), HASH[68b697da99d6ea70a83eb41794dca7ba3938d0ba98fbfaeb3cd0c19b3b5d0ff2])) - 2: Parent - 3: Push(Hash(HASH[6c36729e93b1a316cbf60fe282eb630c0ed6e45db088e365110302b6c9caba86])) - 4: Child) - lower_layers: { - brand => { - LayerProof { - proof: Merk( - // L6 byBrand merk-tree — 100 targets + binary-tree glue - // (192 merk ops total; structurally a fully-resolved in-order - // traversal of all 100 brand entries in the byBrand merk tree) - 0: Push(KVValueHashFeatureTypeWithChildHash(brand_000, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[90ff6f6d9a3d901195982128130677243bfd27b75736206f3c8400966ef0d37b], BasicMerkNode, HASH[19b58883c492e746861db1e6ad07529a5a91cc8330af522682486db9346d6875])) - 1: Push(KVValueHashFeatureTypeWithChildHash(brand_001, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[484ca11fb4ec8f479be1f78af903ce0c9d4fe630517579fb0172c2576d6b9652], BasicMerkNode, HASH[0bf12023f8e067c12db4cec1583909a0283878d6d909c76196736299750b5879])) - 2: Parent - 3: Push(KVValueHashFeatureTypeWithChildHash(brand_002, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[4c19f047068654e71813dce7839a579edfdcb446e3d70efa1b8592c73259da16], BasicMerkNode, HASH[e8d5372904b7f4ac9334aeb4ddab619d9ad7a308732a4f231416e10208a0a356])) - ... - // 97 more KVValueHashFeatureTypeWithChildHash targets following - // the same template — brand_003 ... brand_099 — interleaved with - // Parent/Child ops glueing them into the byBrand merk binary tree. - // Every target shares the structure: - // Push(KVValueHashFeatureTypeWithChildHash( - // brand_NNN, - // CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), // count_value=1000 - // HASH[], - // BasicMerkNode, // NormalTree (no count on the merk node) - // HASH[] - // )) - ... - 189: Push(KVValueHashFeatureTypeWithChildHash(brand_097, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[92adee932cc12927cd76ad9fd25906bbfe547df2bf21e826845bb4d3b47f5314], BasicMerkNode, HASH[34b69e1e424aa023c74f61554db2823da6c19dcbc51bdd5dece32e3f6f9fd219])) - 190: Parent - 191: Push(KVValueHashFeatureTypeWithChildHash(brand_098, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[68e02fcf66f86797035fbc8d53290185fe3fed7de897a8654743cae4007c47c3], BasicMerkNode, HASH[acfc3a88b852e8895449b4c7e01f4b1cc25028e6a80e4915cdde578ff6eb029b])) - 192: Push(KVValueHashFeatureTypeWithChildHash(brand_099, CountTree(636f6c6f72, 1000, flags: [0, 0, 0]), HASH[af9667a8f2a10a9402b3d1fb0ac6e0b64d1e3dde5b8829c03b8d2c9cfc94e16d], BasicMerkNode, HASH[d049fe7e250b7dd763a4a5daa4227dcd2e41733dd95fd0758641ac06c63c3b51])) - // + closing Parent/Child ops binding the last few entries - ) - } - } - } - } - } - } - } -} -``` - -The 254-line full verbatim sits in the bench's `[gproof] G6` output — same template (one `KVValueHashFeatureTypeWithChildHash` per brand, all with `CountTree count=1000` and `BasicMerkNode` feature) repeating 100 times. The schematic above shows the first 3 and last 3 targets so the structural pattern is clear without reproducing 100 near-identical lines. - -**Key observation:** `BasicMerkNode` (not `ProvableCountedMerkNode`) is the feature type on each L6 op. byBrand is a `NormalTree`, so its merk binary tree's internal nodes don't carry running counts — only the per-brand `CountTree count=1000` values stored *inside* each brand's element matter. Contrast this with G6's `byColor` cousin (`group_by_color_in_proof_100_rangecountable_branches`, 10 512 B): there the L6 targets would carry `ProvableCountedMerkNode(...)` features because byColor IS a `ProvableCountTree`. The ~5 % size difference is exactly those count fields × 100 nodes. - -
- -```mermaid -flowchart TB - WD["@/contract_id/0x01/widget"]:::tree - WD ==> BR["brand: NormalTree (100 entries)"]:::path - BR ==> B000["brand_000: CountTree count=1000"]:::target - BR ==> B001["brand_001: CountTree count=1000"]:::target - BR ==> BMore["... 96 more in-range targets
(brand_002 ... brand_097)"]:::target - BR ==> B098["brand_098: CountTree count=1000"]:::target - BR ==> B099["brand_099: CountTree count=1000"]:::target - - SDK["Entries(100 groups, sum=100 000):
("brand_000", 1000),
("brand_001", 1000),
...
("brand_099", 1000)"]:::sdk - B000 -.-> SDK - B099 -.-> SDK - - classDef tree fill:#21262d,color:#c9d1d9,stroke:#1f6feb,stroke-width:2px; - classDef path fill:#6e7681,color:#fff,stroke:#1f6feb,stroke-width:2px; - classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; - classDef sdk fill:#21262d,color:#39c5cf,stroke:#39c5cf,stroke-width:2px,stroke-dasharray: 4 2; - - linkStyle 0 stroke:#1f6feb,stroke-width:3px; - linkStyle 1 stroke:#1f6feb,stroke-width:3px; - linkStyle 2 stroke:#1f6feb,stroke-width:3px; - linkStyle 3 stroke:#1f6feb,stroke-width:3px; - linkStyle 4 stroke:#1f6feb,stroke-width:3px; - linkStyle 5 stroke:#1f6feb,stroke-width:3px; -``` - -### Diagram: per-layer merk-tree structure (Layer 5+) - -Identical to [G1's L5–L6 shape](#g1--in-on-bybrand-grouped-by-brand), just with all 100 entries in the byBrand merk tree resolved as visible targets rather than just two. The byBrand binary tree has all 100 keys exposed — no opaque sibling subtrees (`Hash` ops) at all, only `KVValueHashFeatureTypeWithChildHash` (full reveal) plus `Parent` / `Child` glue. - -```mermaid -flowchart TB - subgraph L5["Layer 5 — widget doctype merk-tree"] - direction TB - L5_q["brand (queried)
kv_hash=HASH[68b6...]"]:::queried - L5_left["HASH[9862...]"]:::sibling - L5_right["HASH[6c36...]"]:::sibling - L5_q --> L5_left - L5_q --> L5_right - end - - subgraph L6["Layer 6 — byBrand merk-tree (ALL 100 targets fully resolved)"] - direction TB - L6_t0["brand_000
CountTree count=1000
BasicMerkNode"]:::target - L6_t1["brand_001
CountTree count=1000"]:::target - L6_tmid["... 97 more KVValueHashFeatureTypeWithChildHash
targets, each CountTree count=1000
(192 merk ops total: 100 Push + 92 Parent/Child)"]:::target - L6_t99["brand_099
CountTree count=1000"]:::target - - L6_t0 --> L6_t1 - L6_t1 --> L6_tmid - L6_tmid --> L6_t99 - end - - L5_q -. "Tree(merk_root[byBrand])" .-> L6_t0 - - classDef queried fill:#1f6feb,color:#fff,stroke:#1f6feb,stroke-width:2px; - classDef sibling fill:#6e7681,color:#fff,stroke:#6e7681; - classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; -``` - -Because the In set covers *every* brand in the fixture, the proof has zero opaque-sibling subtree commitments at L6 — every binary-tree node is revealed as a `KVValueHashFeatureTypeWithChildHash` target. That's the most efficient byte-per-key shape `GroupByIn` can hit: at `|IN| = B` (where `B` is the total entries in the property tree), the proof bytes ≈ `B × (kv-hash + count + child-hash + glue)` ≈ `B × 100 B`. For `B = 100`, that's exactly the 10 038 B we observe. - -By contrast, smaller In sets (G1's `|IN| = 2`) pay the boundary-proof tax: the byBrand merk tree has ~98 unresolved entries, each contributing one `KVHash` (opaque-key commitment, ~33 B) or `Hash` (opaque-subtree commitment, ~33 B). The asymptotic crossover at which "reveal everything" becomes cheaper than "reveal-some-and-commit-the-rest" depends on the ratio of `|IN|` to `B` — for byBrand with `B = 100`, the crossover is around `|IN| ≈ 50`. - -## G7 — Carrier `In` + Range, Grouped By `brand` - -```text -select = COUNT -where = brand IN ["brand_000", "brand_001"] AND color > "color_00000500" -group_by = [brand] -prove = true -``` - -**Path query** (carrier `AggregateCountOnRange` — outer Keys per In value, ACOR subquery over each brand's color subtree): - -```text -path: ["@", contract_id, 0x01, "widget", "brand"] -outer query items: [Key("brand_000"), Key("brand_001")] -subquery_path: ["color"] -subquery items: [AggregateCountOnRange([RangeAfter("color_00000500"..)])] -``` - -**Verified payload** (verifier returns one `(in_key, u64)` per resolved In branch via `GroveDb::verify_aggregate_count_query_per_key`): - -```text -[("brand_000", 499), ("brand_001", 499)] +[("brand_000", 499), ("brand_001", 499)] ``` Each brand has all 1 000 colors in its byBrandColor terminator; the strict `>` cut at `color_00000500` leaves `color_00000501..color_00000999` = 499 in-range colors per brand. Total `sum = 998` documents. @@ -1233,7 +1390,7 @@ G8 is G7's natural extension from "k specific outer keys" to "L outer keys from The cap bounds the prove-path proof size; the *ceiling* is a hardcoded compile-time constant for prover/verifier-agreement reasons. -1. **Proof-size bounding.** Proof bytes scale linearly with the limit (~1 700 B per outer match, exactly as for [G7](#g7--carrier-in--range-grouped-by-brand)). 10 keeps the worst-case proof under 20 KB (Tier-1 for the visualizer's shareable-link guidance) — enough for typical "top-N brands by an outer range" queries while avoiding pathological proof sizes. Callers that want a window above 10 entries call repeatedly with disjoint outer-range bounds; callers that want fewer pass a smaller `limit` (1 through 9). Limit 0 is rejected to keep the response shape non-trivial. +1. **Proof-size bounding.** Proof bytes scale linearly with the limit (~1 700 B per outer match, exactly as for [G7](#g7--carrier-in--range-grouped-by-brand)). 10 keeps the worst-case proof under 20 KB (Tier-1 for the [GroveDB Proof Visualizer's shareable-link guidance](https://github.com/dashpay/grovedb-proof-visualizer-widget/blob/master/prompts/link-from-platform-book.md#size-guidance) — Tier-1 ≤ 20 KB works in every browser and link-preview surface; Tier-2 of 20–50 KB works in browsers but may be truncated in Slack/Discord previews; Tier-3 above 50 KB risks Safari's URL ceiling) — enough for typical "top-N brands by an outer range" queries while avoiding pathological proof sizes. Callers that want a window above 10 entries call repeatedly with disjoint outer-range bounds; callers that want fewer pass a smaller `limit` (1 through 9). Limit 0 is rejected to keep the response shape non-trivial. 2. **Prover/verifier byte-for-byte agreement.** `SizedQuery::limit` is part of the serialized `PathQuery` and feeds the merk-root reconstruction; both prover and verifier must agree on its value. The caller's request carries `limit` over the wire, so its specific value (1..=10) is fine to vary. What can't vary is the platform's *default* when the caller passes nothing — that's why the ceiling is a hardcoded compile-time constant (`MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT`) rather than an operator-tunable runtime value. Same rationale as `RangeDistinctProof`'s use of `crate::config::DEFAULT_QUERY_LIMIT` rather than `drive_config.default_query_limit`. Caller semantics summary: @@ -1357,6 +1514,207 @@ flowchart TB The slope vs G7 is the proof's whole story: G7's `k = 2` outer matches → ~4 KB; G8's `L = 10` outer matches → ~18 KB. The per-outer-match cost (~1 700 B) is the same; only the outer-walk count changes. The platform max of 10 keeps the worst-case proof under 20 KB (Tier-1 of the visualizer's shareable-link guidance); larger windows are unreachable without changing the constant — callers that want more results call repeatedly with disjoint outer-range windows. +## G8a — Bounded carrier + bounded ACOR, grouped by `brand`, descending + +```text +select = COUNT +where = brand > "brand_050" AND brand < "brand_065" + AND color > "color_00000200" AND color < "color_00000400" +group_by = [brand] +order_by = [(brand, desc)] +prove = true +``` + +G8a stresses three carrier-ACOR dimensions G8 didn't: a **bounded** outer range (instead of half-open), a **bounded** inner ACOR (instead of `>` floor), and a **descending** walk (instead of left-to-right ascending). All three orthogonal. Same `RangeAggregateCarrierProof` mode, same path-query builder; the differences live entirely in the per-clause `QueryItem` variants and the carrier's `left_to_right` flag. + +**Path query** (the carrier query items differ from G8 in three ways: outer item is `RangeAfterTo` instead of `RangeAfter`, inner ACOR item is `RangeAfterTo` instead of `RangeAfter`, and `outer_query.left_to_right = false`): + +```text +path: ["@", contract_id, 0x01, "widget", "brand"] +outer query item: RangeAfterTo("brand_050".."brand_065") // exclusive bounds +subquery_path: ["color"] +subquery items: [AggregateCountOnRange([RangeAfterTo("color_00000200".."color_00000400")])] +SizedQuery::limit: 10 // platform default +outer Query.left_to_right: false // from order_by [(brand, desc)] +``` + +**Same-field range merging.** The caller's wire shape carries *four* range clauses (`brand >`, `brand <`, `color >`, `color <`). The dispatcher merges each same-field pair into a single `BetweenExcludeBounds` clause via [`merge_same_field_range_pairs`](https://github.com/dashpay/platform/blob/v3.1-dev/packages/rs-drive/src/query/drive_document_count_query/drive_dispatcher.rs) before mode detection runs. After merging, the structure is identical to G8's two-range shape; mode detection routes to `RangeAggregateCarrierProof` for the same reasons. + +**Verified payload** (descending walk — outer keys come out from highest to lowest, capped at `L = 10`): + +```text +[("brand_064", 199), ("brand_063", 199), …, ("brand_055", 199)] +``` + +The bench's 100-brand fixture has 14 brands strictly between `"brand_050"` and `"brand_065"` (i.e. `brand_051` through `brand_064`). The descending walk starts at `brand_064` and runs left-to-right=false through the byBrand merk tree; the `SizedQuery::limit = 10` halts the walk after 10 outer matches (`brand_064` down to `brand_055`). Each brand's inner ACOR over `color > "color_00000200" AND color < "color_00000400"` sums to **199** documents (199 colors `color_00000201` … `color_00000399`, one document per `(brand, color)` pair in the fixture). Total `sum = 10 × 199 = 1 990`. + +**Proof size:** 29 010 B. **Mode:** `CountMode::GroupByRange` routed to `DocumentCountMode::RangeAggregateCarrierProof`. + +G8a is structurally G8 with three independent variant changes, each adding a small amount of merk-proof overhead but no asymptotic complexity change: + +- **Bounded outer range** → the byBrand merk tree commits both bounds (`brand_050` lower-exclusive + `brand_065` upper-exclusive) as boundary `KVDigest` ops. G8's `>`-only outer commits one boundary; G8a's `>` AND `<` commits two. Modest size delta (~1 extra `KVDigest` per bound × the carrier's tree depth). +- **Bounded inner ACOR** → each per-brand color subtree commits both bounds as `KVDigestCount` ops. G8's `>`-only ACOR walks `O(log C')` boundary nodes for the lower bound; G8a's two-sided ACOR walks `O(log C')` for both bounds. The asymptotic stays `O(L · (log B + log C'))`; the constant roughly doubles for the per-brand boundary walk. +- **Descending walk** → grovedb emits `PushInverted(...)` op variants instead of `Push(...)` and walks the binary merk tree right-to-left. Same op count as ascending, slightly different serialized encoding (~1–2 bytes per op for the `PushInverted` opcode discriminant). The verifier's reconstruction is byte-identical given the same `left_to_right` flag in the `PathQuery`. + +Total proof bytes: **29 010 B** vs G8's **18 022 B**. Per-outer-match overhead: ~2 900 B (G8a) vs ~1 700 B (G8). The extra ~1 200 B per branch is the bounded-inner-ACOR cost — every per-brand subtree commits twice as many boundary `KVDigestCount` ops. + +**Proof display:** + +
+Expand to see the structured proof (8 layers; L8 uses two-sided ACOR boundary walks per brand, `PushInverted` outer-walk ops for descending direction) — or open interactively in the visualizer ↗ + +```text +GroveDBProofV1 { + LayerProof { + proof: Merk(... root-level descent, identical to every other chapter query ...) + lower_layers: { + @ => { ... contract_id descent ... } + // L2..L4 byte-identical to every 8-layer carrier query in this chapter + } + } + // L5 widget doctype: brand queried (same as G3 / G5 / G7 / G8) + // L6 byBrand merk-tree: walked LEFT-TO-RIGHT=FALSE (descending). + // Outer query item: RangeAfterTo("brand_050".."brand_065") + // Inlined targets: brand_064 → brand_063 → ... → brand_055 + // via `PushInverted(KVValueHash(brand_NNN, CountTree, ...))` ops. + // Boundary KVDigest nodes name brand_065 (upper-exclusive cut) + // and brand_050 (lower-exclusive cut, capped by SizedQuery::limit). + // L7 brand_NNN's value tree: single key `color` with NonCounted(ProvableCountTree) + // — repeated 10 times, once per resolved outer brand (in descending order). + // L8 brand_NNN's byBrandColor color subtree: + // proof: Merk( + // ... ACOR boundary walk for color > "color_00000200" AND color < "color_00000400" + // (two-sided cut, ~2× the boundary ops of G8's one-sided ACOR), + // summing to count = 199 per brand ... + // ) + // — repeated 10 times in parallel, each with its own per-brand boundary hashes. +} +``` + +The 902-line full verbatim sits in the bench's `[gproof] G8a` output. The schematic compresses the 10 parallel L7+L8 descents and the per-brand boundary commitments — they share the same template (single-key continuation + ~50-op two-sided ACOR boundary walk), differing only in per-brand hashes and the resulting subtree commits. Each per-brand L8 contributes ~2 800 B of ACOR boundary commitments (~1.6× G8's ~1 700 B due to the two-sided range walking both bounds). + +The most visually distinctive feature of the descending-walk proof: every L6 carrier op is `PushInverted(...)` rather than `Push(...)`, signalling grovedb's right-to-left binary-merk-tree iteration. Identical merk-root reconstruction given the same `Query.left_to_right = false` flag — but the wire-level encoding diverges so the verifier knows which direction to walk. + +
+ +```mermaid +flowchart TB + WD["@/contract_id/0x01/widget"]:::tree + WD ==> BR["brand: NormalTree (descending walk, left_to_right=false)"]:::path + BR ==> B064["brand_064: CountTree count=1000"]:::path + BR ==> BMore["brand_063 … brand_056
(8 more in-range brands, descending)"]:::path + BR ==> B055["brand_055: CountTree count=1000"]:::path + BR -.-> BBelow["brand_051 … brand_054
(in range but below cap — beyond limit, opaque)"]:::faded + BR -.-> BAbove["brand_065 (boundary key, excluded by <)"]:::faded + BR -.-> BCapBelow["brand_000 … brand_050
(below floor, opaque)"]:::faded + + B064 ==> B064_C["brand_064/color: NonCounted(ProvableCountTree)
two-sided ACOR (color > 200 AND color < 400)"]:::target + BMore ==> BMore_C["8 parallel two-sided ACOR walks
(color > 200 AND color < 400)"]:::target + B055 ==> B055_C["brand_055/color: NonCounted(ProvableCountTree)
two-sided ACOR (color > 200 AND color < 400)"]:::target + + SDK["Entries(10 groups, sum=1 990) — DESCENDING:
("brand_064", 199)
("brand_063", 199)

("brand_055", 199)"]:::sdk + B064_C -.-> SDK + BMore_C -.-> SDK + B055_C -.-> SDK + + classDef tree fill:#21262d,color:#c9d1d9,stroke:#1f6feb,stroke-width:2px; + classDef path fill:#6e7681,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef faded fill:#21262d,color:#6e7681,stroke:#484f58; + classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; + classDef sdk fill:#21262d,color:#39c5cf,stroke:#39c5cf,stroke-width:2px,stroke-dasharray: 4 2; + + linkStyle 0 stroke:#1f6feb,stroke-width:3px; + linkStyle 1 stroke:#1f6feb,stroke-width:3px; + linkStyle 2 stroke:#1f6feb,stroke-width:3px; + linkStyle 3 stroke:#1f6feb,stroke-width:3px; + linkStyle 7 stroke:#1f6feb,stroke-width:3px; + linkStyle 8 stroke:#1f6feb,stroke-width:3px; + linkStyle 9 stroke:#1f6feb,stroke-width:3px; +``` + +### Diagram: per-layer merk-tree structure (Layer 5+) + +L5 is identical to G7 / G8 (widget doctype with `brand` queried). L6 differs from G8 in two ways: the outer query item is `RangeAfterTo` (bounded) rather than `RangeAfter` (half-open), and every op is `PushInverted` rather than `Push` because of `left_to_right = false`. L7 + L8 fork into 10 parallel descents, each carrying a **two-sided** ACOR boundary walk over `color > "color_00000200" AND color < "color_00000400"` instead of G8's one-sided `color > "color_00000500"`. + +```mermaid +flowchart TB + subgraph L5["Layer 5 — widget doctype merk-tree"] + direction TB + L5_q["brand (queried)
kv_hash=HASH[68b6...]"]:::queried + end + + subgraph L6["Layer 6 — byBrand merk-tree (bounded outer range, descending walk, 10 targets)"] + direction TB + L6_t064["brand_064
PushInverted(KVValueHash …)
CountTree count=1000"]:::queried + L6_tmid["… 8 more in-range targets …
(brand_063 → brand_056, descending)"]:::queried + L6_t055["brand_055
PushInverted(KVValueHash …)
CountTree count=1000"]:::queried + L6_upper["Upper-bound commitment:
KVDigest(brand_065, …) — excluded by <"]:::boundary + L6_lower["Below-cap + below-floor commitments:
brand_051 … brand_054 (capped)
+ brand_000 … brand_050 (below floor)
(opaque KVHash / Hash ops)"]:::sibling + + L6_t064 --> L6_tmid + L6_tmid --> L6_t055 + L6_t064 --> L6_upper + L6_t055 --> L6_lower + end + + subgraph L7L8["Layers 7+8 — per-brand continuation + two-sided ACOR walk (×10)"] + direction TB + L7L8_each["For each of brand_064 … brand_055 (descending):
L7: single-key `color` continuation (NonCounted(ProvableCountTree))
L8: ~50 merk ops — two-sided ACOR boundary walk
for color > 200 AND color < 400
committing one `u64 = 199` per brand"]:::target + end + + L5_q -. "byBrand" .-> L6_t064 + L6_t064 -. "continuation × 10" .-> L7L8_each + + classDef queried fill:#1f6feb,color:#fff,stroke:#1f6feb,stroke-width:2px; + classDef sibling fill:#6e7681,color:#fff,stroke:#6e7681; + classDef target fill:#39c5cf,color:#0d1117,stroke:#39c5cf,stroke-width:3px; + classDef boundary fill:#d29922,color:#0d1117,stroke:#d29922,stroke-width:2px,stroke-dasharray: 6 3; +``` + +**The size delta between G8 and G8a, per outer match**: ~1 700 B (G8) → ~2 800 B (G8a). The extra ~1 100 B per brand is roughly evenly split between (a) the bounded inner ACOR's second boundary walk and (b) the per-op `PushInverted` discriminant overhead. Both costs are linear in `L` (the platform-max outer cap), so doubling `L` doubles the delta. The asymptotic complexity stays `O(L · (log B + log C'))` — the bounded-vs-unbounded distinction is a constant-factor change in the per-walk boundary commit count, not a complexity-class change. + +**Reading the descending result**: the SDK returns `Vec<(Vec, u64)>` in the same wire order grovedb walked the outer dimension. For `left_to_right = false`, that's lex-descending serialized brand keys (`brand_064` before `brand_063` before … before `brand_055`). Callers that expect ascending output sort the result client-side; the prove-path guarantee is on the *contents* (which brands and which counts), not the client-visible ordering — though for chapter-fixture-deterministic proofs the ordering IS visible in the proof bytes via `Push` vs `PushInverted`, so the verifier knows which direction grovedb walked. + +## G8b — Two-range carrier with `group_by = [brand, color]` (rejected) + +```text +select = COUNT +where = brand > "brand_050" AND color > "color_00000500" +group_by = [brand, color] +prove = true +``` + +**Outcome:** `Err(QuerySyntaxError::InvalidWhereClauseComponents("count query supports at most one range where-clause; combine two-sided ranges via `between*` instead of separate `>` / `<` clauses, or use `group_by = [outer_range_field]` with `prove = true` for the carrier-aggregate shape with one outer range and one inner ACOR range on a different field"))` — at [`detect_mode`](https://github.com/dashpay/platform/blob/v3.1-dev/packages/rs-drive/src/query/drive_document_count_query/mode_detection.rs)'s `range_count > 1` short-circuit, before any index picking or path-query building. + +**Why.** The two-range carrier shape (`outer_range AND inner_range` on distinct fields) is opened by mode detection **only** when `mode == GroupByRange` *and* `group_by.len() == 1` *and* `prove = true`. G8b violates the first two: with `group_by = [brand, color]` the request maps to `CountMode::GroupByCompound`, which routes to `distinct_count_path_query` — a builder that knows how to walk an `In + range` fan-out but not a `range + range` cartesian product. Two design points: + +- **`GroupByCompound` is specifically the `(In, range)` shape.** Its path-query builder emits outer `Key(serialized_in_value)` items (one per In branch) and an inner `Range*` subquery; the walk is `|In|`-bounded by construction. Extending it to accept `range + range` would mean replacing the outer `Key`s with an outer `Range*` (and a `SizedQuery::limit` to bound the walk) **and** swapping the inner from "enumerate distinct values" to "single ACOR aggregate" — at which point the result shape stops being "per-distinct-value entries" and becomes "per-outer-key `u64`s," i.e. G8's shape with a redundant second group_by field. There's no information gain from adding `color` to the group_by — the carrier already commits one `u64` per outer `brand`, and the inner range collapses into that `u64` rather than being enumerated. +- **The carrier primitive returns one `u64` per outer key, not per `(outer, inner)` pair.** Per-distinct-color counts inside an outer-range brand walk would require the alternative `RangeDistinctProof` shape (the G5 compound-distinct path) running on a `byBrandColor + rangeCountable: true` cartesian fan-out — which works for `In + range` (a finite outer key set) but would explode for `range + range` (potentially `B × C'` distinct entries, dwarfing the `MAX_CARRIER_AGGREGATE_OUTER_RANGE_LIMIT = 10` cap that bounds G8). The dispatcher rejects rather than silently routing to a path that'd produce a proof orders of magnitude larger than the caller likely expected. + +**What to use instead.** + +- If you want per-brand totals across an in-range color window (the most common interpretation of this request), use G8 (`group_by = [brand]`): one `u64` per brand, capped at 10 outer matches. +- If you want per-`(brand, color)` distinct counts across both ranges, the dispatcher has no path today — you'd need a `byBrandColor + rangeCountable: true` index plus a new mode that extends `GroupByCompound` to `range + range` with a per-pair `SizedQuery::limit`. Out of scope for this contract. +- If you want a single sum across the whole `brand > X AND color > Y` window, you'd need to call G8 and sum the returned `u64`s client-side (server-side aggregation across the carrier's per-branch counts isn't supported on the prove path — see G8c below). + +## G8c — Two-range carrier with `group_by = []` (rejected) + +```text +select = COUNT +where = brand > "brand_050" AND color > "color_00000500" +group_by = [] +prove = true +``` + +**Outcome:** same rejection as G8b — `Err(QuerySyntaxError::InvalidWhereClauseComponents("count query supports at most one range where-clause; …"))`. Mode-detection's `range_count > 1` short-circuit checks `mode == GroupByRange`, and the dispatcher maps `group_by = []` to `CountMode::Aggregate`, so the check fails for the same structural reason as G8b. + +**Why.** With no `group_by` the request asks for a single scalar `u64` covering every document matching both ranges. The carrier-ACOR primitive emits one `u64` *per outer-range key* (10 brands in G8's case), not a single sum across the whole walk. Two paths to a single sum, neither viable today: + +- **Server-side sum across the carrier's branches.** Would require a new grovedb primitive that takes the carrier shape and emits `Σ branch_counts` as a single ACOR-style aggregate. Not implemented — the carrier's commitment is *per branch*, which is what gives the verifier the cryptographic granularity to verify each entry independently. Summing in the server would lose that and force the verifier to trust the server's sum. +- **Client-side sum after running G8.** Allowed and easy — call G8, get back `Vec<(brand, u64)>`, sum the `u64`s. The proof still cryptographically commits to each branch, and the client's sum is over verified data. This is the pragmatic path for "give me one number" callers; the chapter recommends it instead of opening up `Aggregate` for the two-range carrier shape. + +**The deeper reason `Aggregate` can't shortcut this.** Per [chapter 29's Q7 (Range Aggregate `byColor`)](./count-index-examples.md#query-7--range-aggregate-bycolor), `Aggregate + single range` uses the leaf-level `AggregateCountOnRange` primitive directly, which DOES return a single `u64`. That works because the range is rooted at the index's *terminator* property — there's a single CountTree under which the boundary walk runs. With G8c's two ranges, the *outer* range walks the byBrand merk tree (no `ProvableCountTree` involved) and only the inner range hits the rangeCountable terminator. Collapsing across the outer walk would mean a `ProvableCountTree` over CountTrees, which grovedb's primitive set doesn't have. The walk could in principle compute and emit a sum at the outer layer, but the verifier wouldn't be able to recompute the per-branch counts to check the sum — defeating the prove-path's whole point. + ## Future Work This chapter now mirrors chapter 29's per-query structure: every section above carries a path query, verified payload, proof size, verbatim or schematic proof display, narrative, conceptual flowchart, and per-layer merk-tree diagram. @@ -1368,9 +1726,9 @@ Two pieces of infrastructure made this possible: Open follow-ups: -1. **Inline the full G4 / G5 / G6 verbatim** rather than the schematic-with-elision form. The bench captures every byte; the chapter's `
` blocks currently summarise the 100-target enumerations because reproducing 100 near-identical `KVValueHashFeatureTypeWithChildHash` lines per case is more noise than signal. If a reader needs byte-exact output, they can run the bench and grep `[gproof]`. +1. **Inline the full G4 / G5 / G1b verbatim** rather than the schematic-with-elision form. The bench captures every byte; the chapter's `
` blocks currently summarise the 100-target enumerations because reproducing 100 near-identical `KVValueHashFeatureTypeWithChildHash` lines per case is more noise than signal. If a reader needs byte-exact output, they can run the bench and grep `[gproof]`. 2. **Wire path-query reconstruction + verified-payload printing into `display_group_by_proofs`**. Today it only dumps the proof-display block; chapter 29's `display_proofs` also reconstructs the `PathQuery` and prints the verifier's structured result (the `verified:` block). Adding that to the group_by side would give the chapter parity with chapter 29's `verified:` sections — currently rendered manually from the `[matrix]` output's `Entries(len=N, sum=M)` figures. -3. **A high-fanout byColor variant of G6** (`color IN [100 values]`, `group_by = [color]`) — captured implicitly in the bench's existing `group_by_color_in_proof_100_rangecountable_branches` (10 512 B) but not given its own G* section, since it's structurally G6 with `ProvableCountTree` overhead. +3. **A high-fanout byColor variant of G1b** (`color IN [100 values]`, `group_by = [color]`) — captured implicitly in the bench's existing `group_by_color_in_proof_100_rangecountable_branches` (10 512 B) but not given its own G* section, since it's structurally G1b with `ProvableCountTree` overhead. ## Cross-Reference to Chapter 29 diff --git a/packages/rs-drive-proof-verifier/src/proof/document_count.rs b/packages/rs-drive-proof-verifier/src/proof/document_count.rs index 395feb6de6..b789cbf07b 100644 --- a/packages/rs-drive-proof-verifier/src/proof/document_count.rs +++ b/packages/rs-drive-proof-verifier/src/proof/document_count.rs @@ -267,11 +267,17 @@ pub fn verify_carrier_aggregate_count_proof( proof: &Proof, mtd: &ResponseMetadata, limit: Option, + left_to_right: bool, platform_version: &PlatformVersion, provider: &dyn ContextProvider, ) -> Result, Error> { let (root_hash, per_key_counts) = query - .verify_carrier_aggregate_count_proof(&proof.grovedb_proof, limit, platform_version) + .verify_carrier_aggregate_count_proof( + &proof.grovedb_proof, + limit, + left_to_right, + platform_version, + ) .map_drive_error(proof, mtd)?; verify_tenderdash_proof(proof, mtd, &root_hash, provider)?; diff --git a/packages/rs-drive/benches/document_count_worst_case.rs b/packages/rs-drive/benches/document_count_worst_case.rs index c4b0558cf0..f20248c1b9 100644 --- a/packages/rs-drive/benches/document_count_worst_case.rs +++ b/packages/rs-drive/benches/document_count_worst_case.rs @@ -337,8 +337,9 @@ fn document_count_worst_case(c: &mut Criterion) { display_proofs(&fixture, platform_version); // Decoded display of every `group_by` proof shape in the Count - // Index Group By Examples chapter (G3..G6). G1/G2 omitted — - // their bytes are identical to chapter 29's Q5/Q6. + // Index Group By Examples chapter (G1a, G1b, G3..G5, G7, G8, + // G8a). G1/G2 omitted — their bytes are identical to chapter + // 29's Q5/Q6. display_group_by_proofs(&fixture, platform_version); // Empirical probe of the value-tree element type for the two @@ -583,20 +584,72 @@ fn document_count_worst_case(c: &mut Criterion) { } // Per-query timing for the Count Index Group By Examples chapter - // (G1 through G6). Each case exercises one of the documented - // group_by shapes so the chapter's overview table can quote - // wall-clock timings alongside proof-size and complexity columns. + // (G1 through G1b plus G7/G8/G8a). Each case exercises one of + // the documented group_by shapes so the chapter's overview + // table can quote wall-clock timings alongside proof-size and + // complexity columns. let brands_100 = brands_n(BRAND_COUNT); - let groupby_chapter_queries: Vec<(&str, Value, CountMode, Option)> = vec![ + // Order-by-descending wire shape: matches what + // `order_clauses_from_value` parses into a single + // `OrderClause { field: brand, ascending: false }`. The + // dispatcher reads the first order clause's direction to pick + // `left_to_right` for the carrier walk on G8 / G8a. + let order_by_brand_desc = Value::Array(vec![Value::Array(vec![ + Value::Text("brand".to_string()), + Value::Text("desc".to_string()), + ])]); + let groupby_chapter_queries: Vec<(&str, Value, Value, CountMode, Option)> = vec![ ( "query_g1_brand_in_grouped_by_brand", Value::Array(vec![clause("brand", "in", Value::Array(brands_2.clone()))]), + Value::Null, + CountMode::GroupByIn, + None, + ), + ( + // G1a: same `In on byBrand` shape as G1 but one of the + // In values (`brand_100`) is absent from the fixture + // (BRAND_COUNT = 100, so brand labels are + // `brand_000`..`brand_099`). Captures the absent-branch + // proof shape — the grovedb proof still commits an + // absence subproof at the missing key, but + // `verify_query` without + // `absence_proofs_for_non_existing_searched_keys: true` + // drops the absent branch from the returned entries + // (see `test_point_lookup_proof_omits_absent_in_branches_from_entries`). + "query_g1a_brand_in_with_absent_grouped_by_brand", + Value::Array(vec![clause( + "brand", + "in", + Value::Array(vec![ + Value::Text(brand_label(0)), + Value::Text(brand_label(BRAND_COUNT)), + ]), + )]), + Value::Null, + CountMode::GroupByIn, + None, + ), + ( + // G1b: same shape as G1, scaled to |IN| = BRAND_COUNT + // = 100. The proof reveals every byBrand entry as a + // `KVValueHashFeatureTypeWithChildHash` target — the + // most efficient byte-per-key shape `GroupByIn` can + // hit (no opaque-sibling commitments at L6). + "query_g1b_brand_in_100_grouped_by_brand", + Value::Array(vec![clause( + "brand", + "in", + Value::Array(brands_100.clone()), + )]), + Value::Null, CountMode::GroupByIn, None, ), ( "query_g2_color_in_grouped_by_color", Value::Array(vec![clause("color", "in", Value::Array(colors_2.clone()))]), + Value::Null, CountMode::GroupByIn, None, ), @@ -606,12 +659,14 @@ fn document_count_worst_case(c: &mut Criterion) { clause("brand", "in", Value::Array(brands_2.clone())), clause("color", "==", Value::Text(mid_color.clone())), ]), + Value::Null, CountMode::GroupByIn, None, ), ( "query_g4_color_gt_grouped_by_color", Value::Array(vec![clause("color", ">", broad_range_floor.clone())]), + Value::Null, CountMode::GroupByRange, None, ), @@ -621,25 +676,17 @@ fn document_count_worst_case(c: &mut Criterion) { clause("brand", "in", Value::Array(brands_2.clone())), clause("color", ">", broad_range_floor.clone()), ]), + Value::Null, CountMode::GroupByCompound, None, ), - ( - "query_g6_brand_in_100_grouped_by_brand", - Value::Array(vec![clause( - "brand", - "in", - Value::Array(brands_100.clone()), - )]), - CountMode::GroupByIn, - None, - ), ( "query_g7_brand_in_color_gt_grouped_by_brand", Value::Array(vec![ clause("brand", "in", Value::Array(brands_2.clone())), clause("color", ">", broad_range_floor.clone()), ]), + Value::Null, CountMode::GroupByIn, None, ), @@ -649,6 +696,7 @@ fn document_count_worst_case(c: &mut Criterion) { clause("brand", ">", Value::Text(brand_label(BRAND_COUNT / 2))), clause("color", ">", broad_range_floor.clone()), ]), + Value::Null, CountMode::GroupByRange, // Range-outer carrier-aggregate enforces a fixed // platform-wide outer-walk cap of @@ -657,12 +705,43 @@ fn document_count_worst_case(c: &mut Criterion) { // shape, so pass `None` here. None, ), + ( + "query_g8a_brand_between_color_between_grouped_by_brand_desc", + Value::Array(vec![ + // Two-sided brand range (brand_050, brand_065), + // exclusive on both sides. The dispatcher merges + // these into a single `BetweenExcludeBounds` clause + // via `merge_same_field_range_pairs`. + clause("brand", ">", Value::Text(brand_label(BRAND_COUNT / 2))), + clause( + "brand", + "<", + Value::Text(brand_label(BRAND_COUNT * 65 / 100)), + ), + // Two-sided color range (color_00000200, + // color_00000400), exclusive on both sides. + clause("color", ">", Value::Text(color_label(200))), + clause("color", "<", Value::Text(color_label(400))), + ]), + order_by_brand_desc.clone(), + CountMode::GroupByRange, + None, + ), ]; - for (name, raw_where, mode, limit) in groupby_chapter_queries { + for (name, raw_where, raw_order_by, mode, limit) in groupby_chapter_queries { group.bench_function(name, |b| { b.iter_batched( - || count_request(&fixture, raw_where.clone(), Value::Null, mode, limit, true), + || { + count_request( + &fixture, + raw_where.clone(), + raw_order_by.clone(), + mode, + limit, + true, + ) + }, |request| match fixture .drive .execute_document_count_request(request, None, platform_version) @@ -831,16 +910,28 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: &'static str, platform_allowed: &'static str, raw_where: Value, + /// Order-by shape; `Value::Null` for the default-ascending + /// path. Threaded through so order-sensitive carrier cases + /// (G8a's descending walk) actually exercise + /// `left_to_right = false` instead of silently defaulting + /// to ascending. + raw_order_by: Value, mode: CountMode, limit: Option, } + let order_by_brand_desc = Value::Array(vec![Value::Array(vec![ + Value::Text("brand".to_string()), + Value::Text("desc".to_string()), + ])]); + let cases: Vec = vec![ // ── group_by = [] (Aggregate) ────────────────────────────── MatrixCase { label: "[] / where=(empty)", platform_allowed: "yes (documentsCountable fast path)", raw_where: where_empty(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -848,6 +939,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=brand==X", platform_allowed: "yes", raw_where: where_brand_eq(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -855,6 +947,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=color==X", platform_allowed: "yes", raw_where: where_color_eq(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -862,6 +955,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=brand==X AND color==Y", platform_allowed: "yes", raw_where: where_brand_eq_color_eq(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -869,6 +963,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=brand IN[2]", platform_allowed: "yes (per-In aggregate fan-out)", raw_where: where_brand_in(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -876,6 +971,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=color IN[2]", platform_allowed: "yes (per-In aggregate fan-out)", raw_where: where_color_in(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -883,6 +979,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=color > floor", platform_allowed: "yes (AggregateCountOnRange)", raw_where: where_color_gt(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -890,6 +987,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=brand==X AND color > floor", platform_allowed: "yes (AggregateCountOnRange on byBrandColor terminator)", raw_where: where_brand_eq_color_gt(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -897,6 +995,23 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[] / where=brand IN[2] AND color > floor", platform_allowed: "no-proof: yes / prove: no (aggregate proof can't fork)", raw_where: where_brand_in_color_gt(), + raw_order_by: Value::Null, + mode: CountMode::Aggregate, + limit: None, + }, + // G8c: same `where` as G8, but with no group_by. The + // two-range carrier requires `GroupByRange + group_by = + // [outer_range_field]`; with `mode = Aggregate` the + // dispatcher rejects at mode-detection (single-`u64` + // aggregation across two ranges has no defined target — + // the per-branch counts can't be silently summed at the + // verifier). + MatrixCase { + label: "[] / where=brand > floor AND color > floor", + platform_allowed: + "no — two-range carrier requires `GroupByRange + group_by = [outer_range_field]`", + raw_where: where_brand_gt_color_gt(), + raw_order_by: Value::Null, mode: CountMode::Aggregate, limit: None, }, @@ -905,6 +1020,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[color] / where=color IN[2]", platform_allowed: "yes (GroupByIn)", raw_where: where_color_in(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -912,6 +1028,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[color] / where=color > floor", platform_allowed: "yes (GroupByRange — distinct-range walk)", raw_where: where_color_gt(), + raw_order_by: Value::Null, mode: CountMode::GroupByRange, limit: None, }, @@ -919,6 +1036,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[color] / where=color==X", platform_allowed: "no — `color` is constrained by `==`, not `In` or range", raw_where: where_color_eq(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -926,6 +1044,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[color] / where=brand IN[2] AND color > floor", platform_allowed: "no — single-field GROUP BY with both `In` and range", raw_where: where_brand_in_color_gt(), + raw_order_by: Value::Null, mode: CountMode::GroupByRange, limit: None, }, @@ -934,6 +1053,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand] / where=brand IN[2]", platform_allowed: "yes (GroupByIn — non-rangeCountable byBrand)", raw_where: where_brand_in(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -941,6 +1061,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand] / where=brand IN[2] AND color==Y", platform_allowed: "yes (GroupByIn — compound covers byBrandColor)", raw_where: where_brand_in_color_eq(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -948,6 +1069,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand] / where=brand IN[2] AND color > floor", platform_allowed: "yes (RangeAggregateCarrierProof — carrier ACOR per In branch)", raw_where: where_brand_in_color_gt(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -956,6 +1078,30 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo platform_allowed: "yes (RangeAggregateCarrierProof — carrier ACOR; platform-max outer limit = 10)", raw_where: where_brand_gt_color_gt(), + raw_order_by: Value::Null, + mode: CountMode::GroupByRange, + limit: None, + }, + // G8a: bounded carrier + bounded ACOR with descending walk. + // Same `RangeAggregateCarrierProof` mode as G8 but the + // dispatcher merges two-sided ranges into `between*` clauses + // via `merge_same_field_range_pairs` and threads + // `left_to_right = false` through the carrier path query. + MatrixCase { + label: "[brand] / where=brand BETWEEN AND color BETWEEN (left_to_right=false)", + platform_allowed: + "yes (RangeAggregateCarrierProof — bounded-range carrier with descending walk)", + raw_where: Value::Array(vec![ + clause("brand", ">", Value::Text(brand_label(BRAND_COUNT / 2))), + clause( + "brand", + "<", + Value::Text(brand_label(BRAND_COUNT * 65 / 100)), + ), + clause("color", ">", Value::Text(color_label(200))), + clause("color", "<", Value::Text(color_label(400))), + ]), + raw_order_by: order_by_brand_desc.clone(), mode: CountMode::GroupByRange, limit: None, }, @@ -963,6 +1109,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand] / where=brand==X", platform_allowed: "no — `brand` is `==`, not `In` or range", raw_where: where_brand_eq(), + raw_order_by: Value::Null, mode: CountMode::GroupByIn, limit: None, }, @@ -971,6 +1118,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand, color] / where=brand IN[2] AND color > floor", platform_allowed: "yes (GroupByCompound — `(In, range)` shape)", raw_where: where_brand_in_color_gt(), + raw_order_by: Value::Null, mode: CountMode::GroupByCompound, limit: Some(100), }, @@ -978,9 +1126,28 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo label: "[brand, color] / where=brand IN[2] AND color==Y", platform_allowed: "no — `color` must be range, not `==`", raw_where: where_brand_in_color_eq(), + raw_order_by: Value::Null, mode: CountMode::GroupByCompound, limit: Some(100), }, + // G8b: same `where` as G8, but group_by widened to + // [brand, color]. The two-range carrier (`brand > floor AND + // color > floor`) is permitted only with + // `GroupByRange + group_by = [outer_range_field]`; with + // `GroupByCompound + group_by = [outer, inner]` the + // dispatcher rejects at mode-detection (the carrier shape + // is single-field only — the compound walk would need a + // distinct enumeration over both ranges, which the carrier + // primitive doesn't express). + MatrixCase { + label: "[brand, color] / where=brand > floor AND color > floor", + platform_allowed: + "no — two-range carrier requires `GroupByRange + group_by = [outer_range_field]`", + raw_where: where_brand_gt_color_gt(), + raw_order_by: Value::Null, + mode: CountMode::GroupByCompound, + limit: None, + }, // ── group_by = [color, brand] (reversed compound) ────────── MatrixCase { label: "[color, brand] / where=color IN[2] AND brand > X", @@ -992,6 +1159,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo clause("color", "in", Value::Array(colors_2.clone())), clause("brand", ">", Value::Text(mid_brand.clone())), ]), + raw_order_by: Value::Null, mode: CountMode::GroupByCompound, limit: Some(100), }, @@ -1001,6 +1169,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo let noproof_result = drive_count_outcome( fixture, case.raw_where.clone(), + case.raw_order_by.clone(), case.mode, case.limit, false, @@ -1009,6 +1178,7 @@ fn report_group_by_matrix(fixture: &CountBenchFixture, platform_version: &Platfo let prove_result = drive_count_outcome( fixture, case.raw_where.clone(), + case.raw_order_by.clone(), case.mode, case.limit, true, @@ -1783,19 +1953,49 @@ fn display_group_by_proofs(fixture: &CountBenchFixture, platform_version: &Platf ]) }; - let cases: Vec<(&str, Value, CountMode, Option)> = vec![ + let cases: Vec<(&str, Value, Value, CountMode, Option)> = vec![ + ( + // G1a renders alongside the rest so the chapter can quote + // the absent-branch proof bytes and demonstrate the + // absence subproof commitment. + "G1a [brand] / where=brand IN[brand_000, brand_100] (one absent)", + Value::Array(vec![clause( + "brand", + "in", + Value::Array(vec![ + Value::Text(brand_label(0)), + Value::Text(brand_label(BRAND_COUNT)), + ]), + )]), + Value::Null, + CountMode::GroupByIn, + None, + ), + ( + "G1b [brand] / where=brand IN[100]", + Value::Array(vec![clause( + "brand", + "in", + Value::Array(brands_100.clone()), + )]), + Value::Null, + CountMode::GroupByIn, + None, + ), ( "G3 [brand] / where=brand IN[2] AND color==Y", Value::Array(vec![ clause("brand", "in", Value::Array(brands_2.clone())), clause("color", "==", Value::Text(mid_color.clone())), ]), + Value::Null, CountMode::GroupByIn, None, ), ( "G4 [color] / where=color > floor", Value::Array(vec![clause("color", ">", range_floor.clone())]), + Value::Null, CountMode::GroupByRange, None, ), @@ -1805,25 +2005,17 @@ fn display_group_by_proofs(fixture: &CountBenchFixture, platform_version: &Platf clause("brand", "in", Value::Array(brands_2.clone())), clause("color", ">", range_floor.clone()), ]), + Value::Null, CountMode::GroupByCompound, None, ), - ( - "G6 [brand] / where=brand IN[100]", - Value::Array(vec![clause( - "brand", - "in", - Value::Array(brands_100.clone()), - )]), - CountMode::GroupByIn, - None, - ), ( "G7 [brand] / where=brand IN[2] AND color > floor", Value::Array(vec![ clause("brand", "in", Value::Array(brands_2.clone())), clause("color", ">", range_floor.clone()), ]), + Value::Null, CountMode::GroupByIn, None, ), @@ -1833,6 +2025,26 @@ fn display_group_by_proofs(fixture: &CountBenchFixture, platform_version: &Platf clause("brand", ">", Value::Text(brand_label(BRAND_COUNT / 2))), clause("color", ">", range_floor.clone()), ]), + Value::Null, + CountMode::GroupByRange, + None, + ), + ( + "G8a [brand] / where=brand BETWEEN AND color BETWEEN (desc)", + Value::Array(vec![ + clause("brand", ">", Value::Text(brand_label(BRAND_COUNT / 2))), + clause( + "brand", + "<", + Value::Text(brand_label(BRAND_COUNT * 65 / 100)), + ), + clause("color", ">", Value::Text(color_label(200))), + clause("color", "<", Value::Text(color_label(400))), + ]), + Value::Array(vec![Value::Array(vec![ + Value::Text("brand".to_string()), + Value::Text("desc".to_string()), + ])]), CountMode::GroupByRange, None, ), @@ -1843,8 +2055,8 @@ fn display_group_by_proofs(fixture: &CountBenchFixture, platform_version: &Platf .with_big_endian() .with_no_limit(); - for (label, raw_where, mode, limit) in cases { - let request = count_request(fixture, raw_where, Value::Null, mode, limit, true); + for (label, raw_where, raw_order_by, mode, limit) in cases { + let request = count_request(fixture, raw_where, raw_order_by, mode, limit, true); let proof = match fixture .drive @@ -1985,12 +2197,13 @@ fn hex_bytes(bytes: &[u8]) -> String { fn drive_count_outcome( fixture: &CountBenchFixture, raw_where: Value, + raw_order_by: Value, mode: CountMode, limit: Option, prove: bool, platform_version: &PlatformVersion, ) -> String { - let request = count_request(fixture, raw_where, Value::Null, mode, limit, prove); + let request = count_request(fixture, raw_where, raw_order_by, mode, limit, prove); match fixture .drive .execute_document_count_request(request, None, platform_version) diff --git a/packages/rs-drive/src/query/drive_document_count_query/drive_dispatcher.rs b/packages/rs-drive/src/query/drive_document_count_query/drive_dispatcher.rs index a43f5b8f8a..a76c20414c 100644 --- a/packages/rs-drive/src/query/drive_document_count_query/drive_dispatcher.rs +++ b/packages/rs-drive/src/query/drive_document_count_query/drive_dispatcher.rs @@ -194,19 +194,11 @@ pub fn where_clauses_from_value( } /// Run the system-wide where-clause validator on a structured -/// `Vec`. Single source of truth for the count-endpoint -/// shape contract; called both from the legacy CBOR-decoded entry -/// [`where_clauses_from_value`] and from the dispatcher's typed -/// entry, [`Drive::execute_document_count_request`]. -/// -/// Despite the name, this function is **validation-only** in the -/// worktree's base — it does not re-shape the clauses (no -/// `> AND <` → `between*` merge). The "canonicalize" suffix is -/// reserved for the eventual carrier-aggregate landing where a -/// same-field range-pair merge becomes load-bearing; on the -/// current code path `WhereClause::group_clauses` only classifies, -/// and the merged form is computed lazily inside the executors -/// when an executor needs it. +/// `Vec` and canonicalize same-field range pairs into +/// their `between*` form. Single source of truth for the +/// count-endpoint shape contract; called both from the legacy +/// CBOR-decoded entry [`where_clauses_from_value`] and from the +/// dispatcher's typed entry, [`Drive::execute_document_count_request`]. /// /// The validator (`WhereClause::group_clauses`) rejects: /// - Duplicate `Equal` clauses on the same field @@ -237,6 +229,21 @@ pub fn where_clauses_from_value( /// `CountMode::GroupByRange`-with-two-ranges and routes to /// `DocumentCountMode::RangeAggregateCarrierProof`); replicating /// it here would be redundant. +/// +/// After validation, [`merge_same_field_range_pairs`] collapses +/// `[field > A, field < B]` (and analogous pairs with `>=` / `<=`) +/// into the canonical `between*` operator that +/// [`DriveDocumentCountQuery::range_clause_to_query_item`] knows +/// how to convert into a single `QueryItem`. The regular-query +/// parser does the same merge before its grouped-triple +/// validation; for count queries we do it explicitly here so +/// callers can pass either the bounded form (e.g. +/// `[brand > A, brand < B]`) or the pre-merged form (e.g. +/// `[brand BetweenExcludeBounds [A, B]]`) and get equivalent +/// mode detection downstream. Without this merge, G8a's natural +/// wire shape (four range clauses, two per field) would slip past +/// the catch-`MultipleRangeClauses` block above and then get +/// rejected by `detect_mode`'s `range_count > 1` structural check. pub fn validate_and_canonicalize_where_clauses( clauses: Vec, ) -> Result, Error> { @@ -245,7 +252,106 @@ pub fn validate_and_canonicalize_where_clauses( Err(Error::Query(QuerySyntaxError::MultipleRangeClauses(_))) => {} Err(e) => return Err(e), } - Ok(clauses) + merge_same_field_range_pairs(clauses) +} + +/// Collapse `[field > A, field < B]` (and analogous pairs with +/// `>=` / `<=`) into a single `field between* [A, B]` clause per +/// field. Equality / In clauses pass through unchanged. +/// +/// Returns an error if a field has more than two range clauses +/// (structurally meaningless — a third bound would either +/// contradict an existing one or be redundant) or if the pair +/// isn't one lower-bound + one upper-bound (e.g. two `>` on the +/// same field). +fn merge_same_field_range_pairs(clauses: Vec) -> Result, Error> { + use crate::query::conditions::WhereOperator::{ + Between, BetweenExcludeBounds, BetweenExcludeLeft, BetweenExcludeRight, GreaterThan, + GreaterThanOrEquals, LessThan, LessThanOrEquals, + }; + use std::collections::BTreeMap; + + let mut by_field: BTreeMap> = BTreeMap::new(); + let mut non_range: Vec = Vec::new(); + for wc in clauses { + if DriveDocumentCountQuery::is_range_operator(wc.operator) { + by_field.entry(wc.field.clone()).or_default().push(wc); + } else { + non_range.push(wc); + } + } + let mut result = non_range; + for (field, mut ranges) in by_field { + match ranges.len() { + 0 => {} + 1 => result.push(ranges.remove(0)), + 2 => { + let (mut lower, mut upper): (Option, Option) = + (None, None); + for r in ranges { + match r.operator { + GreaterThan | GreaterThanOrEquals => { + if lower.is_some() { + return Err(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "two lower-bound range clauses on the same field cannot be \ + merged; combine via `between*` or remove the redundant clause", + ))); + } + lower = Some(r); + } + LessThan | LessThanOrEquals => { + if upper.is_some() { + return Err(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "two upper-bound range clauses on the same field cannot be \ + merged; combine via `between*` or remove the redundant clause", + ))); + } + upper = Some(r); + } + _ => { + // The other range operators (Between*, + // StartsWith) are themselves bounded + // already; a second range clause on the + // same field is structurally redundant. + return Err(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "cannot pair a `between*`/`startsWith` range clause with \ + another range on the same field; use the pre-merged form", + ))); + } + } + } + let lower = lower.ok_or(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "two range clauses on the same field require one lower bound (> or >=) \ + and one upper bound (< or <=)", + )))?; + let upper = upper.ok_or(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "two range clauses on the same field require one lower bound (> or >=) \ + and one upper bound (< or <=)", + )))?; + let merged_op = match ( + lower.operator == GreaterThanOrEquals, + upper.operator == LessThanOrEquals, + ) { + (true, true) => Between, // [a, b] + (false, false) => BetweenExcludeBounds, // (a, b) + (true, false) => BetweenExcludeRight, // [a, b) + (false, true) => BetweenExcludeLeft, // (a, b] + }; + result.push(WhereClause { + field, + operator: merged_op, + value: dpp::platform_value::Value::Array(vec![lower.value, upper.value]), + }); + } + _ => { + return Err(Error::Query(QuerySyntaxError::MultipleRangeClauses( + "more than two range clauses on the same field are not supported; a \ + bounded range needs exactly one lower bound and one upper bound", + ))); + } + } + } + Ok(result) } /// Parse the decoded `order_by` value into structured [`OrderClause`]s. @@ -322,7 +428,7 @@ impl Drive { // independent of whether the caller arrived via the CBOR- // shaped legacy path or the v1 typed-proto path. See // [`validate_and_canonicalize_where_clauses`]'s docstring - // for the catalog of rejections. + // for the catalog of rejections / canonicalization rules. let where_clauses = validate_and_canonicalize_where_clauses(request.where_clauses)?; let order_clauses = request.order_clauses; @@ -567,6 +673,14 @@ impl Drive { } None }; + // Outer-walk direction: ascending by default (the + // grovedb invariant for serialized-key carriers), or + // descending when the caller's `order_by` first + // clause is `desc`. Carried byte-identically through + // `Query::left_to_right` so the verifier rebuilds the + // exact same `PathQuery` — same load-bearing pattern + // as the `RangeDistinctProof` arm above. + let left_to_right = order_by_ascending; Ok(DocumentCountResponse::Proof( self.execute_document_count_range_aggregate_carrier_proof( contract_id, @@ -574,6 +688,7 @@ impl Drive { document_type_name, where_clauses, effective_limit, + left_to_right, transaction, platform_version, )?, diff --git a/packages/rs-drive/src/query/drive_document_count_query/execute_range_count.rs b/packages/rs-drive/src/query/drive_document_count_query/execute_range_count.rs index 41be393b0a..510d80e38e 100644 --- a/packages/rs-drive/src/query/drive_document_count_query/execute_range_count.rs +++ b/packages/rs-drive/src/query/drive_document_count_query/execute_range_count.rs @@ -447,15 +447,32 @@ impl DriveDocumentCountQuery<'_> { /// Verified client-side via /// [`grovedb::GroveDb::verify_aggregate_count_query_per_key`], /// which returns `(RootHash, Vec<(Vec, u64)>)`. + /// + /// # Arguments + /// * `left_to_right` — proof-shaping bit. Threaded into the + /// outer `Query` via `Query::new_with_direction(left_to_right)` + /// on the inner carrier path query (see + /// [`Self::carrier_aggregate_count_path_query`]). `true` walks + /// the outer range ascending and emits the per-branch `u64`s + /// in lex-ascending key order; `false` walks descending and + /// emits them in lex-descending order. The serialized + /// `PathQuery` bytes differ between the two — the verifier + /// rebuilds the path query from `(query, limit, left_to_right)` + /// on its side, so the value passed here must match what the + /// caller will pass to + /// [`Self::verify_carrier_aggregate_count_proof`] or the + /// tenderdash root check fails. pub fn execute_carrier_aggregate_count_with_proof( &self, drive: &Drive, limit: Option, + left_to_right: bool, transaction: TransactionArg, platform_version: &PlatformVersion, ) -> Result, Error> { let drive_version = &platform_version.drive; - let path_query = self.carrier_aggregate_count_path_query(limit, platform_version)?; + let path_query = + self.carrier_aggregate_count_path_query(limit, left_to_right, platform_version)?; // Same destructure pattern as the sibling aggregate / distinct // executors. `get_proved_path_query` returns `CostContext`; // ignoring the cost field is the same pattern those use today. diff --git a/packages/rs-drive/src/query/drive_document_count_query/executors/range_aggregate_carrier_proof.rs b/packages/rs-drive/src/query/drive_document_count_query/executors/range_aggregate_carrier_proof.rs index 4b43b7cc2b..3a782e1ae6 100644 --- a/packages/rs-drive/src/query/drive_document_count_query/executors/range_aggregate_carrier_proof.rs +++ b/packages/rs-drive/src/query/drive_document_count_query/executors/range_aggregate_carrier_proof.rs @@ -56,6 +56,7 @@ impl Drive { document_type_name: String, where_clauses: Vec, limit: Option, + left_to_right: bool, transaction: TransactionArg, platform_version: &PlatformVersion, ) -> Result, Error> { @@ -81,6 +82,7 @@ impl Drive { count_query.execute_carrier_aggregate_count_with_proof( self, limit, + left_to_right, transaction, platform_version, ) diff --git a/packages/rs-drive/src/query/drive_document_count_query/path_query.rs b/packages/rs-drive/src/query/drive_document_count_query/path_query.rs index a536c900b0..8018b8367f 100644 --- a/packages/rs-drive/src/query/drive_document_count_query/path_query.rs +++ b/packages/rs-drive/src/query/drive_document_count_query/path_query.rs @@ -294,6 +294,7 @@ impl DriveDocumentCountQuery<'_> { pub fn carrier_aggregate_count_path_query( &self, limit: Option, + left_to_right: bool, platform_version: &PlatformVersion, ) -> Result { // The terminator property (last in the index) carries the @@ -405,7 +406,7 @@ impl DriveDocumentCountQuery<'_> { } subquery_path_extension.push(terminator_prop_name.as_bytes().to_vec()); - let mut outer_query = Query::new(); + let mut outer_query = Query::new_with_direction(left_to_right); match carrier { Carrier::Pending => { return Err(Error::Query( diff --git a/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/mod.rs b/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/mod.rs index 79341a9892..c7ee34d567 100644 --- a/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/mod.rs +++ b/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/mod.rs @@ -9,7 +9,9 @@ use dpp::version::PlatformVersion; impl DriveDocumentCountQuery<'_> { /// Verifies a **carrier** `AggregateCountOnRange` proof and /// returns `(root_hash, per_key_counts)` — one `(in_key, u64)` - /// pair per resolved In branch in serialized lex-asc order. + /// pair per resolved In branch. Order depends on + /// `left_to_right`: `true` returns serialized lex-ascending, + /// `false` returns serialized lex-descending. /// /// Counterpart to the prover-side /// [`execute_carrier_aggregate_count_with_proof`](Self::execute_carrier_aggregate_count_with_proof): @@ -24,6 +26,17 @@ impl DriveDocumentCountQuery<'_> { /// /// # Arguments /// * `proof` — raw grovedb proof bytes. + /// * `limit` — per-branch carrier walk cap; must match the + /// prover's `SizedQuery::limit`. + /// * `left_to_right` — proof-shaping bit. Must match the value + /// the prover passed to + /// [`Self::execute_carrier_aggregate_count_with_proof`] + /// (typically derived from the request's first + /// `order_by_clauses` entry's `ascending`). The verifier + /// constructs the outer `Query` via + /// `Query::new_with_direction(left_to_right)`; a mismatch + /// produces different `PathQuery` bytes and the tenderdash + /// root check fails. /// * `platform_version` — selects the method version. /// /// The `Vec<(Vec, u64)>` payload mirrors grovedb's per-key @@ -33,6 +46,7 @@ impl DriveDocumentCountQuery<'_> { &self, proof: &[u8], limit: Option, + left_to_right: bool, platform_version: &PlatformVersion, ) -> Result<(RootHash, Vec<(Vec, u64)>), Error> { match platform_version @@ -42,7 +56,12 @@ impl DriveDocumentCountQuery<'_> { .document_count .verify_carrier_aggregate_count_proof { - 0 => self.verify_carrier_aggregate_count_proof_v0(proof, limit, platform_version), + 0 => self.verify_carrier_aggregate_count_proof_v0( + proof, + limit, + left_to_right, + platform_version, + ), version => Err(Error::Drive(DriveError::UnknownVersionMismatch { method: "DriveDocumentCountQuery::verify_carrier_aggregate_count_proof".to_string(), known_versions: vec![0], diff --git a/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/v0/mod.rs b/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/v0/mod.rs index 4a7d195fd4..077b7c439d 100644 --- a/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/v0/mod.rs +++ b/packages/rs-drive/src/verify/document_count/verify_carrier_aggregate_count_proof/v0/mod.rs @@ -34,9 +34,11 @@ impl DriveDocumentCountQuery<'_> { &self, proof: &[u8], limit: Option, + left_to_right: bool, platform_version: &PlatformVersion, ) -> Result<(RootHash, Vec<(Vec, u64)>), Error> { - let path_query = self.carrier_aggregate_count_path_query(limit, platform_version)?; + let path_query = + self.carrier_aggregate_count_path_query(limit, left_to_right, platform_version)?; let (root_hash, entries) = GroveDb::verify_aggregate_count_query_per_key( proof, &path_query, diff --git a/packages/rs-sdk/src/platform/documents/count_proof_helpers.rs b/packages/rs-sdk/src/platform/documents/count_proof_helpers.rs index b70e92b407..d00cf8b2fb 100644 --- a/packages/rs-sdk/src/platform/documents/count_proof_helpers.rs +++ b/packages/rs-sdk/src/platform/documents/count_proof_helpers.rs @@ -298,11 +298,17 @@ pub(super) fn verify_count_query( } else { Some(limit_to_u16_or_default(request.limit)?) }; + let left_to_right = request + .order_by_clauses + .first() + .map(|c| c.ascending) + .unwrap_or(true); let entries = verify_carrier_aggregate_count_proof( &count_query, proof, mtd, limit_u16, + left_to_right, platform_version, provider, )?;