From 16e6f1b2da717dad3cd4512914238eab5755f4d3 Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Wed, 14 Jan 2026 13:42:41 -0800 Subject: [PATCH] Derive other indexes directly for binary fuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We manipulate the math and use bit tricks to derive the other two indexes more efficiently during peeling. Apple M1: ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-10 43.8 ± 2% 50.3 ± 3% +14.88% (p=0.000 n=8+9) BinaryFusePopulate/8/n=100000-10 38.6 ± 3% 41.3 ± 1% +7.09% (p=0.000 n=9+8) BinaryFusePopulate/8/n=1000000-10 35.0 ± 4% 36.5 ± 7% +4.12% (p=0.013 n=9+10) BinaryFusePopulate/16/n=10000-10 48.6 ± 4% 48.5 ± 6% ~ (p=1.000 n=10+10) BinaryFusePopulate/16/n=100000-10 38.0 ± 3% 41.1 ± 1% +8.35% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-10 33.8 ± 5% 36.6 ± 2% +8.14% (p=0.000 n=10+10) ``` GCE N4D (AMD Turin): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 53.2 ± 3% 57.1 ± 1% +7.46% (p=0.000 n=10+10) BinaryFusePopulate/8/n=100000-8 33.0 ± 0% 37.5 ± 1% +13.38% (p=0.000 n=10+10) BinaryFusePopulate/8/n=1000000-8 28.5 ± 2% 31.8 ± 2% +11.59% (p=0.000 n=10+10) BinaryFusePopulate/16/n=10000-8 53.1 ± 1% 56.2 ± 1% +5.93% (p=0.000 n=10+10) BinaryFusePopulate/16/n=100000-8 31.8 ± 1% 37.3 ± 1% +17.35% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 27.5 ± 1% 30.9 ± 1% +12.34% (p=0.000 n=10+10) ``` GCE C4 (Intel Emerald Rapids, turbo boost capped at "all core" max): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 29.2 ± 1% 32.2 ± 1% +10.00% (p=0.000 n=10+10) BinaryFusePopulate/8/n=100000-8 27.0 ± 3% 29.8 ± 5% +10.22% (p=0.000 n=10+10) BinaryFusePopulate/8/n=1000000-8 25.6 ± 3% 28.2 ± 5% +10.27% (p=0.000 n=10+10) BinaryFusePopulate/16/n=10000-8 28.9 ± 1% 32.0 ± 1% +10.84% (p=0.000 n=10+10) BinaryFusePopulate/16/n=100000-8 26.2 ± 1% 28.8 ± 3% +10.05% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 24.8 ± 2% 26.9 ± 2% +8.37% (p=0.000 n=10+10) ``` GCE C4A (Google's Axion ARM64): ``` name old MKeys/s new MKeys/s delta BinaryFusePopulate/8/n=10000-8 45.1 ± 1% 45.1 ± 1% ~ (p=0.511 n=9+10) BinaryFusePopulate/8/n=100000-8 39.8 ± 1% 39.4 ± 1% -0.79% (p=0.018 n=9+10) BinaryFusePopulate/8/n=1000000-8 33.9 ± 3% 34.2 ± 3% ~ (p=0.363 n=10+10) BinaryFusePopulate/16/n=10000-8 44.0 ± 1% 44.7 ± 1% +1.54% (p=0.000 n=9+10) BinaryFusePopulate/16/n=100000-8 37.4 ± 1% 38.4 ± 1% +2.75% (p=0.000 n=10+10) BinaryFusePopulate/16/n=1000000-8 30.9 ± 5% 32.4 ± 1% +4.84% (p=0.000 n=10+10) ``` --- binaryfusefilter.go | 70 +++++++++++++++++++++++++++++++-------------- 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/binaryfusefilter.go b/binaryfusefilter.go index f7093b0..7ab1b46 100644 --- a/binaryfusefilter.go +++ b/binaryfusefilter.go @@ -79,10 +79,6 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF reverseOrder := reuseBuffer[uint64](&b.reverseOrder, int(size+1)) reverseOrder[size] = 1 - // the array h0, h1, h2, h0, h1, h2 - var h012 [6]uint32 - // this could be used to compute the mod3 - // tabmod3 := [5]uint8{0,1,2,0,1} for { iterations += 1 if iterations > MaxIterations { @@ -194,6 +190,9 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF } } stacksize := uint32(0) + segLen := filter.SegmentLength + // segLenToMinusSegLenX2 is used to change segLen to -2*segLen via XOR. + segLenToMinusSegLenX2 := segLen ^ (-(2 * segLen)) for Qsize > 0 { Qsize-- index := alone[Qsize] @@ -204,29 +203,63 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF reverseOrder[stacksize] = hash stacksize++ - index1, index2, index3 := filter.getHashFromHash(hash) - - h012[1] = index2 - h012[2] = index3 - h012[3] = index1 - h012[4] = h012[1] + // Here, we could use filter.getHashFromHash(hash) to obtain the other + // two indexes. But we can manipulate the formulas to derive them more + // efficiently. We use bit tricks to avoid branching. + + h01 := uint32(hash>>18) & filter.SegmentLengthMask + h02 := uint32(hash) & filter.SegmentLengthMask + + // These variables are either 0 or all 1s. + is0 := -uint32((found - 1) >> 7) // all 1s if found==0 (relies on uint8 wrap) + is1 := -uint32(found & 1) // all 1s if found==1 + is2 := -uint32(found >> 1) // all 1s if found==2 + + // First, adjust the segment index. other_index1 is: + // if found<2: index + segLen + // if found=2: index - segLen*2 + other_index1 := index + (segLen ^ (segLenToMinusSegLenX2 & is2)) + // other_index2 is: + // if found>0: index - segLen + // if found=0: index + 2*segLen + other_index2 := index - (segLen ^ (segLenToMinusSegLenX2 & is0)) + + // Now adjust the offset inside the segment. + // Three cases: + // 0: other_index1 ^= h01 other_index2 ^= h02 + // 1: other_index1 ^= h01^h02 other_index2 ^= h01 + // 2: other_index1 ^= h02 other_index2 ^= h01^h02 + other_index1 ^= (h01 &^ is2) ^ (h02 &^ is0) + other_index2 ^= (h01 &^ is0) ^ (h02 &^ is1) + + f1 := uint8(is0&1 | is1&2) // f1 = (found + 1) % 3 + f2 := uint8(is0&2 | is2&1) // f2 = (found + 2) % 3 + + // Verification. Turn on for debugging. + if false { + index1, index2, index3 := filter.getHashFromHash(hash) + if other_index1 != []uint32{index1, index2, index3}[(found+1)%3] { + panic("incorrect other_index1") + } + if other_index2 != []uint32{index1, index2, index3}[(found+2)%3] { + panic("incorrect other_index2") + } + } - other_index1 := h012[found+1] alone[Qsize] = other_index1 if (t2count[other_index1] >> 2) == 2 { Qsize++ } t2count[other_index1] -= 4 - t2count[other_index1] ^= filter.mod3(found + 1) // could use this instead: tabmod3[found+1] + t2count[other_index1] ^= f1 t2hash[other_index1] ^= hash - other_index2 := h012[found+2] alone[Qsize] = other_index2 if (t2count[other_index2] >> 2) == 2 { Qsize++ } t2count[other_index2] -= 4 - t2count[other_index2] ^= filter.mod3(found + 2) // could use this instead: tabmod3[found+2] + t2count[other_index2] ^= f2 t2hash[other_index2] ^= hash } } @@ -255,6 +288,7 @@ func buildBinaryFuse[T Unsigned](b *BinaryFuseBuilder, keys []uint64) (_ BinaryF return filter, iterations, nil } + var h012 [5]uint32 for i := int(size - 1); i >= 0; i-- { // the hash of the key we insert next hash := reverseOrder[i] @@ -297,14 +331,6 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(arrayLength)) } -func (filter *BinaryFuse[T]) mod3(x uint8) uint8 { - if x > 2 { - x -= 3 - } - - return x -} - func (filter *BinaryFuse[T]) getHashFromHash(hash uint64) (uint32, uint32, uint32) { hi, _ := bits.Mul64(hash, uint64(filter.SegmentCountLength)) h0 := uint32(hi)