diff --git a/README.md b/README.md index 57e1094..ddf5fac 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,10 @@ When building many filters, memory can be reused (reducing allocation and GC overhead) with a `BinaryFuseBuilder`: ```Go var builder xorfilter.BinaryFuseBuilder +builder = xorfilter.MakeBinaryFuseBuilder[uint16](initialSize) // Optional for { - filter8, _ := BuildBinaryFuse[uint8](&builder, keys) - filter16, _ := BuildBinaryFuse[uint16](&builder, keys) - ... + filter16, _ := BuildBinaryFuse[uint16](&builder, keys) + ... } ``` diff --git a/binaryfusefilter.go b/binaryfusefilter.go index f7093b0..e4db127 100644 --- a/binaryfusefilter.go +++ b/binaryfusefilter.go @@ -38,20 +38,53 @@ func NewBinaryFuse[T Unsigned](keys []uint64) (*BinaryFuse[T], error) { // BinaryFuseBuilder can be used to reuse memory allocations across multiple // BinaryFuse builds. +// +// An empty BinaryFuseBuilder can be used, and its internal memory will grow as +// needed over time. MakeBinaryFuseBuilder can also be used to pre-initialize +// for a certain size. type BinaryFuseBuilder struct { - alone reusableBuffer - t2hash reusableBuffer - reverseOrder reusableBuffer - t2count reusableBuffer - reverseH reusableBuffer - startPos reusableBuffer - fingerprints reusableBuffer + alone []uint32 + t2hash []uint64 + reverseOrder []uint64 + t2count []uint8 + reverseH []uint8 + startPos []uint32 + fingerprints []uint32 +} + +// MakeBinaryFuseBuilder creates a BinaryFuseBuilder with enough preallocated +// memory to allow building of binary fuse filters with fingerprint type T +// without allocations. +// +// Note that the builder can be used with a smaller fingerprint type without +// reallocations. If it is used with a larger fingerprint type, there will be +// one reallocation for the fingerprints slice. +func MakeBinaryFuseBuilder[T Unsigned](initialSize int) BinaryFuseBuilder { + var b BinaryFuseBuilder + var filter BinaryFuse[T] + size := uint32(initialSize) + filter.initializeParameters(&b, size) + capacity := uint32(len(filter.Fingerprints)) + reuseBuffer(&b.alone, capacity) + reuseBuffer(&b.t2count, capacity) + reuseBuffer(&b.reverseH, size) + + reuseBuffer(&b.t2hash, capacity) + reuseBuffer(&b.reverseOrder, size+1) + // The startPos array needs to be large enough for smaller sizes which use a + // smaller segment length. Also, we dynamically try a smaller segment length + // in some cases. + reuseBuffer(&b.startPos, 2<> blockBits) + startPos[i] = uint32((uint64(i) * uint64(size)) >> blockBits) } for _, key := range keys { hash := mixsplit(key, filter.Seed) @@ -279,22 +312,25 @@ func (filter *BinaryFuse[T]) initializeParameters(b *BinaryFuseBuilder, size uin filter.SegmentLength = 262144 } filter.SegmentLengthMask = filter.SegmentLength - 1 - sizeFactor := calculateSizeFactor(arity, size) capacity := uint32(0) if size > 1 { + sizeFactor := calculateSizeFactor(arity, size) capacity = uint32(math.Round(float64(size) * sizeFactor)) } - initSegmentCount := (capacity+filter.SegmentLength-1)/filter.SegmentLength - (arity - 1) - arrayLength := (initSegmentCount + arity - 1) * filter.SegmentLength - filter.SegmentCount = (arrayLength + filter.SegmentLength - 1) / filter.SegmentLength - if filter.SegmentCount <= arity-1 { - filter.SegmentCount = 1 - } else { - filter.SegmentCount = filter.SegmentCount - (arity - 1) + totalSegmentCount := (capacity + filter.SegmentLength - 1) / filter.SegmentLength + if totalSegmentCount < arity { + totalSegmentCount = arity } - arrayLength = (filter.SegmentCount + arity - 1) * filter.SegmentLength + filter.SegmentCount = totalSegmentCount - (arity - 1) filter.SegmentCountLength = filter.SegmentCount * filter.SegmentLength - filter.Fingerprints = reuseBuffer[T](&b.fingerprints, int(arrayLength)) + + // Allocate fingerprints slice. + numFingerprints := totalSegmentCount * filter.SegmentLength + // Our backing buffer is a []uint32. Figure out how many uint32s we need + // to back a []T of the requested size. + bufSize := (numFingerprints*uint32(unsafe.Sizeof(T(0))) + 3) / 4 + buf := reuseBuffer(&b.fingerprints, bufSize) + filter.Fingerprints = unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(buf))), numFingerprints) } func (filter *BinaryFuse[T]) mod3(x uint8) uint8 { @@ -349,29 +385,11 @@ func calculateSizeFactor(arity uint32, size uint32) float64 { } } -// reusableBuffer allows reuse of a backing buffer to avoid allocations for -// slices of integers. -type reusableBuffer struct { - buf []uint64 -} - -type integer interface { - ~int | ~int8 | ~int16 | ~int32 | ~int64 | ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 -} - -// reuseBuffer returns an empty slice of the given size, reusing the last buffer -// if possible. -func reuseBuffer[T integer](b *reusableBuffer, size int) []T { - const sizeOfUint64 = 8 - // Our backing buffer is a []uint64. Figure out how many uint64s we need - // to back a []T of the requested size. - bufSize := int((uintptr(size)*unsafe.Sizeof(T(0)) + sizeOfUint64 - 1) / sizeOfUint64) - if cap(b.buf) >= bufSize { - clear(b.buf[:bufSize]) - } else { - // We need to allocate a new buffer. Increase by at least 25% to amortize - // allocations; this is what append() does for large enough slices. - b.buf = make([]uint64, max(bufSize, cap(b.buf)+cap(b.buf)/4)) - } - return unsafe.Slice((*T)(unsafe.Pointer(unsafe.SliceData(b.buf))), size) +// reuseBuffer returns a zeroed slice of the given size, reusing the previous +// one if possible. +func reuseBuffer[T uint8 | uint32 | uint64](buf *[]T, size uint32) []T { + // The compiler recognizes this pattern and doesn't allocate a temporary + // slice. This pattern is used in slices.Grow(). + *buf = append((*buf)[:0], make([]T, size)...) + return *buf } diff --git a/binaryfusefilter_test.go b/binaryfusefilter_test.go index a60fe3c..e7fb7a8 100644 --- a/binaryfusefilter_test.go +++ b/binaryfusefilter_test.go @@ -350,10 +350,22 @@ func TestBinaryFuseN_Issue35(t *testing.T) { } } +// TestBinaryFuseBuilder verifies that repeated builds with the same builder +// create the exact same filter as using NewBinaryFuse. func TestBinaryFuseBuilder(t *testing.T) { - // Verify that repeated builds with the same builder create the exact same - // filter as using NewBinaryFuse. var bld BinaryFuseBuilder + // Test with and without pre-allocation. + if rand.IntN(2) == 0 { + maxSize := 1 + rand.IntN(1<