From 948a50483d6042e301cb376684381f839a3a8dc6 Mon Sep 17 00:00:00 2001 From: peter Date: Mon, 23 Mar 2026 23:36:26 +0100 Subject: [PATCH 01/27] Add pac_clip_sum aggregate with clipping support --- CMakeLists.txt | 1 + src/aggregates/pac_clip_sum.cpp | 828 ++++++++++++++++++ src/aggregates/pac_count.cpp | 46 + src/aggregates/pac_min_max.cpp | 51 ++ src/compiler/pac_bitslice_compiler.cpp | 10 + src/core/pac_extension.cpp | 17 + src/include/aggregates/pac_clip_sum.hpp | 403 +++++++++ src/include/aggregates/pac_count.hpp | 2 + src/include/aggregates/pac_min_max.hpp | 4 + .../pac_expression_builder.hpp | 8 + .../query_processing/pac_plan_traversal.hpp | 3 + .../pac_expression_builder.cpp | 268 +++++- src/query_processing/pac_plan_traversal.cpp | 2 +- test/sql/pac_clip_sum.test | 214 +++++ 14 files changed, 1851 insertions(+), 6 deletions(-) create mode 100644 src/aggregates/pac_clip_sum.cpp create mode 100644 src/include/aggregates/pac_clip_sum.hpp create mode 100644 test/sql/pac_clip_sum.test diff --git a/CMakeLists.txt b/CMakeLists.txt index 93e0c917..7aa14f03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ set(EXTENSION_SOURCES src/aggregates/pac_count.cpp src/aggregates/pac_min_max.cpp src/aggregates/pac_sum.cpp + src/aggregates/pac_clip_sum.cpp src/compiler/pac_bitslice_compiler.cpp src/compiler/pac_compiler_helpers.cpp src/query_processing/pac_avg_rewriter.cpp diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp new file mode 100644 index 00000000..26a3ebe6 --- /dev/null +++ b/src/aggregates/pac_clip_sum.cpp @@ -0,0 +1,828 @@ +#include "aggregates/pac_clip_sum.hpp" +#include "categorical/pac_categorical.hpp" +#include "duckdb/common/types/decimal.hpp" +#include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" +#include + +namespace duckdb { + +// ============================================================================ +// Inner state update: add one unsigned value to the state +// ============================================================================ +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, uint64_t value, + ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + int level = PacClipSumIntState::GetLevel(value); + uint64_t shift = level << 2; + uint16_t shifted_val = static_cast(value >> shift); // max 255 (8 bits) + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + + // Set bitmap bit + buf[17] |= (1ULL << (key_hash >> 58)); + + // Update exact_count (may cascade top 4 bits to overflow) + state.AddToExactCount(buf, shifted_val, allocator); + + // Add to SWAR counters + Pac2AddToTotalsSWAR16(buf, shifted_val, key_hash); +} + +// Overload for hugeint_t +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, hugeint_t value, + ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + uint64_t upper, lower; + if (value.upper < 0) { + hugeint_t abs_val = -value; + upper = static_cast(abs_val.upper); + lower = abs_val.lower; + } else { + upper = static_cast(value.upper); + lower = value.lower; + } + + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + + // Shift the 128-bit value right by shift bits, take lower 8 bits + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; // max 255 + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + buf[17] |= (1ULL << (key_hash >> 58)); + state.AddToExactCount(buf, shifted_val, allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, key_hash); +} + +// ============================================================================ +// Value routing: two-sided (pos/neg) dispatch +// ============================================================================ +// Route a uint64_t value — when SIGNED, the bits represent a signed int64_t (two's complement) +template +inline void PacClipSumRouteValue(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, + uint64_t value, ArenaAllocator &a) { + if (DUCKDB_LIKELY(hash)) { + int64_t sval = static_cast(value); // reinterpret bits as signed + if (SIGNED && sval < 0) { + auto *neg = wrapper.EnsureNegState(a); + PacClipSumUpdateOneInternal(*neg, hash, static_cast(-sval), a); + neg->update_count++; + } else { + PacClipSumUpdateOneInternal(*pos_state, hash, value, a); + pos_state->update_count++; + } + } +} + +// Overload for hugeint routing (signed) +inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, + hugeint_t value, ArenaAllocator &a, bool is_signed) { + if (DUCKDB_LIKELY(hash)) { + if (is_signed && value.upper < 0) { + auto *neg = wrapper.EnsureNegState(a); + hugeint_t abs_val = -value; + uint64_t upper = static_cast(abs_val.upper); + uint64_t lower = abs_val.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + neg->key_hash |= hash; + neg->EnsureLevelAllocated(a, level); + uint64_t *lbuf = neg->levels[level]; + lbuf[17] |= (1ULL << (hash >> 58)); + neg->AddToExactCount(lbuf, shifted_val, a); + Pac2AddToTotalsSWAR16(lbuf, shifted_val, hash); + neg->update_count++; + } else { + PacClipSumUpdateOneInternal(*pos_state, hash, value, a); + pos_state->update_count++; + } + } +} + +// ============================================================================ +// Buffer flush +// ============================================================================ +template +inline void PacClipSumFlushBuffer(PacClipSumStateWrapper &src, PacClipSumStateWrapper &dst, ArenaAllocator &a) { + uint64_t cnt = src.n_buffered & PacClipSumStateWrapper::BUF_MASK; + if (cnt > 0) { + auto *dst_state = dst.EnsureState(a); + for (uint64_t i = 0; i < cnt; i++) { + PacClipSumRouteValue(dst, dst_state, src.hash_buf[i], src.val_buf[i], a); + } + src.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + } +} + +// ============================================================================ +// Buffered update +// ============================================================================ +template +AUTOVECTORIZE inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, ValueT value, + ArenaAllocator &a) { + uint64_t cnt = agg.n_buffered & PacClipSumStateWrapper::BUF_MASK; + if (DUCKDB_UNLIKELY(cnt == PacClipSumStateWrapper::BUF_SIZE)) { + auto *dst_state = agg.EnsureState(a); + for (int i = 0; i < PacClipSumStateWrapper::BUF_SIZE; i++) { + PacClipSumRouteValue(agg, dst_state, agg.hash_buf[i], agg.val_buf[i], a); + } + PacClipSumRouteValue(agg, dst_state, key_hash, static_cast(value), a); + agg.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + } else { + agg.val_buf[cnt] = static_cast(value); + agg.hash_buf[cnt] = key_hash; + agg.n_buffered++; + } +} + +// Hugeint buffered update — bypass buffer, update directly +template +inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, hugeint_t value, ArenaAllocator &a) { + PacClipSumFlushBuffer(agg, agg, a); // flush any buffered values first + auto *state = agg.EnsureState(a); + PacClipSumRouteHugeint(agg, state, key_hash, value, a, SIGNED); +} + +// ============================================================================ +// Vectorized Update and ScatterUpdate +// ============================================================================ +template +static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + + if (hash_data.validity.AllValid() && value_data.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); + } + } +} + +template +static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(*state, hashes[h_idx], ConvertValue::convert(values[v_idx]), allocator); + } +} + +// ============================================================================ +// X-macro: generate Update/ScatterUpdate for integer types +// ============================================================================ +#define PAC2_INT_TYPES_SIGNED \ + X(TinyInt, int64_t, int8_t, true) \ + X(SmallInt, int64_t, int16_t, true) \ + X(Integer, int64_t, int32_t, true) \ + X(BigInt, int64_t, int64_t, true) + +#define PAC2_INT_TYPES_UNSIGNED \ + X(UTinyInt, uint64_t, uint8_t, false) \ + X(USmallInt, uint64_t, uint16_t, false) \ + X(UInteger, uint64_t, uint32_t, false) \ + X(UBigInt, uint64_t, uint64_t, false) + +#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ + static void PacClipSumUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast(state_p); \ + PacClipSumUpdate(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipSumScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipSumScatterUpdate(inputs, states, count, aggr.allocator); \ + } +PAC2_INT_TYPES_SIGNED +PAC2_INT_TYPES_UNSIGNED +#undef X + +// HugeInt update (signed, via hugeint routing) +static void PacClipSumUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { + auto &state = *reinterpret_cast(state_p); + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(state, hashes[h_idx], values[v_idx], aggr.allocator); + } +} +static void PacClipSumScatterUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, + idx_t count) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(*state, hashes[h_idx], values[v_idx], aggr.allocator); + } +} + +// UHugeInt update (unsigned, convert to hugeint for routing) +static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, + idx_t count) { + auto &state = *reinterpret_cast(state_p); + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + // uhugeint_t is always positive; treat as 128-bit unsigned + auto &v = values[v_idx]; + auto *pos_state = state.EnsureState(aggr.allocator); + if (DUCKDB_LIKELY(hashes[h_idx])) { + uint64_t upper = static_cast(v.upper); + uint64_t lower = v.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + pos_state->key_hash |= hashes[h_idx]; + pos_state->EnsureLevelAllocated(aggr.allocator, level); + uint64_t *buf = pos_state->levels[level]; + buf[17] |= (1ULL << (hashes[h_idx] >> 58)); + pos_state->AddToExactCount(buf, shifted_val, aggr.allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, hashes[h_idx]); + pos_state->update_count++; + } + } +} +static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, + idx_t count) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + auto &v = values[v_idx]; + auto *pos_state = state->EnsureState(aggr.allocator); + if (DUCKDB_LIKELY(hashes[h_idx])) { + uint64_t upper = static_cast(v.upper); + uint64_t lower = v.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + pos_state->key_hash |= hashes[h_idx]; + pos_state->EnsureLevelAllocated(aggr.allocator, level); + uint64_t *buf = pos_state->levels[level]; + buf[17] |= (1ULL << (hashes[h_idx] >> 58)); + pos_state->AddToExactCount(buf, shifted_val, aggr.allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, hashes[h_idx]); + pos_state->update_count++; + } + } +} + +// ============================================================================ +// Combine +// ============================================================================ +AUTOVECTORIZE static void PacClipSumCombineInt(Vector &src, Vector &dst, idx_t count, ArenaAllocator &allocator) { + auto src_wrapper = FlatVector::GetData(src); + auto dst_wrapper = FlatVector::GetData(dst); + + for (idx_t i = 0; i < count; i++) { + // Flush src's buffer into dst + PacClipSumFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); + + auto *s = src_wrapper[i]->GetState(); + if (!s) { + continue; + } + auto *d = dst_wrapper[i]->EnsureState(allocator); + d->CombineFrom(s, allocator); + + // Combine neg states + auto *s_neg = src_wrapper[i]->GetNegState(); + if (s_neg) { + auto *d_neg = dst_wrapper[i]->GetNegState(); + if (!d_neg) { + dst_wrapper[i]->neg_state = s_neg; // steal + } else { + d_neg->CombineFrom(s_neg, allocator); + } + } + } +} + +static void PacClipSumCombine(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { + PacClipSumCombineInt(src, dst, count, aggr.allocator); +} + +// ============================================================================ +// Bind data with clip_support threshold +// ============================================================================ +struct PacClipSumBindData : public PacBindData { + int clip_support_threshold; // levels with fewer estimated distinct contributors are zeroed out + + PacClipSumBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support) + : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support) { + } + + unique_ptr Copy() const override { + auto copy = make_uniq(*this); + copy->total_update_count = 0; + copy->suspicious_count = 0; + copy->nonsuspicious_count = 0; + return copy; + } + bool Equals(const FunctionData &other) const override { + if (!PacBindData::Equals(other)) { + return false; + } + auto *o = dynamic_cast(&other); + return o && clip_support_threshold == o->clip_support_threshold; + } +}; + +// ============================================================================ +// Finalize +// ============================================================================ +template +static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { + auto state_ptrs = FlatVector::GetData(states); + auto data = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + auto &bind = static_cast(*input.bind_data); + double mi = bind.mi; + double correction = bind.correction; + uint64_t query_hash = bind.query_hash; + auto pstate = bind.pstate; + int clip_support = bind.clip_support_threshold; + + for (idx_t i = 0; i < count; i++) { + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + PAC_FLOAT buf[64] = {0}; + auto *pos = state_ptrs[i]->GetState(); + if (!pos) { + result_mask.SetInvalid(offset + i); + continue; + } + uint64_t key_hash = pos->key_hash; + std::mt19937_64 gen(bind.seed); + if (PacNoiseInNull(key_hash, mi, correction, gen)) { + result_mask.SetInvalid(offset + i); + continue; + } + + // Non-mutating: just read totals with clip_support filtering + pos->GetTotals(buf, clip_support); + uint64_t update_count = pos->update_count; + + // Subtract neg state + auto *neg = state_ptrs[i]->GetNegState(); + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support); + key_hash |= neg->key_hash; + for (int j = 0; j < 64; j++) { + buf[j] -= neg_buf[j]; + } + update_count += neg->update_count; + } + + CheckPacSampleDiversity(key_hash, buf, update_count, "pac_clip_sum", bind); + PAC_FLOAT result_val = PacNoisySampleFrom64Counters(buf, mi, correction, gen, ~key_hash, query_hash, pstate); + result_val *= PAC_FLOAT(2.0); // 2x compensation for ~50% sampling + data[offset + i] = FromDouble(result_val); + } +} + +// Instantiate noised finalize (scalar output for pac_noised_clip_sum) +static void PacClipSumNoisedFinalizeSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +static void PacClipSumNoisedFinalizeUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +// BIGINT output variant — used for count→sum conversion where the original returned BIGINT +static void PacClipSumNoisedFinalizeBigInt(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} + +// ============================================================================ +// Counters finalize (LIST output for pac_clip_sum) +// ============================================================================ +template +static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + auto state_ptrs = FlatVector::GetData(states); + auto &bind = static_cast(*input.bind_data); + int clip_support = bind.clip_support_threshold; + double correction = bind.correction; + + // Result is LIST + auto list_entries = FlatVector::GetData(result); + auto &child_vec = ListVector::GetEntry(result); + + idx_t total_elements = count * 64; + ListVector::Reserve(result, total_elements); + ListVector::SetListSize(result, total_elements); + + auto child_data = FlatVector::GetData(child_vec); + + for (idx_t i = 0; i < count; i++) { + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + list_entries[offset + i].offset = i * 64; + list_entries[offset + i].length = 64; + + PAC_FLOAT buf[64] = {0}; + uint64_t key_hash = 0; + uint64_t update_count = 0; + + auto *pos = state_ptrs[i]->GetState(); + if (pos) { + key_hash = pos->key_hash; + update_count = pos->update_count; + pos->GetTotals(buf, clip_support); + + auto *neg = state_ptrs[i]->GetNegState(); + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support); + key_hash |= neg->key_hash; + for (int j = 0; j < 64; j++) { + buf[j] -= neg_buf[j]; + } + update_count += neg->update_count; + } + } + + CheckPacSampleDiversity(key_hash, buf, update_count, "pac_clip_sum", bind); + + idx_t base = i * 64; + for (int j = 0; j < 64; j++) { + if ((key_hash >> j) & 1ULL) { + child_data[base + j] = static_cast(buf[j] * 2.0 * correction); + } else { + child_data[base + j] = 0.0; + } + } + } +} + +static void PacClipSumFinalizeCountersSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); +} +static void PacClipSumFinalizeCountersUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); +} + +// ============================================================================ +// State size / init / bind +// ============================================================================ +static idx_t PacClipSumStateSize(const AggregateFunction &) { + return sizeof(PacClipSumStateWrapper); +} + +static void PacClipSumInitialize(const AggregateFunction &, data_ptr_t state_p) { + memset(state_p, 0, sizeof(PacClipSumStateWrapper)); +} + +static unique_ptr PacClipSumBind(ClientContext &ctx, AggregateFunction &, + vector> &args) { + double mi = GetPacMiFromSetting(ctx); + double correction = 1.0; + if (2 < args.size()) { + if (!args[2]->IsFoldable()) { + throw InvalidInputException("pac_clip_sum: correction parameter must be a constant"); + } + auto val = ExpressionExecutor::EvaluateScalar(ctx, *args[2]); + correction = val.GetValue(); + if (correction < 0.0) { + throw InvalidInputException("pac_clip_sum: correction must be >= 0"); + } + } + // Read pac_clip_support threshold + int clip_support = 0; + Value dc_val; + if (ctx.TryGetCurrentSetting("pac_clip_support", dc_val) && !dc_val.IsNull()) { + clip_support = static_cast(dc_val.GetValue()); + } + return make_uniq(ctx, mi, correction, clip_support); +} + +// ============================================================================ +// DECIMAL support: dispatch by physical type, same pattern as pac_noised_sum +// ============================================================================ +static AggregateFunction GetPacClipSumNoisedAggregate(PhysicalType type) { + switch (type) { + case PhysicalType::INT16: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::SMALLINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateSmallInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSmallInt); + case PhysicalType::INT32: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::INTEGER}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateInteger, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateInteger); + case PhysicalType::INT64: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::BIGINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateBigInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); + case PhysicalType::INT128: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateHugeInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); + default: + throw InternalException("pac_noised_clip_sum: unsupported decimal physical type"); + } +} + +static AggregateFunction GetPacClipSumCountersAggregate(PhysicalType type) { + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + switch (type) { + case PhysicalType::INT16: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::SMALLINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateSmallInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSmallInt); + case PhysicalType::INT32: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::INTEGER}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateInteger, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateInteger); + case PhysicalType::INT64: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::BIGINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateBigInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); + case PhysicalType::INT128: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateHugeInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); + default: + throw InternalException("pac_clip_sum: unsupported decimal physical type"); + } +} + +static unique_ptr BindDecimalPacNoisedClipSum(ClientContext &ctx, AggregateFunction &function, + vector> &args) { + auto decimal_type = args[1]->return_type; + function = GetPacClipSumNoisedAggregate(decimal_type.InternalType()); + function.name = "pac_noised_clip_sum"; + function.arguments[1] = decimal_type; + function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type)); + return PacClipSumBind(ctx, function, args); +} + +static unique_ptr BindDecimalPacClipSum(ClientContext &ctx, AggregateFunction &function, + vector> &args) { + auto decimal_type = args[1]->return_type; + function = GetPacClipSumCountersAggregate(decimal_type.InternalType()); + function.name = "pac_clip_sum"; + function.arguments[1] = decimal_type; + // counters always return LIST, no DECIMAL return type needed + return PacClipSumBind(ctx, function, args); +} + +// ============================================================================ +// Registration helpers +// ============================================================================ +static void AddClipSumCountersFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + aggregate_update_t scatter, aggregate_finalize_t finalize, + aggregate_simple_update_t update) { + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, list_type, PacClipSumStateSize, + PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, list_type, + PacClipSumStateSize, PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); +} + +static void AddNoisedClipSumFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + const LogicalType &result_type, aggregate_update_t scatter, + aggregate_finalize_t finalize, aggregate_simple_update_t update) { + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, result_type, PacClipSumStateSize, + PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, result_type, + PacClipSumStateSize, PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); +} + +// Helper to register all type overloads for a clip sum function set +static void RegisterClipSumTypeOverloads(AggregateFunctionSet &set, const string &name, bool counters) { + if (counters) { + // Counters (LIST) variants + AddClipSumCountersFcn(set, name, LogicalType::TINYINT, PacClipSumScatterUpdateTinyInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::BOOLEAN, PacClipSumScatterUpdateTinyInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::SMALLINT, PacClipSumScatterUpdateSmallInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateSmallInt); + AddClipSumCountersFcn(set, name, LogicalType::INTEGER, PacClipSumScatterUpdateInteger, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateInteger); + AddClipSumCountersFcn(set, name, LogicalType::BIGINT, PacClipSumScatterUpdateBigInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateBigInt); + AddClipSumCountersFcn(set, name, LogicalType::UTINYINT, PacClipSumScatterUpdateUTinyInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::USMALLINT, PacClipSumScatterUpdateUSmallInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUSmallInt); + AddClipSumCountersFcn(set, name, LogicalType::UINTEGER, PacClipSumScatterUpdateUInteger, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUInteger); + AddClipSumCountersFcn(set, name, LogicalType::UBIGINT, PacClipSumScatterUpdateUBigInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUBigInt); + AddClipSumCountersFcn(set, name, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateHugeInt); + AddClipSumCountersFcn(set, name, LogicalType::UHUGEINT, PacClipSumScatterUpdateUHugeInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUHugeInt); + } else { + // Noised (scalar HUGEINT) variants + AddNoisedClipSumFcn(set, name, LogicalType::TINYINT, LogicalType::HUGEINT, PacClipSumScatterUpdateTinyInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::BOOLEAN, LogicalType::HUGEINT, PacClipSumScatterUpdateTinyInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::SMALLINT, LogicalType::HUGEINT, PacClipSumScatterUpdateSmallInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateSmallInt); + AddNoisedClipSumFcn(set, name, LogicalType::INTEGER, LogicalType::HUGEINT, PacClipSumScatterUpdateInteger, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateInteger); + AddNoisedClipSumFcn(set, name, LogicalType::BIGINT, LogicalType::HUGEINT, PacClipSumScatterUpdateBigInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateBigInt); + AddNoisedClipSumFcn(set, name, LogicalType::UTINYINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUTinyInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::USMALLINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUSmallInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUSmallInt); + AddNoisedClipSumFcn(set, name, LogicalType::UINTEGER, LogicalType::HUGEINT, PacClipSumScatterUpdateUInteger, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUInteger); + AddNoisedClipSumFcn(set, name, LogicalType::UBIGINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUBigInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUBigInt); + AddNoisedClipSumFcn(set, name, LogicalType::HUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateHugeInt); + AddNoisedClipSumFcn(set, name, LogicalType::UHUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUHugeInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUHugeInt); + } +} + +// ============================================================================ +// Registration: pac_clip_sum (counters, LIST) +// ============================================================================ +void RegisterPacClipSumFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_clip_sum"); + RegisterClipSumTypeOverloads(fcn_set, "pac_clip_sum", true); + + // DECIMAL overloads + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, list_type, nullptr, nullptr, + nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, + nullptr, BindDecimalPacClipSum)); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, + list_type, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacClipSum)); + + // Add list aggregate overload (LIST → LIST) for categorical/subquery + AddPacListAggregateOverload(fcn_set, "clip_sum"); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = "[INTERNAL] Returns 64 PAC subsample counters with per-level clipping as LIST."; + desc.examples = {"SELECT c_mktsegment, pac_clip_sum(pac_hash(hash(c_custkey)), c_acctbal) FROM customer GROUP BY " + "c_mktsegment"}; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +// ============================================================================ +// Registration: pac_noised_clip_sum (fused noised, scalar HUGEINT) +// ============================================================================ +void RegisterPacNoisedClipSumFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_sum"); + RegisterClipSumTypeOverloads(fcn_set, "pac_noised_clip_sum", false); + + // DECIMAL overloads + fcn_set.AddFunction(AggregateFunction( + {LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipSum)); + fcn_set.AddFunction(AggregateFunction( + {LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, LogicalTypeId::DECIMAL, nullptr, nullptr, + nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipSum)); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = "Privacy-preserving SUM with per-level clipping and noising. Supports 128-bit."; + desc.examples = {"SELECT c_mktsegment, pac_noised_clip_sum(pac_hash(hash(c_custkey)), c_acctbal) FROM customer " + "GROUP BY c_mktsegment"}; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +// ============================================================================ +// Registration: pac_noised_clip_sumcount (sum-of-counts, BIGINT → BIGINT) +// Used when count→sum conversion needs to preserve BIGINT return type. +// ============================================================================ +void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_sumcount"); + // Only BIGINT input → BIGINT output (counts are always BIGINT) + AddNoisedClipSumFcn(fcn_set, "pac_noised_clip_sumcount", LogicalType::BIGINT, LogicalType::BIGINT, + PacClipSumScatterUpdateBigInt, PacClipSumNoisedFinalizeBigInt, PacClipSumUpdateBigInt); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +} // namespace duckdb diff --git a/src/aggregates/pac_count.cpp b/src/aggregates/pac_count.cpp index 86e86de9..80376c27 100644 --- a/src/aggregates/pac_count.cpp +++ b/src/aggregates/pac_count.cpp @@ -341,4 +341,50 @@ void RegisterPacAvgFunctions(ExtensionLoader &loader) { loader.RegisterFunction(std::move(avg_counters_info)); } +// ============================================================================ +// Clip synonyms: pac_noised_clip_count = pac_noised_count, +// pac_clip_count = pac_count +// ============================================================================ +void RegisterPacNoisedClipCountFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_count"); + + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT}, LogicalType::BIGINT, + PacCountStateSize, PacCountInitialize, PacCountScatterUpdate, PacCountCombine, + PacCountFinalize, FunctionNullHandling::SPECIAL_HANDLING, PacCountUpdate, + PacCountBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::DOUBLE}, + LogicalType::BIGINT, PacCountStateSize, PacCountInitialize, + PacCountScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::BIGINT, PacCountStateSize, PacCountInitialize, + PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountColumnUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::BIGINT, + PacCountStateSize, PacCountInitialize, PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountColumnUpdate, PacCountBind)); + + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipCountFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_count"); + + fcn_set.AddFunction(AggregateFunction("pac_clip_count", {LogicalType::UBIGINT}, list_double_type, PacCountStateSize, + PacCountInitialize, PacCountScatterUpdate, PacCountCombine, + PacCountFinalizeCounters, FunctionNullHandling::DEFAULT_NULL_HANDLING, + PacCountUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_count", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, PacCountStateSize, + PacCountInitialize, PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalizeCounters, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacCountColumnUpdate, PacCountBind)); + AddPacListAggregateOverload(fcn_set, "clip_count"); + + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + } // namespace duckdb diff --git a/src/aggregates/pac_min_max.cpp b/src/aggregates/pac_min_max.cpp index 8fdfec10..60f5bdac 100644 --- a/src/aggregates/pac_min_max.cpp +++ b/src/aggregates/pac_min_max.cpp @@ -371,6 +371,57 @@ void RegisterPacMaxCountersFunctions(ExtensionLoader &loader) { loader.RegisterFunction(std::move(info)); } +// ============================================================================ +// Clip synonyms: pac_noised_clip_min/max = pac_noised_min/max, +// pac_clip_min/max = pac_min/max +// ============================================================================ +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_min"); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", + {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_max"); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::ANY, nullptr, + nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipMinFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_min"); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); + AddPacListAggregateOverload(fcn_set, "clip_min"); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipMaxFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_max"); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); + AddPacListAggregateOverload(fcn_set, "clip_max"); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + // Explicit template instantiations #define INST_ALL(T) \ template void PacMinMaxUpdate(Vector[], AggregateInputData &, idx_t, data_ptr_t, idx_t); \ diff --git a/src/compiler/pac_bitslice_compiler.cpp b/src/compiler/pac_bitslice_compiler.cpp index 6e93db68..e2e45784 100644 --- a/src/compiler/pac_bitslice_compiler.cpp +++ b/src/compiler/pac_bitslice_compiler.cpp @@ -880,6 +880,16 @@ void CompilePacBitsliceQuery(const PACCompatibilityResult &check, OptimizerExten // errors. The post-optimizer still handles user-written pac_avg() in SQL. RewritePacAvgToDiv(input, plan); + // Clip rewrite: when pac_clip_support is set, refine PAC aggregates to use + // clipping variants with per-PU pre-aggregation below. + { + Value clip_val; + if (input.context.TryGetCurrentSetting("pac_clip_support", clip_val) && !clip_val.IsNull()) { + auto &pu_names = (pu_present_in_tree && !pu_via_cte) ? check.scanned_pu_tables : privacy_units; + RewriteClipAggregates(input, plan, check, pu_names); + } + } + #if PAC_DEBUG PAC_DEBUG_PRINT("=== PAC-OPTIMIZED PLAN ==="); plan->Print(); diff --git a/src/core/pac_extension.cpp b/src/core/pac_extension.cpp index 2393fe80..f4153dfa 100644 --- a/src/core/pac_extension.cpp +++ b/src/core/pac_extension.cpp @@ -18,6 +18,7 @@ #include "aggregates/pac_aggregate.hpp" #include "aggregates/pac_count.hpp" #include "aggregates/pac_sum.hpp" +#include "aggregates/pac_clip_sum.hpp" #include "aggregates/pac_min_max.hpp" #include "categorical/pac_categorical.hpp" #include "parser/pac_parser.hpp" @@ -247,17 +248,33 @@ static void LoadInternal(ExtensionLoader &loader) { db.config.AddExtensionOption("pac_ptracking", "[INTERNAL] Enable persistent secret p-tracking for query-level MIA", LogicalType::BOOLEAN, Value::BOOLEAN(true)); + db.config.AddExtensionOption("pac_clip_support", + "Dynamic outlier clipping threshold for pac_clip_sum. " + "Levels with fewer than this many estimated distinct contributors are zeroed out. " + "NULL (default) disables pac_clip_sum; set to e.g. 64 to enable.", + LogicalType::BIGINT, Value()); + // Register pac_sum aggregate functions RegisterPacSumFunctions(loader); RegisterPacSumCountersFunctions(loader); + RegisterPacClipSumFunctions(loader); + RegisterPacNoisedClipSumFunctions(loader); + RegisterPacNoisedClipSumCountFunctions(loader); RegisterPacCountFunctions(loader); RegisterPacCountCountersFunctions(loader); + RegisterPacClipCountFunctions(loader); + RegisterPacNoisedClipCountFunctions(loader); // Register pac_min/pac_max aggregate functions RegisterPacMinFunctions(loader); RegisterPacMaxFunctions(loader); // Register _counters variants for categorical queries RegisterPacMinCountersFunctions(loader); RegisterPacMaxCountersFunctions(loader); + // Register clip synonyms for min/max + RegisterPacClipMinFunctions(loader); + RegisterPacClipMaxFunctions(loader); + RegisterPacNoisedClipMinFunctions(loader); + RegisterPacNoisedClipMaxFunctions(loader); // Register dummy pac_noised_avg / pac_avg (replaced by RewritePacAvgToDiv before execution) RegisterPacAvgFunctions(loader); diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp new file mode 100644 index 00000000..7bb0c87b --- /dev/null +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -0,0 +1,403 @@ +// +// pac_clip_sum: Approximate sum with per-level overflow + distinct bitmaps +// Always: buffered, approximate, two-sided (unsigned pos/neg), 31 levels covering 128-bit +// +#ifndef PAC_CLIP_SUM_HPP +#define PAC_CLIP_SUM_HPP + +#include "duckdb.hpp" +#include "pac_aggregate.hpp" +#include + +namespace duckdb { + +void RegisterPacClipSumFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipSumFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); + +// ============================================================================ +// Constants +// ============================================================================ +constexpr int PAC2_NUM_LEVELS = 31; +constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters +constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap +constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters +constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count +constexpr int PAC2_LEVEL_SHIFT = 4; +constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; + +// ============================================================================ +// Packed pointer + exact_count helpers +// Normal level[16] stores: upper 16 bits = exact_count, lower 48 bits = overflow pointer +// ============================================================================ +static inline uint64_t *Pac2GetOverflowPtr(uint64_t packed) { + return reinterpret_cast(packed & 0x0000FFFFFFFFFFFFULL); +} +static inline uint16_t Pac2GetExactCount(uint64_t packed) { + return static_cast(packed >> 48); +} +static inline void Pac2SetExactCount(uint64_t &packed, uint16_t count) { + packed = (packed & 0x0000FFFFFFFFFFFFULL) | (static_cast(count) << 48); +} +static inline void Pac2SetOverflowPtr(uint64_t &packed, uint64_t *ptr) { + packed = (packed & 0xFFFF000000000000ULL) | (reinterpret_cast(ptr) & 0x0000FFFFFFFFFFFFULL); +} + +// ============================================================================ +// SWAR kernel — identical to pac_sum's AddToTotalsSWAR for uint16_t +// ============================================================================ +AUTOVECTORIZE static inline void Pac2AddToTotalsSWAR16(uint64_t *PAC_RESTRICT total, uint64_t value, + uint64_t key_hash) { + uint64_t val_packed = static_cast(value) * PAC2_SWAR_MASK_16; + for (int i = 0; i < 16; i++) { + uint64_t bits = (key_hash >> i) & PAC2_SWAR_MASK_16; + uint64_t expanded = (bits << 16) - bits; + total[i] += val_packed & expanded; + } +} + +// ============================================================================ +// PacClipSumIntState — core state for one unsigned accumulator +// ============================================================================ +struct PacClipSumIntState { + uint64_t key_hash; + uint64_t update_count; + int8_t max_level_used; // -1 if none + int8_t inline_level_idx; // which level uses inline, -1 if none + + // 31 level pointers = 248 bytes. + // Inline optimization: last 18 slots (indices 13..30) = 144 bytes = one normal level. + // Levels 0-12 can use inline storage without overlapping their own pointer slot. + union { + uint64_t *levels[PAC2_NUM_LEVELS]; // 248 bytes + struct { + uint64_t *_ptrs[13]; // levels 0-12 pointers (104 bytes) + uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level + }; + }; + + // ======================================================================== + // GetLevel: route value to lowest level where shifted value fits in 8 bits + // ======================================================================== + static inline int GetLevel(uint64_t abs_val) { + if (abs_val < 256) { + return 0; + } + int bit_pos = 63 - pac_clzll(abs_val); + return (bit_pos - 4) >> 2; + } + + // For 128-bit (hugeint) values + static inline int GetLevel128(uint64_t upper, uint64_t lower) { + if (upper == 0) { + return GetLevel(lower); + } + int bit_pos = 127 - pac_clzll(upper); + return (bit_pos - 4) >> 2; + } + + // ======================================================================== + // Level allocation + // ======================================================================== + inline void AllocateLevel(ArenaAllocator &allocator, int k) { + if (k >= 13 && inline_level_idx >= 0) { + // Evict inline level to arena + auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); + memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + levels[inline_level_idx] = ext; + inline_level_idx = -1; + // Clear inline area so levels[13..30] read as nullptr + memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + } + if (k < 13 && inline_level_idx < 0) { + // Use inline storage + levels[k] = inline_level; + memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + inline_level_idx = static_cast(k); + } else { + auto *buf = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); + memset(buf, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + levels[k] = buf; + } + } + + inline void EnsureLevelAllocated(ArenaAllocator &allocator, int k) { + if (DUCKDB_LIKELY(k <= max_level_used)) { + return; + } + for (int i = max_level_used + 1; i <= k; i++) { + AllocateLevel(allocator, i); + } + max_level_used = static_cast(k); + } + + // ======================================================================== + // CascadeTop4: extract top 4 bits of 16-bit SWAR → add to 32-bit overflow + // ======================================================================== + void CascadeTop4(uint64_t *normal_buf, ArenaAllocator &allocator) { + // 1. Ensure overflow level allocated + uint64_t *overflow = Pac2GetOverflowPtr(normal_buf[16]); + if (!overflow) { + overflow = reinterpret_cast(allocator.Allocate(PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t))); + memset(overflow, 0, PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t)); + Pac2SetOverflowPtr(normal_buf[16], overflow); + } + + // 2. Extract top 4 bits of each 16-bit counter → add to 32-bit overflow + // SWAR 16-bit element i holds bit positions: i, i+16, i+32, i+48 + // SWAR 32-bit element i holds: i, i+32; element i+16 holds: i+16, i+48 + for (int i = 0; i < 16; i++) { + uint64_t swar = normal_buf[i]; + uint64_t top4 = (swar >> 12) & 0x000F000F000F000FULL; + normal_buf[i] = swar & 0x0FFF0FFF0FFF0FFFULL; + + auto *t = reinterpret_cast(&top4); + auto *o1 = reinterpret_cast(&overflow[i]); // bits i, i+32 + auto *o2 = reinterpret_cast(&overflow[i + 16]); // bits i+16, i+48 + o1[0] += t[0]; // bit i + o1[1] += t[2]; // bit i+32 + o2[0] += t[1]; // bit i+16 + o2[1] += t[3]; // bit i+48 + } + + // 3. Cascade exact_count top 4 bits + uint16_t ec = Pac2GetExactCount(normal_buf[16]); + auto *overflow_ec = reinterpret_cast(&overflow[32]); + *overflow_ec += (ec >> 12); + Pac2SetExactCount(normal_buf[16], ec & 0x0FFF); + } + + // ======================================================================== + // AddValue: overflow-aware exact_count update + // ======================================================================== + inline void AddToExactCount(uint64_t *normal_buf, uint16_t shifted_val, ArenaAllocator &allocator) { + uint16_t ec = Pac2GetExactCount(normal_buf[16]); + uint32_t new_ec = static_cast(ec) + shifted_val; + if (DUCKDB_UNLIKELY(new_ec > 0xFFFF)) { + CascadeTop4(normal_buf, allocator); + ec = Pac2GetExactCount(normal_buf[16]); // now ≤ 0x0FFF + new_ec = static_cast(ec) + shifted_val; + } + Pac2SetExactCount(normal_buf[16], static_cast(new_ec)); + } + + // ======================================================================== + // Estimate distinct count from 64-bit bitmap using birthday-paradox formula + // ======================================================================== + static inline int EstimateDistinct(uint64_t bitmap) { + int k = pac_popcount64(bitmap); + if (k >= 64) { + return 256; // saturated — could be any large number + } + if (k == 0) { + return 0; + } + // n ≈ -64 * ln(1 - k/64) + return static_cast(-64.0 * std::log(1.0 - k / 64.0)); + } + + // ======================================================================== + // GetTotals: non-mutating finalization — sums all levels + // clip_support_threshold: soft clamping of under-supported levels (0 = no clipping) + // + // Levels with fewer estimated distinct contributors than the threshold are + // attenuated rather than zeroed: + // - Prefix (below first supported level): scaled UP by 16^distance to + // clamp small under-supported values toward the supported range. + // - Suffix (above last supported level): scaled DOWN by 16^distance to + // attenuate outlier levels toward the supported range. + // - Interior unsupported levels (between first and last supported): full + // contribution, no attenuation. + // ======================================================================== + void GetTotals(PAC_FLOAT *dst, int clip_support_threshold = 0) const { + memset(dst, 0, 64 * sizeof(PAC_FLOAT)); + + // Pass 1: find first and last supported levels + int first_supported = -1; + int last_supported = -1; + if (clip_support_threshold > 0) { + for (int k = 0; k <= max_level_used; k++) { + if (levels[k] && EstimateDistinct(levels[k][17]) >= clip_support_threshold) { + if (first_supported < 0) { + first_supported = k; + } + last_supported = k; + } + } + } + + // Pass 2: accumulate contributions with soft clamping + for (int k = 0; k <= max_level_used; k++) { + if (!levels[k]) { + continue; + } + + // Determine effective scale: clamp under-supported prefix/suffix levels + int effective_level = k; + if (clip_support_threshold > 0 && first_supported >= 0) { + if (k < first_supported) { + // Prefix: clamp scale up to first supported level + effective_level = first_supported; + } else if (k > last_supported) { + // Suffix: clamp scale down to last supported level + effective_level = last_supported; + } + } else if (clip_support_threshold > 0 && first_supported < 0) { + // No supported levels at all — zero everything + continue; + } + + PAC_FLOAT scale = std::exp2(static_cast(PAC2_LEVEL_SHIFT * effective_level)); + + // Add normal 16-bit SWAR contribution + auto *counters = reinterpret_cast(levels[k]); + for (int j = 0; j < 64; j++) { + int swar_idx = (j % 16) * 4 + (j / 16); + dst[j] += static_cast(counters[swar_idx]) * scale; + } + + // Add overflow 32-bit SWAR contribution (scaled by 2^12 relative to normal) + uint64_t *overflow = Pac2GetOverflowPtr(levels[k][16]); + if (overflow) { + PAC_FLOAT overflow_scale = scale * std::exp2(static_cast(12)); + auto *ocounters = reinterpret_cast(overflow); + for (int j = 0; j < 64; j++) { + int swar_idx = (j % 32) * 2 + (j / 32); + dst[j] += static_cast(ocounters[swar_idx]) * overflow_scale; + } + } + } + } + + // ======================================================================== + // CombineFrom: merge another state into this one + // ======================================================================== + void CombineFrom(PacClipSumIntState *src, ArenaAllocator &allocator) { + if (!src) { + return; + } + key_hash |= src->key_hash; + update_count += src->update_count; + + for (int k = 0; k <= src->max_level_used; k++) { + if (!src->levels[k]) { + continue; + } + + // If dst doesn't have this level: steal src's pointer + if (k > max_level_used || !levels[k]) { + EnsureLevelAllocated(allocator, k); // ensures max_level_used >= k, allocates if needed + // If we just allocated a fresh level, steal src's data over it + if (k != src->inline_level_idx) { + // src level is arena-allocated, can steal + levels[k] = src->levels[k]; + src->levels[k] = nullptr; + } else { + // src is using inline — copy instead + memcpy(levels[k], src->levels[k], PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + } + continue; + } + + // Both have this level: merge + // Add SWAR counters + for (int i = 0; i < PAC2_NORMAL_SWAR; i++) { + levels[k][i] += src->levels[k][i]; + } + // OR bitmaps + levels[k][17] |= src->levels[k][17]; + + // Merge exact_counts (check overflow) + uint16_t dst_ec = Pac2GetExactCount(levels[k][16]); + uint16_t src_ec = Pac2GetExactCount(src->levels[k][16]); + uint32_t sum_ec = static_cast(dst_ec) + src_ec; + if (sum_ec > 0xFFFF) { + CascadeTop4(levels[k], allocator); + } + dst_ec = Pac2GetExactCount(levels[k][16]); + Pac2SetExactCount(levels[k][16], dst_ec + src_ec); + + // Merge overflow levels + uint64_t *src_overflow = Pac2GetOverflowPtr(src->levels[k][16]); + uint64_t *dst_overflow = Pac2GetOverflowPtr(levels[k][16]); + if (src_overflow && !dst_overflow) { + // Steal overflow from src + Pac2SetOverflowPtr(levels[k][16], src_overflow); + Pac2SetOverflowPtr(src->levels[k][16], nullptr); + } else if (src_overflow && dst_overflow) { + // Merge overflow SWAR counters + for (int i = 0; i < PAC2_OVERFLOW_SWAR; i++) { + dst_overflow[i] += src_overflow[i]; + } + // Merge overflow exact_counts + auto *dec = reinterpret_cast(&dst_overflow[32]); + auto *sec = reinterpret_cast(&src_overflow[32]); + *dec += *sec; + } + } + } + + // Interface methods + PacClipSumIntState *GetState() { + return this; + } + PacClipSumIntState *EnsureState(ArenaAllocator &) { + return this; + } +}; + +// ============================================================================ +// PacClipSumStateWrapper: buffering wrapper with two-sided pos/neg +// ============================================================================ +struct PacClipSumStateWrapper { + using State = PacClipSumIntState; + using Value = uint64_t; + static constexpr int BUF_SIZE = 2; + static constexpr uint64_t BUF_MASK = 3ULL; + + uint64_t val_buf[BUF_SIZE]; + uint64_t hash_buf[BUF_SIZE]; + union { + uint64_t n_buffered; // lower 2 bits: count, upper bits: state pointer + PacClipSumIntState *state; + }; + PacClipSumIntState *neg_state; // separate state for negatives (stores absolute values) + + PacClipSumIntState *GetState() const { + return reinterpret_cast(reinterpret_cast(state) & ~7ULL); + } + + PacClipSumIntState *EnsureState(ArenaAllocator &a) { + PacClipSumIntState *s = GetState(); + if (!s) { + s = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); + memset(s, 0, sizeof(PacClipSumIntState)); + s->max_level_used = -1; + s->inline_level_idx = -1; + state = s; + } + return s; + } + + PacClipSumIntState *GetNegState() const { + return neg_state; + } + + PacClipSumIntState *EnsureNegState(ArenaAllocator &a) { + if (!neg_state) { + neg_state = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); + memset(neg_state, 0, sizeof(PacClipSumIntState)); + neg_state->max_level_used = -1; + neg_state->inline_level_idx = -1; + } + return neg_state; + } + + static idx_t StateSize() { + return sizeof(PacClipSumStateWrapper); + } +}; + +} // namespace duckdb + +#endif // PAC_CLIP_SUM_HPP diff --git a/src/include/aggregates/pac_count.hpp b/src/include/aggregates/pac_count.hpp index ea7069da..aa0f34d0 100644 --- a/src/include/aggregates/pac_count.hpp +++ b/src/include/aggregates/pac_count.hpp @@ -25,6 +25,8 @@ namespace duckdb { void RegisterPacCountFunctions(ExtensionLoader &); void RegisterPacCountCountersFunctions(ExtensionLoader &); void RegisterPacAvgFunctions(ExtensionLoader &); +void RegisterPacNoisedClipCountFunctions(ExtensionLoader &); +void RegisterPacClipCountFunctions(ExtensionLoader &); // PAC_COUNT(key_hash) implements a COUNT aggregate that for each privacy-unit (identified by a key_hash) // computes 64 independent counts, where each independent count randomly (50% chance) includes a PU or not. diff --git a/src/include/aggregates/pac_min_max.hpp b/src/include/aggregates/pac_min_max.hpp index e6cf177e..a825f503 100644 --- a/src/include/aggregates/pac_min_max.hpp +++ b/src/include/aggregates/pac_min_max.hpp @@ -32,6 +32,10 @@ void RegisterPacMinFunctions(ExtensionLoader &loader); void RegisterPacMaxFunctions(ExtensionLoader &loader); void RegisterPacMinCountersFunctions(ExtensionLoader &loader); void RegisterPacMaxCountersFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); +void RegisterPacClipMinFunctions(ExtensionLoader &loader); +void RegisterPacClipMaxFunctions(ExtensionLoader &loader); // ============================================================================ // PAC_MIN/PAC_MAX(hash_key, value) aggregate functions diff --git a/src/include/query_processing/pac_expression_builder.hpp b/src/include/query_processing/pac_expression_builder.hpp index 168fe183..d1ad9c48 100644 --- a/src/include/query_processing/pac_expression_builder.hpp +++ b/src/include/query_processing/pac_expression_builder.hpp @@ -10,6 +10,7 @@ #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_cteref.hpp" +#include "metadata/pac_compatibility_check.hpp" namespace duckdb { @@ -67,6 +68,13 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg unique_ptr &hash_input_expr, unique_ptr &plan, double correction = 1.0); +// Rewrite PAC aggregates to use clipping variants when pac_clip_support is set. +// Inserts a lower aggregate with plain DuckDB aggregates (GROUP BY groups + PU hash), +// and rewrites the top aggregate to use pac_noised_clip_* / pac_clip_* functions. +// Skips insertion if child already groups by PU key (Q13 exception). +void RewriteClipAggregates(OptimizerExtensionInput &input, unique_ptr &plan, + const PACCompatibilityResult &check, const vector &privacy_units); + } // namespace duckdb #endif // PAC_EXPRESSION_BUILDER_HPP diff --git a/src/include/query_processing/pac_plan_traversal.hpp b/src/include/query_processing/pac_plan_traversal.hpp index fbb778aa..a4be99e3 100644 --- a/src/include/query_processing/pac_plan_traversal.hpp +++ b/src/include/query_processing/pac_plan_traversal.hpp @@ -115,6 +115,9 @@ vector FilterTargetAggregatesWithPUKeyCheck(const vector &privacy_units); +// Find the first aggregate in a subtree (depth-first). +LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op); + // Check if a target node is inside a DELIM_JOIN's subquery branch (children[1]). // This is important for correlated subqueries where nodes in the subquery branch // cannot directly access tables from the outer query. diff --git a/src/query_processing/pac_expression_builder.cpp b/src/query_processing/pac_expression_builder.cpp index c1c2740e..1cf09243 100644 --- a/src/query_processing/pac_expression_builder.cpp +++ b/src/query_processing/pac_expression_builder.cpp @@ -24,6 +24,7 @@ #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_cross_product.hpp" #include "utils/pac_helpers.hpp" +#include "categorical/pac_categorical_detection.hpp" namespace duckdb { @@ -444,7 +445,7 @@ unique_ptr BindBitOrAggregate(OptimizerExtensionInput &input, unique } // Map aggregate function name to PAC function name -static string GetPacAggregateFunctionName(const string &function_name) { +static string GetPacAggregateFunctionName(const string &function_name, ClientContext *ctx = nullptr) { string pac_function_name; if (function_name == "sum" || function_name == "sum_no_overflow") { pac_function_name = "pac_noised_sum"; @@ -521,7 +522,7 @@ static void InsertDistinctPreAggregation(OptimizerExtensionInput &input, Logical for (idx_t i = 0; i < agg->expressions.size(); i++) { auto &old_aggr = agg->expressions[i]->Cast(); string function_name = old_aggr.function.name; - string pac_name = GetPacAggregateFunctionName(function_name); + string pac_name = GetPacAggregateFunctionName(function_name, &input.context); auto hash_ref = make_uniq(LogicalType::UBIGINT, combined_hash_binding); unique_ptr value_ref; @@ -594,7 +595,7 @@ BuildDistinctBranch(OptimizerExtensionInput &input, unique_ptr vector> outer_expressions; for (auto &spec : agg_specs) { - string pac_name = GetPacAggregateFunctionName(spec.second); + string pac_name = GetPacAggregateFunctionName(spec.second, &input.context); auto hash_ref = make_uniq(LogicalType::UBIGINT, combined_hash_binding); unique_ptr value_ref; if (spec.second == "count" || spec.second == "count_star") { @@ -633,7 +634,7 @@ static unique_ptr BuildNonDistinctBranch( for (auto &spec : agg_specs) { auto &old_aggr = *spec.second; string function_name = old_aggr.function.name; - string pac_name = GetPacAggregateFunctionName(function_name); + string pac_name = GetPacAggregateFunctionName(function_name, &input.context); unique_ptr value_child; if (old_aggr.children.empty()) { @@ -1110,7 +1111,7 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg value_child = old_aggr.children[0]->Copy(); } - string pac_function_name = GetPacAggregateFunctionName(function_name); + string pac_function_name = GetPacAggregateFunctionName(function_name, &input.context); unique_ptr correction_expr; if (correction != 1.0) { correction_expr = make_uniq_base(Value::DOUBLE(correction)); @@ -1122,4 +1123,261 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg agg->ResolveOperatorTypes(); } +// ============================================================================ +// Clip aggregate rewrite: pac_noised_* → pac_noised_clip_* / pac_clip_* +// with optional lower aggregate insertion for per-PU pre-aggregation +// ============================================================================ + +// Map pac function names to their clip variants +static string GetClipVariant(const string &name) { + if (name == "pac_noised_sum") { + return "pac_noised_clip_sum"; + } + if (name == "pac_noised_count") { + return "pac_noised_clip_count"; + } + if (name == "pac_noised_min") { + return "pac_noised_clip_min"; + } + if (name == "pac_noised_max") { + return "pac_noised_clip_max"; + } + if (name == "pac_sum") { + return "pac_clip_sum"; + } + if (name == "pac_count") { + return "pac_clip_count"; + } + if (name == "pac_min") { + return "pac_clip_min"; + } + if (name == "pac_max") { + return "pac_clip_max"; + } + return ""; // not a pac aggregate +} + +// Map pac function names to their original DuckDB aggregate +static string GetOriginalAggregate(const string &name) { + if (name == "pac_noised_sum" || name == "pac_sum") { + return "sum"; + } + if (name == "pac_noised_count" || name == "pac_count") { + return "count"; + } + if (name == "pac_noised_min" || name == "pac_min") { + return "min"; + } + if (name == "pac_noised_max" || name == "pac_max") { + return "max"; + } + return ""; +} + +// Is this a noised (scalar) variant? If so, top aggregate uses pac_noised_clip_* +static bool IsNoisedVariant(const string &name) { + return name.find("pac_noised_") == 0; +} + +// Bind a plain DuckDB aggregate function (sum, count, min, max) +static unique_ptr BindPlainAggregate(OptimizerExtensionInput &input, const string &func_name, + vector> children) { + FunctionBinder function_binder(input.context); + ErrorData error; + vector arg_types; + for (auto &child : children) { + arg_types.push_back(child->return_type); + } + auto &entry = Catalog::GetSystemCatalog(input.context) + .GetEntry(input.context, DEFAULT_SCHEMA, func_name); + auto best = function_binder.BindFunction(entry.name, entry.functions, arg_types, error); + if (!best.IsValid()) { + throw InternalException("PAC clip rewrite: failed to bind " + func_name); + } + auto func = entry.functions.GetFunctionByOffset(best.GetIndex()); + return function_binder.BindAggregateFunction(func, std::move(children), nullptr, AggregateType::NON_DISTINCT); +} + +// Check if an aggregate contains pac_noised_* or pac_* (counters) expressions +static bool IsPacAggregate(LogicalAggregate *agg) { + for (auto &expr : agg->expressions) { + if (expr->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = expr->Cast(); + if (!GetClipVariant(aggr.function.name).empty()) { + return true; + } + } + return false; +} + +void RewriteClipAggregates(OptimizerExtensionInput &input, unique_ptr &plan, + const PACCompatibilityResult &check, const vector &privacy_units) { + // Find all aggregate nodes + vector all_aggregates; + FindAllAggregates(plan, all_aggregates); + + for (auto *agg : all_aggregates) { + if (!IsPacAggregate(agg)) { + continue; + } + + // Check Q13 exception: does the child aggregate already group by PU key? + bool child_groups_by_pu = false; + for (auto &child : agg->children) { + auto *inner_agg = FindFirstChildAggregate(child.get()); + if (inner_agg && AggregateGroupsByPUKey(inner_agg, check, privacy_units)) { + child_groups_by_pu = true; + break; + } + } + + if (child_groups_by_pu) { + // Q13 exception: just rename pac_noised_* → pac_noised_clip_* in place + for (idx_t i = 0; i < agg->expressions.size(); i++) { + if (agg->expressions[i]->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = agg->expressions[i]->Cast(); + string clip_name = GetClipVariant(aggr.function.name); + if (clip_name.empty()) { + continue; + } + // Rebind with the clip variant name + vector> children; + for (auto &child : aggr.children) { + children.push_back(child->Copy()); + } + agg->expressions[i] = RebindAggregate(input.context, clip_name, std::move(children), false); + } + agg->ResolveOperatorTypes(); + continue; + } + + // Normal path: insert lower aggregate + auto &binder = input.optimizer.binder; + idx_t lower_group_index = binder.GenerateTableIndex(); + idx_t lower_agg_index = binder.GenerateTableIndex(); + + // Identify the PU hash expression from the first pac aggregate's first child (hash arg) + unique_ptr pu_hash_expr; + for (auto &expr : agg->expressions) { + if (expr->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = expr->Cast(); + if (!GetClipVariant(aggr.function.name).empty() && !aggr.children.empty()) { + pu_hash_expr = aggr.children[0]->Copy(); + break; + } + } + if (!pu_hash_expr) { + continue; // shouldn't happen + } + + idx_t num_original_groups = agg->groups.size(); + + // Build lower aggregate expressions (plain DuckDB aggregates) + vector> lower_expressions; + for (idx_t i = 0; i < agg->expressions.size(); i++) { + auto &aggr = agg->expressions[i]->Cast(); + string orig_name = GetOriginalAggregate(aggr.function.name); + if (orig_name.empty()) { + throw InternalException("PAC clip rewrite: unexpected aggregate " + aggr.function.name); + } + + vector> plain_children; + if (orig_name == "count" && (aggr.children.size() <= 1)) { + // pac_noised_count(hash) or pac_count(hash) → count_star() + // pac_noised_count(hash, col) → count(col) — but children[1] might be constant 1 + if (aggr.children.size() >= 2) { + auto &val_child = aggr.children[1]; + // Check if it's a constant 1 (from count_star rewrite) + if (val_child->type == ExpressionType::VALUE_CONSTANT) { + auto &const_expr = val_child->Cast(); + if (const_expr.value.IsNull() || const_expr.value == Value::BIGINT(1)) { + // count_star — no children + } else { + plain_children.push_back(val_child->Copy()); + } + } else { + plain_children.push_back(val_child->Copy()); + } + } + lower_expressions.push_back(BindPlainAggregate(input, "count_star", std::move(plain_children))); + } else if (orig_name == "count" && aggr.children.size() > 1) { + // count with column reference + plain_children.push_back(aggr.children[1]->Copy()); + lower_expressions.push_back(BindPlainAggregate(input, "count", std::move(plain_children))); + } else { + // sum, min, max — extract the value child (children[1]) + if (aggr.children.size() >= 2) { + plain_children.push_back(aggr.children[1]->Copy()); + } + lower_expressions.push_back(BindPlainAggregate(input, orig_name, std::move(plain_children))); + } + } + + // Create lower aggregate node + auto lower_agg = make_uniq(lower_group_index, lower_agg_index, std::move(lower_expressions)); + + // Copy original groups + add PU hash as extra group + for (auto &g : agg->groups) { + lower_agg->groups.push_back(g->Copy()); + } + lower_agg->groups.push_back(pu_hash_expr->Copy()); + + // Steal top's child → lower's child + lower_agg->children.push_back(std::move(agg->children[0])); + lower_agg->ResolveOperatorTypes(); + + // Rewrite top aggregate's groups to reference lower's group output + for (idx_t i = 0; i < num_original_groups; i++) { + auto gtype = agg->groups[i]->return_type; + agg->groups[i] = make_uniq(gtype, ColumnBinding(lower_group_index, i)); + } + + // PU hash ref from lower's group output + auto pu_hash_ref = make_uniq(pu_hash_expr->return_type, + ColumnBinding(lower_group_index, num_original_groups)); + + // Rewrite top aggregate's expressions to clip variants + for (idx_t i = 0; i < agg->expressions.size(); i++) { + auto &aggr = agg->expressions[i]->Cast(); + string pac_name = aggr.function.name; + bool noised = IsNoisedVariant(pac_name); + string orig = GetOriginalAggregate(pac_name); + + // Reference to lower aggregate's result + auto lower_type = lower_agg->types[num_original_groups + 1 + i]; + unique_ptr lower_ref = + make_uniq(lower_type, ColumnBinding(lower_agg_index, i)); + + // pac_clip_sum has integer + DECIMAL overloads but no FLOAT/DOUBLE. + // Cast FLOAT/DOUBLE to BIGINT so binding succeeds. + if ((orig == "sum" || orig == "count") && + (lower_type.id() == LogicalTypeId::FLOAT || lower_type.id() == LogicalTypeId::DOUBLE)) { + lower_ref = + BoundCastExpression::AddCastToType(input.context, std::move(lower_ref), LogicalType::BIGINT); + } + + // count → sumcount (preserves BIGINT return type), others → clip variant + string clip_func; + if (orig == "count") { + clip_func = noised ? "pac_noised_clip_sumcount" : "pac_clip_sum"; + } else { + clip_func = GetClipVariant(pac_name); + } + + agg->expressions[i] = + BindPacAggregate(input, clip_func, pu_hash_ref->Copy(), std::move(lower_ref), nullptr); + } + + // Set lower as top's child + agg->children[0] = std::move(lower_agg); + agg->ResolveOperatorTypes(); + } +} + } // namespace duckdb diff --git a/src/query_processing/pac_plan_traversal.cpp b/src/query_processing/pac_plan_traversal.cpp index 24bec2ac..fbbb3113 100644 --- a/src/query_processing/pac_plan_traversal.cpp +++ b/src/query_processing/pac_plan_traversal.cpp @@ -812,7 +812,7 @@ bool AggregateGroupsByPUKey(LogicalAggregate *agg, const PACCompatibilityResult } // Find the first aggregate in a subtree (depth-first). -static LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op) { +LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op) { if (!op) { return nullptr; } diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test new file mode 100644 index 00000000..dd20e698 --- /dev/null +++ b/test/sql/pac_clip_sum.test @@ -0,0 +1,214 @@ +# name: test/sql/pac_clip_sum.test +# description: Test pac_clip_sum and pac_noised_clip_sum aggregate functions with clipping +# group: [sql] + +require pac + +statement ok +PRAGMA clear_pac_metadata; + +statement ok +SET pac_seed = 42 + +statement ok +SET threads = 1 + +statement ok +SET pac_mi = 0 + +# ============================================================================ +# Basic pac_clip_sum correctness (returns LIST) +# ============================================================================ + +statement ok +CREATE TABLE test_data AS +SELECT i AS rowid, i % 3 AS grp, (i % 100) AS value +FROM range(4000) t(i) + +# pac_clip_sum returns LIST (64 counters) +query I +SELECT typeof(pac_clip_sum(hash(rowid)::UBIGINT, value::INTEGER)) FROM test_data +---- +FLOAT[] + +# pac_noised_clip_sum returns HUGEINT (fused noised scalar) +query I +SELECT typeof(pac_noised_clip_sum(hash(rowid)::UBIGINT, value::INTEGER)) FROM test_data +---- +HUGEINT + +# Works with different types +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, value::BIGINT) IS NOT NULL FROM test_data +---- +true + +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, value::SMALLINT) IS NOT NULL FROM test_data +---- +true + +# Grouped aggregation +query I +SELECT count(*) FROM ( + SELECT grp, pac_noised_clip_sum(hash(rowid)::UBIGINT, value::INTEGER) as s + FROM test_data GROUP BY grp +) t WHERE s IS NOT NULL +---- +3 + +# NULL handling +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, CASE WHEN rowid % 2 = 0 THEN value ELSE NULL END) IS NOT NULL +FROM test_data +---- +true + +# ============================================================================ +# Clipping: outlier elimination via pac_noised_clip_sum +# ============================================================================ + +# Create data with 1000 normal rows and 1 huge outlier +statement ok +CREATE TABLE outlier_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i % 10) + 1 + ELSE 1000000 + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: result includes the outlier (expect ~1M+ range) +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > 100000 FROM outlier_test +---- +true + +# With clip_support=5: outlier level has only 1 contributor, gets clipped +# Result should be close to sum without outlier = 5500 +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 100000 FROM outlier_test +---- +true + +# Verify the clipped result is in the right ballpark (within 5x of 5500) +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 5500) < 5500 * 5 FROM outlier_test +---- +true + +# Soft clamp: outlier contributes a small nonzero amount (not hard-zeroed) +# Compare with a baseline that has no outlier at all +query I +SELECT (SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM outlier_test) + > (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) +---- +true + +# ============================================================================ +# Clipping with grouped data: outlier in one group, normal in another +# ============================================================================ + +statement ok +CREATE TABLE grouped_outlier AS +SELECT i as id, i % 2 as grp, + CASE WHEN i <= 1000 THEN (i % 10) + 1 + WHEN i = 1001 THEN 10000000 -- massive outlier in group 1 + ELSE (i % 10) + 1 + END as value +FROM range(1, 1003) t(i) + +# Group 0 (even ids): ~500 normal values, no outlier +# Group 1 (odd ids): ~500 normal values + one 10M outlier +# With clip_support: the outlier's level should be clipped in group 1 +query IT +SELECT grp, pac_noised_clip_sum(hash(id)::UBIGINT, value) < 100000 as reasonable +FROM grouped_outlier +GROUP BY grp +ORDER BY grp +---- +0 true +1 true + +# ============================================================================ +# Clipping with negative values (two-sided) +# ============================================================================ + +statement ok +CREATE TABLE neg_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN i - 500 -- normal: -499 to 500 + ELSE -5000000 -- single negative outlier + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: outlier drags result very negative +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < -100000 FROM neg_outlier +---- +true + +# With clip_support: outlier clipped, result near 500 (sum of 1..500 - sum of 1..499 = 500) +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > -100000 FROM neg_outlier +---- +true + +# ============================================================================ +# HUGEINT support +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(i)::UBIGINT, i::HUGEINT) IS NOT NULL FROM range(1, 101) t(i) +---- +true + +# ============================================================================ +# Clip synonyms: pac_clip_count, pac_clip_min, pac_clip_max exist +# ============================================================================ + +query I +SELECT typeof(pac_clip_count(hash(i)::UBIGINT)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_count(hash(i)::UBIGINT)) FROM range(1, 101) t(i) +---- +BIGINT + +# ============================================================================ +# pac_clip_support setting via compiler (sum → pac_noised_clip_sum) +# ============================================================================ + +statement ok +SET pac_clip_support = 64 + +statement ok +CREATE TABLE compiler_test (id INTEGER, value INTEGER) + +statement ok +ALTER TABLE compiler_test ADD PAC_KEY (id) + +statement ok +ALTER TABLE compiler_test SET PU + +statement ok +INSERT INTO compiler_test SELECT i, i % 100 FROM range(1, 1001) t(i) + +# When pac_clip_support is set, regular SUM should go through clip rewrite +query I +SELECT sum(value) IS NOT NULL FROM compiler_test +---- +true From 96da519833314164c162a525794c91b3f89290f2 Mon Sep 17 00:00:00 2001 From: peter Date: Tue, 24 Mar 2026 10:50:31 +0100 Subject: [PATCH 02/27] instructions for genai --- CLAUDE.md | 90 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..e497ad57 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,90 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Working with the user + +When you're stuck — either unable to fix a bug after 2-3 attempts, or tempted to work around the actual problem by redefining the objective — **stop and ask the user for directions**. Explain clearly what the specific problem is (e.g., "pac_clip_sum(UBIGINT, DOUBLE) has no matching overload — should I add a DOUBLE overload or cast?"). The user knows this codebase deeply and can often point you to the right solution in one sentence. Do not silently change the goal, declare something impossible, or add bloated workarounds without consulting first. We work as a team. + +Always test your changes with real queries (e.g., TPC-H on sf1) before declaring success, not just unit tests. Unit tests with wide boolean thresholds can pass even when the code is fundamentally broken. + +Never execute git commands that could lose code. Always ask the user for permission on those. + +## What is PAC? + +PAC (Pretty Accurate Counting) is a DuckDB extension that automatically privatizes SQL aggregate queries. It protects against Membership Inference Attacks by maintaining 64 parallel counters per aggregate (one per "world" bit), adding calibrated noise at finalization. Queries are rewritten transparently — users write normal SQL and PAC transforms it. + +## Build & Test + +```bash +GEN=ninja make # build (release) +make test # run all tests (~20 tests, ~1600 assertions) + +# single test +build/release/test/unittest "test/sql/pac_sum.test" + +# C++ unit tests (parser, traversal, compiler) +build/release/extension/pac/pac_test_runner +``` + +Build outputs go to `build/release/`. DuckDB is a git submodule in `duckdb/`. + +## Compilation Pipeline + +PAC runs in `pre_optimize_function` — BEFORE DuckDB's built-in optimizers (join order, filter pushdown, column lifetime). This means: +- `plan->ResolveOperatorTypes()` must be called before accessing `LogicalProjection::types` (they're empty in raw plans) +- WHERE filters are still separate FILTER nodes +- LIMIT is a separate root node +- DuckDB's optimizers run automatically on the PAC-transformed plan + +### Pipeline phases (in order) + +1. **Compatibility check** (`pac_compatibility_check.cpp`) — decides if query needs PAC rewrite +2. **FK join injection** (`pac_bitslice_add_fkjoins.cpp`) — adds missing joins to reach PU tables +3. **Aggregate transformation** (`pac_bitslice_compiler.cpp` → `pac_expression_builder.cpp`) — replaces SUM/COUNT/etc. with pac_noised_sum/pac_noised_count/etc., inserts pac_hash projections above scans +4. **Categorical rewrite** (`pac_categorical_rewriter.cpp`) — when PAC aggregates appear in filters/comparisons, converts to counter lists (LIST\) with pac_filter/pac_select terminals +5. **AVG decomposition** (`pac_avg_rewriter.cpp`) — rewrites pac_noised_avg into pac_noised_div(pac_sum, pac_count) +6. **Clip rewrite** (`pac_expression_builder.cpp:RewriteClipAggregates`) — when `pac_clip_support` is set, inserts lower aggregate for per-PU pre-aggregation with clipping + +### Aggregate naming convention + +| Name pattern | Returns | Purpose | +|---|---|---| +| `pac_sum/count/min/max` | LIST\ | 64 counters (used by categorical/clip rewrites) | +| `pac_noised_sum/count/min/max` | scalar | Fused counters + noise (direct query output) | +| `pac_clip_sum/count/min/max` | LIST\ | Counters with per-level support clipping | +| `pac_noised_clip_sum/count/min/max` | scalar | Fused clip + noise | + +pac_noised_* is the fused version of pac_noised(pac_*()). The unfused form is used when expressions operate on counters (list_transform/lambdas in categorical queries). + +### Key architectural rules + +- **pac_hash is always computed in a Projection above the scan**, never inside an aggregate +- **Pre-computed bindings become stale after RewriteBottomUp** — always re-compute at point of use +- **Use `binder.GenerateTableIndex()`** for new table indices, never manual tracking +- **Always call `ResolveOperatorTypes()`** after creating or modifying a LogicalAggregate +- **pac_noised_sum on DECIMAL** uses `BindDecimalPacSum` to dispatch by physical type and set return_type to DECIMAL(38, scale) — any new sum variant needs the same pattern + +## Key source files + +- `src/core/pac_optimizer.cpp` — optimizer hook entry point +- `src/compiler/pac_bitslice_compiler.cpp` — main compilation orchestrator (`CompilePacBitsliceQuery`) +- `src/query_processing/pac_expression_builder.cpp` — aggregate modification, clip rewrite, expression binding +- `src/query_processing/pac_plan_traversal.cpp` — plan traversal utilities (FindAllAggregates, AggregateGroupsByPUKey, etc.) +- `src/include/aggregates/pac_aggregate.hpp` — PacBindData, noise calibration, p-tracking +- `src/categorical/pac_categorical_rewriter.cpp` — categorical query transformation (~1770 lines) + +## Debugging + +Set `#define PAC_DEBUG 1` in `src/include/pac_debug.hpp` for stderr trace output. Use `EXPLAIN` to see the transformed plan. + +## PAC DDL examples + +```sql +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +SET pac_mi = 0; -- disable noise for testing +SET pac_seed = 42; -- reproducible results +SET pac_clip_support = 40; -- enable clip rewrite with support threshold +``` From 5067f4657e166a14cb1bbb9a379e1e74ea7d3765 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 14:35:52 +0100 Subject: [PATCH 03/27] Hard-zero unsupported outlier levels in pac_clip_sum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change suffix attenuation from soft-clamp (scale by 16^distance) to hard-zero (skip entirely). Unsupported magnitude levels now contribute nothing to the result, fully eliminating the variance side-channel. Attack results with clip_support=2: - Small filter (3-4 users): 96% → 47% (random) - 20K small items: 96% → 53% (random) - Std ratio in/out: 90x → 0.87x Co-Authored-By: Claude Opus 4.6 (1M context) --- src/include/aggregates/pac_clip_sum.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 7bb0c87b..ddd9d80f 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -239,8 +239,8 @@ struct PacClipSumIntState { // Prefix: clamp scale up to first supported level effective_level = first_supported; } else if (k > last_supported) { - // Suffix: clamp scale down to last supported level - effective_level = last_supported; + // Suffix: hard zero — unsupported outlier levels contribute nothing + continue; } } else if (clip_support_threshold > 0 && first_supported < 0) { // No supported levels at all — zero everything From a377f0781112069ddc1ef6fcda5c9507d3275bb8 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 15:48:37 +0100 Subject: [PATCH 04/27] =?UTF-8?q?Reduce=20pac=5Fclip=5Fsum=20level=20width?= =?UTF-8?q?=20from=2016x=20to=204x=20(shift=3D4=20=E2=86=92=20shift=3D2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finer-grained magnitude levels (2-bit bands, 4x per level) allow the clipping mechanism to catch moderate outliers that were previously invisible within the same 16x-wide level. A 10x outlier (50k vs 5k normal) now lands in a different level and gets hard-zeroed. Changes: - PAC2_LEVEL_SHIFT: 4 → 2 - PAC2_NUM_LEVELS: 31 → 32 (covers int64; HUGEINT clamps to level 31) - GetLevel/GetLevel128: divide by 2 instead of 4, clamp to max level - Inline optimization threshold: 13 → 14 - All shift extraction: level << 2 → level << 1 Memory: +8 bytes per state (256 vs 248 byte pointer array). Negligible. Performance: no regression on TPCH Q01 SF1 (1.38s → 1.31s). Security: moderate outlier attack drops from 76.5% to 52.9% (random). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/aggregates/pac_clip_sum.cpp | 10 +++++----- src/include/aggregates/pac_clip_sum.hpp | 26 ++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp index 26a3ebe6..88d7f5ff 100644 --- a/src/aggregates/pac_clip_sum.cpp +++ b/src/aggregates/pac_clip_sum.cpp @@ -14,7 +14,7 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, state.key_hash |= key_hash; int level = PacClipSumIntState::GetLevel(value); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val = static_cast(value >> shift); // max 255 (8 bits) state.EnsureLevelAllocated(allocator, level); @@ -46,7 +46,7 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, } int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; // Shift the 128-bit value right by shift bits, take lower 8 bits uint16_t shifted_val; @@ -96,7 +96,7 @@ inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIn uint64_t upper = static_cast(abs_val.upper); uint64_t lower = abs_val.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); @@ -305,7 +305,7 @@ static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); @@ -347,7 +347,7 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index ddd9d80f..00456bcd 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -18,12 +18,12 @@ void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); // ============================================================================ // Constants // ============================================================================ -constexpr int PAC2_NUM_LEVELS = 31; +constexpr int PAC2_NUM_LEVELS = 32; // 32 levels × 2-bit bands covers 64-bit; HUGEINT clamps to level 31 constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count -constexpr int PAC2_LEVEL_SHIFT = 4; +constexpr int PAC2_LEVEL_SHIFT = 2; // 2^2 = 4x per level (was 4 = 16x per level) constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; // ============================================================================ @@ -65,13 +65,13 @@ struct PacClipSumIntState { int8_t max_level_used; // -1 if none int8_t inline_level_idx; // which level uses inline, -1 if none - // 31 level pointers = 248 bytes. - // Inline optimization: last 18 slots (indices 13..30) = 144 bytes = one normal level. - // Levels 0-12 can use inline storage without overlapping their own pointer slot. + // 32 level pointers = 256 bytes. + // Inline optimization: last 18 slots (indices 14..31) = 144 bytes = one normal level. + // Levels 0-13 can use inline storage without overlapping their own pointer slot. union { - uint64_t *levels[PAC2_NUM_LEVELS]; // 248 bytes + uint64_t *levels[PAC2_NUM_LEVELS]; // 256 bytes struct { - uint64_t *_ptrs[13]; // levels 0-12 pointers (104 bytes) + uint64_t *_ptrs[14]; // levels 0-13 pointers (112 bytes) uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level }; }; @@ -84,32 +84,32 @@ struct PacClipSumIntState { return 0; } int bit_pos = 63 - pac_clzll(abs_val); - return (bit_pos - 4) >> 2; + return std::min((bit_pos - 4) >> 1, PAC2_NUM_LEVELS - 1); } - // For 128-bit (hugeint) values + // For 128-bit (hugeint) values — clamps to max level for very large values static inline int GetLevel128(uint64_t upper, uint64_t lower) { if (upper == 0) { return GetLevel(lower); } int bit_pos = 127 - pac_clzll(upper); - return (bit_pos - 4) >> 2; + return std::min((bit_pos - 4) >> 1, PAC2_NUM_LEVELS - 1); } // ======================================================================== // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= 13 && inline_level_idx >= 0) { + if (k >= 14 && inline_level_idx >= 0) { // Evict inline level to arena auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[inline_level_idx] = ext; inline_level_idx = -1; - // Clear inline area so levels[13..30] read as nullptr + // Clear inline area so levels[14..31] read as nullptr memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); } - if (k < 13 && inline_level_idx < 0) { + if (k < 14 && inline_level_idx < 0) { // Use inline storage levels[k] = inline_level; memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); From f87f9f86a522e5c8b3181788aa6317c451d8fe59 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 15:50:05 +0100 Subject: [PATCH 05/27] Fix pac_clip_sum test: adjust soft-clamp assertion for hard-zero behavior With hard-zero, unsupported outlier levels contribute nothing, so the clipped result equals (not exceeds) the no-outlier baseline. Change > to >=. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/sql/pac_clip_sum.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test index dd20e698..0a4b1ee9 100644 --- a/test/sql/pac_clip_sum.test +++ b/test/sql/pac_clip_sum.test @@ -99,11 +99,11 @@ SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 5500) < 5500 * 5 FROM ---- true -# Soft clamp: outlier contributes a small nonzero amount (not hard-zeroed) -# Compare with a baseline that has no outlier at all +# Hard-zero: outlier level is unsupported and contributes nothing, +# so result should be >= no-outlier baseline (equal or greater due to noise) query I SELECT (SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM outlier_test) - > (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) + >= (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) ---- true From 160bff1301e5c2e46e1d3f880eb77b4ad86618c1 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:23:01 +0100 Subject: [PATCH 06/27] Extend pac_clip_sum to 62 levels for full HUGEINT support Increase PAC2_NUM_LEVELS from 32 to 62 to cover the full 128-bit range without clamping. int64 values naturally use only levels 0-29 (the extra pointer slots remain NULL, no per-level data is allocated). The inline optimization threshold moves from 14 to 44 accordingly. Memory: +240 bytes per state for the pointer array (496 vs 256 bytes). Per-level data allocations are unchanged for int64 workloads. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/include/aggregates/pac_clip_sum.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 00456bcd..9f15a5b4 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -18,7 +18,7 @@ void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); // ============================================================================ // Constants // ============================================================================ -constexpr int PAC2_NUM_LEVELS = 32; // 32 levels × 2-bit bands covers 64-bit; HUGEINT clamps to level 31 +constexpr int PAC2_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit (int64 uses ≤30) constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters @@ -65,13 +65,13 @@ struct PacClipSumIntState { int8_t max_level_used; // -1 if none int8_t inline_level_idx; // which level uses inline, -1 if none - // 32 level pointers = 256 bytes. - // Inline optimization: last 18 slots (indices 14..31) = 144 bytes = one normal level. - // Levels 0-13 can use inline storage without overlapping their own pointer slot. + // 62 level pointers = 496 bytes. + // Inline optimization: last 18 slots (indices 44..61) = 144 bytes = one normal level. + // Levels 0-43 can use inline storage without overlapping their own pointer slot. union { - uint64_t *levels[PAC2_NUM_LEVELS]; // 256 bytes + uint64_t *levels[PAC2_NUM_LEVELS]; // 496 bytes struct { - uint64_t *_ptrs[14]; // levels 0-13 pointers (112 bytes) + uint64_t *_ptrs[44]; // levels 0-43 pointers (352 bytes) uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level }; }; @@ -100,16 +100,16 @@ struct PacClipSumIntState { // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= 14 && inline_level_idx >= 0) { + if (k >= 44 && inline_level_idx >= 0) { // Evict inline level to arena auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[inline_level_idx] = ext; inline_level_idx = -1; - // Clear inline area so levels[14..31] read as nullptr + // Clear inline area so levels[44..61] read as nullptr memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); } - if (k < 14 && inline_level_idx < 0) { + if (k < 44 && inline_level_idx < 0) { // Use inline storage levels[k] = inline_level; memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); From 3d60b741b501805e3b2d6c5a0867e1da6150c964 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:25:46 +0100 Subject: [PATCH 07/27] Add tests for level boundaries, HUGEINT clipping, over-clipping, multi-group MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New test cases: - Level boundary routing (same-level vs cross-level with 4x bands) - HUGEINT outlier clipping (values at 2^70, beyond int64 range) - Negative HUGEINT outlier via neg_state - Over-clipping (clip_support > group size → zero result) - Multi-group with outlier isolated to one group Co-Authored-By: Claude Opus 4.6 (1M context) --- test/sql/pac_clip_sum.test | 155 +++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test index 0a4b1ee9..1f94a454 100644 --- a/test/sql/pac_clip_sum.test +++ b/test/sql/pac_clip_sum.test @@ -212,3 +212,158 @@ query I SELECT sum(value) IS NOT NULL FROM compiler_test ---- true + +# ============================================================================ +# Level boundary tests (4x bands with shift=2) +# Values at exact level boundaries should be correctly routed +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +# Level 0: 0-255, Level 1: 256-1023, Level 2: 1024-4095, Level 3: 4096-16383 +# 1000 rows at value=100 (level 0), 1 outlier at 4096 (level 3) → different level → clipped +statement ok +CREATE TABLE boundary_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 100 + ELSE 4096 + END as value +FROM range(1, 1002) t(i) + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 200000 FROM boundary_test +---- +true + +# Outlier at 255 (max level 0) — same level as value=100 → NOT clipped +statement ok +CREATE TABLE same_level_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 100 + ELSE 255 + END as value +FROM range(1, 1002) t(i) + +# Without clipping: sum = 1000*100 + 255 = 100255 +statement ok +SET pac_clip_support = NULL + +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 100255) < 50000 FROM same_level_test +---- +true + +# With clipping: 255 is same level as 100, so it's NOT clipped — result similar +statement ok +SET pac_clip_support = 5 + +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 100255) < 50000 FROM same_level_test +---- +true + +# ============================================================================ +# HUGEINT outlier clipping (levels 30+, beyond int64 range) +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +# Normal values + one HUGEINT outlier at 2^70 +statement ok +CREATE TABLE hugeint_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN i::HUGEINT + ELSE (1::HUGEINT << 70) + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: result includes the huge outlier +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > 1000000000000 FROM hugeint_outlier +---- +true + +# With clip_support: outlier at high level gets hard-zeroed +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 1000000000000 FROM hugeint_outlier +---- +true + +# ============================================================================ +# Negative HUGEINT outlier +# ============================================================================ + +statement ok +CREATE TABLE neg_hugeint_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i * 100)::HUGEINT + ELSE -(1::HUGEINT << 70) + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: huge negative outlier dominates +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < -1000000000000 FROM neg_hugeint_outlier +---- +true + +# With clip_support: negative outlier clipped via neg_state +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > -1000000000000 FROM neg_hugeint_outlier +---- +true + +# ============================================================================ +# Over-clipping: pac_clip_support higher than group size → zero result +# ============================================================================ + +statement ok +CREATE TABLE tiny_group AS SELECT i as id, i * 10 as value FROM range(1, 4) t(i) + +# 3 rows, clip_support=10 → no level has 10 contributors → all zeroed +statement ok +SET pac_clip_support = 10 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM tiny_group +---- +0 + +# ============================================================================ +# Multi-group with outlier in one group only +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +statement ok +CREATE TABLE multigroup AS +SELECT i as id, i % 4 as grp, + CASE WHEN i = 997 THEN 9999999 -- outlier in group 1 (997 % 4 = 1) + ELSE (i % 50) + 1 + END as value +FROM range(1, 1001) t(i) + +# Group 1 has the outlier — should be clipped to reasonable range +# Other groups are normal +query I +SELECT count(*) FROM ( + SELECT grp, pac_noised_clip_sum(hash(id)::UBIGINT, value) as s + FROM multigroup GROUP BY grp +) t WHERE abs(s) < 500000 +---- +4 From a1422ded0eb5c923f84ebb589760a57790348be2 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:32:53 +0100 Subject: [PATCH 08/27] Add CLAUDE.md with development rules and project guidance Fetched from main and added: - Development rules: test coverage, no test removal, codebase-first search, helper function reuse, duckdb submodule is read-only - Reference to the PAC paper (arXiv:2603.15023) - PAC_DEBUG_PRINT usage guidance Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 CLAUDE.md diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 00000000..21b11dba --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,100 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Working with the user + +When you're stuck — either unable to fix a bug after 2-3 attempts, or tempted to work around the actual problem by redefining the objective — **stop and ask the user for directions**. Explain clearly what the specific problem is (e.g., "pac_clip_sum(UBIGINT, DOUBLE) has no matching overload — should I add a DOUBLE overload or cast?"). The user knows this codebase deeply and can often point you to the right solution in one sentence. Do not silently change the goal, declare something impossible, or add bloated workarounds without consulting first. We work as a team. + +Always test your changes with real queries (e.g., TPC-H on sf1) before declaring success, not just unit tests. Unit tests with wide boolean thresholds can pass even when the code is fundamentally broken. + +Never execute git commands that could lose code. Always ask the user for permission on those. + +## Development rules + +- **New features must have tests.** Ask the user whether to create a new test file or extend an existing one in `test/sql/`. +- **Never remove a failing test to "fix" a failure.** If a test fails, fix the underlying bug. Tests exist for a reason. +- **Before implementing anything, search the existing codebase** for similar patterns or solutions. Check if a helper function, utility, or prior approach already addresses the problem. Reuse before reinventing. +- **Use helper functions.** Factor shared logic into helpers rather than duplicating code. Check `src/include/utils/` and existing helpers in the file you're editing. +- **Never edit the `duckdb/` submodule.** The DuckDB source is read-only. All PAC logic lives in `src/` and `test/`. If you need DuckDB internals, use the public API or ask the user. +- **Keep the paper in mind.** The PAC mechanism is described in [SIMD-PAC-DB: Pretty Performant PAC Privacy](https://arxiv.org/abs/2603.15023). Refer to it for the theoretical foundations (noise calibration, mutual information bounds, counter semantics) before making changes to core aggregate logic. +- **Add `PAC_DEBUG_PRINT` statements** at major code flow points (entry/exit of compilation phases, aggregate rewrites, clipping decisions). Use the existing `PAC_DEBUG_PRINT` macro from `src/include/pac_debug.hpp` — it's compiled out when `PAC_DEBUG` is 0. + +## What is PAC? + +PAC (Probably Approximately Correct) is a DuckDB extension that automatically privatizes SQL aggregate queries. It protects against Membership Inference Attacks by maintaining 64 parallel counters per aggregate (one per "world" bit), adding calibrated noise at finalization. Queries are rewritten transparently — users write normal SQL and PAC transforms it. + +## Build & Test + +```bash +GEN=ninja make # build (release) +make test # run all tests (~20 tests, ~1600 assertions) + +# single test +build/release/test/unittest "test/sql/pac_sum.test" + +# C++ unit tests (parser, traversal, compiler) +build/release/extension/pac/pac_test_runner +``` + +Build outputs go to `build/release/`. DuckDB is a git submodule in `duckdb/`. + +## Compilation Pipeline + +PAC runs in `pre_optimize_function` — BEFORE DuckDB's built-in optimizers (join order, filter pushdown, column lifetime). This means: +- `plan->ResolveOperatorTypes()` must be called before accessing `LogicalProjection::types` (they're empty in raw plans) +- WHERE filters are still separate FILTER nodes +- LIMIT is a separate root node +- DuckDB's optimizers run automatically on the PAC-transformed plan + +### Pipeline phases (in order) + +1. **Compatibility check** (`pac_compatibility_check.cpp`) — decides if query needs PAC rewrite +2. **FK join injection** (`pac_bitslice_add_fkjoins.cpp`) — adds missing joins to reach PU tables +3. **Aggregate transformation** (`pac_bitslice_compiler.cpp` → `pac_expression_builder.cpp`) — replaces SUM/COUNT/etc. with pac_noised_sum/pac_noised_count/etc., inserts pac_hash projections above scans +4. **Categorical rewrite** (`pac_categorical_rewriter.cpp`) — when PAC aggregates appear in filters/comparisons, converts to counter lists (LIST\) with pac_filter/pac_select terminals +5. **AVG decomposition** (`pac_avg_rewriter.cpp`) — rewrites pac_noised_avg into pac_noised_div(pac_sum, pac_count) +6. **Clip rewrite** (`pac_expression_builder.cpp:RewriteClipAggregates`) — when `pac_clip_support` is set, inserts lower aggregate for per-PU pre-aggregation with clipping + +### Aggregate naming convention + +| Name pattern | Returns | Purpose | +|---|---|---| +| `pac_sum/count/min/max` | LIST\ | 64 counters (used by categorical/clip rewrites) | +| `pac_noised_sum/count/min/max` | scalar | Fused counters + noise (direct query output) | +| `pac_clip_sum/count/min/max` | LIST\ | Counters with per-level support clipping | +| `pac_noised_clip_sum/count/min/max` | scalar | Fused clip + noise | + +pac_noised_* is the fused version of pac_noised(pac_*()). The unfused form is used when expressions operate on counters (list_transform/lambdas in categorical queries). + +### Key architectural rules + +- **pac_hash is always computed in a Projection above the scan**, never inside an aggregate +- **Pre-computed bindings become stale after RewriteBottomUp** — always re-compute at point of use +- **Use `binder.GenerateTableIndex()`** for new table indices, never manual tracking +- **Always call `ResolveOperatorTypes()`** after creating or modifying a LogicalAggregate +- **pac_noised_sum on DECIMAL** uses `BindDecimalPacSum` to dispatch by physical type and set return_type to DECIMAL(38, scale) — any new sum variant needs the same pattern + +## Key source files + +- `src/core/pac_optimizer.cpp` — optimizer hook entry point +- `src/compiler/pac_bitslice_compiler.cpp` — main compilation orchestrator (`CompilePacBitsliceQuery`) +- `src/query_processing/pac_expression_builder.cpp` — aggregate modification, clip rewrite, expression binding +- `src/query_processing/pac_plan_traversal.cpp` — plan traversal utilities (FindAllAggregates, AggregateGroupsByPUKey, etc.) +- `src/include/aggregates/pac_aggregate.hpp` — PacBindData, noise calibration, p-tracking +- `src/categorical/pac_categorical_rewriter.cpp` — categorical query transformation (~1770 lines) + +## Debugging + +Set `#define PAC_DEBUG 1` in `src/include/pac_debug.hpp` for stderr trace output. Use `EXPLAIN` to see the transformed plan. + +## PAC DDL examples + +```sql +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +SET pac_mi = 0; -- disable noise for testing +SET pac_seed = 42; -- reproducible results +SET pac_clip_support = 40; -- enable clip rewrite with support threshold +``` From 864a6961ff9df0a83c9663864f6f5af74fd72402 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:51:33 +0100 Subject: [PATCH 09/27] Add attack scripts and evaluation results for pac_clip_sum Attack scripts testing the variance side-channel MIA against pac_clip_sum: - clip_attack_test.sh: main suite (small filter, wide filter, 10K users, etc.) - clip_multirow_test.sh: 20K small items user (tests pre-aggregation) - clip_hardzero_stress.sh: stress tests (high trials, composed queries, collusion) - clip_shift2_stress.sh: tests with 4x magnitude levels (shift=2) - clipping_experiment.sh: input clipping (Winsorization) baseline - output_clipping_experiment.sh: post-hoc output clipping baseline - output_clipping_v2_experiment.sh: output clipping before noise - clip_attack_results.md: full evaluation with findings Co-Authored-By: Claude Opus 4.6 (1M context) --- attacks/clip_attack_results.md | 314 +++++++++++++++++++++++ attacks/clip_attack_test.sh | 196 ++++++++++++++ attacks/clip_hardzero_stress.sh | 297 +++++++++++++++++++++ attacks/clip_multirow_test.sh | 136 ++++++++++ attacks/clip_shift2_stress.sh | 241 +++++++++++++++++ attacks/clipping_experiment.sh | 215 ++++++++++++++++ attacks/output_clipping_experiment.sh | 244 ++++++++++++++++++ attacks/output_clipping_v2_experiment.sh | 220 ++++++++++++++++ 8 files changed, 1863 insertions(+) create mode 100644 attacks/clip_attack_results.md create mode 100755 attacks/clip_attack_test.sh create mode 100755 attacks/clip_hardzero_stress.sh create mode 100755 attacks/clip_multirow_test.sh create mode 100755 attacks/clip_shift2_stress.sh create mode 100755 attacks/clipping_experiment.sh create mode 100755 attacks/output_clipping_experiment.sh create mode 100644 attacks/output_clipping_v2_experiment.sh diff --git a/attacks/clip_attack_results.md b/attacks/clip_attack_results.md new file mode 100644 index 00000000..5baf8847 --- /dev/null +++ b/attacks/clip_attack_results.md @@ -0,0 +1,314 @@ +# pac_clip_sum Attack Evaluation + +Evaluates whether `pac_clip_sum`'s support-based outlier attenuation defeats the +variance side-channel membership inference attack (MIA). + +## Background + +`pac_clip_sum` (commit 948a504) introduces a two-level aggregation: + +1. **Lower aggregate**: `GROUP BY pu_hash` with plain `SUM` — pre-aggregates per user +2. **Upper aggregate**: `pac_noised_clip_sum` — decomposes values into magnitude levels + (each 16x the previous), tracks a 64-bit bitmap of distinct contributors per level, + and attenuates levels with fewer contributors than `pac_clip_support` + +Supported clip aggregates: SUM, COUNT, MIN, MAX (MIN/MAX not fully implemented). +float/double types not yet supported. + +## Parameters + +- `pac_mi = 0.0078125` (1/128) +- 30 trials per condition unless noted +- Random guess baseline: 50% + +--- + +## Attack 1: Single-query variance classifier (small filter) + +**Setup**: N=1000 users, target=999999, filter<=3 (3-4 users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | **96.0%** | 80.0% | 10,688,008 | 117,662 | 90.8x | +| 2 | **72.5%** | 62.7% | 613,511 | 122,026 | 5.0x | +| 3 | **72.5%** | 62.7% | 613,511 | 122,026 | 5.0x | + +**Finding**: Clipping reduces attack accuracy from 96% to 72.5% and collapses the +variance ratio from 91x to 5x. The outlier's contribution is attenuated by ~16x +(one magnitude level). However, 72.5% is still well above random — the residual +5x variance gap remains exploitable. + +clip=2 and clip=3 produce identical results because with only 3-4 users per level, +the bitmap support is at most 2-3 regardless of threshold. + +--- + +## Attack 2: Wide filter (clipping best-case) + +**Setup**: N=1000 users, target=999999, filter<=999 (all users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | 53.3% | 60.0% | 10,424,631 | 1,822,936 | 5.7x | +| 2 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | +| 5 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | +| 10 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | + +**Finding**: With many users in the aggregation, clipping completely eliminates the +variance side-channel (std ratio goes to 1.0x). Attack accuracy drops to ~53-55%, near +random. The outlier's magnitude level has only 1 bitmap contributor vs hundreds at +normal levels — it is cleanly identified and attenuated. + +Note: even without clipping, the attack barely works (53%) because the outlier +signal (1M) is diluted by the large background (5M from 999 users). Clipping +further equalizes the means (5.1M vs 4.9M). + +clip=2, 5, and 10 all produce identical results — with ~1000 users, all normal +levels have saturated bitmaps (est. distinct >> 10), so only the outlier level +is affected. + +--- + +## Attack 3: 10K users, extreme outlier + +**Setup**: N=10000 users, target=9,999,999, filter<=2 (2-3 users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | **97.8%** | 77.8% | 104,410,859 | 110,259 | 947x | +| 2 | **75.0%** | 47.9% | 396,383 | 112,778 | 3.5x | +| 3 | **47.9%** | 47.9% | 0 | 0 | — | + +**Finding**: clip=2 reduces accuracy from 97.8% to 75% (variance ratio 947x to 3.5x). +clip=3 zeroes ALL results (returns 0 for both in and out) — with only 2-3 users, +no level reaches 3 distinct contributors, so everything is zeroed. The attack is +"defeated" at 47.9% but utility is completely destroyed. + +--- + +## Attack 4: Over-clipping + +**Setup**: N=1000 users, target=999999, filter<=3, 15 trials + +| clip_support | Var>200k accuracy | mean_in | mean_out | +|-------------|-------------------|---------|----------| +| off | 91.3% | -2,067,562 | 42,892 | +| 5 | **50.0%** | 0 | 0 | +| 10 | **50.0%** | 0 | 0 | + +**Finding**: With only 3-4 users in the filter, clip_support >= 5 zeroes all output. +Attack accuracy = 50% (random), but every query returns 0. This is not a useful +defense — it's equivalent to refusing to answer. + +**Takeaway**: `pac_clip_support` must be set below the minimum expected number of +users in any aggregation group. For small filters, this severely limits the +clipping threshold. + +--- + +## Attack 5: Wide filter + aggressive clipping + +**Setup**: N=1000 users, target=999999, filter<=999, 15 trials + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | +|-------------|-------------------|-------------------|--------|---------| +| off | 53.3% | 63.3% | 11,391,246 | 2,172,216 | +| 50 | **50.0%** | 50.0% | 1,219,189 | 2,049,795 | +| 100 | **50.0%** | 50.0% | 1,219,189 | 2,049,795 | + +**Finding**: With a wide filter (1000 users), even aggressive clipping (support=50, 100) +works perfectly — attack accuracy = 50% (random) and noise stds are equalized. +Normal magnitude levels have hundreds of bitmap contributors, far exceeding the +threshold. Only the outlier level (1 contributor) is affected. + +--- + +## Attack 6: Clip-after-filter vs clip-full-table (Dandan's hypothesis) + +Dandan's concern: clipping applied after filtering may leak more than clipping +applied to the entire dataset, because the filter changes which users contribute +to the bitmap, affecting which levels appear "supported." + +**Setup**: N=1000 users, target=999999, filter<=3 + +| Method | Var>200k accuracy | std_in | std_out | std ratio | +|--------|-------------------|--------|---------|-----------| +| No clipping | **96.0%** | 10,688,008 | 117,662 | 90.8x | +| clip-after-filter (pac_clip_support=2) | **72.5%** | 613,511 | 122,026 | 5.0x | +| clip-full-table (pre-clip to mu+3sigma) | **56.9%** | 180,457 | 124,641 | 1.4x | + +**Finding: Dandan is correct.** Pre-clipping the full table then filtering gives +significantly better protection (56.9% vs 72.5%). The reasons: + +1. **Full-table pre-clipping** clamps the billionaire to 13,661 BEFORE PAC sees it. + PAC computes noise from the clamped range [1, 13661], and the noise is nearly + the same for in/out (std ratio 1.4x). + +2. **Clip-after-filter** only sees 3-4 rows. The bitmap has very few bits set per + level, making it harder to distinguish the outlier level from normal levels. + The attenuation is only ~16x (one level), leaving a 5x variance gap. + +However, clip-after-filter is still much better than no clipping (72.5% vs 96%), +confirming Dandan's second point: "this approach is still significantly better +than not applying clipping at all." + +--- + +## Summary + +| Scenario | No clip | pac_clip_support=2 | Pre-clip full table | +|----------|---------|-------------------|---------------------| +| Small filter (3-4 users) | 96% | 72.5% | 56.9% | +| Wide filter (1000 users) | 53% | 55% (side-channel gone) | — | +| 10K users, filter<=2 | 97.8% | 75% | — | + +## Attack 7: 20K small items user (multi-row outlier) + +The core argument for per-user pre-aggregation: a user with 20,000 purchases of +$50 each has normal individual values but a total contribution of $1,000,000. +Per-row Winsorization won't catch this. Does pac_clip_sum's GROUP BY pu_hash? + +**Setup**: N=1000 background users (1 row each, acctbal in [1,10000]). +Target user_id=0: 20,000 rows x $50 = $1,000,000 total. filter<=3. + +| Method | Var>200k accuracy | std_in | std_out | std ratio | +|--------|-------------------|--------|---------|-----------| +| No clipping | **96.0%** | 10,686,722 | 117,662 | 90.8x | +| Winsorization (per-row clip to 13661) | **94.1%** | 9,436,439 | 124,641 | 75.7x | +| pac_clip_sum (clip_support=2) | **72.5%** | 613,511 | 122,026 | 5.0x | + +**Finding: Winsorization completely fails.** Each $50 value is well within the +[0, 13661] clip bounds, so nothing gets clipped. The 20,000 small rows pass +through untouched and the attack succeeds at 94.1% (barely below the 96% baseline). + +**pac_clip_sum catches it** because the pre-aggregation step (`GROUP BY pu_hash`) +sums user_id=0's 20,000 rows into a single $1,000,000 entry. This lands at an +outlier magnitude level with only 1 bitmap contributor, and gets attenuated. +Attack accuracy drops to 72.5%. + +This is precisely Peter's argument for why per-user contribution clipping (via +pre-aggregation) is needed instead of per-row value clipping. It validates the +two-level aggregation design from Wilson et al. 2019. + +--- + +## Suffix attenuation modes compared + +We tested three suffix attenuation strategies for unsupported outlier levels. +"Soft-clamp" is Peter's original (scale by 16^distance). "Bitmap-proportional" +adds a factor of estimated_distinct/threshold. "Hard-zero" skips the level entirely. + +**Attack 1 results (N=1000, filter<=3, clip=2):** + +| Mode | Var>200k | std_in | std_out | std ratio | +|------|----------|--------|---------|-----------| +| No clipping | **96.0%** | 10,688,008 | 117,662 | 90.8x | +| Soft-clamp | **72.5%** | 613,511 | 122,026 | 5.0x | +| Bitmap-proportional | **66.7%** | 327,625 | 122,026 | 2.7x | +| **Hard zero** | **47.1%** | 106,223 | 122,026 | **0.87x** | + +--- + +## Hard-zero stress tests + +Comprehensive adversarial evaluation of the hard-zero mode. + +### TEST 1: High trial count (60 trials, best-threshold search) + +| truth | mean | std | n | +|-------|------|-----|---| +| in | 12,397 | 87,366 | 49 | +| out | 29,758 | 109,140 | 54 | + +Best threshold accuracy (searched 10k-500k in 10k steps): **52.4%**. +Midpoint classifier: **52.4%**. Likelihood ratio: **52.4%**. +All classifiers are indistinguishable from random. + +### TEST 2: Composed queries (30 trials x 10 queries) + +| n_queries | accuracy | +|-----------|----------| +| 1 | 43.4% | +| 5 | 50.9% | +| 10 | 48.1% | +| Majority vote | **50.0%** | + +Per-trial variance: in_std=83,747, out_std=85,116, **ratio=0.98**. +Composing 10 queries and averaging does not help the attacker. + +### TEST 3: Moderate outlier (target=50,000, same magnitude level as normal) + +**THIS BREAKS IT.** Target 50,000 is in level 2 (4096-65535), same as normal users. +The bitmap shows this level as supported → no clipping occurs. + +| truth | mean | std | +|-------|------|-----| +| in | 139,946 | 497,518 | +| out | 20,093 | 122,026 | + +Best threshold accuracy: **76.5%**. Std ratio: 4.1x. + +**Implication**: pac_clip_sum only clips outliers that are at a DIFFERENT magnitude +level than normal users. A 10x outlier within the same level passes through. + +### TEST 4: Two colluding outliers + +Two users with 999,999 — level 3 has 2 bitmap bits, meeting threshold=2. + +| truth | mean | std | +|-------|------|-----| +| in | 1,783,465 | 12,547,986 | +| out | 20,093 | 122,026 | + +Best threshold accuracy: **100.0%**. Attack fully succeeds. + +**Implication**: Two colluding users at the same magnitude level make that level +"supported." The clipping mechanism assumes outlier levels have few contributors. +Collusion (or any scenario with 2+ users at the same extreme level) defeats it. + +### TEST 5: Dandan's filter probing + +Attacker uses two queries with different filters to probe clipping behavior. + +| Query | Best accuracy | +|-------|--------------| +| Filter<=3 (narrow) | 52.9% | +| Filter<=999 (wide) | 51.7% | +| Cross-filter differential | **51.0%** | + +**Dandan's concern is NOT exploitable with hard-zero.** The narrow query zeroes the +outlier level, giving identical counter distributions for in/out. The wide query +has the outlier's level zeroed too (1 contributor < threshold). The cross-filter +differential reveals nothing. + +### TEST 6: 20K small items ($50 x 20,000) + +Best threshold accuracy: **52.9%**. Attack defeated. +Pre-aggregation collapses 20K rows into one $1M entry at level 3, which is zeroed. + +### TEST 7: Borderline outlier (target=65,536, exactly level 3 boundary) + +Best threshold accuracy: **52.9%**. Attack defeated. +Even the minimum level-3 value is zeroed when it's the sole contributor. + +--- + +### Key takeaways + +1. **Hard-zero fully defeats the variance side-channel** for outliers at unsupported + magnitude levels. Attack accuracy = 50% across all classifiers, even with + composed queries, different thresholds, and cross-filter probing. + +2. **Moderate outliers within the same magnitude level are NOT caught.** A 10x outlier + (50,000 vs normal ~5,000) sits in the same level and passes through unclipped. + Attack accuracy: 76.5%. This is a fundamental limitation of the magnitude-level + granularity (each level spans 16x). + +3. **Two colluding outliers defeat the clipping** by making their level "supported" + (2 contributors >= threshold 2). Attack accuracy: 100%. + +4. **Dandan's filter-probing concern does not apply with hard-zero.** The zeroed level + contributes nothing regardless of filter, so different filters reveal no info. + +5. **The pre-aggregation step remains essential** — 20K small items are correctly + collapsed and clipped. diff --git a/attacks/clip_attack_test.sh b/attacks/clip_attack_test.sh new file mode 100755 index 00000000..e37d47cb --- /dev/null +++ b/attacks/clip_attack_test.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash +# Test pac_clip_sum against membership inference attacks. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 clip=$6 + local insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${target_val});" + local clip_sql="" + [ "$clip" != "off" ] && clip_sql="SET pac_clip_support = ${clip};" + $DUCKDB -noheader -list 2>/dev/null <> "$IN_F" + echo "out,$(run_sum out $seed $n $tv $filt $clip)" >> "$OUT_F" + done + + echo "=== $label | N=$n filt<=$filt tv=$tv clip=$clip ===" + $DUCKDB -markdown < ${FBG} + ${tv}/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG} + ${tv}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL + echo "" + rm -f "$IN_F" "$OUT_F" +} + +NT=30 + +echo "==========================================" +echo " pac_clip_sum ATTACK EVALUATION" +echo "==========================================" +echo "" + +# --- Attack 1: Baseline variance classifier (simplest) --- +echo "## ATTACK 1: Single-query variance classifier" +echo "N=1000, target=999999, filter<=3, $NT trials" +echo "" +for CLIP in off 2 3; do + run_scenario "atk1" 1000 999999 3 "$CLIP" $NT +done + +# --- Attack 2: Wide filter (clipping best-case) --- +echo "## ATTACK 2: Wide filter (all users in aggregation)" +echo "N=1000, target=999999, filter<=999, $NT trials" +echo "" +for CLIP in off 2 5 10; do + run_scenario "atk2" 1000 999999 999 "$CLIP" $NT +done + +# --- Attack 3: 10K users --- +echo "## ATTACK 3: 10K users, extreme outlier" +echo "N=10000, target=9999999, filter<=2, $NT trials" +echo "" +for CLIP in off 2 3; do + run_scenario "atk3" 10000 9999999 2 "$CLIP" $NT +done + +# --- Attack 4: Over-clipping --- +echo "## ATTACK 4: Over-clipping (too aggressive)" +echo "N=1000, target=999999, filter<=3, 15 trials" +echo "clip_support=10 with only 3-4 users => no supported levels" +echo "" +for CLIP in off 5 10; do + run_scenario "atk4" 1000 999999 3 "$CLIP" 15 +done + +# --- Attack 5: Wide filter + over-clipping --- +echo "## ATTACK 5: Wide filter + aggressive clipping" +echo "N=1000, target=999999, filter<=999, 15 trials" +echo "" +for CLIP in off 50 100; do + run_scenario "atk5" 1000 999999 999 "$CLIP" 15 +done + +# --- Attack 6: Clip after filter vs clip on full table (Dandan's concern) --- +# pac_clip_sum clips AFTER filtering (only filtered rows enter the aggregate). +# An adversary might exploit this: the clipping behavior differs depending on +# which users are in the filter. Compare filter-then-clip (what pac_clip_sum does) +# vs clip-all-then-filter (manual pre-clipping of the full table, then query). +echo "## ATTACK 6: Clip-after-filter vs clip-full-table" +echo "N=1000, target=999999, filter<=3, $NT trials" +echo "Tests Dandan's hypothesis: clipping after filtering leaks more than" +echo "clipping the entire dataset. We compare pac_clip_sum (clips filtered rows)" +echo "vs manual pre-clipping of all rows then querying without clip_support." +echo "" + +# 6a: pac_clip_sum (clip after filter) — already covered in atk1 clip=2 +echo "### 6a: clip-after-filter (pac_clip_support=2)" +echo "(Same as Attack 1 clip=2)" +echo "" + +# 6b: clip-full-table-then-query (no pac_clip_support, but data is pre-clipped) +run_sum_preclipped() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 clip_support=$6 + local insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${target_val});" + # Pre-clip ALL rows at percentile bounds (simulating clip-on-full-table) + # Use the same magnitude-level idea: clip values to level-2 max (65535) + # This ensures the billionaire is clipped BEFORE filtering. + $DUCKDB -noheader -list 2>/dev/null < 13661; +ALTER TABLE users ADD PAC_KEY(user_id); +ALTER TABLE users SET PU; +SET pac_mi = 0.0078125; +SET pac_seed = ${seed}; +SELECT SUM(acctbal) FROM users WHERE user_id <= ${filter} OR user_id = 0; +SQL +} + +echo "### 6b: clip-full-table-then-query (pre-clip all to mu+3sigma=13661)" +echo "" +FBG_CLIP=$($DUCKDB -noheader -list -c \ + "SELECT SUM(LEAST((hash(i*31+7)%10000+1)::INTEGER, 13661)) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum_preclipped in $seed 1000 999999 3 off)" >> "$IN_F" + echo "out,$(run_sum_preclipped out $seed 1000 999999 3 off)" >> "$OUT_F" +done +echo "=== atk6b | N=1000 filt<=3 tv=999999 pre-clip=13661 ===" +$DUCKDB -markdown < ${FBG_CLIP} + 999999/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG_CLIP} + 999999/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG_CLIP}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG_CLIP}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL +echo "" +rm -f "$IN_F" "$OUT_F" diff --git a/attacks/clip_hardzero_stress.sh b/attacks/clip_hardzero_stress.sh new file mode 100755 index 00000000..33e78154 --- /dev/null +++ b/attacks/clip_hardzero_stress.sh @@ -0,0 +1,297 @@ +#!/usr/bin/env bash +# Stress-test hard-zero clipping: try every attack angle we can think of. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +CLIP=2 + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 extra_sql="${6:-}" + local insert="" + [ "$cond" = "in" ] && insert="$target_val" + $DUCKDB -noheader -list 2>/dev/null < threshold THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}) <= threshold THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS acc + FROM raw, generate_series(10000, 500000, 10000) thresholds(threshold) + WHERE v IS NOT NULL + GROUP BY threshold +); + +-- Mean-based classifier (v > midpoint) +SELECT 'Midpoint clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND v > (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + WHEN truth='out' AND v <= (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; + +-- Likelihood ratio: compare distance to expected in vs expected out +SELECT 'LR clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${fbg}::DOUBLE - ${tv}) < ABS(v - ${fbg}::DOUBLE) THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}::DOUBLE - ${tv}) >= ABS(v - ${fbg}::DOUBLE) THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; +SQL + echo "" +} + +analyze_composed() { + local label=$1 in_f=$2 out_f=$3 fbg=$4 tv=$5 nq=$6 + echo "=== $label ===" + $DUCKDB -markdown < ${fbg} + ${tv}/2.0 THEN 1 + WHEN truth='out' AND ravg <= ${fbg} + ${tv}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM cum GROUP BY qid ORDER BY qid; + +-- Majority vote +WITH votes AS ( + SELECT truth, trial, + SUM(CASE WHEN v > ${fbg} + ${tv}/2.0 THEN 1 ELSE 0 END) AS yes, COUNT(*) AS total + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Majority vote (${nq}q)' AS method, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND yes > total/2.0 THEN 1 + WHEN truth='out' AND yes <= total/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM votes; + +-- Variance-based: use per-trial variance across queries +WITH trial_stats AS ( + SELECT truth, trial, STDDEV(v) AS trial_std + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Variance of queries' AS method, + printf('in_std=%.0f out_std=%.0f ratio=%.2f', + AVG(CASE WHEN truth='in' THEN trial_std END), + AVG(CASE WHEN truth='out' THEN trial_std END), + AVG(CASE WHEN truth='in' THEN trial_std END) / + NULLIF(AVG(CASE WHEN truth='out' THEN trial_std END), 0)) AS stats +FROM trial_stats; +SQL + echo "" +} + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') +FBG999=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,999) t(i);" | tr -d '[:space:]') + +echo "=============================================" +echo " HARD-ZERO STRESS TEST" +echo " pac_clip_support=$CLIP, trying to break it" +echo "=============================================" +echo "Background: filter<=3 sum=$FBG, filter<=999 sum=$FBG999" +echo "" + +# --------------------------------------------------------------- +# TEST 1: High trial count (100 trials for statistical power) +# --------------------------------------------------------------- +echo "## TEST 1: High trial count (60 trials)" +echo "" +NT=60 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "100 trials, N=1000, tv=999999, filt<=3" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 2: Composed queries (10 queries, reduce noise by averaging) +# --------------------------------------------------------------- +echo "## TEST 2: Composed queries (10 queries)" +echo "" +NT=30; NQ=10 +IN_F=$(mktemp); OUT_F=$(mktemp) +for trial in $(seq 1 $NT); do + for q in $(seq 1 $NQ); do + s=$((trial * 1000 + q)) + echo "in,${trial},${q},$(run_sum in $s 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,${trial},${q},$(run_sum out $s 1000 "" 3)" >> "$OUT_F" + done +done +analyze_composed "50 trials x 10 queries" "$IN_F" "$OUT_F" "$FBG" 999999 10 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 3: Moderate outlier (50000 — same magnitude level as normal) +# --------------------------------------------------------------- +echo "## TEST 3: Moderate outlier (target=50000, same magnitude level)" +echo "Normal users ~5000, target ~50000 — both in level 2 (4096-65535)" +echo "The bitmap should show this level as supported, so NO clipping occurs" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 50000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Moderate outlier tv=50000" "$IN_F" "$OUT_F" "$FBG" 50000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 4: Two colluding outliers +# --------------------------------------------------------------- +echo "## TEST 4: Two colluding outliers" +echo "Two users with 999999 — level 3 now has 2 bitmap bits (meets threshold=2)" +echo "Hard-zero might NOT clip because level has enough support!" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +TWO_INSERT="INSERT INTO users VALUES (0, 999999); INSERT INTO users VALUES (-1, 999999);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$TWO_INSERT" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +# For two outliers: "in" filter catches user 0 (user -1 is NOT in filter <= 3) +# But user -1's value still goes into the table and affects the bitmap! +analyze "Two outliers (0 and -1), filt<=3" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 5: Dandan's filter probing attack +# --------------------------------------------------------------- +echo "## TEST 5: Dandan's filter probing" +echo "Attacker tries different filters to see if clipping behavior changes." +echo "If the outlier is present, the bitmap at level 3 has a bit set." +echo "Query 1: filter<=3 (includes user 0 if present)" +echo "Query 2: filter<=999 (includes everyone)" +echo "Difference in results might reveal membership." +echo "" +NT=30 +IN_F1=$(mktemp); OUT_F1=$(mktemp) +IN_F2=$(mktemp); OUT_F2=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F1" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F1" + echo "in,$(run_sum in $((seed+10000)) 1000 "INSERT INTO users VALUES (0, 999999);" 999)" >> "$IN_F2" + echo "out,$(run_sum out $((seed+10000)) 1000 "" 999)" >> "$OUT_F2" +done +analyze "Filter<=3 (narrow)" "$IN_F1" "$OUT_F1" "$FBG" 999999 +analyze "Filter<=999 (wide)" "$IN_F2" "$OUT_F2" "$FBG999" 999999 + +echo "=== Cross-filter differential ===" +$DUCKDB -markdown < 0 THEN 1 + WHEN n.truth='out' AND w.v - n.v <= 0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM narrow n JOIN wide w ON n.truth = w.truth AND n.trial = w.trial +WHERE n.v IS NOT NULL AND w.v IS NOT NULL; +SQL +echo "" +rm -f "$IN_F1" "$OUT_F1" "$IN_F2" "$OUT_F2" + +# --------------------------------------------------------------- +# TEST 6: 20K small items with high trial count +# --------------------------------------------------------------- +echo "## TEST 6: 20K small items, 50 trials" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +MULTI_INSERT="INSERT INTO users SELECT 0, 50 FROM generate_series(1,20000) t(i);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$MULTI_INSERT" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "20K items x \$50, filt<=3" "$IN_F" "$OUT_F" "$FBG" 1000000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 7: Borderline outlier (value at level boundary) +# --------------------------------------------------------------- +echo "## TEST 7: Borderline outlier (target=65536, exactly level 3 boundary)" +echo "Just barely crosses into level 3 — minimum unsupported value" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 65536);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Borderline tv=65536" "$IN_F" "$OUT_F" "$FBG" 65536 +rm -f "$IN_F" "$OUT_F" + +echo "=============================================" +echo " STRESS TEST COMPLETE" +echo "=============================================" diff --git a/attacks/clip_multirow_test.sh b/attacks/clip_multirow_test.sh new file mode 100755 index 00000000..c818360f --- /dev/null +++ b/attacks/clip_multirow_test.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +# Attack 7: "20K small items" user — tests whether clipping catches +# a user whose individual rows are normal but total contribution is huge. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +NT=30 +N=1000; FILT=3; TV_PER_ROW=50; TV_ROWS=20000 +# Total contribution: 20000 * 50 = 1,000,000 + +echo "=============================================" +echo " ATTACK 7: 20K small items user" +echo "=============================================" +echo " N=$N background users (1 row each)" +echo " Target user_id=0: $TV_ROWS rows x \$$TV_PER_ROW = \$$(( TV_ROWS * TV_PER_ROW ))" +echo " filter<=3, $NT trials" +echo "" + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,${FILT}) t(i);" | tr -d '[:space:]') +TV_TOTAL=$((TV_ROWS * TV_PER_ROW)) +echo "Background SUM=$FBG, target total=$TV_TOTAL" +echo "" + +# --- 7a: No clipping (baseline) --- +run_noprotection() { + local cond=$1 seed=$2 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users SELECT 0, ${TV_PER_ROW} FROM generate_series(1,${TV_ROWS}) t(i);" + $DUCKDB -noheader -list 2>/dev/null </dev/null < 13661; +ALTER TABLE users ADD PAC_KEY(user_id); +ALTER TABLE users SET PU; +SET pac_mi = 0.0078125; +SET pac_seed = ${seed}; +SELECT SUM(acctbal) FROM users WHERE user_id <= ${FILT} OR user_id = 0; +SQL +} + +# --- 7c: pac_clip_sum (clip after filter, with pre-aggregation) --- +run_clipsum() { + local cond=$1 seed=$2 clip=$3 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users SELECT 0, ${TV_PER_ROW} FROM generate_series(1,${TV_ROWS}) t(i);" + $DUCKDB -noheader -list 2>/dev/null < ${FBG} + ${TV_TOTAL}/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG} + ${TV_TOTAL}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL + echo "" +} + +# 7a: No clipping +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_noprotection in $seed)" >> "$IN_F" + echo "out,$(run_noprotection out $seed)" >> "$OUT_F" +done +analyze "7a: No clipping (baseline)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" + +# 7b: Full-table Winsorization +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_winsorized in $seed)" >> "$IN_F" + echo "out,$(run_winsorized out $seed)" >> "$OUT_F" +done +analyze "7b: Winsorization (per-row clip to 13661)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" + +# 7c: pac_clip_sum with clip_support=2 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_clipsum in $seed 2)" >> "$IN_F" + echo "out,$(run_clipsum out $seed 2)" >> "$OUT_F" +done +analyze "7c: pac_clip_sum (clip_support=2)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" diff --git a/attacks/clip_shift2_stress.sh b/attacks/clip_shift2_stress.sh new file mode 100755 index 00000000..88567708 --- /dev/null +++ b/attacks/clip_shift2_stress.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env bash +# Stress-test shift=2 (4x levels) with hard-zero clipping. +# Focus on edge cases that 4x granularity might miss. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +CLIP=2 + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_insert="$4" filter=$5 + $DUCKDB -noheader -list 2>/dev/null < threshold THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}) <= threshold THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS acc + FROM raw, generate_series(5000, 500000, 5000) thresholds(threshold) + WHERE v IS NOT NULL + GROUP BY threshold +); +SQL + echo "" +} + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') + +echo "=============================================" +echo " SHIFT=2 STRESS TEST (4x levels, hard-zero)" +echo " pac_clip_support=$CLIP" +echo "=============================================" +echo "" + +NT=30 + +# --------------------------------------------------------------- +# TEST 1: 3.5x outlier (within 4x boundary — should NOT be caught) +# Normal ~5000, target=17000 (3.4x) +# Both in level 3 (4096-16383)? Let's check: +# 5000: bit_pos=12, (12-4)>>1 = 4. Level 4. +# 17000: bit_pos=14, (14-4)>>1 = 5. Level 5. DIFFERENT! +# Actually 17000 might be caught. Let's try 15000: +# 15000: bit_pos=13, (13-4)>>1 = 4. Level 4. SAME as 5000! +# --------------------------------------------------------------- +echo "## TEST 1: 3x outlier (target=15000, same level as normal)" +echo "5000→level 4, 15000→level 4 (bit_pos 13, (13-4)/2=4). Same level." +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 15000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "3x outlier tv=15000" "$IN_F" "$OUT_F" "$FBG" 15000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 2: Just above 4x (target=20000) +# 20000: bit_pos=14, (14-4)>>1 = 5. Level 5. Different from 5000 (level 4). +# --------------------------------------------------------------- +echo "## TEST 2: 4x outlier (target=20000, different level)" +echo "5000→level 4, 20000→level 5. Should be caught." +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 20000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "4x outlier tv=20000" "$IN_F" "$OUT_F" "$FBG" 20000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 3: Two colluding outliers (still breaks it?) +# --------------------------------------------------------------- +echo "## TEST 3: Two colluding outliers (999999)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999); INSERT INTO users VALUES (-1, 999999);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Two colluders tv=999999" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 4: Outlier at exact level boundary (16384 = start of level 5) +# 16384: bit_pos=14, (14-4)>>1 = 5. Normal at level 4. +# --------------------------------------------------------------- +echo "## TEST 4: Boundary outlier (target=16384, exact level 5 start)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 16384);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Boundary tv=16384" "$IN_F" "$OUT_F" "$FBG" 16384 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 5: Outlier just below boundary (16383 = max of level 4) +# 16383: bit_pos=13, (13-4)>>1 = 4. Same level as 5000. +# --------------------------------------------------------------- +echo "## TEST 5: Just-below-boundary (target=16383, still level 4)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 16383);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Just-below tv=16383" "$IN_F" "$OUT_F" "$FBG" 16383 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 6: Many small outliers (10 users at 15000, all in same level) +# They all go to level 4 like normal users → supported → no clipping +# --------------------------------------------------------------- +echo "## TEST 6: 10 users at 15000 (3x, same level, all 'supported')" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +MULTI="INSERT INTO users SELECT -(i+1), 15000 FROM generate_series(1,10) t(i); INSERT INTO users VALUES (0, 15000);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$MULTI" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "10 users at 15000" "$IN_F" "$OUT_F" "$FBG" 15000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 7: Wide filter + moderate outlier (best case for clipping) +# --------------------------------------------------------------- +FBG999=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,999) t(i);" | tr -d '[:space:]') +echo "## TEST 7: Wide filter + moderate outlier (tv=50000)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 50000);" 999)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 999)" >> "$OUT_F" +done +analyze "Wide filter tv=50000" "$IN_F" "$OUT_F" "$FBG999" 50000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 8: 20K small items (pre-aggregation test) +# --------------------------------------------------------------- +echo "## TEST 8: 20K small items (\$50 x 20000 = \$1M)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users SELECT 0, 50 FROM generate_series(1,20000) t(i);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "20K items" "$IN_F" "$OUT_F" "$FBG" 1000000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 9: Composed queries (10 queries averaged) +# --------------------------------------------------------------- +echo "## TEST 9: 10 composed queries, tv=999999" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for trial in $(seq 1 $NT); do + for q in $(seq 1 10); do + s=$((trial * 1000 + q)) + echo "in,${trial},${q},$(run_sum in $s 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,${trial},${q},$(run_sum out $s 1000 "" 3)" >> "$OUT_F" + done +done +echo "=== 10 composed queries ===" +$DUCKDB -markdown < ${FBG} + 999999/2.0 THEN 1 ELSE 0 END) AS yes, COUNT(*) AS total + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Majority vote (10q)' AS method, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND yes > total/2.0 THEN 1 + WHEN truth='out' AND yes <= total/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM votes; + +WITH trial_stats AS ( + SELECT truth, trial, STDDEV(v) AS trial_std + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Per-trial variance' AS method, + printf('in_std=%.0f out_std=%.0f ratio=%.2f', + AVG(CASE WHEN truth='in' THEN trial_std END), + AVG(CASE WHEN truth='out' THEN trial_std END), + AVG(CASE WHEN truth='in' THEN trial_std END) / + NULLIF(AVG(CASE WHEN truth='out' THEN trial_std END), 0)) AS stats +FROM trial_stats; +SQL +echo "" +rm -f "$IN_F" "$OUT_F" + +echo "=============================================" +echo " STRESS TEST COMPLETE" +echo "=============================================" diff --git a/attacks/clipping_experiment.sh b/attacks/clipping_experiment.sh new file mode 100755 index 00000000..ef2c7842 --- /dev/null +++ b/attacks/clipping_experiment.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +# Clipping experiment: does pre-PAC outlier clipping reduce attack success + improve utility? +# +# For each clipping threshold t in {1, 2, 3, 5, inf}, clips data at μ ± t·σ +# (recursive until convergence), then runs PAC and measures attack accuracy + utility. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +CLIP_ITERS=20 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " CLIPPING EXPERIMENT" +echo " Does pre-PAC outlier clipping reduce attack accuracy?" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# Generate SQL for recursive clipping: CLIP_ITERS rounds of UPDATE at μ ± t·σ. +# Each round recomputes μ,σ from the current data. The WHERE clause ensures +# convergence — once all values are within bounds, subsequent rounds are no-ops. +gen_clip_sql() { + local t=$1 sql="" i + for i in $(seq 1 $CLIP_ITERS); do + sql+="UPDATE users SET acctbal = LEAST(GREATEST(acctbal, + (SELECT (AVG(acctbal) - ${t} * STDDEV_POP(acctbal))::INTEGER FROM users)), + (SELECT (AVG(acctbal) + ${t} * STDDEV_POP(acctbal))::INTEGER FROM users)) +WHERE acctbal < (SELECT (AVG(acctbal) - ${t} * STDDEV_POP(acctbal))::INTEGER FROM users) + OR acctbal > (SELECT (AVG(acctbal) + ${t} * STDDEV_POP(acctbal))::INTEGER FROM users); +" + done + printf '%s' "$sql" +} + +# Ground truth: no clipping, no PAC noise +run_true_unclipped() { + local cond=$1 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${TV});" + $DUCKDB -noheader -list </dev/null <> "$GROUND_F" + echo "${t},out,${CLIP_OUT}" >> "$GROUND_F" + + # Run PAC trials + for seed in $(seq 1 $NTRIALS); do + v_in=$(run_clipped_pac in "$seed" "$t" | tr -d '[:space:]') + v_out=$(run_clipped_pac out "$seed" "$t" | tr -d '[:space:]') + echo "${t},in,${seed},${v_in}" >> "$RESULTS_F" + echo "${t},out,${seed},${v_out}" >> "$RESULTS_F" + printf "." + done + echo " done" +done + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis --- +$DUCKDB -markdown < 200000 + 100.0 * SUM(CASE + WHEN r.truth='in' AND ABS(r.v - ${TRUE_OUT}) > 200000 THEN 1 + WHEN r.truth='out' AND ABS(r.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + -- Midpoint classifier: v > (clipped_in + clipped_out) / 2 + 100.0 * SUM(CASE + WHEN r.truth='in' AND r.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN r.truth='out' AND r.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN r.truth='in' THEN r.v END) AS std_in, + STDDEV(CASE WHEN r.truth='out' THEN r.v END) AS std_out + FROM results r + JOIN ground g_in ON r.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON r.t_val = g_out.t_val AND g_out.truth = 'out' + WHERE r.v IS NOT NULL + GROUP BY r.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT r.t_val, + AVG(CASE WHEN r.truth='in' + THEN ABS(r.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(r.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM results r + WHERE r.v IS NOT NULL + GROUP BY r.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT a.t_val AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE a.t_val WHEN 'inf' THEN 999 ELSE a.t_val::INT END; + +-- Detailed per-condition stats +SELECT r.t_val AS t, r.truth, + printf('%.0f', AVG(r.v)) AS mean_pac, + printf('%.0f', STDDEV(r.v)) AS std_pac, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM results r +JOIN ground g ON r.t_val = g.t_val AND r.truth = g.truth +WHERE r.v IS NOT NULL +GROUP BY r.t_val, r.truth, g.clipped_true +ORDER BY CASE r.t_val WHEN 'inf' THEN 999 ELSE r.t_val::INT END, r.truth; +SQL + +rm -f "$RESULTS_F" "$GROUND_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " attack_acc_200k: variance classifier (|v - bg| > 200k), 50% = random" +echo " attack_acc_mid: midpoint classifier (optimal threshold), 50% = random" +echo " mape_vs_true: mean |noised - unclipped_truth| / |unclipped_truth|" +echo " clip_bias_in/out: |clipped_truth - unclipped_truth|" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" diff --git a/attacks/output_clipping_experiment.sh b/attacks/output_clipping_experiment.sh new file mode 100755 index 00000000..2fb161a4 --- /dev/null +++ b/attacks/output_clipping_experiment.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +# Output clipping experiment: clip the PAC query RESULT (not input data) at bounds +# derived from baseline column statistics. +# +# Unlike input clipping (which modifies stored values before PAC), output clipping +# leaves the data untouched. After PAC returns a noised result, we clamp it to +# [n·(μ - t·σ), n·(μ + t·σ)] where μ,σ are pre-computed column stats and n is the +# expected number of users in the filter. +# +# Key property: if the billionaire is NOT in the filter, the result is already +# within bounds and nothing changes. Clipping only fires when an outlier inflates +# the result beyond the expected range. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " OUTPUT CLIPPING EXPERIMENT" +echo " Clip the PAC result post-hoc at n·(μ ± t·σ)" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# --- Baseline column statistics (from N users, NO target) --- +# These represent the "known" column distribution used for clipping bounds. +read MU SIGMA <<< "$($DUCKDB -noheader -csv -separator ' ' </dev/null <> "$RESULTS_F" + echo "out,${seed},${v_out}" >> "$RESULTS_F" + printf "." +done +echo " done" + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis: for each t, apply output clipping and compute metrics --- +$DUCKDB -markdown < 200000 + 100.0 * SUM(CASE + WHEN c.truth='in' AND ABS(c.v - ${TRUE_OUT}) > 200000 THEN 1 + WHEN c.truth='out' AND ABS(c.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + -- Midpoint classifier: v > (clipped_in_truth + clipped_out_truth) / 2 + 100.0 * SUM(CASE + WHEN c.truth='in' AND c.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN c.truth='out' AND c.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN c.truth='in' THEN c.v END) AS std_in, + STDDEV(CASE WHEN c.truth='out' THEN c.v END) AS std_out, + -- Fraction of results that hit a clip bound + 100.0 * SUM(CASE WHEN c.truth='in' AND (c.v = c.lo OR c.v = c.hi) THEN 1 ELSE 0 END)::DOUBLE + / NULLIF(SUM(CASE WHEN c.truth='in' THEN 1 ELSE 0 END), 0) AS pct_clipped_in, + 100.0 * SUM(CASE WHEN c.truth='out' AND (c.v = c.lo OR c.v = c.hi) THEN 1 ELSE 0 END)::DOUBLE + / NULLIF(SUM(CASE WHEN c.truth='out' THEN 1 ELSE 0 END), 0) AS pct_clipped_out + FROM clipped c + JOIN ground g_in ON c.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON c.t_val = g_out.t_val AND g_out.truth = 'out' + GROUP BY c.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT c.t_val, + AVG(CASE WHEN c.truth='in' + THEN ABS(c.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(c.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM clipped c + GROUP BY c.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT CASE WHEN a.t_val = 999 THEN 'inf' ELSE a.t_val::VARCHAR END AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out, + printf('%.0f%%', a.pct_clipped_in) AS pct_clip_in, + printf('%.0f%%', a.pct_clipped_out) AS pct_clip_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE WHEN a.t_val = 999 THEN 999 ELSE a.t_val END; + +-- Detailed per-condition stats +SELECT CASE WHEN c.t_val = 999 THEN 'inf' ELSE c.t_val::VARCHAR END AS t, + c.truth, + printf('%.0f', AVG(c.v)) AS mean_clipped, + printf('%.0f', STDDEV(c.v)) AS std_clipped, + printf('%.0f', AVG(c.raw_v)) AS mean_raw, + printf('%.0f', STDDEV(c.raw_v)) AS std_raw, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM clipped c +JOIN ground g ON c.t_val = g.t_val AND c.truth = g.truth +GROUP BY c.t_val, c.truth, g.clipped_true +ORDER BY CASE WHEN c.t_val = 999 THEN 999 ELSE c.t_val END, c.truth; +SQL + +rm -f "$RESULTS_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " Output clipping: CLAMP(pac_result, n·(μ-tσ), n·(μ+tσ))" +echo " Bounds use baseline stats (μ=$MU, σ=$SIGMA, n=$N_BASE)" +echo " attack_acc_200k: variance classifier, 50% = random" +echo " attack_acc_mid: midpoint classifier, 50% = random" +echo " pct_clip_in/out: fraction of results hitting a bound" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" diff --git a/attacks/output_clipping_v2_experiment.sh b/attacks/output_clipping_v2_experiment.sh new file mode 100644 index 00000000..9fb3ac71 --- /dev/null +++ b/attacks/output_clipping_v2_experiment.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Output clipping v2: clip values at QUERY TIME using pre-computed baseline bounds, +# BEFORE PAC computes sensitivity. +# +# Pipeline per query: +# 1. Data is stored unmodified (billionaire's 999999 stays as-is) +# 2. At query time, clamp each value to [μ-tσ, μ+tσ] (baseline stats, single pass) +# 3. PAC computes sensitivity from the CLIPPED range → noise ∝ 2tσ +# 4. Return noised result +# +# Key property: bounds are identical for in/out (derived from baseline, not current data), +# so PAC calibrates the SAME noise regardless of membership. No side-channel from +# differing sensitivities. +# +# Simulated by: UPDATE + PAC_KEY in the same session. The UPDATE models the query-time +# clamping; PAC then sees the clipped range for sensitivity. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " OUTPUT CLIPPING v2" +echo " Clip at query time, BEFORE PAC sensitivity computation" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# --- Baseline column stats (from N users, NO target) --- +read MU SIGMA <<< "$($DUCKDB -noheader -csv -separator ' ' </dev/null <> "$GROUND_F" + echo "${t},out,${CLIP_OUT}" >> "$GROUND_F" + + # Run PAC trials + for seed in $(seq 1 $NTRIALS); do + v_in=$(run_clipped_pac in "$seed" "$t" | tr -d '[:space:]') + v_out=$(run_clipped_pac out "$seed" "$t" | tr -d '[:space:]') + echo "${t},in,${seed},${v_in}" >> "$RESULTS_F" + echo "${t},out,${seed},${v_out}" >> "$RESULTS_F" + printf "." + done + echo " done" +done + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis --- +$DUCKDB -markdown < 200000 THEN 1 + WHEN r.truth='out' AND ABS(r.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + 100.0 * SUM(CASE + WHEN r.truth='in' AND r.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN r.truth='out' AND r.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN r.truth='in' THEN r.v END) AS std_in, + STDDEV(CASE WHEN r.truth='out' THEN r.v END) AS std_out + FROM results r + JOIN ground g_in ON r.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON r.t_val = g_out.t_val AND g_out.truth = 'out' + WHERE r.v IS NOT NULL + GROUP BY r.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT r.t_val, + AVG(CASE WHEN r.truth='in' + THEN ABS(r.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(r.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM results r + WHERE r.v IS NOT NULL + GROUP BY r.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT a.t_val AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.1fx', a.std_in / NULLIF(a.std_out, 0)) AS std_ratio, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE a.t_val WHEN 'inf' THEN 999 ELSE a.t_val::INT END; + +-- Detailed per-condition stats +SELECT r.t_val AS t, r.truth, + printf('%.0f', AVG(r.v)) AS mean_pac, + printf('%.0f', STDDEV(r.v)) AS std_pac, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM results r +JOIN ground g ON r.t_val = g.t_val AND r.truth = g.truth +WHERE r.v IS NOT NULL +GROUP BY r.t_val, r.truth, g.clipped_true +ORDER BY CASE r.t_val WHEN 'inf' THEN 999 ELSE r.t_val::INT END, r.truth; +SQL + +rm -f "$RESULTS_F" "$GROUND_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " Single-pass clip at [μ-tσ, μ+tσ] using baseline stats" +echo " μ=$MU, σ=$SIGMA (from $N background users)" +echo " PAC sees clipped range → sensitivity = 2tσ for both in/out" +echo " std_ratio: noise_std_in / noise_std_out (1.0 = no side-channel)" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" From d5d2243e39af2bef014d3d2248450e99ba0133ae Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:51:43 +0100 Subject: [PATCH 10/27] Update CLAUDE.md, add hooks, skills, and permissions - CLAUDE.md: added code style rules (clang-tidy naming, clang-format style), attack evaluation section, development rules - .claude/settings.json: PostToolUse hook to auto-run make format-fix after edits - Skills: /run-attacks, /test-clip, /explain-pac, /explain-dp, /explain-pac-ddl Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/settings.json | 17 ++++++ .claude/skills/explain-dp/SKILL.md | 55 ++++++++++++++++++ .claude/skills/explain-pac-ddl/SKILL.md | 74 +++++++++++++++++++++++++ .claude/skills/explain-pac/SKILL.md | 49 ++++++++++++++++ .claude/skills/run-attacks/SKILL.md | 23 ++++++++ .claude/skills/test-clip/SKILL.md | 11 ++++ CLAUDE.md | 33 +++++++++++ 7 files changed, 262 insertions(+) create mode 100644 .claude/settings.json create mode 100644 .claude/skills/explain-dp/SKILL.md create mode 100644 .claude/skills/explain-pac-ddl/SKILL.md create mode 100644 .claude/skills/explain-pac/SKILL.md create mode 100644 .claude/skills/run-attacks/SKILL.md create mode 100644 .claude/skills/test-clip/SKILL.md diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..786c761c --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,17 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit", + "hooks": [ + { + "type": "command", + "command": "make format-fix 2>/dev/null || true", + "timeout": 30, + "statusMessage": "Running format-fix..." + } + ] + } + ] + } +} diff --git a/.claude/skills/explain-dp/SKILL.md b/.claude/skills/explain-dp/SKILL.md new file mode 100644 index 00000000..103f0d2a --- /dev/null +++ b/.claude/skills/explain-dp/SKILL.md @@ -0,0 +1,55 @@ +--- +name: explain-dp +description: Reference material for differential privacy concepts. Auto-loaded when discussing privacy, attacks, sensitivity, or clipping. +--- + +## Differential Privacy (DP) + +### Definition + +A randomized mechanism M satisfies (ε,δ)-differential privacy if for all +neighboring datasets D, D' (differing in one individual) and all outputs S: + + P[M(D) ∈ S] ≤ e^ε · P[M(D') ∈ S] + δ + +Smaller ε = stronger privacy. δ is the probability of catastrophic failure. + +### Key concepts + +- **Sensitivity**: Maximum change in query output when one individual is + added/removed. For SUM with values in [L,U]: sensitivity = U-L. +- **Laplace mechanism**: Add Laplace(0, sensitivity/ε) noise. Standard for counting queries. +- **Gaussian mechanism**: Add N(0, sensitivity²·2ln(1.25/δ)/ε²) noise. Better for composition. +- **Composition**: Running k queries on the same data costs k·ε total (basic), + or O(√k·ε) with advanced composition. +- **Post-processing**: Any function of a DP output is still DP. Free to clip/transform after noise. + +### Membership Inference Attack (MIA) + +The adversary's game: given a query result, determine whether a specific individual +is in the dataset. Attack accuracy = fraction of correct guesses across trials. +50% = random (DP working). >50% = information leakage. + +### Bounded user contribution (Wilson et al. 2019) + +Standard approach for DP SQL: +1. GROUP BY user_id → compute per-user contribution +2. Clip each user's contribution to [L, U] +3. Sum clipped contributions +4. Add noise calibrated to U-L + +This handles both single-large-value outliers and many-small-values users. +Reference: "Differentially Private SQL with Bounded User Contribution" (Google). + +### How PAC differs from DP + +- PAC bounds **mutual information** (pac_mi), not ε-divergence +- PAC does NOT compute sensitivity — noise is calibrated differently +- PAC uses 64 parallel counters + bitslice encoding for efficient aggregation +- pac_clip_sum uses **support-based magnitude clipping** instead of hard [L,U] bounds + +### Input clipping (Winsorization) + +Clip individual values to [μ-tσ, μ+tσ] before aggregation. Reduces sensitivity. +Well-established in DP literature. Limitations: doesn't catch users with many +small values (need per-user contribution clipping instead). diff --git a/.claude/skills/explain-pac-ddl/SKILL.md b/.claude/skills/explain-pac-ddl/SKILL.md new file mode 100644 index 00000000..da2cac75 --- /dev/null +++ b/.claude/skills/explain-pac-ddl/SKILL.md @@ -0,0 +1,74 @@ +--- +name: explain-pac-ddl +description: Reference for PAC DDL syntax — PAC_KEY, PAC_LINK, PROTECTED, SET PU, and the parser. Auto-loaded when discussing table setup, privacy units, or protected columns. +--- + +## PAC DDL Overview + +PAC extends SQL DDL with privacy annotations. The parser (`src/parser/pac_parser.cpp`, +`src/parser/pac_parser_helpers.cpp`) intercepts CREATE TABLE and ALTER TABLE statements +to extract PAC-specific clauses before forwarding to DuckDB. + +### Privacy Unit (PU) table + +The PU table is the entity being protected (e.g., customer). One row = one individual. + +```sql +-- Mark a table as the privacy unit +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; + +-- Protect specific columns from direct projection +ALTER PU TABLE customer ADD PROTECTED (c_acctbal, c_name, c_address); +``` + +- `PAC_KEY (col)`: Designates the column(s) that uniquely identify a privacy unit. + Must be set before `SET PU`. +- `SET PU`: Marks the table as the privacy unit. After this, aggregates on linked + tables get PAC noise. +- `PROTECTED (col1, col2, ...)`: Columns that cannot be directly projected. + Aggregates (SUM, COUNT, AVG) on protected columns go through PAC. + +### Linking tables to the PU + +Non-PU tables reference the PU table via foreign-key-like links: + +```sql +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +ALTER TABLE lineitem ADD PAC_LINK (l_orderkey) REFERENCES orders (o_orderkey); +``` + +- `PAC_LINK (local_col) REFERENCES table(ref_col)`: Declares how to join this + table back to the PU. The compiler uses these links to inject the PU hash + into the query plan. +- Links can be chained: `lineitem → orders → customer`. + +### CREATE TABLE syntax (inline) + +PAC clauses can be inlined in CREATE TABLE: + +```sql +CREATE PU TABLE employees ( + id INTEGER, + department VARCHAR, + salary DECIMAL(10,2), + PAC_KEY (id), + PROTECTED (salary) +); +``` + +The parser strips PAC_KEY, PAC_LINK, and PROTECTED clauses from the CREATE +statement, forwards the clean SQL to DuckDB, then applies the PAC metadata +via ALTER TABLE internally. + +### Common mistakes + +- `PAC_LINK(col, table, ref)` — wrong. Use `PAC_LINK (col) REFERENCES table(ref)`. +- `PROTECTED salary` — wrong. Must have parentheses: `PROTECTED (salary)`. +- ALTER TABLE on a PU table requires `ALTER PU TABLE`, not `ALTER TABLE`. + +### Key source files + +- `src/parser/pac_parser.cpp` — main parser hook (intercepts SQL statements) +- `src/parser/pac_parser_helpers.cpp` — extraction of PAC_KEY, PAC_LINK, PROTECTED +- `src/core/pac_metadata.cpp` — in-memory metadata storage for PU/link/protected info diff --git a/.claude/skills/explain-pac/SKILL.md b/.claude/skills/explain-pac/SKILL.md new file mode 100644 index 00000000..8982ca47 --- /dev/null +++ b/.claude/skills/explain-pac/SKILL.md @@ -0,0 +1,49 @@ +--- +name: explain-pac +description: Reference material for PAC privacy internals. Auto-loaded when discussing PAC mechanism, noise, counters, or clipping. +--- + +## PAC Privacy Overview + +PAC (Probably Approximately Correct) privacy is a framework for privatizing SQL +aggregates, described in [SIMD-PAC-DB](https://arxiv.org/abs/2603.15023). + +### Core mechanism + +- Each aggregate maintains **64 parallel counters** (one per bit of a hashed key) +- Each row's value is added to ~32 counters (determined by pac_hash of the PU key) +- At finalization, noise calibrated to a **mutual information bound** (pac_mi) is + added, and the result is estimated from the counters +- PAC does NOT compute sensitivity (unlike differential privacy) + +### SWAR bitslice encoding + +- Counters are packed as 4 × uint16_t per uint64_t (SWAR = SIMD Within A Register) +- This enables processing 4 counters per instruction without actual SIMD intrinsics +- Overflow cascades to 32-bit overflow counters when 16-bit counters saturate + +### pac_clip_sum (contribution clipping) + +- **Pre-aggregation**: Query rewriter inserts `GROUP BY pu_hash` to sum each user's + rows into a single contribution (handles the "50K small items" case) +- **Magnitude levels**: Values decomposed into levels (4x per level, 2-bit shift). + Level 0: 0-255, Level 1: 256-1023, Level 2: 1024-4095, etc. +- **Bitmap tracking**: Each level maintains a 64-bit bitmap of distinct contributors + (using birthday-paradox estimation from popcount) +- **Hard-zero**: Levels with fewer distinct contributors than `pac_clip_support` + contribute nothing to the result (prevents variance side-channel attacks) + +### Key settings + +- `pac_mi`: Mutual information bound (0 = deterministic/no noise) +- `pac_seed`: RNG seed for reproducible noise +- `pac_clip_support`: Minimum distinct contributors per magnitude level (NULL = disabled) +- `pac_hash_repair`: Ensure pac_hash outputs exactly 32 bits set + +### DDL + +```sql +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +``` diff --git a/.claude/skills/run-attacks/SKILL.md b/.claude/skills/run-attacks/SKILL.md new file mode 100644 index 00000000..a01e7c13 --- /dev/null +++ b/.claude/skills/run-attacks/SKILL.md @@ -0,0 +1,23 @@ +--- +name: run-attacks +description: Run the pac_clip_sum membership inference attack test suite and summarize results. +--- + +## Context + +PAC (Probably Approximately Correct) privacy privatizes SQL aggregates via 64 parallel +SWAR bitslice counters with MI-bounded noise. pac_clip_sum adds per-user contribution +clipping using magnitude-level decomposition (4x bands, 2-bit shift) with distinct-contributor +bitmaps. Unsupported outlier levels are hard-zeroed to prevent variance side-channel attacks. + +## Instructions + +1. Build if needed: `GEN=ninja make 2>&1 | tail -5` +2. Run the main attack suite: `bash attacks/clip_attack_test.sh 2>/dev/null` +3. Run the multi-row attack: `bash attacks/clip_multirow_test.sh 2>/dev/null` +4. Run stress tests if available: `bash attacks/clip_hardzero_stress.sh 2>/dev/null` + +Summarize results as a table: +- Attack scenario, clip_support value, attack accuracy, std_in, std_out, std ratio +- Flag any accuracy above 60% as a potential regression +- Compare to baselines in `attacks/clip_attack_results.md` diff --git a/.claude/skills/test-clip/SKILL.md b/.claude/skills/test-clip/SKILL.md new file mode 100644 index 00000000..66f7dc8a --- /dev/null +++ b/.claude/skills/test-clip/SKILL.md @@ -0,0 +1,11 @@ +--- +name: test-clip +description: Build and run pac_clip_sum unit tests. +--- + +## Instructions + +1. Build: `GEN=ninja make 2>&1 | tail -5` +2. Run clip_sum tests: `build/release/test/unittest "test/sql/pac_clip_sum*" 2>&1` +3. Report: number of assertions passed/failed +4. If any fail, show the failing test name and expected vs actual values diff --git a/CLAUDE.md b/CLAUDE.md index 21b11dba..acf449f3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -98,3 +98,36 @@ SET pac_mi = 0; -- disable noise for testing SET pac_seed = 42; -- reproducible results SET pac_clip_support = 40; -- enable clip rewrite with support threshold ``` + +## Code style (clang-tidy) + +The project uses clang-tidy with DuckDB's configuration (`.clang-tidy`). Key naming rules: + +- **Classes/Enums**: `CamelCase` (e.g., `PacClipSumIntState`) +- **Functions**: `CamelCase` (e.g., `GetLevel`, `AllocateLevel`) +- **Variables/parameters/members**: `lower_case` (e.g., `max_level_used`, `key_hash`) +- **Constants/static/constexpr**: `UPPER_CASE` (e.g., `PAC2_NUM_LEVELS`, `PAC2_LEVEL_SHIFT`) +- **Macros**: `UPPER_CASE` (e.g., `PAC_DEBUG_PRINT`) +- **Typedefs**: `lower_case_t` suffix (e.g., `aggregate_update_t`) + +Other style rules (from `.clang-format`, based on LLVM): + +- **Tabs for indentation**, width 4 +- **Column limit**: 120 +- **Braces**: same line as statement (K&R / Allman-attached) +- **Pointers**: right-aligned (`int *ptr`, not `int* ptr`) +- **No short functions on single line** +- **Templates**: always break after `template<...>` +- **Long arguments**: align after open bracket + +Run `make format-fix` to auto-format. Formatting runs automatically via hook after edits. + +## Attack evaluation + +Attack scripts live in `attacks/`. Results are documented in `attacks/clip_attack_results.md`. + +```bash +bash attacks/clip_attack_test.sh 2>/dev/null # main attack suite +bash attacks/clip_multirow_test.sh 2>/dev/null # 20K small items test +bash attacks/clip_hardzero_stress.sh 2>/dev/null # stress tests +``` From 524e461a90907f4eff04ac245d526ab9a7452e78 Mon Sep 17 00:00:00 2001 From: peter Date: Mon, 23 Mar 2026 23:36:26 +0100 Subject: [PATCH 11/27] Add pac_clip_sum aggregate with clipping support --- CMakeLists.txt | 1 + src/aggregates/pac_clip_sum.cpp | 828 ++++++++++++++++++ src/aggregates/pac_count.cpp | 46 + src/aggregates/pac_min_max.cpp | 51 ++ src/compiler/pac_bitslice_compiler.cpp | 10 + src/core/pac_extension.cpp | 17 + src/include/aggregates/pac_clip_sum.hpp | 403 +++++++++ src/include/aggregates/pac_count.hpp | 2 + src/include/aggregates/pac_min_max.hpp | 4 + .../pac_expression_builder.hpp | 8 + .../query_processing/pac_plan_traversal.hpp | 3 + .../pac_expression_builder.cpp | 268 +++++- src/query_processing/pac_plan_traversal.cpp | 2 +- test/sql/pac_clip_sum.test | 214 +++++ 14 files changed, 1851 insertions(+), 6 deletions(-) create mode 100644 src/aggregates/pac_clip_sum.cpp create mode 100644 src/include/aggregates/pac_clip_sum.hpp create mode 100644 test/sql/pac_clip_sum.test diff --git a/CMakeLists.txt b/CMakeLists.txt index 93e0c917..7aa14f03 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,6 +37,7 @@ set(EXTENSION_SOURCES src/aggregates/pac_count.cpp src/aggregates/pac_min_max.cpp src/aggregates/pac_sum.cpp + src/aggregates/pac_clip_sum.cpp src/compiler/pac_bitslice_compiler.cpp src/compiler/pac_compiler_helpers.cpp src/query_processing/pac_avg_rewriter.cpp diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp new file mode 100644 index 00000000..26a3ebe6 --- /dev/null +++ b/src/aggregates/pac_clip_sum.cpp @@ -0,0 +1,828 @@ +#include "aggregates/pac_clip_sum.hpp" +#include "categorical/pac_categorical.hpp" +#include "duckdb/common/types/decimal.hpp" +#include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" +#include + +namespace duckdb { + +// ============================================================================ +// Inner state update: add one unsigned value to the state +// ============================================================================ +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, uint64_t value, + ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + int level = PacClipSumIntState::GetLevel(value); + uint64_t shift = level << 2; + uint16_t shifted_val = static_cast(value >> shift); // max 255 (8 bits) + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + + // Set bitmap bit + buf[17] |= (1ULL << (key_hash >> 58)); + + // Update exact_count (may cascade top 4 bits to overflow) + state.AddToExactCount(buf, shifted_val, allocator); + + // Add to SWAR counters + Pac2AddToTotalsSWAR16(buf, shifted_val, key_hash); +} + +// Overload for hugeint_t +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, hugeint_t value, + ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + uint64_t upper, lower; + if (value.upper < 0) { + hugeint_t abs_val = -value; + upper = static_cast(abs_val.upper); + lower = abs_val.lower; + } else { + upper = static_cast(value.upper); + lower = value.lower; + } + + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + + // Shift the 128-bit value right by shift bits, take lower 8 bits + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; // max 255 + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + buf[17] |= (1ULL << (key_hash >> 58)); + state.AddToExactCount(buf, shifted_val, allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, key_hash); +} + +// ============================================================================ +// Value routing: two-sided (pos/neg) dispatch +// ============================================================================ +// Route a uint64_t value — when SIGNED, the bits represent a signed int64_t (two's complement) +template +inline void PacClipSumRouteValue(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, + uint64_t value, ArenaAllocator &a) { + if (DUCKDB_LIKELY(hash)) { + int64_t sval = static_cast(value); // reinterpret bits as signed + if (SIGNED && sval < 0) { + auto *neg = wrapper.EnsureNegState(a); + PacClipSumUpdateOneInternal(*neg, hash, static_cast(-sval), a); + neg->update_count++; + } else { + PacClipSumUpdateOneInternal(*pos_state, hash, value, a); + pos_state->update_count++; + } + } +} + +// Overload for hugeint routing (signed) +inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, + hugeint_t value, ArenaAllocator &a, bool is_signed) { + if (DUCKDB_LIKELY(hash)) { + if (is_signed && value.upper < 0) { + auto *neg = wrapper.EnsureNegState(a); + hugeint_t abs_val = -value; + uint64_t upper = static_cast(abs_val.upper); + uint64_t lower = abs_val.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + neg->key_hash |= hash; + neg->EnsureLevelAllocated(a, level); + uint64_t *lbuf = neg->levels[level]; + lbuf[17] |= (1ULL << (hash >> 58)); + neg->AddToExactCount(lbuf, shifted_val, a); + Pac2AddToTotalsSWAR16(lbuf, shifted_val, hash); + neg->update_count++; + } else { + PacClipSumUpdateOneInternal(*pos_state, hash, value, a); + pos_state->update_count++; + } + } +} + +// ============================================================================ +// Buffer flush +// ============================================================================ +template +inline void PacClipSumFlushBuffer(PacClipSumStateWrapper &src, PacClipSumStateWrapper &dst, ArenaAllocator &a) { + uint64_t cnt = src.n_buffered & PacClipSumStateWrapper::BUF_MASK; + if (cnt > 0) { + auto *dst_state = dst.EnsureState(a); + for (uint64_t i = 0; i < cnt; i++) { + PacClipSumRouteValue(dst, dst_state, src.hash_buf[i], src.val_buf[i], a); + } + src.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + } +} + +// ============================================================================ +// Buffered update +// ============================================================================ +template +AUTOVECTORIZE inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, ValueT value, + ArenaAllocator &a) { + uint64_t cnt = agg.n_buffered & PacClipSumStateWrapper::BUF_MASK; + if (DUCKDB_UNLIKELY(cnt == PacClipSumStateWrapper::BUF_SIZE)) { + auto *dst_state = agg.EnsureState(a); + for (int i = 0; i < PacClipSumStateWrapper::BUF_SIZE; i++) { + PacClipSumRouteValue(agg, dst_state, agg.hash_buf[i], agg.val_buf[i], a); + } + PacClipSumRouteValue(agg, dst_state, key_hash, static_cast(value), a); + agg.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + } else { + agg.val_buf[cnt] = static_cast(value); + agg.hash_buf[cnt] = key_hash; + agg.n_buffered++; + } +} + +// Hugeint buffered update — bypass buffer, update directly +template +inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, hugeint_t value, ArenaAllocator &a) { + PacClipSumFlushBuffer(agg, agg, a); // flush any buffered values first + auto *state = agg.EnsureState(a); + PacClipSumRouteHugeint(agg, state, key_hash, value, a, SIGNED); +} + +// ============================================================================ +// Vectorized Update and ScatterUpdate +// ============================================================================ +template +static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + + if (hash_data.validity.AllValid() && value_data.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); + } + } +} + +template +static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(*state, hashes[h_idx], ConvertValue::convert(values[v_idx]), allocator); + } +} + +// ============================================================================ +// X-macro: generate Update/ScatterUpdate for integer types +// ============================================================================ +#define PAC2_INT_TYPES_SIGNED \ + X(TinyInt, int64_t, int8_t, true) \ + X(SmallInt, int64_t, int16_t, true) \ + X(Integer, int64_t, int32_t, true) \ + X(BigInt, int64_t, int64_t, true) + +#define PAC2_INT_TYPES_UNSIGNED \ + X(UTinyInt, uint64_t, uint8_t, false) \ + X(USmallInt, uint64_t, uint16_t, false) \ + X(UInteger, uint64_t, uint32_t, false) \ + X(UBigInt, uint64_t, uint64_t, false) + +#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ + static void PacClipSumUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast(state_p); \ + PacClipSumUpdate(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipSumScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipSumScatterUpdate(inputs, states, count, aggr.allocator); \ + } +PAC2_INT_TYPES_SIGNED +PAC2_INT_TYPES_UNSIGNED +#undef X + +// HugeInt update (signed, via hugeint routing) +static void PacClipSumUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { + auto &state = *reinterpret_cast(state_p); + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(state, hashes[h_idx], values[v_idx], aggr.allocator); + } +} +static void PacClipSumScatterUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, + idx_t count) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(*state, hashes[h_idx], values[v_idx], aggr.allocator); + } +} + +// UHugeInt update (unsigned, convert to hugeint for routing) +static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, + idx_t count) { + auto &state = *reinterpret_cast(state_p); + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + // uhugeint_t is always positive; treat as 128-bit unsigned + auto &v = values[v_idx]; + auto *pos_state = state.EnsureState(aggr.allocator); + if (DUCKDB_LIKELY(hashes[h_idx])) { + uint64_t upper = static_cast(v.upper); + uint64_t lower = v.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + pos_state->key_hash |= hashes[h_idx]; + pos_state->EnsureLevelAllocated(aggr.allocator, level); + uint64_t *buf = pos_state->levels[level]; + buf[17] |= (1ULL << (hashes[h_idx] >> 58)); + pos_state->AddToExactCount(buf, shifted_val, aggr.allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, hashes[h_idx]); + pos_state->update_count++; + } + } +} +static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, + idx_t count) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + auto &v = values[v_idx]; + auto *pos_state = state->EnsureState(aggr.allocator); + if (DUCKDB_LIKELY(hashes[h_idx])) { + uint64_t upper = static_cast(v.upper); + uint64_t lower = v.lower; + int level = PacClipSumIntState::GetLevel128(upper, lower); + uint64_t shift = level << 2; + uint16_t shifted_val; + if (shift >= 64) { + shifted_val = static_cast(upper >> (shift - 64)); + } else if (shift > 0) { + shifted_val = static_cast((lower >> shift) | (upper << (64 - shift))); + } else { + shifted_val = static_cast(lower); + } + shifted_val &= 0xFF; + pos_state->key_hash |= hashes[h_idx]; + pos_state->EnsureLevelAllocated(aggr.allocator, level); + uint64_t *buf = pos_state->levels[level]; + buf[17] |= (1ULL << (hashes[h_idx] >> 58)); + pos_state->AddToExactCount(buf, shifted_val, aggr.allocator); + Pac2AddToTotalsSWAR16(buf, shifted_val, hashes[h_idx]); + pos_state->update_count++; + } + } +} + +// ============================================================================ +// Combine +// ============================================================================ +AUTOVECTORIZE static void PacClipSumCombineInt(Vector &src, Vector &dst, idx_t count, ArenaAllocator &allocator) { + auto src_wrapper = FlatVector::GetData(src); + auto dst_wrapper = FlatVector::GetData(dst); + + for (idx_t i = 0; i < count; i++) { + // Flush src's buffer into dst + PacClipSumFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); + + auto *s = src_wrapper[i]->GetState(); + if (!s) { + continue; + } + auto *d = dst_wrapper[i]->EnsureState(allocator); + d->CombineFrom(s, allocator); + + // Combine neg states + auto *s_neg = src_wrapper[i]->GetNegState(); + if (s_neg) { + auto *d_neg = dst_wrapper[i]->GetNegState(); + if (!d_neg) { + dst_wrapper[i]->neg_state = s_neg; // steal + } else { + d_neg->CombineFrom(s_neg, allocator); + } + } + } +} + +static void PacClipSumCombine(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { + PacClipSumCombineInt(src, dst, count, aggr.allocator); +} + +// ============================================================================ +// Bind data with clip_support threshold +// ============================================================================ +struct PacClipSumBindData : public PacBindData { + int clip_support_threshold; // levels with fewer estimated distinct contributors are zeroed out + + PacClipSumBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support) + : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support) { + } + + unique_ptr Copy() const override { + auto copy = make_uniq(*this); + copy->total_update_count = 0; + copy->suspicious_count = 0; + copy->nonsuspicious_count = 0; + return copy; + } + bool Equals(const FunctionData &other) const override { + if (!PacBindData::Equals(other)) { + return false; + } + auto *o = dynamic_cast(&other); + return o && clip_support_threshold == o->clip_support_threshold; + } +}; + +// ============================================================================ +// Finalize +// ============================================================================ +template +static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { + auto state_ptrs = FlatVector::GetData(states); + auto data = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + auto &bind = static_cast(*input.bind_data); + double mi = bind.mi; + double correction = bind.correction; + uint64_t query_hash = bind.query_hash; + auto pstate = bind.pstate; + int clip_support = bind.clip_support_threshold; + + for (idx_t i = 0; i < count; i++) { + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + PAC_FLOAT buf[64] = {0}; + auto *pos = state_ptrs[i]->GetState(); + if (!pos) { + result_mask.SetInvalid(offset + i); + continue; + } + uint64_t key_hash = pos->key_hash; + std::mt19937_64 gen(bind.seed); + if (PacNoiseInNull(key_hash, mi, correction, gen)) { + result_mask.SetInvalid(offset + i); + continue; + } + + // Non-mutating: just read totals with clip_support filtering + pos->GetTotals(buf, clip_support); + uint64_t update_count = pos->update_count; + + // Subtract neg state + auto *neg = state_ptrs[i]->GetNegState(); + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support); + key_hash |= neg->key_hash; + for (int j = 0; j < 64; j++) { + buf[j] -= neg_buf[j]; + } + update_count += neg->update_count; + } + + CheckPacSampleDiversity(key_hash, buf, update_count, "pac_clip_sum", bind); + PAC_FLOAT result_val = PacNoisySampleFrom64Counters(buf, mi, correction, gen, ~key_hash, query_hash, pstate); + result_val *= PAC_FLOAT(2.0); // 2x compensation for ~50% sampling + data[offset + i] = FromDouble(result_val); + } +} + +// Instantiate noised finalize (scalar output for pac_noised_clip_sum) +static void PacClipSumNoisedFinalizeSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +static void PacClipSumNoisedFinalizeUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +// BIGINT output variant — used for count→sum conversion where the original returned BIGINT +static void PacClipSumNoisedFinalizeBigInt(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} + +// ============================================================================ +// Counters finalize (LIST output for pac_clip_sum) +// ============================================================================ +template +static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + auto state_ptrs = FlatVector::GetData(states); + auto &bind = static_cast(*input.bind_data); + int clip_support = bind.clip_support_threshold; + double correction = bind.correction; + + // Result is LIST + auto list_entries = FlatVector::GetData(result); + auto &child_vec = ListVector::GetEntry(result); + + idx_t total_elements = count * 64; + ListVector::Reserve(result, total_elements); + ListVector::SetListSize(result, total_elements); + + auto child_data = FlatVector::GetData(child_vec); + + for (idx_t i = 0; i < count; i++) { + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + list_entries[offset + i].offset = i * 64; + list_entries[offset + i].length = 64; + + PAC_FLOAT buf[64] = {0}; + uint64_t key_hash = 0; + uint64_t update_count = 0; + + auto *pos = state_ptrs[i]->GetState(); + if (pos) { + key_hash = pos->key_hash; + update_count = pos->update_count; + pos->GetTotals(buf, clip_support); + + auto *neg = state_ptrs[i]->GetNegState(); + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support); + key_hash |= neg->key_hash; + for (int j = 0; j < 64; j++) { + buf[j] -= neg_buf[j]; + } + update_count += neg->update_count; + } + } + + CheckPacSampleDiversity(key_hash, buf, update_count, "pac_clip_sum", bind); + + idx_t base = i * 64; + for (int j = 0; j < 64; j++) { + if ((key_hash >> j) & 1ULL) { + child_data[base + j] = static_cast(buf[j] * 2.0 * correction); + } else { + child_data[base + j] = 0.0; + } + } + } +} + +static void PacClipSumFinalizeCountersSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); +} +static void PacClipSumFinalizeCountersUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); +} + +// ============================================================================ +// State size / init / bind +// ============================================================================ +static idx_t PacClipSumStateSize(const AggregateFunction &) { + return sizeof(PacClipSumStateWrapper); +} + +static void PacClipSumInitialize(const AggregateFunction &, data_ptr_t state_p) { + memset(state_p, 0, sizeof(PacClipSumStateWrapper)); +} + +static unique_ptr PacClipSumBind(ClientContext &ctx, AggregateFunction &, + vector> &args) { + double mi = GetPacMiFromSetting(ctx); + double correction = 1.0; + if (2 < args.size()) { + if (!args[2]->IsFoldable()) { + throw InvalidInputException("pac_clip_sum: correction parameter must be a constant"); + } + auto val = ExpressionExecutor::EvaluateScalar(ctx, *args[2]); + correction = val.GetValue(); + if (correction < 0.0) { + throw InvalidInputException("pac_clip_sum: correction must be >= 0"); + } + } + // Read pac_clip_support threshold + int clip_support = 0; + Value dc_val; + if (ctx.TryGetCurrentSetting("pac_clip_support", dc_val) && !dc_val.IsNull()) { + clip_support = static_cast(dc_val.GetValue()); + } + return make_uniq(ctx, mi, correction, clip_support); +} + +// ============================================================================ +// DECIMAL support: dispatch by physical type, same pattern as pac_noised_sum +// ============================================================================ +static AggregateFunction GetPacClipSumNoisedAggregate(PhysicalType type) { + switch (type) { + case PhysicalType::INT16: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::SMALLINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateSmallInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSmallInt); + case PhysicalType::INT32: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::INTEGER}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateInteger, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateInteger); + case PhysicalType::INT64: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::BIGINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateBigInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); + case PhysicalType::INT128: + return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, + LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, + PacClipSumScatterUpdateHugeInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); + default: + throw InternalException("pac_noised_clip_sum: unsupported decimal physical type"); + } +} + +static AggregateFunction GetPacClipSumCountersAggregate(PhysicalType type) { + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + switch (type) { + case PhysicalType::INT16: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::SMALLINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateSmallInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSmallInt); + case PhysicalType::INT32: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::INTEGER}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateInteger, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateInteger); + case PhysicalType::INT64: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::BIGINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateBigInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); + case PhysicalType::INT128: + return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateHugeInt, + PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); + default: + throw InternalException("pac_clip_sum: unsupported decimal physical type"); + } +} + +static unique_ptr BindDecimalPacNoisedClipSum(ClientContext &ctx, AggregateFunction &function, + vector> &args) { + auto decimal_type = args[1]->return_type; + function = GetPacClipSumNoisedAggregate(decimal_type.InternalType()); + function.name = "pac_noised_clip_sum"; + function.arguments[1] = decimal_type; + function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type)); + return PacClipSumBind(ctx, function, args); +} + +static unique_ptr BindDecimalPacClipSum(ClientContext &ctx, AggregateFunction &function, + vector> &args) { + auto decimal_type = args[1]->return_type; + function = GetPacClipSumCountersAggregate(decimal_type.InternalType()); + function.name = "pac_clip_sum"; + function.arguments[1] = decimal_type; + // counters always return LIST, no DECIMAL return type needed + return PacClipSumBind(ctx, function, args); +} + +// ============================================================================ +// Registration helpers +// ============================================================================ +static void AddClipSumCountersFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + aggregate_update_t scatter, aggregate_finalize_t finalize, + aggregate_simple_update_t update) { + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, list_type, PacClipSumStateSize, + PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, list_type, + PacClipSumStateSize, PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); +} + +static void AddNoisedClipSumFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + const LogicalType &result_type, aggregate_update_t scatter, + aggregate_finalize_t finalize, aggregate_simple_update_t update) { + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, result_type, PacClipSumStateSize, + PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, result_type, + PacClipSumStateSize, PacClipSumInitialize, scatter, PacClipSumCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipSumBind)); +} + +// Helper to register all type overloads for a clip sum function set +static void RegisterClipSumTypeOverloads(AggregateFunctionSet &set, const string &name, bool counters) { + if (counters) { + // Counters (LIST) variants + AddClipSumCountersFcn(set, name, LogicalType::TINYINT, PacClipSumScatterUpdateTinyInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::BOOLEAN, PacClipSumScatterUpdateTinyInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::SMALLINT, PacClipSumScatterUpdateSmallInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateSmallInt); + AddClipSumCountersFcn(set, name, LogicalType::INTEGER, PacClipSumScatterUpdateInteger, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateInteger); + AddClipSumCountersFcn(set, name, LogicalType::BIGINT, PacClipSumScatterUpdateBigInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateBigInt); + AddClipSumCountersFcn(set, name, LogicalType::UTINYINT, PacClipSumScatterUpdateUTinyInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUTinyInt); + AddClipSumCountersFcn(set, name, LogicalType::USMALLINT, PacClipSumScatterUpdateUSmallInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUSmallInt); + AddClipSumCountersFcn(set, name, LogicalType::UINTEGER, PacClipSumScatterUpdateUInteger, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUInteger); + AddClipSumCountersFcn(set, name, LogicalType::UBIGINT, PacClipSumScatterUpdateUBigInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUBigInt); + AddClipSumCountersFcn(set, name, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, + PacClipSumFinalizeCountersSigned, PacClipSumUpdateHugeInt); + AddClipSumCountersFcn(set, name, LogicalType::UHUGEINT, PacClipSumScatterUpdateUHugeInt, + PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUHugeInt); + } else { + // Noised (scalar HUGEINT) variants + AddNoisedClipSumFcn(set, name, LogicalType::TINYINT, LogicalType::HUGEINT, PacClipSumScatterUpdateTinyInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::BOOLEAN, LogicalType::HUGEINT, PacClipSumScatterUpdateTinyInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::SMALLINT, LogicalType::HUGEINT, PacClipSumScatterUpdateSmallInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateSmallInt); + AddNoisedClipSumFcn(set, name, LogicalType::INTEGER, LogicalType::HUGEINT, PacClipSumScatterUpdateInteger, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateInteger); + AddNoisedClipSumFcn(set, name, LogicalType::BIGINT, LogicalType::HUGEINT, PacClipSumScatterUpdateBigInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateBigInt); + AddNoisedClipSumFcn(set, name, LogicalType::UTINYINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUTinyInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUTinyInt); + AddNoisedClipSumFcn(set, name, LogicalType::USMALLINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUSmallInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUSmallInt); + AddNoisedClipSumFcn(set, name, LogicalType::UINTEGER, LogicalType::HUGEINT, PacClipSumScatterUpdateUInteger, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUInteger); + AddNoisedClipSumFcn(set, name, LogicalType::UBIGINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUBigInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUBigInt); + AddNoisedClipSumFcn(set, name, LogicalType::HUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, + PacClipSumNoisedFinalizeSigned, PacClipSumUpdateHugeInt); + AddNoisedClipSumFcn(set, name, LogicalType::UHUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUHugeInt, + PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUHugeInt); + } +} + +// ============================================================================ +// Registration: pac_clip_sum (counters, LIST) +// ============================================================================ +void RegisterPacClipSumFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_clip_sum"); + RegisterClipSumTypeOverloads(fcn_set, "pac_clip_sum", true); + + // DECIMAL overloads + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, list_type, nullptr, nullptr, + nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, + nullptr, BindDecimalPacClipSum)); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, + list_type, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacClipSum)); + + // Add list aggregate overload (LIST → LIST) for categorical/subquery + AddPacListAggregateOverload(fcn_set, "clip_sum"); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = "[INTERNAL] Returns 64 PAC subsample counters with per-level clipping as LIST."; + desc.examples = {"SELECT c_mktsegment, pac_clip_sum(pac_hash(hash(c_custkey)), c_acctbal) FROM customer GROUP BY " + "c_mktsegment"}; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +// ============================================================================ +// Registration: pac_noised_clip_sum (fused noised, scalar HUGEINT) +// ============================================================================ +void RegisterPacNoisedClipSumFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_sum"); + RegisterClipSumTypeOverloads(fcn_set, "pac_noised_clip_sum", false); + + // DECIMAL overloads + fcn_set.AddFunction(AggregateFunction( + {LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipSum)); + fcn_set.AddFunction(AggregateFunction( + {LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, LogicalTypeId::DECIMAL, nullptr, nullptr, + nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipSum)); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = "Privacy-preserving SUM with per-level clipping and noising. Supports 128-bit."; + desc.examples = {"SELECT c_mktsegment, pac_noised_clip_sum(pac_hash(hash(c_custkey)), c_acctbal) FROM customer " + "GROUP BY c_mktsegment"}; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +// ============================================================================ +// Registration: pac_noised_clip_sumcount (sum-of-counts, BIGINT → BIGINT) +// Used when count→sum conversion needs to preserve BIGINT return type. +// ============================================================================ +void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_sumcount"); + // Only BIGINT input → BIGINT output (counts are always BIGINT) + AddNoisedClipSumFcn(fcn_set, "pac_noised_clip_sumcount", LogicalType::BIGINT, LogicalType::BIGINT, + PacClipSumScatterUpdateBigInt, PacClipSumNoisedFinalizeBigInt, PacClipSumUpdateBigInt); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +} // namespace duckdb diff --git a/src/aggregates/pac_count.cpp b/src/aggregates/pac_count.cpp index 86e86de9..80376c27 100644 --- a/src/aggregates/pac_count.cpp +++ b/src/aggregates/pac_count.cpp @@ -341,4 +341,50 @@ void RegisterPacAvgFunctions(ExtensionLoader &loader) { loader.RegisterFunction(std::move(avg_counters_info)); } +// ============================================================================ +// Clip synonyms: pac_noised_clip_count = pac_noised_count, +// pac_clip_count = pac_count +// ============================================================================ +void RegisterPacNoisedClipCountFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_count"); + + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT}, LogicalType::BIGINT, + PacCountStateSize, PacCountInitialize, PacCountScatterUpdate, PacCountCombine, + PacCountFinalize, FunctionNullHandling::SPECIAL_HANDLING, PacCountUpdate, + PacCountBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::DOUBLE}, + LogicalType::BIGINT, PacCountStateSize, PacCountInitialize, + PacCountScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::BIGINT, PacCountStateSize, PacCountInitialize, + PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountColumnUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_count", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::BIGINT, + PacCountStateSize, PacCountInitialize, PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalize, + FunctionNullHandling::SPECIAL_HANDLING, PacCountColumnUpdate, PacCountBind)); + + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipCountFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_count"); + + fcn_set.AddFunction(AggregateFunction("pac_clip_count", {LogicalType::UBIGINT}, list_double_type, PacCountStateSize, + PacCountInitialize, PacCountScatterUpdate, PacCountCombine, + PacCountFinalizeCounters, FunctionNullHandling::DEFAULT_NULL_HANDLING, + PacCountUpdate, PacCountBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_count", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, PacCountStateSize, + PacCountInitialize, PacCountColumnScatterUpdate, PacCountCombine, PacCountFinalizeCounters, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacCountColumnUpdate, PacCountBind)); + AddPacListAggregateOverload(fcn_set, "clip_count"); + + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + } // namespace duckdb diff --git a/src/aggregates/pac_min_max.cpp b/src/aggregates/pac_min_max.cpp index 8fdfec10..60f5bdac 100644 --- a/src/aggregates/pac_min_max.cpp +++ b/src/aggregates/pac_min_max.cpp @@ -371,6 +371,57 @@ void RegisterPacMaxCountersFunctions(ExtensionLoader &loader) { loader.RegisterFunction(std::move(info)); } +// ============================================================================ +// Clip synonyms: pac_noised_clip_min/max = pac_noised_min/max, +// pac_clip_min/max = pac_min/max +// ============================================================================ +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_min"); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", + {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader) { + AggregateFunctionSet fcn_set("pac_noised_clip_max"); + fcn_set.AddFunction(AggregateFunction("pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, + LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::ANY, nullptr, + nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipMinFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_min"); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); + AddPacListAggregateOverload(fcn_set, "clip_min"); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + +void RegisterPacClipMaxFunctions(ExtensionLoader &loader) { + auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); + AggregateFunctionSet fcn_set("pac_clip_max"); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); + AddPacListAggregateOverload(fcn_set, "clip_max"); + CreateAggregateFunctionInfo info(fcn_set); + loader.RegisterFunction(std::move(info)); +} + // Explicit template instantiations #define INST_ALL(T) \ template void PacMinMaxUpdate(Vector[], AggregateInputData &, idx_t, data_ptr_t, idx_t); \ diff --git a/src/compiler/pac_bitslice_compiler.cpp b/src/compiler/pac_bitslice_compiler.cpp index 6e93db68..e2e45784 100644 --- a/src/compiler/pac_bitslice_compiler.cpp +++ b/src/compiler/pac_bitslice_compiler.cpp @@ -880,6 +880,16 @@ void CompilePacBitsliceQuery(const PACCompatibilityResult &check, OptimizerExten // errors. The post-optimizer still handles user-written pac_avg() in SQL. RewritePacAvgToDiv(input, plan); + // Clip rewrite: when pac_clip_support is set, refine PAC aggregates to use + // clipping variants with per-PU pre-aggregation below. + { + Value clip_val; + if (input.context.TryGetCurrentSetting("pac_clip_support", clip_val) && !clip_val.IsNull()) { + auto &pu_names = (pu_present_in_tree && !pu_via_cte) ? check.scanned_pu_tables : privacy_units; + RewriteClipAggregates(input, plan, check, pu_names); + } + } + #if PAC_DEBUG PAC_DEBUG_PRINT("=== PAC-OPTIMIZED PLAN ==="); plan->Print(); diff --git a/src/core/pac_extension.cpp b/src/core/pac_extension.cpp index 2393fe80..f4153dfa 100644 --- a/src/core/pac_extension.cpp +++ b/src/core/pac_extension.cpp @@ -18,6 +18,7 @@ #include "aggregates/pac_aggregate.hpp" #include "aggregates/pac_count.hpp" #include "aggregates/pac_sum.hpp" +#include "aggregates/pac_clip_sum.hpp" #include "aggregates/pac_min_max.hpp" #include "categorical/pac_categorical.hpp" #include "parser/pac_parser.hpp" @@ -247,17 +248,33 @@ static void LoadInternal(ExtensionLoader &loader) { db.config.AddExtensionOption("pac_ptracking", "[INTERNAL] Enable persistent secret p-tracking for query-level MIA", LogicalType::BOOLEAN, Value::BOOLEAN(true)); + db.config.AddExtensionOption("pac_clip_support", + "Dynamic outlier clipping threshold for pac_clip_sum. " + "Levels with fewer than this many estimated distinct contributors are zeroed out. " + "NULL (default) disables pac_clip_sum; set to e.g. 64 to enable.", + LogicalType::BIGINT, Value()); + // Register pac_sum aggregate functions RegisterPacSumFunctions(loader); RegisterPacSumCountersFunctions(loader); + RegisterPacClipSumFunctions(loader); + RegisterPacNoisedClipSumFunctions(loader); + RegisterPacNoisedClipSumCountFunctions(loader); RegisterPacCountFunctions(loader); RegisterPacCountCountersFunctions(loader); + RegisterPacClipCountFunctions(loader); + RegisterPacNoisedClipCountFunctions(loader); // Register pac_min/pac_max aggregate functions RegisterPacMinFunctions(loader); RegisterPacMaxFunctions(loader); // Register _counters variants for categorical queries RegisterPacMinCountersFunctions(loader); RegisterPacMaxCountersFunctions(loader); + // Register clip synonyms for min/max + RegisterPacClipMinFunctions(loader); + RegisterPacClipMaxFunctions(loader); + RegisterPacNoisedClipMinFunctions(loader); + RegisterPacNoisedClipMaxFunctions(loader); // Register dummy pac_noised_avg / pac_avg (replaced by RewritePacAvgToDiv before execution) RegisterPacAvgFunctions(loader); diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp new file mode 100644 index 00000000..7bb0c87b --- /dev/null +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -0,0 +1,403 @@ +// +// pac_clip_sum: Approximate sum with per-level overflow + distinct bitmaps +// Always: buffered, approximate, two-sided (unsigned pos/neg), 31 levels covering 128-bit +// +#ifndef PAC_CLIP_SUM_HPP +#define PAC_CLIP_SUM_HPP + +#include "duckdb.hpp" +#include "pac_aggregate.hpp" +#include + +namespace duckdb { + +void RegisterPacClipSumFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipSumFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); + +// ============================================================================ +// Constants +// ============================================================================ +constexpr int PAC2_NUM_LEVELS = 31; +constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters +constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap +constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters +constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count +constexpr int PAC2_LEVEL_SHIFT = 4; +constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; + +// ============================================================================ +// Packed pointer + exact_count helpers +// Normal level[16] stores: upper 16 bits = exact_count, lower 48 bits = overflow pointer +// ============================================================================ +static inline uint64_t *Pac2GetOverflowPtr(uint64_t packed) { + return reinterpret_cast(packed & 0x0000FFFFFFFFFFFFULL); +} +static inline uint16_t Pac2GetExactCount(uint64_t packed) { + return static_cast(packed >> 48); +} +static inline void Pac2SetExactCount(uint64_t &packed, uint16_t count) { + packed = (packed & 0x0000FFFFFFFFFFFFULL) | (static_cast(count) << 48); +} +static inline void Pac2SetOverflowPtr(uint64_t &packed, uint64_t *ptr) { + packed = (packed & 0xFFFF000000000000ULL) | (reinterpret_cast(ptr) & 0x0000FFFFFFFFFFFFULL); +} + +// ============================================================================ +// SWAR kernel — identical to pac_sum's AddToTotalsSWAR for uint16_t +// ============================================================================ +AUTOVECTORIZE static inline void Pac2AddToTotalsSWAR16(uint64_t *PAC_RESTRICT total, uint64_t value, + uint64_t key_hash) { + uint64_t val_packed = static_cast(value) * PAC2_SWAR_MASK_16; + for (int i = 0; i < 16; i++) { + uint64_t bits = (key_hash >> i) & PAC2_SWAR_MASK_16; + uint64_t expanded = (bits << 16) - bits; + total[i] += val_packed & expanded; + } +} + +// ============================================================================ +// PacClipSumIntState — core state for one unsigned accumulator +// ============================================================================ +struct PacClipSumIntState { + uint64_t key_hash; + uint64_t update_count; + int8_t max_level_used; // -1 if none + int8_t inline_level_idx; // which level uses inline, -1 if none + + // 31 level pointers = 248 bytes. + // Inline optimization: last 18 slots (indices 13..30) = 144 bytes = one normal level. + // Levels 0-12 can use inline storage without overlapping their own pointer slot. + union { + uint64_t *levels[PAC2_NUM_LEVELS]; // 248 bytes + struct { + uint64_t *_ptrs[13]; // levels 0-12 pointers (104 bytes) + uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level + }; + }; + + // ======================================================================== + // GetLevel: route value to lowest level where shifted value fits in 8 bits + // ======================================================================== + static inline int GetLevel(uint64_t abs_val) { + if (abs_val < 256) { + return 0; + } + int bit_pos = 63 - pac_clzll(abs_val); + return (bit_pos - 4) >> 2; + } + + // For 128-bit (hugeint) values + static inline int GetLevel128(uint64_t upper, uint64_t lower) { + if (upper == 0) { + return GetLevel(lower); + } + int bit_pos = 127 - pac_clzll(upper); + return (bit_pos - 4) >> 2; + } + + // ======================================================================== + // Level allocation + // ======================================================================== + inline void AllocateLevel(ArenaAllocator &allocator, int k) { + if (k >= 13 && inline_level_idx >= 0) { + // Evict inline level to arena + auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); + memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + levels[inline_level_idx] = ext; + inline_level_idx = -1; + // Clear inline area so levels[13..30] read as nullptr + memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + } + if (k < 13 && inline_level_idx < 0) { + // Use inline storage + levels[k] = inline_level; + memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + inline_level_idx = static_cast(k); + } else { + auto *buf = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); + memset(buf, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + levels[k] = buf; + } + } + + inline void EnsureLevelAllocated(ArenaAllocator &allocator, int k) { + if (DUCKDB_LIKELY(k <= max_level_used)) { + return; + } + for (int i = max_level_used + 1; i <= k; i++) { + AllocateLevel(allocator, i); + } + max_level_used = static_cast(k); + } + + // ======================================================================== + // CascadeTop4: extract top 4 bits of 16-bit SWAR → add to 32-bit overflow + // ======================================================================== + void CascadeTop4(uint64_t *normal_buf, ArenaAllocator &allocator) { + // 1. Ensure overflow level allocated + uint64_t *overflow = Pac2GetOverflowPtr(normal_buf[16]); + if (!overflow) { + overflow = reinterpret_cast(allocator.Allocate(PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t))); + memset(overflow, 0, PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t)); + Pac2SetOverflowPtr(normal_buf[16], overflow); + } + + // 2. Extract top 4 bits of each 16-bit counter → add to 32-bit overflow + // SWAR 16-bit element i holds bit positions: i, i+16, i+32, i+48 + // SWAR 32-bit element i holds: i, i+32; element i+16 holds: i+16, i+48 + for (int i = 0; i < 16; i++) { + uint64_t swar = normal_buf[i]; + uint64_t top4 = (swar >> 12) & 0x000F000F000F000FULL; + normal_buf[i] = swar & 0x0FFF0FFF0FFF0FFFULL; + + auto *t = reinterpret_cast(&top4); + auto *o1 = reinterpret_cast(&overflow[i]); // bits i, i+32 + auto *o2 = reinterpret_cast(&overflow[i + 16]); // bits i+16, i+48 + o1[0] += t[0]; // bit i + o1[1] += t[2]; // bit i+32 + o2[0] += t[1]; // bit i+16 + o2[1] += t[3]; // bit i+48 + } + + // 3. Cascade exact_count top 4 bits + uint16_t ec = Pac2GetExactCount(normal_buf[16]); + auto *overflow_ec = reinterpret_cast(&overflow[32]); + *overflow_ec += (ec >> 12); + Pac2SetExactCount(normal_buf[16], ec & 0x0FFF); + } + + // ======================================================================== + // AddValue: overflow-aware exact_count update + // ======================================================================== + inline void AddToExactCount(uint64_t *normal_buf, uint16_t shifted_val, ArenaAllocator &allocator) { + uint16_t ec = Pac2GetExactCount(normal_buf[16]); + uint32_t new_ec = static_cast(ec) + shifted_val; + if (DUCKDB_UNLIKELY(new_ec > 0xFFFF)) { + CascadeTop4(normal_buf, allocator); + ec = Pac2GetExactCount(normal_buf[16]); // now ≤ 0x0FFF + new_ec = static_cast(ec) + shifted_val; + } + Pac2SetExactCount(normal_buf[16], static_cast(new_ec)); + } + + // ======================================================================== + // Estimate distinct count from 64-bit bitmap using birthday-paradox formula + // ======================================================================== + static inline int EstimateDistinct(uint64_t bitmap) { + int k = pac_popcount64(bitmap); + if (k >= 64) { + return 256; // saturated — could be any large number + } + if (k == 0) { + return 0; + } + // n ≈ -64 * ln(1 - k/64) + return static_cast(-64.0 * std::log(1.0 - k / 64.0)); + } + + // ======================================================================== + // GetTotals: non-mutating finalization — sums all levels + // clip_support_threshold: soft clamping of under-supported levels (0 = no clipping) + // + // Levels with fewer estimated distinct contributors than the threshold are + // attenuated rather than zeroed: + // - Prefix (below first supported level): scaled UP by 16^distance to + // clamp small under-supported values toward the supported range. + // - Suffix (above last supported level): scaled DOWN by 16^distance to + // attenuate outlier levels toward the supported range. + // - Interior unsupported levels (between first and last supported): full + // contribution, no attenuation. + // ======================================================================== + void GetTotals(PAC_FLOAT *dst, int clip_support_threshold = 0) const { + memset(dst, 0, 64 * sizeof(PAC_FLOAT)); + + // Pass 1: find first and last supported levels + int first_supported = -1; + int last_supported = -1; + if (clip_support_threshold > 0) { + for (int k = 0; k <= max_level_used; k++) { + if (levels[k] && EstimateDistinct(levels[k][17]) >= clip_support_threshold) { + if (first_supported < 0) { + first_supported = k; + } + last_supported = k; + } + } + } + + // Pass 2: accumulate contributions with soft clamping + for (int k = 0; k <= max_level_used; k++) { + if (!levels[k]) { + continue; + } + + // Determine effective scale: clamp under-supported prefix/suffix levels + int effective_level = k; + if (clip_support_threshold > 0 && first_supported >= 0) { + if (k < first_supported) { + // Prefix: clamp scale up to first supported level + effective_level = first_supported; + } else if (k > last_supported) { + // Suffix: clamp scale down to last supported level + effective_level = last_supported; + } + } else if (clip_support_threshold > 0 && first_supported < 0) { + // No supported levels at all — zero everything + continue; + } + + PAC_FLOAT scale = std::exp2(static_cast(PAC2_LEVEL_SHIFT * effective_level)); + + // Add normal 16-bit SWAR contribution + auto *counters = reinterpret_cast(levels[k]); + for (int j = 0; j < 64; j++) { + int swar_idx = (j % 16) * 4 + (j / 16); + dst[j] += static_cast(counters[swar_idx]) * scale; + } + + // Add overflow 32-bit SWAR contribution (scaled by 2^12 relative to normal) + uint64_t *overflow = Pac2GetOverflowPtr(levels[k][16]); + if (overflow) { + PAC_FLOAT overflow_scale = scale * std::exp2(static_cast(12)); + auto *ocounters = reinterpret_cast(overflow); + for (int j = 0; j < 64; j++) { + int swar_idx = (j % 32) * 2 + (j / 32); + dst[j] += static_cast(ocounters[swar_idx]) * overflow_scale; + } + } + } + } + + // ======================================================================== + // CombineFrom: merge another state into this one + // ======================================================================== + void CombineFrom(PacClipSumIntState *src, ArenaAllocator &allocator) { + if (!src) { + return; + } + key_hash |= src->key_hash; + update_count += src->update_count; + + for (int k = 0; k <= src->max_level_used; k++) { + if (!src->levels[k]) { + continue; + } + + // If dst doesn't have this level: steal src's pointer + if (k > max_level_used || !levels[k]) { + EnsureLevelAllocated(allocator, k); // ensures max_level_used >= k, allocates if needed + // If we just allocated a fresh level, steal src's data over it + if (k != src->inline_level_idx) { + // src level is arena-allocated, can steal + levels[k] = src->levels[k]; + src->levels[k] = nullptr; + } else { + // src is using inline — copy instead + memcpy(levels[k], src->levels[k], PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + } + continue; + } + + // Both have this level: merge + // Add SWAR counters + for (int i = 0; i < PAC2_NORMAL_SWAR; i++) { + levels[k][i] += src->levels[k][i]; + } + // OR bitmaps + levels[k][17] |= src->levels[k][17]; + + // Merge exact_counts (check overflow) + uint16_t dst_ec = Pac2GetExactCount(levels[k][16]); + uint16_t src_ec = Pac2GetExactCount(src->levels[k][16]); + uint32_t sum_ec = static_cast(dst_ec) + src_ec; + if (sum_ec > 0xFFFF) { + CascadeTop4(levels[k], allocator); + } + dst_ec = Pac2GetExactCount(levels[k][16]); + Pac2SetExactCount(levels[k][16], dst_ec + src_ec); + + // Merge overflow levels + uint64_t *src_overflow = Pac2GetOverflowPtr(src->levels[k][16]); + uint64_t *dst_overflow = Pac2GetOverflowPtr(levels[k][16]); + if (src_overflow && !dst_overflow) { + // Steal overflow from src + Pac2SetOverflowPtr(levels[k][16], src_overflow); + Pac2SetOverflowPtr(src->levels[k][16], nullptr); + } else if (src_overflow && dst_overflow) { + // Merge overflow SWAR counters + for (int i = 0; i < PAC2_OVERFLOW_SWAR; i++) { + dst_overflow[i] += src_overflow[i]; + } + // Merge overflow exact_counts + auto *dec = reinterpret_cast(&dst_overflow[32]); + auto *sec = reinterpret_cast(&src_overflow[32]); + *dec += *sec; + } + } + } + + // Interface methods + PacClipSumIntState *GetState() { + return this; + } + PacClipSumIntState *EnsureState(ArenaAllocator &) { + return this; + } +}; + +// ============================================================================ +// PacClipSumStateWrapper: buffering wrapper with two-sided pos/neg +// ============================================================================ +struct PacClipSumStateWrapper { + using State = PacClipSumIntState; + using Value = uint64_t; + static constexpr int BUF_SIZE = 2; + static constexpr uint64_t BUF_MASK = 3ULL; + + uint64_t val_buf[BUF_SIZE]; + uint64_t hash_buf[BUF_SIZE]; + union { + uint64_t n_buffered; // lower 2 bits: count, upper bits: state pointer + PacClipSumIntState *state; + }; + PacClipSumIntState *neg_state; // separate state for negatives (stores absolute values) + + PacClipSumIntState *GetState() const { + return reinterpret_cast(reinterpret_cast(state) & ~7ULL); + } + + PacClipSumIntState *EnsureState(ArenaAllocator &a) { + PacClipSumIntState *s = GetState(); + if (!s) { + s = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); + memset(s, 0, sizeof(PacClipSumIntState)); + s->max_level_used = -1; + s->inline_level_idx = -1; + state = s; + } + return s; + } + + PacClipSumIntState *GetNegState() const { + return neg_state; + } + + PacClipSumIntState *EnsureNegState(ArenaAllocator &a) { + if (!neg_state) { + neg_state = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); + memset(neg_state, 0, sizeof(PacClipSumIntState)); + neg_state->max_level_used = -1; + neg_state->inline_level_idx = -1; + } + return neg_state; + } + + static idx_t StateSize() { + return sizeof(PacClipSumStateWrapper); + } +}; + +} // namespace duckdb + +#endif // PAC_CLIP_SUM_HPP diff --git a/src/include/aggregates/pac_count.hpp b/src/include/aggregates/pac_count.hpp index ea7069da..aa0f34d0 100644 --- a/src/include/aggregates/pac_count.hpp +++ b/src/include/aggregates/pac_count.hpp @@ -25,6 +25,8 @@ namespace duckdb { void RegisterPacCountFunctions(ExtensionLoader &); void RegisterPacCountCountersFunctions(ExtensionLoader &); void RegisterPacAvgFunctions(ExtensionLoader &); +void RegisterPacNoisedClipCountFunctions(ExtensionLoader &); +void RegisterPacClipCountFunctions(ExtensionLoader &); // PAC_COUNT(key_hash) implements a COUNT aggregate that for each privacy-unit (identified by a key_hash) // computes 64 independent counts, where each independent count randomly (50% chance) includes a PU or not. diff --git a/src/include/aggregates/pac_min_max.hpp b/src/include/aggregates/pac_min_max.hpp index e6cf177e..a825f503 100644 --- a/src/include/aggregates/pac_min_max.hpp +++ b/src/include/aggregates/pac_min_max.hpp @@ -32,6 +32,10 @@ void RegisterPacMinFunctions(ExtensionLoader &loader); void RegisterPacMaxFunctions(ExtensionLoader &loader); void RegisterPacMinCountersFunctions(ExtensionLoader &loader); void RegisterPacMaxCountersFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); +void RegisterPacClipMinFunctions(ExtensionLoader &loader); +void RegisterPacClipMaxFunctions(ExtensionLoader &loader); // ============================================================================ // PAC_MIN/PAC_MAX(hash_key, value) aggregate functions diff --git a/src/include/query_processing/pac_expression_builder.hpp b/src/include/query_processing/pac_expression_builder.hpp index 168fe183..d1ad9c48 100644 --- a/src/include/query_processing/pac_expression_builder.hpp +++ b/src/include/query_processing/pac_expression_builder.hpp @@ -10,6 +10,7 @@ #include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_cteref.hpp" +#include "metadata/pac_compatibility_check.hpp" namespace duckdb { @@ -67,6 +68,13 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg unique_ptr &hash_input_expr, unique_ptr &plan, double correction = 1.0); +// Rewrite PAC aggregates to use clipping variants when pac_clip_support is set. +// Inserts a lower aggregate with plain DuckDB aggregates (GROUP BY groups + PU hash), +// and rewrites the top aggregate to use pac_noised_clip_* / pac_clip_* functions. +// Skips insertion if child already groups by PU key (Q13 exception). +void RewriteClipAggregates(OptimizerExtensionInput &input, unique_ptr &plan, + const PACCompatibilityResult &check, const vector &privacy_units); + } // namespace duckdb #endif // PAC_EXPRESSION_BUILDER_HPP diff --git a/src/include/query_processing/pac_plan_traversal.hpp b/src/include/query_processing/pac_plan_traversal.hpp index fbb778aa..a4be99e3 100644 --- a/src/include/query_processing/pac_plan_traversal.hpp +++ b/src/include/query_processing/pac_plan_traversal.hpp @@ -115,6 +115,9 @@ vector FilterTargetAggregatesWithPUKeyCheck(const vector &privacy_units); +// Find the first aggregate in a subtree (depth-first). +LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op); + // Check if a target node is inside a DELIM_JOIN's subquery branch (children[1]). // This is important for correlated subqueries where nodes in the subquery branch // cannot directly access tables from the outer query. diff --git a/src/query_processing/pac_expression_builder.cpp b/src/query_processing/pac_expression_builder.cpp index c1c2740e..1cf09243 100644 --- a/src/query_processing/pac_expression_builder.cpp +++ b/src/query_processing/pac_expression_builder.cpp @@ -24,6 +24,7 @@ #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_cross_product.hpp" #include "utils/pac_helpers.hpp" +#include "categorical/pac_categorical_detection.hpp" namespace duckdb { @@ -444,7 +445,7 @@ unique_ptr BindBitOrAggregate(OptimizerExtensionInput &input, unique } // Map aggregate function name to PAC function name -static string GetPacAggregateFunctionName(const string &function_name) { +static string GetPacAggregateFunctionName(const string &function_name, ClientContext *ctx = nullptr) { string pac_function_name; if (function_name == "sum" || function_name == "sum_no_overflow") { pac_function_name = "pac_noised_sum"; @@ -521,7 +522,7 @@ static void InsertDistinctPreAggregation(OptimizerExtensionInput &input, Logical for (idx_t i = 0; i < agg->expressions.size(); i++) { auto &old_aggr = agg->expressions[i]->Cast(); string function_name = old_aggr.function.name; - string pac_name = GetPacAggregateFunctionName(function_name); + string pac_name = GetPacAggregateFunctionName(function_name, &input.context); auto hash_ref = make_uniq(LogicalType::UBIGINT, combined_hash_binding); unique_ptr value_ref; @@ -594,7 +595,7 @@ BuildDistinctBranch(OptimizerExtensionInput &input, unique_ptr vector> outer_expressions; for (auto &spec : agg_specs) { - string pac_name = GetPacAggregateFunctionName(spec.second); + string pac_name = GetPacAggregateFunctionName(spec.second, &input.context); auto hash_ref = make_uniq(LogicalType::UBIGINT, combined_hash_binding); unique_ptr value_ref; if (spec.second == "count" || spec.second == "count_star") { @@ -633,7 +634,7 @@ static unique_ptr BuildNonDistinctBranch( for (auto &spec : agg_specs) { auto &old_aggr = *spec.second; string function_name = old_aggr.function.name; - string pac_name = GetPacAggregateFunctionName(function_name); + string pac_name = GetPacAggregateFunctionName(function_name, &input.context); unique_ptr value_child; if (old_aggr.children.empty()) { @@ -1110,7 +1111,7 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg value_child = old_aggr.children[0]->Copy(); } - string pac_function_name = GetPacAggregateFunctionName(function_name); + string pac_function_name = GetPacAggregateFunctionName(function_name, &input.context); unique_ptr correction_expr; if (correction != 1.0) { correction_expr = make_uniq_base(Value::DOUBLE(correction)); @@ -1122,4 +1123,261 @@ void ModifyAggregatesWithPacFunctions(OptimizerExtensionInput &input, LogicalAgg agg->ResolveOperatorTypes(); } +// ============================================================================ +// Clip aggregate rewrite: pac_noised_* → pac_noised_clip_* / pac_clip_* +// with optional lower aggregate insertion for per-PU pre-aggregation +// ============================================================================ + +// Map pac function names to their clip variants +static string GetClipVariant(const string &name) { + if (name == "pac_noised_sum") { + return "pac_noised_clip_sum"; + } + if (name == "pac_noised_count") { + return "pac_noised_clip_count"; + } + if (name == "pac_noised_min") { + return "pac_noised_clip_min"; + } + if (name == "pac_noised_max") { + return "pac_noised_clip_max"; + } + if (name == "pac_sum") { + return "pac_clip_sum"; + } + if (name == "pac_count") { + return "pac_clip_count"; + } + if (name == "pac_min") { + return "pac_clip_min"; + } + if (name == "pac_max") { + return "pac_clip_max"; + } + return ""; // not a pac aggregate +} + +// Map pac function names to their original DuckDB aggregate +static string GetOriginalAggregate(const string &name) { + if (name == "pac_noised_sum" || name == "pac_sum") { + return "sum"; + } + if (name == "pac_noised_count" || name == "pac_count") { + return "count"; + } + if (name == "pac_noised_min" || name == "pac_min") { + return "min"; + } + if (name == "pac_noised_max" || name == "pac_max") { + return "max"; + } + return ""; +} + +// Is this a noised (scalar) variant? If so, top aggregate uses pac_noised_clip_* +static bool IsNoisedVariant(const string &name) { + return name.find("pac_noised_") == 0; +} + +// Bind a plain DuckDB aggregate function (sum, count, min, max) +static unique_ptr BindPlainAggregate(OptimizerExtensionInput &input, const string &func_name, + vector> children) { + FunctionBinder function_binder(input.context); + ErrorData error; + vector arg_types; + for (auto &child : children) { + arg_types.push_back(child->return_type); + } + auto &entry = Catalog::GetSystemCatalog(input.context) + .GetEntry(input.context, DEFAULT_SCHEMA, func_name); + auto best = function_binder.BindFunction(entry.name, entry.functions, arg_types, error); + if (!best.IsValid()) { + throw InternalException("PAC clip rewrite: failed to bind " + func_name); + } + auto func = entry.functions.GetFunctionByOffset(best.GetIndex()); + return function_binder.BindAggregateFunction(func, std::move(children), nullptr, AggregateType::NON_DISTINCT); +} + +// Check if an aggregate contains pac_noised_* or pac_* (counters) expressions +static bool IsPacAggregate(LogicalAggregate *agg) { + for (auto &expr : agg->expressions) { + if (expr->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = expr->Cast(); + if (!GetClipVariant(aggr.function.name).empty()) { + return true; + } + } + return false; +} + +void RewriteClipAggregates(OptimizerExtensionInput &input, unique_ptr &plan, + const PACCompatibilityResult &check, const vector &privacy_units) { + // Find all aggregate nodes + vector all_aggregates; + FindAllAggregates(plan, all_aggregates); + + for (auto *agg : all_aggregates) { + if (!IsPacAggregate(agg)) { + continue; + } + + // Check Q13 exception: does the child aggregate already group by PU key? + bool child_groups_by_pu = false; + for (auto &child : agg->children) { + auto *inner_agg = FindFirstChildAggregate(child.get()); + if (inner_agg && AggregateGroupsByPUKey(inner_agg, check, privacy_units)) { + child_groups_by_pu = true; + break; + } + } + + if (child_groups_by_pu) { + // Q13 exception: just rename pac_noised_* → pac_noised_clip_* in place + for (idx_t i = 0; i < agg->expressions.size(); i++) { + if (agg->expressions[i]->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = agg->expressions[i]->Cast(); + string clip_name = GetClipVariant(aggr.function.name); + if (clip_name.empty()) { + continue; + } + // Rebind with the clip variant name + vector> children; + for (auto &child : aggr.children) { + children.push_back(child->Copy()); + } + agg->expressions[i] = RebindAggregate(input.context, clip_name, std::move(children), false); + } + agg->ResolveOperatorTypes(); + continue; + } + + // Normal path: insert lower aggregate + auto &binder = input.optimizer.binder; + idx_t lower_group_index = binder.GenerateTableIndex(); + idx_t lower_agg_index = binder.GenerateTableIndex(); + + // Identify the PU hash expression from the first pac aggregate's first child (hash arg) + unique_ptr pu_hash_expr; + for (auto &expr : agg->expressions) { + if (expr->GetExpressionClass() != ExpressionClass::BOUND_AGGREGATE) { + continue; + } + auto &aggr = expr->Cast(); + if (!GetClipVariant(aggr.function.name).empty() && !aggr.children.empty()) { + pu_hash_expr = aggr.children[0]->Copy(); + break; + } + } + if (!pu_hash_expr) { + continue; // shouldn't happen + } + + idx_t num_original_groups = agg->groups.size(); + + // Build lower aggregate expressions (plain DuckDB aggregates) + vector> lower_expressions; + for (idx_t i = 0; i < agg->expressions.size(); i++) { + auto &aggr = agg->expressions[i]->Cast(); + string orig_name = GetOriginalAggregate(aggr.function.name); + if (orig_name.empty()) { + throw InternalException("PAC clip rewrite: unexpected aggregate " + aggr.function.name); + } + + vector> plain_children; + if (orig_name == "count" && (aggr.children.size() <= 1)) { + // pac_noised_count(hash) or pac_count(hash) → count_star() + // pac_noised_count(hash, col) → count(col) — but children[1] might be constant 1 + if (aggr.children.size() >= 2) { + auto &val_child = aggr.children[1]; + // Check if it's a constant 1 (from count_star rewrite) + if (val_child->type == ExpressionType::VALUE_CONSTANT) { + auto &const_expr = val_child->Cast(); + if (const_expr.value.IsNull() || const_expr.value == Value::BIGINT(1)) { + // count_star — no children + } else { + plain_children.push_back(val_child->Copy()); + } + } else { + plain_children.push_back(val_child->Copy()); + } + } + lower_expressions.push_back(BindPlainAggregate(input, "count_star", std::move(plain_children))); + } else if (orig_name == "count" && aggr.children.size() > 1) { + // count with column reference + plain_children.push_back(aggr.children[1]->Copy()); + lower_expressions.push_back(BindPlainAggregate(input, "count", std::move(plain_children))); + } else { + // sum, min, max — extract the value child (children[1]) + if (aggr.children.size() >= 2) { + plain_children.push_back(aggr.children[1]->Copy()); + } + lower_expressions.push_back(BindPlainAggregate(input, orig_name, std::move(plain_children))); + } + } + + // Create lower aggregate node + auto lower_agg = make_uniq(lower_group_index, lower_agg_index, std::move(lower_expressions)); + + // Copy original groups + add PU hash as extra group + for (auto &g : agg->groups) { + lower_agg->groups.push_back(g->Copy()); + } + lower_agg->groups.push_back(pu_hash_expr->Copy()); + + // Steal top's child → lower's child + lower_agg->children.push_back(std::move(agg->children[0])); + lower_agg->ResolveOperatorTypes(); + + // Rewrite top aggregate's groups to reference lower's group output + for (idx_t i = 0; i < num_original_groups; i++) { + auto gtype = agg->groups[i]->return_type; + agg->groups[i] = make_uniq(gtype, ColumnBinding(lower_group_index, i)); + } + + // PU hash ref from lower's group output + auto pu_hash_ref = make_uniq(pu_hash_expr->return_type, + ColumnBinding(lower_group_index, num_original_groups)); + + // Rewrite top aggregate's expressions to clip variants + for (idx_t i = 0; i < agg->expressions.size(); i++) { + auto &aggr = agg->expressions[i]->Cast(); + string pac_name = aggr.function.name; + bool noised = IsNoisedVariant(pac_name); + string orig = GetOriginalAggregate(pac_name); + + // Reference to lower aggregate's result + auto lower_type = lower_agg->types[num_original_groups + 1 + i]; + unique_ptr lower_ref = + make_uniq(lower_type, ColumnBinding(lower_agg_index, i)); + + // pac_clip_sum has integer + DECIMAL overloads but no FLOAT/DOUBLE. + // Cast FLOAT/DOUBLE to BIGINT so binding succeeds. + if ((orig == "sum" || orig == "count") && + (lower_type.id() == LogicalTypeId::FLOAT || lower_type.id() == LogicalTypeId::DOUBLE)) { + lower_ref = + BoundCastExpression::AddCastToType(input.context, std::move(lower_ref), LogicalType::BIGINT); + } + + // count → sumcount (preserves BIGINT return type), others → clip variant + string clip_func; + if (orig == "count") { + clip_func = noised ? "pac_noised_clip_sumcount" : "pac_clip_sum"; + } else { + clip_func = GetClipVariant(pac_name); + } + + agg->expressions[i] = + BindPacAggregate(input, clip_func, pu_hash_ref->Copy(), std::move(lower_ref), nullptr); + } + + // Set lower as top's child + agg->children[0] = std::move(lower_agg); + agg->ResolveOperatorTypes(); + } +} + } // namespace duckdb diff --git a/src/query_processing/pac_plan_traversal.cpp b/src/query_processing/pac_plan_traversal.cpp index 24bec2ac..fbbb3113 100644 --- a/src/query_processing/pac_plan_traversal.cpp +++ b/src/query_processing/pac_plan_traversal.cpp @@ -812,7 +812,7 @@ bool AggregateGroupsByPUKey(LogicalAggregate *agg, const PACCompatibilityResult } // Find the first aggregate in a subtree (depth-first). -static LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op) { +LogicalAggregate *FindFirstChildAggregate(LogicalOperator *op) { if (!op) { return nullptr; } diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test new file mode 100644 index 00000000..dd20e698 --- /dev/null +++ b/test/sql/pac_clip_sum.test @@ -0,0 +1,214 @@ +# name: test/sql/pac_clip_sum.test +# description: Test pac_clip_sum and pac_noised_clip_sum aggregate functions with clipping +# group: [sql] + +require pac + +statement ok +PRAGMA clear_pac_metadata; + +statement ok +SET pac_seed = 42 + +statement ok +SET threads = 1 + +statement ok +SET pac_mi = 0 + +# ============================================================================ +# Basic pac_clip_sum correctness (returns LIST) +# ============================================================================ + +statement ok +CREATE TABLE test_data AS +SELECT i AS rowid, i % 3 AS grp, (i % 100) AS value +FROM range(4000) t(i) + +# pac_clip_sum returns LIST (64 counters) +query I +SELECT typeof(pac_clip_sum(hash(rowid)::UBIGINT, value::INTEGER)) FROM test_data +---- +FLOAT[] + +# pac_noised_clip_sum returns HUGEINT (fused noised scalar) +query I +SELECT typeof(pac_noised_clip_sum(hash(rowid)::UBIGINT, value::INTEGER)) FROM test_data +---- +HUGEINT + +# Works with different types +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, value::BIGINT) IS NOT NULL FROM test_data +---- +true + +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, value::SMALLINT) IS NOT NULL FROM test_data +---- +true + +# Grouped aggregation +query I +SELECT count(*) FROM ( + SELECT grp, pac_noised_clip_sum(hash(rowid)::UBIGINT, value::INTEGER) as s + FROM test_data GROUP BY grp +) t WHERE s IS NOT NULL +---- +3 + +# NULL handling +query I +SELECT pac_noised_clip_sum(hash(rowid)::UBIGINT, CASE WHEN rowid % 2 = 0 THEN value ELSE NULL END) IS NOT NULL +FROM test_data +---- +true + +# ============================================================================ +# Clipping: outlier elimination via pac_noised_clip_sum +# ============================================================================ + +# Create data with 1000 normal rows and 1 huge outlier +statement ok +CREATE TABLE outlier_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i % 10) + 1 + ELSE 1000000 + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: result includes the outlier (expect ~1M+ range) +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > 100000 FROM outlier_test +---- +true + +# With clip_support=5: outlier level has only 1 contributor, gets clipped +# Result should be close to sum without outlier = 5500 +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 100000 FROM outlier_test +---- +true + +# Verify the clipped result is in the right ballpark (within 5x of 5500) +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 5500) < 5500 * 5 FROM outlier_test +---- +true + +# Soft clamp: outlier contributes a small nonzero amount (not hard-zeroed) +# Compare with a baseline that has no outlier at all +query I +SELECT (SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM outlier_test) + > (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) +---- +true + +# ============================================================================ +# Clipping with grouped data: outlier in one group, normal in another +# ============================================================================ + +statement ok +CREATE TABLE grouped_outlier AS +SELECT i as id, i % 2 as grp, + CASE WHEN i <= 1000 THEN (i % 10) + 1 + WHEN i = 1001 THEN 10000000 -- massive outlier in group 1 + ELSE (i % 10) + 1 + END as value +FROM range(1, 1003) t(i) + +# Group 0 (even ids): ~500 normal values, no outlier +# Group 1 (odd ids): ~500 normal values + one 10M outlier +# With clip_support: the outlier's level should be clipped in group 1 +query IT +SELECT grp, pac_noised_clip_sum(hash(id)::UBIGINT, value) < 100000 as reasonable +FROM grouped_outlier +GROUP BY grp +ORDER BY grp +---- +0 true +1 true + +# ============================================================================ +# Clipping with negative values (two-sided) +# ============================================================================ + +statement ok +CREATE TABLE neg_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN i - 500 -- normal: -499 to 500 + ELSE -5000000 -- single negative outlier + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: outlier drags result very negative +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < -100000 FROM neg_outlier +---- +true + +# With clip_support: outlier clipped, result near 500 (sum of 1..500 - sum of 1..499 = 500) +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > -100000 FROM neg_outlier +---- +true + +# ============================================================================ +# HUGEINT support +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(i)::UBIGINT, i::HUGEINT) IS NOT NULL FROM range(1, 101) t(i) +---- +true + +# ============================================================================ +# Clip synonyms: pac_clip_count, pac_clip_min, pac_clip_max exist +# ============================================================================ + +query I +SELECT typeof(pac_clip_count(hash(i)::UBIGINT)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_count(hash(i)::UBIGINT)) FROM range(1, 101) t(i) +---- +BIGINT + +# ============================================================================ +# pac_clip_support setting via compiler (sum → pac_noised_clip_sum) +# ============================================================================ + +statement ok +SET pac_clip_support = 64 + +statement ok +CREATE TABLE compiler_test (id INTEGER, value INTEGER) + +statement ok +ALTER TABLE compiler_test ADD PAC_KEY (id) + +statement ok +ALTER TABLE compiler_test SET PU + +statement ok +INSERT INTO compiler_test SELECT i, i % 100 FROM range(1, 1001) t(i) + +# When pac_clip_support is set, regular SUM should go through clip rewrite +query I +SELECT sum(value) IS NOT NULL FROM compiler_test +---- +true From c848241362d7c928c8e62edc09744b4baad56f02 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 14:35:52 +0100 Subject: [PATCH 12/27] Hard-zero unsupported outlier levels in pac_clip_sum MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change suffix attenuation from soft-clamp (scale by 16^distance) to hard-zero (skip entirely). Unsupported magnitude levels now contribute nothing to the result, fully eliminating the variance side-channel. Attack results with clip_support=2: - Small filter (3-4 users): 96% → 47% (random) - 20K small items: 96% → 53% (random) - Std ratio in/out: 90x → 0.87x Co-Authored-By: Claude Opus 4.6 (1M context) --- src/include/aggregates/pac_clip_sum.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 7bb0c87b..ddd9d80f 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -239,8 +239,8 @@ struct PacClipSumIntState { // Prefix: clamp scale up to first supported level effective_level = first_supported; } else if (k > last_supported) { - // Suffix: clamp scale down to last supported level - effective_level = last_supported; + // Suffix: hard zero — unsupported outlier levels contribute nothing + continue; } } else if (clip_support_threshold > 0 && first_supported < 0) { // No supported levels at all — zero everything From 9d6bb04c4e71f6b9457d216adf02ef85ffb18c2a Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 15:48:37 +0100 Subject: [PATCH 13/27] =?UTF-8?q?Reduce=20pac=5Fclip=5Fsum=20level=20width?= =?UTF-8?q?=20from=2016x=20to=204x=20(shift=3D4=20=E2=86=92=20shift=3D2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Finer-grained magnitude levels (2-bit bands, 4x per level) allow the clipping mechanism to catch moderate outliers that were previously invisible within the same 16x-wide level. A 10x outlier (50k vs 5k normal) now lands in a different level and gets hard-zeroed. Changes: - PAC2_LEVEL_SHIFT: 4 → 2 - PAC2_NUM_LEVELS: 31 → 32 (covers int64; HUGEINT clamps to level 31) - GetLevel/GetLevel128: divide by 2 instead of 4, clamp to max level - Inline optimization threshold: 13 → 14 - All shift extraction: level << 2 → level << 1 Memory: +8 bytes per state (256 vs 248 byte pointer array). Negligible. Performance: no regression on TPCH Q01 SF1 (1.38s → 1.31s). Security: moderate outlier attack drops from 76.5% to 52.9% (random). Co-Authored-By: Claude Opus 4.6 (1M context) --- src/aggregates/pac_clip_sum.cpp | 10 +++++----- src/include/aggregates/pac_clip_sum.hpp | 26 ++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp index 26a3ebe6..88d7f5ff 100644 --- a/src/aggregates/pac_clip_sum.cpp +++ b/src/aggregates/pac_clip_sum.cpp @@ -14,7 +14,7 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, state.key_hash |= key_hash; int level = PacClipSumIntState::GetLevel(value); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val = static_cast(value >> shift); // max 255 (8 bits) state.EnsureLevelAllocated(allocator, level); @@ -46,7 +46,7 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, } int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; // Shift the 128-bit value right by shift bits, take lower 8 bits uint16_t shifted_val; @@ -96,7 +96,7 @@ inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIn uint64_t upper = static_cast(abs_val.upper); uint64_t lower = abs_val.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); @@ -305,7 +305,7 @@ static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); @@ -347,7 +347,7 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; int level = PacClipSumIntState::GetLevel128(upper, lower); - uint64_t shift = level << 2; + uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { shifted_val = static_cast(upper >> (shift - 64)); diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index ddd9d80f..00456bcd 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -18,12 +18,12 @@ void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); // ============================================================================ // Constants // ============================================================================ -constexpr int PAC2_NUM_LEVELS = 31; +constexpr int PAC2_NUM_LEVELS = 32; // 32 levels × 2-bit bands covers 64-bit; HUGEINT clamps to level 31 constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count -constexpr int PAC2_LEVEL_SHIFT = 4; +constexpr int PAC2_LEVEL_SHIFT = 2; // 2^2 = 4x per level (was 4 = 16x per level) constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; // ============================================================================ @@ -65,13 +65,13 @@ struct PacClipSumIntState { int8_t max_level_used; // -1 if none int8_t inline_level_idx; // which level uses inline, -1 if none - // 31 level pointers = 248 bytes. - // Inline optimization: last 18 slots (indices 13..30) = 144 bytes = one normal level. - // Levels 0-12 can use inline storage without overlapping their own pointer slot. + // 32 level pointers = 256 bytes. + // Inline optimization: last 18 slots (indices 14..31) = 144 bytes = one normal level. + // Levels 0-13 can use inline storage without overlapping their own pointer slot. union { - uint64_t *levels[PAC2_NUM_LEVELS]; // 248 bytes + uint64_t *levels[PAC2_NUM_LEVELS]; // 256 bytes struct { - uint64_t *_ptrs[13]; // levels 0-12 pointers (104 bytes) + uint64_t *_ptrs[14]; // levels 0-13 pointers (112 bytes) uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level }; }; @@ -84,32 +84,32 @@ struct PacClipSumIntState { return 0; } int bit_pos = 63 - pac_clzll(abs_val); - return (bit_pos - 4) >> 2; + return std::min((bit_pos - 4) >> 1, PAC2_NUM_LEVELS - 1); } - // For 128-bit (hugeint) values + // For 128-bit (hugeint) values — clamps to max level for very large values static inline int GetLevel128(uint64_t upper, uint64_t lower) { if (upper == 0) { return GetLevel(lower); } int bit_pos = 127 - pac_clzll(upper); - return (bit_pos - 4) >> 2; + return std::min((bit_pos - 4) >> 1, PAC2_NUM_LEVELS - 1); } // ======================================================================== // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= 13 && inline_level_idx >= 0) { + if (k >= 14 && inline_level_idx >= 0) { // Evict inline level to arena auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[inline_level_idx] = ext; inline_level_idx = -1; - // Clear inline area so levels[13..30] read as nullptr + // Clear inline area so levels[14..31] read as nullptr memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); } - if (k < 13 && inline_level_idx < 0) { + if (k < 14 && inline_level_idx < 0) { // Use inline storage levels[k] = inline_level; memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); From 4aa4e8ffe23fb540164158dd456a480e503fc04d Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 15:50:05 +0100 Subject: [PATCH 14/27] Fix pac_clip_sum test: adjust soft-clamp assertion for hard-zero behavior With hard-zero, unsupported outlier levels contribute nothing, so the clipped result equals (not exceeds) the no-outlier baseline. Change > to >=. Co-Authored-By: Claude Opus 4.6 (1M context) --- test/sql/pac_clip_sum.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test index dd20e698..0a4b1ee9 100644 --- a/test/sql/pac_clip_sum.test +++ b/test/sql/pac_clip_sum.test @@ -99,11 +99,11 @@ SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 5500) < 5500 * 5 FROM ---- true -# Soft clamp: outlier contributes a small nonzero amount (not hard-zeroed) -# Compare with a baseline that has no outlier at all +# Hard-zero: outlier level is unsupported and contributes nothing, +# so result should be >= no-outlier baseline (equal or greater due to noise) query I SELECT (SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM outlier_test) - > (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) + >= (SELECT pac_noised_clip_sum(hash(i)::UBIGINT, (i % 10) + 1) FROM range(1, 1001) t(i)) ---- true From ce5696310916742eb039f17d63f515d510085d03 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:23:01 +0100 Subject: [PATCH 15/27] Extend pac_clip_sum to 62 levels for full HUGEINT support Increase PAC2_NUM_LEVELS from 32 to 62 to cover the full 128-bit range without clamping. int64 values naturally use only levels 0-29 (the extra pointer slots remain NULL, no per-level data is allocated). The inline optimization threshold moves from 14 to 44 accordingly. Memory: +240 bytes per state for the pointer array (496 vs 256 bytes). Per-level data allocations are unchanged for int64 workloads. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/include/aggregates/pac_clip_sum.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 00456bcd..9f15a5b4 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -18,7 +18,7 @@ void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); // ============================================================================ // Constants // ============================================================================ -constexpr int PAC2_NUM_LEVELS = 32; // 32 levels × 2-bit bands covers 64-bit; HUGEINT clamps to level 31 +constexpr int PAC2_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit (int64 uses ≤30) constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters @@ -65,13 +65,13 @@ struct PacClipSumIntState { int8_t max_level_used; // -1 if none int8_t inline_level_idx; // which level uses inline, -1 if none - // 32 level pointers = 256 bytes. - // Inline optimization: last 18 slots (indices 14..31) = 144 bytes = one normal level. - // Levels 0-13 can use inline storage without overlapping their own pointer slot. + // 62 level pointers = 496 bytes. + // Inline optimization: last 18 slots (indices 44..61) = 144 bytes = one normal level. + // Levels 0-43 can use inline storage without overlapping their own pointer slot. union { - uint64_t *levels[PAC2_NUM_LEVELS]; // 256 bytes + uint64_t *levels[PAC2_NUM_LEVELS]; // 496 bytes struct { - uint64_t *_ptrs[14]; // levels 0-13 pointers (112 bytes) + uint64_t *_ptrs[44]; // levels 0-43 pointers (352 bytes) uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level }; }; @@ -100,16 +100,16 @@ struct PacClipSumIntState { // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= 14 && inline_level_idx >= 0) { + if (k >= 44 && inline_level_idx >= 0) { // Evict inline level to arena auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[inline_level_idx] = ext; inline_level_idx = -1; - // Clear inline area so levels[14..31] read as nullptr + // Clear inline area so levels[44..61] read as nullptr memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); } - if (k < 14 && inline_level_idx < 0) { + if (k < 44 && inline_level_idx < 0) { // Use inline storage levels[k] = inline_level; memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); From cd38190736737893cb82c117ab523d7ae8e6dfb2 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:25:46 +0100 Subject: [PATCH 16/27] Add tests for level boundaries, HUGEINT clipping, over-clipping, multi-group MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New test cases: - Level boundary routing (same-level vs cross-level with 4x bands) - HUGEINT outlier clipping (values at 2^70, beyond int64 range) - Negative HUGEINT outlier via neg_state - Over-clipping (clip_support > group size → zero result) - Multi-group with outlier isolated to one group Co-Authored-By: Claude Opus 4.6 (1M context) --- test/sql/pac_clip_sum.test | 155 +++++++++++++++++++++++++++++++++++++ 1 file changed, 155 insertions(+) diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test index 0a4b1ee9..1f94a454 100644 --- a/test/sql/pac_clip_sum.test +++ b/test/sql/pac_clip_sum.test @@ -212,3 +212,158 @@ query I SELECT sum(value) IS NOT NULL FROM compiler_test ---- true + +# ============================================================================ +# Level boundary tests (4x bands with shift=2) +# Values at exact level boundaries should be correctly routed +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +# Level 0: 0-255, Level 1: 256-1023, Level 2: 1024-4095, Level 3: 4096-16383 +# 1000 rows at value=100 (level 0), 1 outlier at 4096 (level 3) → different level → clipped +statement ok +CREATE TABLE boundary_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 100 + ELSE 4096 + END as value +FROM range(1, 1002) t(i) + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 200000 FROM boundary_test +---- +true + +# Outlier at 255 (max level 0) — same level as value=100 → NOT clipped +statement ok +CREATE TABLE same_level_test AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 100 + ELSE 255 + END as value +FROM range(1, 1002) t(i) + +# Without clipping: sum = 1000*100 + 255 = 100255 +statement ok +SET pac_clip_support = NULL + +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 100255) < 50000 FROM same_level_test +---- +true + +# With clipping: 255 is same level as 100, so it's NOT clipped — result similar +statement ok +SET pac_clip_support = 5 + +query I +SELECT abs(pac_noised_clip_sum(hash(id)::UBIGINT, value) - 100255) < 50000 FROM same_level_test +---- +true + +# ============================================================================ +# HUGEINT outlier clipping (levels 30+, beyond int64 range) +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +# Normal values + one HUGEINT outlier at 2^70 +statement ok +CREATE TABLE hugeint_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN i::HUGEINT + ELSE (1::HUGEINT << 70) + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: result includes the huge outlier +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > 1000000000000 FROM hugeint_outlier +---- +true + +# With clip_support: outlier at high level gets hard-zeroed +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < 1000000000000 FROM hugeint_outlier +---- +true + +# ============================================================================ +# Negative HUGEINT outlier +# ============================================================================ + +statement ok +CREATE TABLE neg_hugeint_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i * 100)::HUGEINT + ELSE -(1::HUGEINT << 70) + END as value +FROM range(1, 1002) t(i) + +# Without clip_support: huge negative outlier dominates +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) < -1000000000000 FROM neg_hugeint_outlier +---- +true + +# With clip_support: negative outlier clipped via neg_state +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > -1000000000000 FROM neg_hugeint_outlier +---- +true + +# ============================================================================ +# Over-clipping: pac_clip_support higher than group size → zero result +# ============================================================================ + +statement ok +CREATE TABLE tiny_group AS SELECT i as id, i * 10 as value FROM range(1, 4) t(i) + +# 3 rows, clip_support=10 → no level has 10 contributors → all zeroed +statement ok +SET pac_clip_support = 10 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) FROM tiny_group +---- +0 + +# ============================================================================ +# Multi-group with outlier in one group only +# ============================================================================ + +statement ok +SET pac_clip_support = 5 + +statement ok +CREATE TABLE multigroup AS +SELECT i as id, i % 4 as grp, + CASE WHEN i = 997 THEN 9999999 -- outlier in group 1 (997 % 4 = 1) + ELSE (i % 50) + 1 + END as value +FROM range(1, 1001) t(i) + +# Group 1 has the outlier — should be clipped to reasonable range +# Other groups are normal +query I +SELECT count(*) FROM ( + SELECT grp, pac_noised_clip_sum(hash(id)::UBIGINT, value) as s + FROM multigroup GROUP BY grp +) t WHERE abs(s) < 500000 +---- +4 From 051dc4adafc0cae89c55e113c3d41e2f9588a399 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:32:53 +0100 Subject: [PATCH 17/27] Add CLAUDE.md with development rules and project guidance Fetched from main and added: - Development rules: test coverage, no test removal, codebase-first search, helper function reuse, duckdb submodule is read-only - Reference to the PAC paper (arXiv:2603.15023) - PAC_DEBUG_PRINT usage guidance Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index e497ad57..21b11dba 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -10,9 +10,19 @@ Always test your changes with real queries (e.g., TPC-H on sf1) before declaring Never execute git commands that could lose code. Always ask the user for permission on those. +## Development rules + +- **New features must have tests.** Ask the user whether to create a new test file or extend an existing one in `test/sql/`. +- **Never remove a failing test to "fix" a failure.** If a test fails, fix the underlying bug. Tests exist for a reason. +- **Before implementing anything, search the existing codebase** for similar patterns or solutions. Check if a helper function, utility, or prior approach already addresses the problem. Reuse before reinventing. +- **Use helper functions.** Factor shared logic into helpers rather than duplicating code. Check `src/include/utils/` and existing helpers in the file you're editing. +- **Never edit the `duckdb/` submodule.** The DuckDB source is read-only. All PAC logic lives in `src/` and `test/`. If you need DuckDB internals, use the public API or ask the user. +- **Keep the paper in mind.** The PAC mechanism is described in [SIMD-PAC-DB: Pretty Performant PAC Privacy](https://arxiv.org/abs/2603.15023). Refer to it for the theoretical foundations (noise calibration, mutual information bounds, counter semantics) before making changes to core aggregate logic. +- **Add `PAC_DEBUG_PRINT` statements** at major code flow points (entry/exit of compilation phases, aggregate rewrites, clipping decisions). Use the existing `PAC_DEBUG_PRINT` macro from `src/include/pac_debug.hpp` — it's compiled out when `PAC_DEBUG` is 0. + ## What is PAC? -PAC (Pretty Accurate Counting) is a DuckDB extension that automatically privatizes SQL aggregate queries. It protects against Membership Inference Attacks by maintaining 64 parallel counters per aggregate (one per "world" bit), adding calibrated noise at finalization. Queries are rewritten transparently — users write normal SQL and PAC transforms it. +PAC (Probably Approximately Correct) is a DuckDB extension that automatically privatizes SQL aggregate queries. It protects against Membership Inference Attacks by maintaining 64 parallel counters per aggregate (one per "world" bit), adding calibrated noise at finalization. Queries are rewritten transparently — users write normal SQL and PAC transforms it. ## Build & Test From 318870963025c7de2814eb403cc2493ae193ec9a Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:51:33 +0100 Subject: [PATCH 18/27] Add attack scripts and evaluation results for pac_clip_sum Attack scripts testing the variance side-channel MIA against pac_clip_sum: - clip_attack_test.sh: main suite (small filter, wide filter, 10K users, etc.) - clip_multirow_test.sh: 20K small items user (tests pre-aggregation) - clip_hardzero_stress.sh: stress tests (high trials, composed queries, collusion) - clip_shift2_stress.sh: tests with 4x magnitude levels (shift=2) - clipping_experiment.sh: input clipping (Winsorization) baseline - output_clipping_experiment.sh: post-hoc output clipping baseline - output_clipping_v2_experiment.sh: output clipping before noise - clip_attack_results.md: full evaluation with findings Co-Authored-By: Claude Opus 4.6 (1M context) --- attacks/clip_attack_results.md | 314 +++++++++++++++++++++++ attacks/clip_attack_test.sh | 196 ++++++++++++++ attacks/clip_hardzero_stress.sh | 297 +++++++++++++++++++++ attacks/clip_multirow_test.sh | 136 ++++++++++ attacks/clip_shift2_stress.sh | 241 +++++++++++++++++ attacks/clipping_experiment.sh | 215 ++++++++++++++++ attacks/output_clipping_experiment.sh | 244 ++++++++++++++++++ attacks/output_clipping_v2_experiment.sh | 220 ++++++++++++++++ 8 files changed, 1863 insertions(+) create mode 100644 attacks/clip_attack_results.md create mode 100755 attacks/clip_attack_test.sh create mode 100755 attacks/clip_hardzero_stress.sh create mode 100755 attacks/clip_multirow_test.sh create mode 100755 attacks/clip_shift2_stress.sh create mode 100755 attacks/clipping_experiment.sh create mode 100755 attacks/output_clipping_experiment.sh create mode 100644 attacks/output_clipping_v2_experiment.sh diff --git a/attacks/clip_attack_results.md b/attacks/clip_attack_results.md new file mode 100644 index 00000000..5baf8847 --- /dev/null +++ b/attacks/clip_attack_results.md @@ -0,0 +1,314 @@ +# pac_clip_sum Attack Evaluation + +Evaluates whether `pac_clip_sum`'s support-based outlier attenuation defeats the +variance side-channel membership inference attack (MIA). + +## Background + +`pac_clip_sum` (commit 948a504) introduces a two-level aggregation: + +1. **Lower aggregate**: `GROUP BY pu_hash` with plain `SUM` — pre-aggregates per user +2. **Upper aggregate**: `pac_noised_clip_sum` — decomposes values into magnitude levels + (each 16x the previous), tracks a 64-bit bitmap of distinct contributors per level, + and attenuates levels with fewer contributors than `pac_clip_support` + +Supported clip aggregates: SUM, COUNT, MIN, MAX (MIN/MAX not fully implemented). +float/double types not yet supported. + +## Parameters + +- `pac_mi = 0.0078125` (1/128) +- 30 trials per condition unless noted +- Random guess baseline: 50% + +--- + +## Attack 1: Single-query variance classifier (small filter) + +**Setup**: N=1000 users, target=999999, filter<=3 (3-4 users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | **96.0%** | 80.0% | 10,688,008 | 117,662 | 90.8x | +| 2 | **72.5%** | 62.7% | 613,511 | 122,026 | 5.0x | +| 3 | **72.5%** | 62.7% | 613,511 | 122,026 | 5.0x | + +**Finding**: Clipping reduces attack accuracy from 96% to 72.5% and collapses the +variance ratio from 91x to 5x. The outlier's contribution is attenuated by ~16x +(one magnitude level). However, 72.5% is still well above random — the residual +5x variance gap remains exploitable. + +clip=2 and clip=3 produce identical results because with only 3-4 users per level, +the bitmap support is at most 2-3 regardless of threshold. + +--- + +## Attack 2: Wide filter (clipping best-case) + +**Setup**: N=1000 users, target=999999, filter<=999 (all users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | 53.3% | 60.0% | 10,424,631 | 1,822,936 | 5.7x | +| 2 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | +| 5 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | +| 10 | **55.0%** | 53.3% | 1,816,604 | 1,875,047 | **1.0x** | + +**Finding**: With many users in the aggregation, clipping completely eliminates the +variance side-channel (std ratio goes to 1.0x). Attack accuracy drops to ~53-55%, near +random. The outlier's magnitude level has only 1 bitmap contributor vs hundreds at +normal levels — it is cleanly identified and attenuated. + +Note: even without clipping, the attack barely works (53%) because the outlier +signal (1M) is diluted by the large background (5M from 999 users). Clipping +further equalizes the means (5.1M vs 4.9M). + +clip=2, 5, and 10 all produce identical results — with ~1000 users, all normal +levels have saturated bitmaps (est. distinct >> 10), so only the outlier level +is affected. + +--- + +## Attack 3: 10K users, extreme outlier + +**Setup**: N=10000 users, target=9,999,999, filter<=2 (2-3 users in aggregation) + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | std ratio | +|-------------|-------------------|-------------------|--------|---------|-----------| +| off | **97.8%** | 77.8% | 104,410,859 | 110,259 | 947x | +| 2 | **75.0%** | 47.9% | 396,383 | 112,778 | 3.5x | +| 3 | **47.9%** | 47.9% | 0 | 0 | — | + +**Finding**: clip=2 reduces accuracy from 97.8% to 75% (variance ratio 947x to 3.5x). +clip=3 zeroes ALL results (returns 0 for both in and out) — with only 2-3 users, +no level reaches 3 distinct contributors, so everything is zeroed. The attack is +"defeated" at 47.9% but utility is completely destroyed. + +--- + +## Attack 4: Over-clipping + +**Setup**: N=1000 users, target=999999, filter<=3, 15 trials + +| clip_support | Var>200k accuracy | mean_in | mean_out | +|-------------|-------------------|---------|----------| +| off | 91.3% | -2,067,562 | 42,892 | +| 5 | **50.0%** | 0 | 0 | +| 10 | **50.0%** | 0 | 0 | + +**Finding**: With only 3-4 users in the filter, clip_support >= 5 zeroes all output. +Attack accuracy = 50% (random), but every query returns 0. This is not a useful +defense — it's equivalent to refusing to answer. + +**Takeaway**: `pac_clip_support` must be set below the minimum expected number of +users in any aggregation group. For small filters, this severely limits the +clipping threshold. + +--- + +## Attack 5: Wide filter + aggressive clipping + +**Setup**: N=1000 users, target=999999, filter<=999, 15 trials + +| clip_support | Var>200k accuracy | Standard accuracy | std_in | std_out | +|-------------|-------------------|-------------------|--------|---------| +| off | 53.3% | 63.3% | 11,391,246 | 2,172,216 | +| 50 | **50.0%** | 50.0% | 1,219,189 | 2,049,795 | +| 100 | **50.0%** | 50.0% | 1,219,189 | 2,049,795 | + +**Finding**: With a wide filter (1000 users), even aggressive clipping (support=50, 100) +works perfectly — attack accuracy = 50% (random) and noise stds are equalized. +Normal magnitude levels have hundreds of bitmap contributors, far exceeding the +threshold. Only the outlier level (1 contributor) is affected. + +--- + +## Attack 6: Clip-after-filter vs clip-full-table (Dandan's hypothesis) + +Dandan's concern: clipping applied after filtering may leak more than clipping +applied to the entire dataset, because the filter changes which users contribute +to the bitmap, affecting which levels appear "supported." + +**Setup**: N=1000 users, target=999999, filter<=3 + +| Method | Var>200k accuracy | std_in | std_out | std ratio | +|--------|-------------------|--------|---------|-----------| +| No clipping | **96.0%** | 10,688,008 | 117,662 | 90.8x | +| clip-after-filter (pac_clip_support=2) | **72.5%** | 613,511 | 122,026 | 5.0x | +| clip-full-table (pre-clip to mu+3sigma) | **56.9%** | 180,457 | 124,641 | 1.4x | + +**Finding: Dandan is correct.** Pre-clipping the full table then filtering gives +significantly better protection (56.9% vs 72.5%). The reasons: + +1. **Full-table pre-clipping** clamps the billionaire to 13,661 BEFORE PAC sees it. + PAC computes noise from the clamped range [1, 13661], and the noise is nearly + the same for in/out (std ratio 1.4x). + +2. **Clip-after-filter** only sees 3-4 rows. The bitmap has very few bits set per + level, making it harder to distinguish the outlier level from normal levels. + The attenuation is only ~16x (one level), leaving a 5x variance gap. + +However, clip-after-filter is still much better than no clipping (72.5% vs 96%), +confirming Dandan's second point: "this approach is still significantly better +than not applying clipping at all." + +--- + +## Summary + +| Scenario | No clip | pac_clip_support=2 | Pre-clip full table | +|----------|---------|-------------------|---------------------| +| Small filter (3-4 users) | 96% | 72.5% | 56.9% | +| Wide filter (1000 users) | 53% | 55% (side-channel gone) | — | +| 10K users, filter<=2 | 97.8% | 75% | — | + +## Attack 7: 20K small items user (multi-row outlier) + +The core argument for per-user pre-aggregation: a user with 20,000 purchases of +$50 each has normal individual values but a total contribution of $1,000,000. +Per-row Winsorization won't catch this. Does pac_clip_sum's GROUP BY pu_hash? + +**Setup**: N=1000 background users (1 row each, acctbal in [1,10000]). +Target user_id=0: 20,000 rows x $50 = $1,000,000 total. filter<=3. + +| Method | Var>200k accuracy | std_in | std_out | std ratio | +|--------|-------------------|--------|---------|-----------| +| No clipping | **96.0%** | 10,686,722 | 117,662 | 90.8x | +| Winsorization (per-row clip to 13661) | **94.1%** | 9,436,439 | 124,641 | 75.7x | +| pac_clip_sum (clip_support=2) | **72.5%** | 613,511 | 122,026 | 5.0x | + +**Finding: Winsorization completely fails.** Each $50 value is well within the +[0, 13661] clip bounds, so nothing gets clipped. The 20,000 small rows pass +through untouched and the attack succeeds at 94.1% (barely below the 96% baseline). + +**pac_clip_sum catches it** because the pre-aggregation step (`GROUP BY pu_hash`) +sums user_id=0's 20,000 rows into a single $1,000,000 entry. This lands at an +outlier magnitude level with only 1 bitmap contributor, and gets attenuated. +Attack accuracy drops to 72.5%. + +This is precisely Peter's argument for why per-user contribution clipping (via +pre-aggregation) is needed instead of per-row value clipping. It validates the +two-level aggregation design from Wilson et al. 2019. + +--- + +## Suffix attenuation modes compared + +We tested three suffix attenuation strategies for unsupported outlier levels. +"Soft-clamp" is Peter's original (scale by 16^distance). "Bitmap-proportional" +adds a factor of estimated_distinct/threshold. "Hard-zero" skips the level entirely. + +**Attack 1 results (N=1000, filter<=3, clip=2):** + +| Mode | Var>200k | std_in | std_out | std ratio | +|------|----------|--------|---------|-----------| +| No clipping | **96.0%** | 10,688,008 | 117,662 | 90.8x | +| Soft-clamp | **72.5%** | 613,511 | 122,026 | 5.0x | +| Bitmap-proportional | **66.7%** | 327,625 | 122,026 | 2.7x | +| **Hard zero** | **47.1%** | 106,223 | 122,026 | **0.87x** | + +--- + +## Hard-zero stress tests + +Comprehensive adversarial evaluation of the hard-zero mode. + +### TEST 1: High trial count (60 trials, best-threshold search) + +| truth | mean | std | n | +|-------|------|-----|---| +| in | 12,397 | 87,366 | 49 | +| out | 29,758 | 109,140 | 54 | + +Best threshold accuracy (searched 10k-500k in 10k steps): **52.4%**. +Midpoint classifier: **52.4%**. Likelihood ratio: **52.4%**. +All classifiers are indistinguishable from random. + +### TEST 2: Composed queries (30 trials x 10 queries) + +| n_queries | accuracy | +|-----------|----------| +| 1 | 43.4% | +| 5 | 50.9% | +| 10 | 48.1% | +| Majority vote | **50.0%** | + +Per-trial variance: in_std=83,747, out_std=85,116, **ratio=0.98**. +Composing 10 queries and averaging does not help the attacker. + +### TEST 3: Moderate outlier (target=50,000, same magnitude level as normal) + +**THIS BREAKS IT.** Target 50,000 is in level 2 (4096-65535), same as normal users. +The bitmap shows this level as supported → no clipping occurs. + +| truth | mean | std | +|-------|------|-----| +| in | 139,946 | 497,518 | +| out | 20,093 | 122,026 | + +Best threshold accuracy: **76.5%**. Std ratio: 4.1x. + +**Implication**: pac_clip_sum only clips outliers that are at a DIFFERENT magnitude +level than normal users. A 10x outlier within the same level passes through. + +### TEST 4: Two colluding outliers + +Two users with 999,999 — level 3 has 2 bitmap bits, meeting threshold=2. + +| truth | mean | std | +|-------|------|-----| +| in | 1,783,465 | 12,547,986 | +| out | 20,093 | 122,026 | + +Best threshold accuracy: **100.0%**. Attack fully succeeds. + +**Implication**: Two colluding users at the same magnitude level make that level +"supported." The clipping mechanism assumes outlier levels have few contributors. +Collusion (or any scenario with 2+ users at the same extreme level) defeats it. + +### TEST 5: Dandan's filter probing + +Attacker uses two queries with different filters to probe clipping behavior. + +| Query | Best accuracy | +|-------|--------------| +| Filter<=3 (narrow) | 52.9% | +| Filter<=999 (wide) | 51.7% | +| Cross-filter differential | **51.0%** | + +**Dandan's concern is NOT exploitable with hard-zero.** The narrow query zeroes the +outlier level, giving identical counter distributions for in/out. The wide query +has the outlier's level zeroed too (1 contributor < threshold). The cross-filter +differential reveals nothing. + +### TEST 6: 20K small items ($50 x 20,000) + +Best threshold accuracy: **52.9%**. Attack defeated. +Pre-aggregation collapses 20K rows into one $1M entry at level 3, which is zeroed. + +### TEST 7: Borderline outlier (target=65,536, exactly level 3 boundary) + +Best threshold accuracy: **52.9%**. Attack defeated. +Even the minimum level-3 value is zeroed when it's the sole contributor. + +--- + +### Key takeaways + +1. **Hard-zero fully defeats the variance side-channel** for outliers at unsupported + magnitude levels. Attack accuracy = 50% across all classifiers, even with + composed queries, different thresholds, and cross-filter probing. + +2. **Moderate outliers within the same magnitude level are NOT caught.** A 10x outlier + (50,000 vs normal ~5,000) sits in the same level and passes through unclipped. + Attack accuracy: 76.5%. This is a fundamental limitation of the magnitude-level + granularity (each level spans 16x). + +3. **Two colluding outliers defeat the clipping** by making their level "supported" + (2 contributors >= threshold 2). Attack accuracy: 100%. + +4. **Dandan's filter-probing concern does not apply with hard-zero.** The zeroed level + contributes nothing regardless of filter, so different filters reveal no info. + +5. **The pre-aggregation step remains essential** — 20K small items are correctly + collapsed and clipped. diff --git a/attacks/clip_attack_test.sh b/attacks/clip_attack_test.sh new file mode 100755 index 00000000..e37d47cb --- /dev/null +++ b/attacks/clip_attack_test.sh @@ -0,0 +1,196 @@ +#!/usr/bin/env bash +# Test pac_clip_sum against membership inference attacks. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 clip=$6 + local insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${target_val});" + local clip_sql="" + [ "$clip" != "off" ] && clip_sql="SET pac_clip_support = ${clip};" + $DUCKDB -noheader -list 2>/dev/null <> "$IN_F" + echo "out,$(run_sum out $seed $n $tv $filt $clip)" >> "$OUT_F" + done + + echo "=== $label | N=$n filt<=$filt tv=$tv clip=$clip ===" + $DUCKDB -markdown < ${FBG} + ${tv}/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG} + ${tv}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL + echo "" + rm -f "$IN_F" "$OUT_F" +} + +NT=30 + +echo "==========================================" +echo " pac_clip_sum ATTACK EVALUATION" +echo "==========================================" +echo "" + +# --- Attack 1: Baseline variance classifier (simplest) --- +echo "## ATTACK 1: Single-query variance classifier" +echo "N=1000, target=999999, filter<=3, $NT trials" +echo "" +for CLIP in off 2 3; do + run_scenario "atk1" 1000 999999 3 "$CLIP" $NT +done + +# --- Attack 2: Wide filter (clipping best-case) --- +echo "## ATTACK 2: Wide filter (all users in aggregation)" +echo "N=1000, target=999999, filter<=999, $NT trials" +echo "" +for CLIP in off 2 5 10; do + run_scenario "atk2" 1000 999999 999 "$CLIP" $NT +done + +# --- Attack 3: 10K users --- +echo "## ATTACK 3: 10K users, extreme outlier" +echo "N=10000, target=9999999, filter<=2, $NT trials" +echo "" +for CLIP in off 2 3; do + run_scenario "atk3" 10000 9999999 2 "$CLIP" $NT +done + +# --- Attack 4: Over-clipping --- +echo "## ATTACK 4: Over-clipping (too aggressive)" +echo "N=1000, target=999999, filter<=3, 15 trials" +echo "clip_support=10 with only 3-4 users => no supported levels" +echo "" +for CLIP in off 5 10; do + run_scenario "atk4" 1000 999999 3 "$CLIP" 15 +done + +# --- Attack 5: Wide filter + over-clipping --- +echo "## ATTACK 5: Wide filter + aggressive clipping" +echo "N=1000, target=999999, filter<=999, 15 trials" +echo "" +for CLIP in off 50 100; do + run_scenario "atk5" 1000 999999 999 "$CLIP" 15 +done + +# --- Attack 6: Clip after filter vs clip on full table (Dandan's concern) --- +# pac_clip_sum clips AFTER filtering (only filtered rows enter the aggregate). +# An adversary might exploit this: the clipping behavior differs depending on +# which users are in the filter. Compare filter-then-clip (what pac_clip_sum does) +# vs clip-all-then-filter (manual pre-clipping of the full table, then query). +echo "## ATTACK 6: Clip-after-filter vs clip-full-table" +echo "N=1000, target=999999, filter<=3, $NT trials" +echo "Tests Dandan's hypothesis: clipping after filtering leaks more than" +echo "clipping the entire dataset. We compare pac_clip_sum (clips filtered rows)" +echo "vs manual pre-clipping of all rows then querying without clip_support." +echo "" + +# 6a: pac_clip_sum (clip after filter) — already covered in atk1 clip=2 +echo "### 6a: clip-after-filter (pac_clip_support=2)" +echo "(Same as Attack 1 clip=2)" +echo "" + +# 6b: clip-full-table-then-query (no pac_clip_support, but data is pre-clipped) +run_sum_preclipped() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 clip_support=$6 + local insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${target_val});" + # Pre-clip ALL rows at percentile bounds (simulating clip-on-full-table) + # Use the same magnitude-level idea: clip values to level-2 max (65535) + # This ensures the billionaire is clipped BEFORE filtering. + $DUCKDB -noheader -list 2>/dev/null < 13661; +ALTER TABLE users ADD PAC_KEY(user_id); +ALTER TABLE users SET PU; +SET pac_mi = 0.0078125; +SET pac_seed = ${seed}; +SELECT SUM(acctbal) FROM users WHERE user_id <= ${filter} OR user_id = 0; +SQL +} + +echo "### 6b: clip-full-table-then-query (pre-clip all to mu+3sigma=13661)" +echo "" +FBG_CLIP=$($DUCKDB -noheader -list -c \ + "SELECT SUM(LEAST((hash(i*31+7)%10000+1)::INTEGER, 13661)) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum_preclipped in $seed 1000 999999 3 off)" >> "$IN_F" + echo "out,$(run_sum_preclipped out $seed 1000 999999 3 off)" >> "$OUT_F" +done +echo "=== atk6b | N=1000 filt<=3 tv=999999 pre-clip=13661 ===" +$DUCKDB -markdown < ${FBG_CLIP} + 999999/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG_CLIP} + 999999/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG_CLIP}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG_CLIP}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL +echo "" +rm -f "$IN_F" "$OUT_F" diff --git a/attacks/clip_hardzero_stress.sh b/attacks/clip_hardzero_stress.sh new file mode 100755 index 00000000..33e78154 --- /dev/null +++ b/attacks/clip_hardzero_stress.sh @@ -0,0 +1,297 @@ +#!/usr/bin/env bash +# Stress-test hard-zero clipping: try every attack angle we can think of. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +CLIP=2 + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 extra_sql="${6:-}" + local insert="" + [ "$cond" = "in" ] && insert="$target_val" + $DUCKDB -noheader -list 2>/dev/null < threshold THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}) <= threshold THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS acc + FROM raw, generate_series(10000, 500000, 10000) thresholds(threshold) + WHERE v IS NOT NULL + GROUP BY threshold +); + +-- Mean-based classifier (v > midpoint) +SELECT 'Midpoint clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND v > (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + WHEN truth='out' AND v <= (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; + +-- Likelihood ratio: compare distance to expected in vs expected out +SELECT 'LR clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${fbg}::DOUBLE - ${tv}) < ABS(v - ${fbg}::DOUBLE) THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}::DOUBLE - ${tv}) >= ABS(v - ${fbg}::DOUBLE) THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; +SQL + echo "" +} + +analyze_composed() { + local label=$1 in_f=$2 out_f=$3 fbg=$4 tv=$5 nq=$6 + echo "=== $label ===" + $DUCKDB -markdown < ${fbg} + ${tv}/2.0 THEN 1 + WHEN truth='out' AND ravg <= ${fbg} + ${tv}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM cum GROUP BY qid ORDER BY qid; + +-- Majority vote +WITH votes AS ( + SELECT truth, trial, + SUM(CASE WHEN v > ${fbg} + ${tv}/2.0 THEN 1 ELSE 0 END) AS yes, COUNT(*) AS total + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Majority vote (${nq}q)' AS method, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND yes > total/2.0 THEN 1 + WHEN truth='out' AND yes <= total/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM votes; + +-- Variance-based: use per-trial variance across queries +WITH trial_stats AS ( + SELECT truth, trial, STDDEV(v) AS trial_std + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Variance of queries' AS method, + printf('in_std=%.0f out_std=%.0f ratio=%.2f', + AVG(CASE WHEN truth='in' THEN trial_std END), + AVG(CASE WHEN truth='out' THEN trial_std END), + AVG(CASE WHEN truth='in' THEN trial_std END) / + NULLIF(AVG(CASE WHEN truth='out' THEN trial_std END), 0)) AS stats +FROM trial_stats; +SQL + echo "" +} + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') +FBG999=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,999) t(i);" | tr -d '[:space:]') + +echo "=============================================" +echo " HARD-ZERO STRESS TEST" +echo " pac_clip_support=$CLIP, trying to break it" +echo "=============================================" +echo "Background: filter<=3 sum=$FBG, filter<=999 sum=$FBG999" +echo "" + +# --------------------------------------------------------------- +# TEST 1: High trial count (100 trials for statistical power) +# --------------------------------------------------------------- +echo "## TEST 1: High trial count (60 trials)" +echo "" +NT=60 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "100 trials, N=1000, tv=999999, filt<=3" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 2: Composed queries (10 queries, reduce noise by averaging) +# --------------------------------------------------------------- +echo "## TEST 2: Composed queries (10 queries)" +echo "" +NT=30; NQ=10 +IN_F=$(mktemp); OUT_F=$(mktemp) +for trial in $(seq 1 $NT); do + for q in $(seq 1 $NQ); do + s=$((trial * 1000 + q)) + echo "in,${trial},${q},$(run_sum in $s 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,${trial},${q},$(run_sum out $s 1000 "" 3)" >> "$OUT_F" + done +done +analyze_composed "50 trials x 10 queries" "$IN_F" "$OUT_F" "$FBG" 999999 10 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 3: Moderate outlier (50000 — same magnitude level as normal) +# --------------------------------------------------------------- +echo "## TEST 3: Moderate outlier (target=50000, same magnitude level)" +echo "Normal users ~5000, target ~50000 — both in level 2 (4096-65535)" +echo "The bitmap should show this level as supported, so NO clipping occurs" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 50000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Moderate outlier tv=50000" "$IN_F" "$OUT_F" "$FBG" 50000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 4: Two colluding outliers +# --------------------------------------------------------------- +echo "## TEST 4: Two colluding outliers" +echo "Two users with 999999 — level 3 now has 2 bitmap bits (meets threshold=2)" +echo "Hard-zero might NOT clip because level has enough support!" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +TWO_INSERT="INSERT INTO users VALUES (0, 999999); INSERT INTO users VALUES (-1, 999999);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$TWO_INSERT" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +# For two outliers: "in" filter catches user 0 (user -1 is NOT in filter <= 3) +# But user -1's value still goes into the table and affects the bitmap! +analyze "Two outliers (0 and -1), filt<=3" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 5: Dandan's filter probing attack +# --------------------------------------------------------------- +echo "## TEST 5: Dandan's filter probing" +echo "Attacker tries different filters to see if clipping behavior changes." +echo "If the outlier is present, the bitmap at level 3 has a bit set." +echo "Query 1: filter<=3 (includes user 0 if present)" +echo "Query 2: filter<=999 (includes everyone)" +echo "Difference in results might reveal membership." +echo "" +NT=30 +IN_F1=$(mktemp); OUT_F1=$(mktemp) +IN_F2=$(mktemp); OUT_F2=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F1" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F1" + echo "in,$(run_sum in $((seed+10000)) 1000 "INSERT INTO users VALUES (0, 999999);" 999)" >> "$IN_F2" + echo "out,$(run_sum out $((seed+10000)) 1000 "" 999)" >> "$OUT_F2" +done +analyze "Filter<=3 (narrow)" "$IN_F1" "$OUT_F1" "$FBG" 999999 +analyze "Filter<=999 (wide)" "$IN_F2" "$OUT_F2" "$FBG999" 999999 + +echo "=== Cross-filter differential ===" +$DUCKDB -markdown < 0 THEN 1 + WHEN n.truth='out' AND w.v - n.v <= 0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM narrow n JOIN wide w ON n.truth = w.truth AND n.trial = w.trial +WHERE n.v IS NOT NULL AND w.v IS NOT NULL; +SQL +echo "" +rm -f "$IN_F1" "$OUT_F1" "$IN_F2" "$OUT_F2" + +# --------------------------------------------------------------- +# TEST 6: 20K small items with high trial count +# --------------------------------------------------------------- +echo "## TEST 6: 20K small items, 50 trials" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +MULTI_INSERT="INSERT INTO users SELECT 0, 50 FROM generate_series(1,20000) t(i);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$MULTI_INSERT" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "20K items x \$50, filt<=3" "$IN_F" "$OUT_F" "$FBG" 1000000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 7: Borderline outlier (value at level boundary) +# --------------------------------------------------------------- +echo "## TEST 7: Borderline outlier (target=65536, exactly level 3 boundary)" +echo "Just barely crosses into level 3 — minimum unsupported value" +echo "" +NT=30 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 65536);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Borderline tv=65536" "$IN_F" "$OUT_F" "$FBG" 65536 +rm -f "$IN_F" "$OUT_F" + +echo "=============================================" +echo " STRESS TEST COMPLETE" +echo "=============================================" diff --git a/attacks/clip_multirow_test.sh b/attacks/clip_multirow_test.sh new file mode 100755 index 00000000..c818360f --- /dev/null +++ b/attacks/clip_multirow_test.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +# Attack 7: "20K small items" user — tests whether clipping catches +# a user whose individual rows are normal but total contribution is huge. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +NT=30 +N=1000; FILT=3; TV_PER_ROW=50; TV_ROWS=20000 +# Total contribution: 20000 * 50 = 1,000,000 + +echo "=============================================" +echo " ATTACK 7: 20K small items user" +echo "=============================================" +echo " N=$N background users (1 row each)" +echo " Target user_id=0: $TV_ROWS rows x \$$TV_PER_ROW = \$$(( TV_ROWS * TV_PER_ROW ))" +echo " filter<=3, $NT trials" +echo "" + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,${FILT}) t(i);" | tr -d '[:space:]') +TV_TOTAL=$((TV_ROWS * TV_PER_ROW)) +echo "Background SUM=$FBG, target total=$TV_TOTAL" +echo "" + +# --- 7a: No clipping (baseline) --- +run_noprotection() { + local cond=$1 seed=$2 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users SELECT 0, ${TV_PER_ROW} FROM generate_series(1,${TV_ROWS}) t(i);" + $DUCKDB -noheader -list 2>/dev/null </dev/null < 13661; +ALTER TABLE users ADD PAC_KEY(user_id); +ALTER TABLE users SET PU; +SET pac_mi = 0.0078125; +SET pac_seed = ${seed}; +SELECT SUM(acctbal) FROM users WHERE user_id <= ${FILT} OR user_id = 0; +SQL +} + +# --- 7c: pac_clip_sum (clip after filter, with pre-aggregation) --- +run_clipsum() { + local cond=$1 seed=$2 clip=$3 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users SELECT 0, ${TV_PER_ROW} FROM generate_series(1,${TV_ROWS}) t(i);" + $DUCKDB -noheader -list 2>/dev/null < ${FBG} + ${TV_TOTAL}/2.0 THEN 1 + WHEN truth='out' AND v <= ${FBG} + ${TV_TOTAL}/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL +UNION ALL +SELECT 'Var>200k', + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${FBG}) > 200000 THEN 1 + WHEN truth='out' AND ABS(v - ${FBG}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) +FROM raw WHERE v IS NOT NULL; +SQL + echo "" +} + +# 7a: No clipping +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_noprotection in $seed)" >> "$IN_F" + echo "out,$(run_noprotection out $seed)" >> "$OUT_F" +done +analyze "7a: No clipping (baseline)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" + +# 7b: Full-table Winsorization +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_winsorized in $seed)" >> "$IN_F" + echo "out,$(run_winsorized out $seed)" >> "$OUT_F" +done +analyze "7b: Winsorization (per-row clip to 13661)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" + +# 7c: pac_clip_sum with clip_support=2 +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_clipsum in $seed 2)" >> "$IN_F" + echo "out,$(run_clipsum out $seed 2)" >> "$OUT_F" +done +analyze "7c: pac_clip_sum (clip_support=2)" "$IN_F" "$OUT_F" +rm -f "$IN_F" "$OUT_F" diff --git a/attacks/clip_shift2_stress.sh b/attacks/clip_shift2_stress.sh new file mode 100755 index 00000000..88567708 --- /dev/null +++ b/attacks/clip_shift2_stress.sh @@ -0,0 +1,241 @@ +#!/usr/bin/env bash +# Stress-test shift=2 (4x levels) with hard-zero clipping. +# Focus on edge cases that 4x granularity might miss. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" +CLIP=2 + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_insert="$4" filter=$5 + $DUCKDB -noheader -list 2>/dev/null < threshold THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}) <= threshold THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS acc + FROM raw, generate_series(5000, 500000, 5000) thresholds(threshold) + WHERE v IS NOT NULL + GROUP BY threshold +); +SQL + echo "" +} + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') + +echo "=============================================" +echo " SHIFT=2 STRESS TEST (4x levels, hard-zero)" +echo " pac_clip_support=$CLIP" +echo "=============================================" +echo "" + +NT=30 + +# --------------------------------------------------------------- +# TEST 1: 3.5x outlier (within 4x boundary — should NOT be caught) +# Normal ~5000, target=17000 (3.4x) +# Both in level 3 (4096-16383)? Let's check: +# 5000: bit_pos=12, (12-4)>>1 = 4. Level 4. +# 17000: bit_pos=14, (14-4)>>1 = 5. Level 5. DIFFERENT! +# Actually 17000 might be caught. Let's try 15000: +# 15000: bit_pos=13, (13-4)>>1 = 4. Level 4. SAME as 5000! +# --------------------------------------------------------------- +echo "## TEST 1: 3x outlier (target=15000, same level as normal)" +echo "5000→level 4, 15000→level 4 (bit_pos 13, (13-4)/2=4). Same level." +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 15000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "3x outlier tv=15000" "$IN_F" "$OUT_F" "$FBG" 15000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 2: Just above 4x (target=20000) +# 20000: bit_pos=14, (14-4)>>1 = 5. Level 5. Different from 5000 (level 4). +# --------------------------------------------------------------- +echo "## TEST 2: 4x outlier (target=20000, different level)" +echo "5000→level 4, 20000→level 5. Should be caught." +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 20000);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "4x outlier tv=20000" "$IN_F" "$OUT_F" "$FBG" 20000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 3: Two colluding outliers (still breaks it?) +# --------------------------------------------------------------- +echo "## TEST 3: Two colluding outliers (999999)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 999999); INSERT INTO users VALUES (-1, 999999);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Two colluders tv=999999" "$IN_F" "$OUT_F" "$FBG" 999999 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 4: Outlier at exact level boundary (16384 = start of level 5) +# 16384: bit_pos=14, (14-4)>>1 = 5. Normal at level 4. +# --------------------------------------------------------------- +echo "## TEST 4: Boundary outlier (target=16384, exact level 5 start)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 16384);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Boundary tv=16384" "$IN_F" "$OUT_F" "$FBG" 16384 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 5: Outlier just below boundary (16383 = max of level 4) +# 16383: bit_pos=13, (13-4)>>1 = 4. Same level as 5000. +# --------------------------------------------------------------- +echo "## TEST 5: Just-below-boundary (target=16383, still level 4)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 16383);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "Just-below tv=16383" "$IN_F" "$OUT_F" "$FBG" 16383 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 6: Many small outliers (10 users at 15000, all in same level) +# They all go to level 4 like normal users → supported → no clipping +# --------------------------------------------------------------- +echo "## TEST 6: 10 users at 15000 (3x, same level, all 'supported')" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +MULTI="INSERT INTO users SELECT -(i+1), 15000 FROM generate_series(1,10) t(i); INSERT INTO users VALUES (0, 15000);" +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "$MULTI" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "10 users at 15000" "$IN_F" "$OUT_F" "$FBG" 15000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 7: Wide filter + moderate outlier (best case for clipping) +# --------------------------------------------------------------- +FBG999=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,999) t(i);" | tr -d '[:space:]') +echo "## TEST 7: Wide filter + moderate outlier (tv=50000)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users VALUES (0, 50000);" 999)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 999)" >> "$OUT_F" +done +analyze "Wide filter tv=50000" "$IN_F" "$OUT_F" "$FBG999" 50000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 8: 20K small items (pre-aggregation test) +# --------------------------------------------------------------- +echo "## TEST 8: 20K small items (\$50 x 20000 = \$1M)" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for seed in $(seq 1 $NT); do + echo "in,$(run_sum in $seed 1000 "INSERT INTO users SELECT 0, 50 FROM generate_series(1,20000) t(i);" 3)" >> "$IN_F" + echo "out,$(run_sum out $seed 1000 "" 3)" >> "$OUT_F" +done +analyze "20K items" "$IN_F" "$OUT_F" "$FBG" 1000000 +rm -f "$IN_F" "$OUT_F" + +# --------------------------------------------------------------- +# TEST 9: Composed queries (10 queries averaged) +# --------------------------------------------------------------- +echo "## TEST 9: 10 composed queries, tv=999999" +echo "" +IN_F=$(mktemp); OUT_F=$(mktemp) +for trial in $(seq 1 $NT); do + for q in $(seq 1 10); do + s=$((trial * 1000 + q)) + echo "in,${trial},${q},$(run_sum in $s 1000 "INSERT INTO users VALUES (0, 999999);" 3)" >> "$IN_F" + echo "out,${trial},${q},$(run_sum out $s 1000 "" 3)" >> "$OUT_F" + done +done +echo "=== 10 composed queries ===" +$DUCKDB -markdown < ${FBG} + 999999/2.0 THEN 1 ELSE 0 END) AS yes, COUNT(*) AS total + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Majority vote (10q)' AS method, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND yes > total/2.0 THEN 1 + WHEN truth='out' AND yes <= total/2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM votes; + +WITH trial_stats AS ( + SELECT truth, trial, STDDEV(v) AS trial_std + FROM raw WHERE v IS NOT NULL GROUP BY truth, trial +) +SELECT 'Per-trial variance' AS method, + printf('in_std=%.0f out_std=%.0f ratio=%.2f', + AVG(CASE WHEN truth='in' THEN trial_std END), + AVG(CASE WHEN truth='out' THEN trial_std END), + AVG(CASE WHEN truth='in' THEN trial_std END) / + NULLIF(AVG(CASE WHEN truth='out' THEN trial_std END), 0)) AS stats +FROM trial_stats; +SQL +echo "" +rm -f "$IN_F" "$OUT_F" + +echo "=============================================" +echo " STRESS TEST COMPLETE" +echo "=============================================" diff --git a/attacks/clipping_experiment.sh b/attacks/clipping_experiment.sh new file mode 100755 index 00000000..ef2c7842 --- /dev/null +++ b/attacks/clipping_experiment.sh @@ -0,0 +1,215 @@ +#!/usr/bin/env bash +# Clipping experiment: does pre-PAC outlier clipping reduce attack success + improve utility? +# +# For each clipping threshold t in {1, 2, 3, 5, inf}, clips data at μ ± t·σ +# (recursive until convergence), then runs PAC and measures attack accuracy + utility. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +CLIP_ITERS=20 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " CLIPPING EXPERIMENT" +echo " Does pre-PAC outlier clipping reduce attack accuracy?" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# Generate SQL for recursive clipping: CLIP_ITERS rounds of UPDATE at μ ± t·σ. +# Each round recomputes μ,σ from the current data. The WHERE clause ensures +# convergence — once all values are within bounds, subsequent rounds are no-ops. +gen_clip_sql() { + local t=$1 sql="" i + for i in $(seq 1 $CLIP_ITERS); do + sql+="UPDATE users SET acctbal = LEAST(GREATEST(acctbal, + (SELECT (AVG(acctbal) - ${t} * STDDEV_POP(acctbal))::INTEGER FROM users)), + (SELECT (AVG(acctbal) + ${t} * STDDEV_POP(acctbal))::INTEGER FROM users)) +WHERE acctbal < (SELECT (AVG(acctbal) - ${t} * STDDEV_POP(acctbal))::INTEGER FROM users) + OR acctbal > (SELECT (AVG(acctbal) + ${t} * STDDEV_POP(acctbal))::INTEGER FROM users); +" + done + printf '%s' "$sql" +} + +# Ground truth: no clipping, no PAC noise +run_true_unclipped() { + local cond=$1 insert="" + [ "$cond" = "in" ] && insert="INSERT INTO users VALUES (0, ${TV});" + $DUCKDB -noheader -list </dev/null <> "$GROUND_F" + echo "${t},out,${CLIP_OUT}" >> "$GROUND_F" + + # Run PAC trials + for seed in $(seq 1 $NTRIALS); do + v_in=$(run_clipped_pac in "$seed" "$t" | tr -d '[:space:]') + v_out=$(run_clipped_pac out "$seed" "$t" | tr -d '[:space:]') + echo "${t},in,${seed},${v_in}" >> "$RESULTS_F" + echo "${t},out,${seed},${v_out}" >> "$RESULTS_F" + printf "." + done + echo " done" +done + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis --- +$DUCKDB -markdown < 200000 + 100.0 * SUM(CASE + WHEN r.truth='in' AND ABS(r.v - ${TRUE_OUT}) > 200000 THEN 1 + WHEN r.truth='out' AND ABS(r.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + -- Midpoint classifier: v > (clipped_in + clipped_out) / 2 + 100.0 * SUM(CASE + WHEN r.truth='in' AND r.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN r.truth='out' AND r.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN r.truth='in' THEN r.v END) AS std_in, + STDDEV(CASE WHEN r.truth='out' THEN r.v END) AS std_out + FROM results r + JOIN ground g_in ON r.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON r.t_val = g_out.t_val AND g_out.truth = 'out' + WHERE r.v IS NOT NULL + GROUP BY r.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT r.t_val, + AVG(CASE WHEN r.truth='in' + THEN ABS(r.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(r.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM results r + WHERE r.v IS NOT NULL + GROUP BY r.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT a.t_val AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE a.t_val WHEN 'inf' THEN 999 ELSE a.t_val::INT END; + +-- Detailed per-condition stats +SELECT r.t_val AS t, r.truth, + printf('%.0f', AVG(r.v)) AS mean_pac, + printf('%.0f', STDDEV(r.v)) AS std_pac, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM results r +JOIN ground g ON r.t_val = g.t_val AND r.truth = g.truth +WHERE r.v IS NOT NULL +GROUP BY r.t_val, r.truth, g.clipped_true +ORDER BY CASE r.t_val WHEN 'inf' THEN 999 ELSE r.t_val::INT END, r.truth; +SQL + +rm -f "$RESULTS_F" "$GROUND_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " attack_acc_200k: variance classifier (|v - bg| > 200k), 50% = random" +echo " attack_acc_mid: midpoint classifier (optimal threshold), 50% = random" +echo " mape_vs_true: mean |noised - unclipped_truth| / |unclipped_truth|" +echo " clip_bias_in/out: |clipped_truth - unclipped_truth|" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" diff --git a/attacks/output_clipping_experiment.sh b/attacks/output_clipping_experiment.sh new file mode 100755 index 00000000..2fb161a4 --- /dev/null +++ b/attacks/output_clipping_experiment.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +# Output clipping experiment: clip the PAC query RESULT (not input data) at bounds +# derived from baseline column statistics. +# +# Unlike input clipping (which modifies stored values before PAC), output clipping +# leaves the data untouched. After PAC returns a noised result, we clamp it to +# [n·(μ - t·σ), n·(μ + t·σ)] where μ,σ are pre-computed column stats and n is the +# expected number of users in the filter. +# +# Key property: if the billionaire is NOT in the filter, the result is already +# within bounds and nothing changes. Clipping only fires when an outlier inflates +# the result beyond the expected range. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " OUTPUT CLIPPING EXPERIMENT" +echo " Clip the PAC result post-hoc at n·(μ ± t·σ)" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# --- Baseline column statistics (from N users, NO target) --- +# These represent the "known" column distribution used for clipping bounds. +read MU SIGMA <<< "$($DUCKDB -noheader -csv -separator ' ' </dev/null <> "$RESULTS_F" + echo "out,${seed},${v_out}" >> "$RESULTS_F" + printf "." +done +echo " done" + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis: for each t, apply output clipping and compute metrics --- +$DUCKDB -markdown < 200000 + 100.0 * SUM(CASE + WHEN c.truth='in' AND ABS(c.v - ${TRUE_OUT}) > 200000 THEN 1 + WHEN c.truth='out' AND ABS(c.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + -- Midpoint classifier: v > (clipped_in_truth + clipped_out_truth) / 2 + 100.0 * SUM(CASE + WHEN c.truth='in' AND c.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN c.truth='out' AND c.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN c.truth='in' THEN c.v END) AS std_in, + STDDEV(CASE WHEN c.truth='out' THEN c.v END) AS std_out, + -- Fraction of results that hit a clip bound + 100.0 * SUM(CASE WHEN c.truth='in' AND (c.v = c.lo OR c.v = c.hi) THEN 1 ELSE 0 END)::DOUBLE + / NULLIF(SUM(CASE WHEN c.truth='in' THEN 1 ELSE 0 END), 0) AS pct_clipped_in, + 100.0 * SUM(CASE WHEN c.truth='out' AND (c.v = c.lo OR c.v = c.hi) THEN 1 ELSE 0 END)::DOUBLE + / NULLIF(SUM(CASE WHEN c.truth='out' THEN 1 ELSE 0 END), 0) AS pct_clipped_out + FROM clipped c + JOIN ground g_in ON c.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON c.t_val = g_out.t_val AND g_out.truth = 'out' + GROUP BY c.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT c.t_val, + AVG(CASE WHEN c.truth='in' + THEN ABS(c.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(c.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM clipped c + GROUP BY c.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT CASE WHEN a.t_val = 999 THEN 'inf' ELSE a.t_val::VARCHAR END AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out, + printf('%.0f%%', a.pct_clipped_in) AS pct_clip_in, + printf('%.0f%%', a.pct_clipped_out) AS pct_clip_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE WHEN a.t_val = 999 THEN 999 ELSE a.t_val END; + +-- Detailed per-condition stats +SELECT CASE WHEN c.t_val = 999 THEN 'inf' ELSE c.t_val::VARCHAR END AS t, + c.truth, + printf('%.0f', AVG(c.v)) AS mean_clipped, + printf('%.0f', STDDEV(c.v)) AS std_clipped, + printf('%.0f', AVG(c.raw_v)) AS mean_raw, + printf('%.0f', STDDEV(c.raw_v)) AS std_raw, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM clipped c +JOIN ground g ON c.t_val = g.t_val AND c.truth = g.truth +GROUP BY c.t_val, c.truth, g.clipped_true +ORDER BY CASE WHEN c.t_val = 999 THEN 999 ELSE c.t_val END, c.truth; +SQL + +rm -f "$RESULTS_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " Output clipping: CLAMP(pac_result, n·(μ-tσ), n·(μ+tσ))" +echo " Bounds use baseline stats (μ=$MU, σ=$SIGMA, n=$N_BASE)" +echo " attack_acc_200k: variance classifier, 50% = random" +echo " attack_acc_mid: midpoint classifier, 50% = random" +echo " pct_clip_in/out: fraction of results hitting a bound" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" diff --git a/attacks/output_clipping_v2_experiment.sh b/attacks/output_clipping_v2_experiment.sh new file mode 100644 index 00000000..9fb3ac71 --- /dev/null +++ b/attacks/output_clipping_v2_experiment.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# Output clipping v2: clip values at QUERY TIME using pre-computed baseline bounds, +# BEFORE PAC computes sensitivity. +# +# Pipeline per query: +# 1. Data is stored unmodified (billionaire's 999999 stays as-is) +# 2. At query time, clamp each value to [μ-tσ, μ+tσ] (baseline stats, single pass) +# 3. PAC computes sensitivity from the CLIPPED range → noise ∝ 2tσ +# 4. Return noised result +# +# Key property: bounds are identical for in/out (derived from baseline, not current data), +# so PAC calibrates the SAME noise regardless of membership. No side-channel from +# differing sensitivities. +# +# Simulated by: UPDATE + PAC_KEY in the same session. The UPDATE models the query-time +# clamping; PAC then sees the clipped range for sensitivity. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +N=1000; TV=999999; FILT=3; NTRIALS=30 +T_VALUES="1 2 3 5 inf" + +echo "============================================================" +echo " OUTPUT CLIPPING v2" +echo " Clip at query time, BEFORE PAC sensitivity computation" +echo "============================================================" +echo " N=$N users, target=$TV, filter<=$FILT, $NTRIALS trials" +echo " Clipping thresholds (t): $T_VALUES" +echo "============================================================" +echo "" + +# --- Baseline column stats (from N users, NO target) --- +read MU SIGMA <<< "$($DUCKDB -noheader -csv -separator ' ' </dev/null <> "$GROUND_F" + echo "${t},out,${CLIP_OUT}" >> "$GROUND_F" + + # Run PAC trials + for seed in $(seq 1 $NTRIALS); do + v_in=$(run_clipped_pac in "$seed" "$t" | tr -d '[:space:]') + v_out=$(run_clipped_pac out "$seed" "$t" | tr -d '[:space:]') + echo "${t},in,${seed},${v_in}" >> "$RESULTS_F" + echo "${t},out,${seed},${v_out}" >> "$RESULTS_F" + printf "." + done + echo " done" +done + +echo "" +echo "============================================================" +echo " RESULTS" +echo "============================================================" +echo "" + +# --- Analysis --- +$DUCKDB -markdown < 200000 THEN 1 + WHEN r.truth='out' AND ABS(r.v - ${TRUE_OUT}) <= 200000 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS var_acc, + 100.0 * SUM(CASE + WHEN r.truth='in' AND r.v > (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + WHEN r.truth='out' AND r.v <= (g_in.clipped_true + g_out.clipped_true) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS mid_acc, + STDDEV(CASE WHEN r.truth='in' THEN r.v END) AS std_in, + STDDEV(CASE WHEN r.truth='out' THEN r.v END) AS std_out + FROM results r + JOIN ground g_in ON r.t_val = g_in.t_val AND g_in.truth = 'in' + JOIN ground g_out ON r.t_val = g_out.t_val AND g_out.truth = 'out' + WHERE r.v IS NOT NULL + GROUP BY r.t_val, g_in.clipped_true, g_out.clipped_true +), +utility AS ( + SELECT r.t_val, + AVG(CASE WHEN r.truth='in' + THEN ABS(r.v - ${TRUE_IN}::DOUBLE) / NULLIF(ABS(${TRUE_IN}::DOUBLE), 0) * 100 + ELSE ABS(r.v - ${TRUE_OUT}::DOUBLE) / NULLIF(ABS(${TRUE_OUT}::DOUBLE), 0) * 100 + END) AS mape + FROM results r + WHERE r.v IS NOT NULL + GROUP BY r.t_val +), +bias AS ( + SELECT g.t_val, + MAX(CASE WHEN g.truth='in' THEN ABS(g.clipped_true - ${TRUE_IN}::DOUBLE) END) AS bias_in, + MAX(CASE WHEN g.truth='out' THEN ABS(g.clipped_true - ${TRUE_OUT}::DOUBLE) END) AS bias_out + FROM ground g + GROUP BY g.t_val +) +SELECT a.t_val AS t, + printf('%.1f%%', a.var_acc) AS attack_acc_200k, + printf('%.1f%%', a.mid_acc) AS attack_acc_mid, + printf('%.1f%%', ut.mape) AS mape_vs_true, + printf('%.0f', a.std_in) AS noise_std_in, + printf('%.0f', a.std_out) AS noise_std_out, + printf('%.1fx', a.std_in / NULLIF(a.std_out, 0)) AS std_ratio, + printf('%.0f', b.bias_in) AS clip_bias_in, + printf('%.0f', b.bias_out) AS clip_bias_out +FROM attack a +JOIN utility ut ON a.t_val = ut.t_val +JOIN bias b ON a.t_val = b.t_val +ORDER BY CASE a.t_val WHEN 'inf' THEN 999 ELSE a.t_val::INT END; + +-- Detailed per-condition stats +SELECT r.t_val AS t, r.truth, + printf('%.0f', AVG(r.v)) AS mean_pac, + printf('%.0f', STDDEV(r.v)) AS std_pac, + printf('%.0f', g.clipped_true) AS truth_clipped, + COUNT(*) AS n +FROM results r +JOIN ground g ON r.t_val = g.t_val AND r.truth = g.truth +WHERE r.v IS NOT NULL +GROUP BY r.t_val, r.truth, g.clipped_true +ORDER BY CASE r.t_val WHEN 'inf' THEN 999 ELSE r.t_val::INT END, r.truth; +SQL + +rm -f "$RESULTS_F" "$GROUND_F" + +echo "" +echo "============================================================" +echo " INTERPRETATION" +echo "============================================================" +echo " Single-pass clip at [μ-tσ, μ+tσ] using baseline stats" +echo " μ=$MU, σ=$SIGMA (from $N background users)" +echo " PAC sees clipped range → sensitivity = 2tσ for both in/out" +echo " std_ratio: noise_std_in / noise_std_out (1.0 = no side-channel)" +echo " Unclipped truths: in=$TRUE_IN out=$TRUE_OUT" +echo "============================================================" From 8d7b944068018a1ad3eb37674457940e6e52e8e9 Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 16:51:43 +0100 Subject: [PATCH 19/27] Update CLAUDE.md, add hooks, skills, and permissions - CLAUDE.md: added code style rules (clang-tidy naming, clang-format style), attack evaluation section, development rules - .claude/settings.json: PostToolUse hook to auto-run make format-fix after edits - Skills: /run-attacks, /test-clip, /explain-pac, /explain-dp, /explain-pac-ddl Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/settings.json | 17 ++++++ .claude/skills/explain-dp/SKILL.md | 55 ++++++++++++++++++ .claude/skills/explain-pac-ddl/SKILL.md | 74 +++++++++++++++++++++++++ .claude/skills/explain-pac/SKILL.md | 49 ++++++++++++++++ .claude/skills/run-attacks/SKILL.md | 23 ++++++++ .claude/skills/test-clip/SKILL.md | 11 ++++ CLAUDE.md | 33 +++++++++++ 7 files changed, 262 insertions(+) create mode 100644 .claude/settings.json create mode 100644 .claude/skills/explain-dp/SKILL.md create mode 100644 .claude/skills/explain-pac-ddl/SKILL.md create mode 100644 .claude/skills/explain-pac/SKILL.md create mode 100644 .claude/skills/run-attacks/SKILL.md create mode 100644 .claude/skills/test-clip/SKILL.md diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 00000000..786c761c --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,17 @@ +{ + "hooks": { + "PostToolUse": [ + { + "matcher": "Edit", + "hooks": [ + { + "type": "command", + "command": "make format-fix 2>/dev/null || true", + "timeout": 30, + "statusMessage": "Running format-fix..." + } + ] + } + ] + } +} diff --git a/.claude/skills/explain-dp/SKILL.md b/.claude/skills/explain-dp/SKILL.md new file mode 100644 index 00000000..103f0d2a --- /dev/null +++ b/.claude/skills/explain-dp/SKILL.md @@ -0,0 +1,55 @@ +--- +name: explain-dp +description: Reference material for differential privacy concepts. Auto-loaded when discussing privacy, attacks, sensitivity, or clipping. +--- + +## Differential Privacy (DP) + +### Definition + +A randomized mechanism M satisfies (ε,δ)-differential privacy if for all +neighboring datasets D, D' (differing in one individual) and all outputs S: + + P[M(D) ∈ S] ≤ e^ε · P[M(D') ∈ S] + δ + +Smaller ε = stronger privacy. δ is the probability of catastrophic failure. + +### Key concepts + +- **Sensitivity**: Maximum change in query output when one individual is + added/removed. For SUM with values in [L,U]: sensitivity = U-L. +- **Laplace mechanism**: Add Laplace(0, sensitivity/ε) noise. Standard for counting queries. +- **Gaussian mechanism**: Add N(0, sensitivity²·2ln(1.25/δ)/ε²) noise. Better for composition. +- **Composition**: Running k queries on the same data costs k·ε total (basic), + or O(√k·ε) with advanced composition. +- **Post-processing**: Any function of a DP output is still DP. Free to clip/transform after noise. + +### Membership Inference Attack (MIA) + +The adversary's game: given a query result, determine whether a specific individual +is in the dataset. Attack accuracy = fraction of correct guesses across trials. +50% = random (DP working). >50% = information leakage. + +### Bounded user contribution (Wilson et al. 2019) + +Standard approach for DP SQL: +1. GROUP BY user_id → compute per-user contribution +2. Clip each user's contribution to [L, U] +3. Sum clipped contributions +4. Add noise calibrated to U-L + +This handles both single-large-value outliers and many-small-values users. +Reference: "Differentially Private SQL with Bounded User Contribution" (Google). + +### How PAC differs from DP + +- PAC bounds **mutual information** (pac_mi), not ε-divergence +- PAC does NOT compute sensitivity — noise is calibrated differently +- PAC uses 64 parallel counters + bitslice encoding for efficient aggregation +- pac_clip_sum uses **support-based magnitude clipping** instead of hard [L,U] bounds + +### Input clipping (Winsorization) + +Clip individual values to [μ-tσ, μ+tσ] before aggregation. Reduces sensitivity. +Well-established in DP literature. Limitations: doesn't catch users with many +small values (need per-user contribution clipping instead). diff --git a/.claude/skills/explain-pac-ddl/SKILL.md b/.claude/skills/explain-pac-ddl/SKILL.md new file mode 100644 index 00000000..da2cac75 --- /dev/null +++ b/.claude/skills/explain-pac-ddl/SKILL.md @@ -0,0 +1,74 @@ +--- +name: explain-pac-ddl +description: Reference for PAC DDL syntax — PAC_KEY, PAC_LINK, PROTECTED, SET PU, and the parser. Auto-loaded when discussing table setup, privacy units, or protected columns. +--- + +## PAC DDL Overview + +PAC extends SQL DDL with privacy annotations. The parser (`src/parser/pac_parser.cpp`, +`src/parser/pac_parser_helpers.cpp`) intercepts CREATE TABLE and ALTER TABLE statements +to extract PAC-specific clauses before forwarding to DuckDB. + +### Privacy Unit (PU) table + +The PU table is the entity being protected (e.g., customer). One row = one individual. + +```sql +-- Mark a table as the privacy unit +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; + +-- Protect specific columns from direct projection +ALTER PU TABLE customer ADD PROTECTED (c_acctbal, c_name, c_address); +``` + +- `PAC_KEY (col)`: Designates the column(s) that uniquely identify a privacy unit. + Must be set before `SET PU`. +- `SET PU`: Marks the table as the privacy unit. After this, aggregates on linked + tables get PAC noise. +- `PROTECTED (col1, col2, ...)`: Columns that cannot be directly projected. + Aggregates (SUM, COUNT, AVG) on protected columns go through PAC. + +### Linking tables to the PU + +Non-PU tables reference the PU table via foreign-key-like links: + +```sql +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +ALTER TABLE lineitem ADD PAC_LINK (l_orderkey) REFERENCES orders (o_orderkey); +``` + +- `PAC_LINK (local_col) REFERENCES table(ref_col)`: Declares how to join this + table back to the PU. The compiler uses these links to inject the PU hash + into the query plan. +- Links can be chained: `lineitem → orders → customer`. + +### CREATE TABLE syntax (inline) + +PAC clauses can be inlined in CREATE TABLE: + +```sql +CREATE PU TABLE employees ( + id INTEGER, + department VARCHAR, + salary DECIMAL(10,2), + PAC_KEY (id), + PROTECTED (salary) +); +``` + +The parser strips PAC_KEY, PAC_LINK, and PROTECTED clauses from the CREATE +statement, forwards the clean SQL to DuckDB, then applies the PAC metadata +via ALTER TABLE internally. + +### Common mistakes + +- `PAC_LINK(col, table, ref)` — wrong. Use `PAC_LINK (col) REFERENCES table(ref)`. +- `PROTECTED salary` — wrong. Must have parentheses: `PROTECTED (salary)`. +- ALTER TABLE on a PU table requires `ALTER PU TABLE`, not `ALTER TABLE`. + +### Key source files + +- `src/parser/pac_parser.cpp` — main parser hook (intercepts SQL statements) +- `src/parser/pac_parser_helpers.cpp` — extraction of PAC_KEY, PAC_LINK, PROTECTED +- `src/core/pac_metadata.cpp` — in-memory metadata storage for PU/link/protected info diff --git a/.claude/skills/explain-pac/SKILL.md b/.claude/skills/explain-pac/SKILL.md new file mode 100644 index 00000000..8982ca47 --- /dev/null +++ b/.claude/skills/explain-pac/SKILL.md @@ -0,0 +1,49 @@ +--- +name: explain-pac +description: Reference material for PAC privacy internals. Auto-loaded when discussing PAC mechanism, noise, counters, or clipping. +--- + +## PAC Privacy Overview + +PAC (Probably Approximately Correct) privacy is a framework for privatizing SQL +aggregates, described in [SIMD-PAC-DB](https://arxiv.org/abs/2603.15023). + +### Core mechanism + +- Each aggregate maintains **64 parallel counters** (one per bit of a hashed key) +- Each row's value is added to ~32 counters (determined by pac_hash of the PU key) +- At finalization, noise calibrated to a **mutual information bound** (pac_mi) is + added, and the result is estimated from the counters +- PAC does NOT compute sensitivity (unlike differential privacy) + +### SWAR bitslice encoding + +- Counters are packed as 4 × uint16_t per uint64_t (SWAR = SIMD Within A Register) +- This enables processing 4 counters per instruction without actual SIMD intrinsics +- Overflow cascades to 32-bit overflow counters when 16-bit counters saturate + +### pac_clip_sum (contribution clipping) + +- **Pre-aggregation**: Query rewriter inserts `GROUP BY pu_hash` to sum each user's + rows into a single contribution (handles the "50K small items" case) +- **Magnitude levels**: Values decomposed into levels (4x per level, 2-bit shift). + Level 0: 0-255, Level 1: 256-1023, Level 2: 1024-4095, etc. +- **Bitmap tracking**: Each level maintains a 64-bit bitmap of distinct contributors + (using birthday-paradox estimation from popcount) +- **Hard-zero**: Levels with fewer distinct contributors than `pac_clip_support` + contribute nothing to the result (prevents variance side-channel attacks) + +### Key settings + +- `pac_mi`: Mutual information bound (0 = deterministic/no noise) +- `pac_seed`: RNG seed for reproducible noise +- `pac_clip_support`: Minimum distinct contributors per magnitude level (NULL = disabled) +- `pac_hash_repair`: Ensure pac_hash outputs exactly 32 bits set + +### DDL + +```sql +ALTER TABLE customer ADD PAC_KEY (c_custkey); +ALTER TABLE customer SET PU; +ALTER TABLE orders ADD PAC_LINK (o_custkey) REFERENCES customer (c_custkey); +``` diff --git a/.claude/skills/run-attacks/SKILL.md b/.claude/skills/run-attacks/SKILL.md new file mode 100644 index 00000000..a01e7c13 --- /dev/null +++ b/.claude/skills/run-attacks/SKILL.md @@ -0,0 +1,23 @@ +--- +name: run-attacks +description: Run the pac_clip_sum membership inference attack test suite and summarize results. +--- + +## Context + +PAC (Probably Approximately Correct) privacy privatizes SQL aggregates via 64 parallel +SWAR bitslice counters with MI-bounded noise. pac_clip_sum adds per-user contribution +clipping using magnitude-level decomposition (4x bands, 2-bit shift) with distinct-contributor +bitmaps. Unsupported outlier levels are hard-zeroed to prevent variance side-channel attacks. + +## Instructions + +1. Build if needed: `GEN=ninja make 2>&1 | tail -5` +2. Run the main attack suite: `bash attacks/clip_attack_test.sh 2>/dev/null` +3. Run the multi-row attack: `bash attacks/clip_multirow_test.sh 2>/dev/null` +4. Run stress tests if available: `bash attacks/clip_hardzero_stress.sh 2>/dev/null` + +Summarize results as a table: +- Attack scenario, clip_support value, attack accuracy, std_in, std_out, std ratio +- Flag any accuracy above 60% as a potential regression +- Compare to baselines in `attacks/clip_attack_results.md` diff --git a/.claude/skills/test-clip/SKILL.md b/.claude/skills/test-clip/SKILL.md new file mode 100644 index 00000000..66f7dc8a --- /dev/null +++ b/.claude/skills/test-clip/SKILL.md @@ -0,0 +1,11 @@ +--- +name: test-clip +description: Build and run pac_clip_sum unit tests. +--- + +## Instructions + +1. Build: `GEN=ninja make 2>&1 | tail -5` +2. Run clip_sum tests: `build/release/test/unittest "test/sql/pac_clip_sum*" 2>&1` +3. Report: number of assertions passed/failed +4. If any fail, show the failing test name and expected vs actual values diff --git a/CLAUDE.md b/CLAUDE.md index 21b11dba..acf449f3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -98,3 +98,36 @@ SET pac_mi = 0; -- disable noise for testing SET pac_seed = 42; -- reproducible results SET pac_clip_support = 40; -- enable clip rewrite with support threshold ``` + +## Code style (clang-tidy) + +The project uses clang-tidy with DuckDB's configuration (`.clang-tidy`). Key naming rules: + +- **Classes/Enums**: `CamelCase` (e.g., `PacClipSumIntState`) +- **Functions**: `CamelCase` (e.g., `GetLevel`, `AllocateLevel`) +- **Variables/parameters/members**: `lower_case` (e.g., `max_level_used`, `key_hash`) +- **Constants/static/constexpr**: `UPPER_CASE` (e.g., `PAC2_NUM_LEVELS`, `PAC2_LEVEL_SHIFT`) +- **Macros**: `UPPER_CASE` (e.g., `PAC_DEBUG_PRINT`) +- **Typedefs**: `lower_case_t` suffix (e.g., `aggregate_update_t`) + +Other style rules (from `.clang-format`, based on LLVM): + +- **Tabs for indentation**, width 4 +- **Column limit**: 120 +- **Braces**: same line as statement (K&R / Allman-attached) +- **Pointers**: right-aligned (`int *ptr`, not `int* ptr`) +- **No short functions on single line** +- **Templates**: always break after `template<...>` +- **Long arguments**: align after open bracket + +Run `make format-fix` to auto-format. Formatting runs automatically via hook after edits. + +## Attack evaluation + +Attack scripts live in `attacks/`. Results are documented in `attacks/clip_attack_results.md`. + +```bash +bash attacks/clip_attack_test.sh 2>/dev/null # main attack suite +bash attacks/clip_multirow_test.sh 2>/dev/null # 20K small items test +bash attacks/clip_hardzero_stress.sh 2>/dev/null # stress tests +``` From 783008c7ba88f72b2ff791ec8c98fafa378874ea Mon Sep 17 00:00:00 2001 From: ila Date: Tue, 24 Mar 2026 17:07:50 +0100 Subject: [PATCH 20/27] Add metadata file documentation to explain-pac-ddl skill Document the pac_metadata JSON sidecar files: naming convention, auto-loading, save/clear pragmas, and the important note to delete metadata when recreating DBs. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/explain-pac-ddl/SKILL.md | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/.claude/skills/explain-pac-ddl/SKILL.md b/.claude/skills/explain-pac-ddl/SKILL.md index da2cac75..110fbcda 100644 --- a/.claude/skills/explain-pac-ddl/SKILL.md +++ b/.claude/skills/explain-pac-ddl/SKILL.md @@ -67,8 +67,42 @@ via ALTER TABLE internally. - `PROTECTED salary` — wrong. Must have parentheses: `PROTECTED (salary)`. - ALTER TABLE on a PU table requires `ALTER PU TABLE`, not `ALTER TABLE`. +### Metadata files + +PAC metadata (PU tables, links, protected columns) is stored in JSON sidecar files +next to the database file. The naming convention is: + +``` +pac_metadata__.json +``` + +For example, `tpch_sf1.db` produces `pac_metadata_tpch_sf1_main.json` in the same +directory. + +**Auto-loading**: When the PAC extension loads (`LOAD pac`), it automatically looks +for a matching metadata file next to the attached database and loads it. No manual +`PRAGMA load_pac_metadata` needed for persistent databases. + +**Saving**: After setting up PAC_KEY/PAC_LINK/PROTECTED, save with: +```sql +PRAGMA save_pac_metadata('/path/to/pac_metadata_mydb_main.json'); +``` + +**Clearing**: Reset all in-memory PAC metadata: +```sql +PRAGMA clear_pac_metadata; +``` + +**Important**: If you delete or recreate a database file, also delete the +corresponding `pac_metadata_*.json` file. Stale metadata causes confusing errors +(references to tables/columns that no longer exist). + +For in-memory databases, metadata file is named `pac_metadata_memory_main.json` +in the current working directory. + ### Key source files - `src/parser/pac_parser.cpp` — main parser hook (intercepts SQL statements) - `src/parser/pac_parser_helpers.cpp` — extraction of PAC_KEY, PAC_LINK, PROTECTED - `src/core/pac_metadata.cpp` — in-memory metadata storage for PU/link/protected info +- `src/core/pac_extension.cpp` — auto-loading of metadata on extension load (LoadInternal) From f8e192b902f91d460b05e7777666073b93215af6 Mon Sep 17 00:00:00 2001 From: ila Date: Wed, 25 Mar 2026 10:09:11 +0100 Subject: [PATCH 21/27] Update PAC and DP skills with formal definitions and theory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit explain-pac: added formal PAC definition, 4-step privatization template, MI-to-posterior success rate table, composition theorem, PAC vs DP comparison, and SIMD-PAC-DB implementation details. explain-dp: added PAC vs DP comparison table, loose bounds insight, privacy-conscious design (MSE = Bias² + (1/(2B)+1)·Var), and implications for clipping (reducing variance improves privacy-utility tradeoff). Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/explain-dp/SKILL.md | 35 +++++++++++++++--- .claude/skills/explain-pac/SKILL.md | 55 +++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/.claude/skills/explain-dp/SKILL.md b/.claude/skills/explain-dp/SKILL.md index 103f0d2a..352a2437 100644 --- a/.claude/skills/explain-dp/SKILL.md +++ b/.claude/skills/explain-dp/SKILL.md @@ -43,13 +43,40 @@ Reference: "Differentially Private SQL with Bounded User Contribution" (Google). ### How PAC differs from DP -- PAC bounds **mutual information** (pac_mi), not ε-divergence -- PAC does NOT compute sensitivity — noise is calibrated differently -- PAC uses 64 parallel counters + bitslice encoding for efficient aggregation -- pac_clip_sum uses **support-based magnitude clipping** instead of hard [L,U] bounds +| | DP | PAC | +|---|---|---| +| **Guarantee type** | Input-independent (worst-case) | Instance-dependent (distribution D) | +| **Noise calibration** | Sensitivity s → noise ∝ s/ε | Variance σ² → noise ∝ σ²/(2β) | +| **White-boxing** | Required (analyze algorithm) | Not needed (black-box simulation) | +| **Composition** | k queries → k·ε (basic) | k queries → Σ MIᵢ (linear, Theorem 2) | +| **Privacy metric** | ε (log-likelihood ratio) | MI (mutual information, in nats) | +| **Conversion** | MI=1/128 ≈ ε=0.25 for prior=50% | See Table 3.2 in thesis | +| **Stable algorithms** | Same noise regardless | Less noise automatically | +| **Outlier impact** | Sensitivity explodes | Variance explodes (same practical problem) | + +Key insight: PAC guarantees are **loose** — the theoretical bound on MIA success +rate is conservative. Empirical attacks achieve lower success than the bound +predicts. This means the bounds are hard to violate. ### Input clipping (Winsorization) Clip individual values to [μ-tσ, μ+tσ] before aggregation. Reduces sensitivity. Well-established in DP literature. Limitations: doesn't catch users with many small values (need per-user contribution clipping instead). + +### Privacy-conscious design + +Rather than post-hoc privatization (build algorithm, then add noise), PAC enables +**privacy-conscious design**: optimize algorithm parameters jointly with +the privacy budget. + +Key result: For a privatized estimator with budget B: + MSE = Bias² + (1/(2B) + 1) · Var + error + +This means privatization amplifies the variance by 1/(2B). At tight budgets +(small B), the optimal algorithm shifts toward lower-variance (higher-bias) +models. E.g., stronger regularization in ridge regression. + +For databases: this suggests that queries producing high-variance outputs (due to +outliers, small groups, etc.) are inherently harder to privatize. Clipping reduces +variance and thus the noise needed, improving the privacy-utility tradeoff. diff --git a/.claude/skills/explain-pac/SKILL.md b/.claude/skills/explain-pac/SKILL.md index 8982ca47..19ea9124 100644 --- a/.claude/skills/explain-pac/SKILL.md +++ b/.claude/skills/explain-pac/SKILL.md @@ -5,16 +5,65 @@ description: Reference material for PAC privacy internals. Auto-loaded when disc ## PAC Privacy Overview -PAC (Probably Approximately Correct) privacy is a framework for privatizing SQL -aggregates, described in [SIMD-PAC-DB](https://arxiv.org/abs/2603.15023). +PAC (Probably Approximately Correct) privacy is a framework for privatizing +algorithms with provable guarantees, described in [SIMD-PAC-DB](https://arxiv.org/abs/2603.15023). -### Core mechanism +### Formal definition + +Given a data distribution D, a query Q satisfies (δ, ρ, D)-PAC Privacy if no +adversary who knows D can, after observing Q(X) where X ~ D, produce an +estimate X̂ such that ρ(X̂, X) = 1 with probability ≥ (1-δ). + +The key insight: **noise scales with the variance of the algorithm's output across +random subsamples** of the data. Stable algorithms (low variance) need less noise. + +### The 4-step privatization template + +1. **Subsample**: Draw m random 50%-subsets X₁...Xₘ from the full dataset +2. **Compute**: Run the query Q on each subset → outputs y₁...yₘ +3. **Estimate noise**: Compute variance σ² across the yᵢ. Required noise: Δ = σ²/(2β) + where β is the MI budget +4. **Release**: Pick a random subset Xⱼ, return Q(Xⱼ) + N(0, Δ) + +This is the theoretical foundation. SIMD-PAC-DB encodes this efficiently using +64 parallel counters (one per possible subset assignment bit). + +### MI → posterior success rate + +| MI | Max posterior (prior=50%) | Max posterior (prior=25%) | +|----|--------------------------|--------------------------| +| 1/128 | 56.2% | 30.5% | +| 1/64 | 58.8% | 32.9% | +| 1/32 | 62.4% | 36.3% | +| 1/16 | 67.5% | 41.2% | +| 1/8 | 74.5% | 48.2% | +| 1/4 | 83.8% | 58.4% | +| 1/2 | 95.2% | 72.7% | +| 1 | 100% | 91.4% | + +### PAC Composition + +For T adaptive queries with independent random sampling per query, the total +MI is bounded by the sum: MI(total) ≤ Σᵢ MIᵢ. This is linear composition — +each query's MI adds to the budget. The key requirement: **independent random +sampling per query** (each query uses a fresh random subset). + +### PAC vs DP + +- **DP**: input-independent guarantee. Requires white-boxing to compute sensitivity. + Noise ∝ sensitivity/ε. Works for worst-case neighboring datasets. +- **PAC**: instance-dependent guarantee. No white-boxing needed. Noise ∝ Var[Q(X)]/β. + Stable queries get less noise automatically. But the guarantee depends on the + data distribution D. + +### Core mechanism (SIMD-PAC-DB implementation) - Each aggregate maintains **64 parallel counters** (one per bit of a hashed key) - Each row's value is added to ~32 counters (determined by pac_hash of the PU key) - At finalization, noise calibrated to a **mutual information bound** (pac_mi) is added, and the result is estimated from the counters - PAC does NOT compute sensitivity (unlike differential privacy) +- The 64 counters encode m=64 possible subsets in one pass (SIMD-efficient) ### SWAR bitslice encoding From 7e7688f9259fe94bbc9b77c9df76bf839f9e74c9 Mon Sep 17 00:00:00 2001 From: ila Date: Wed, 1 Apr 2026 14:48:56 +0200 Subject: [PATCH 22/27] Add pac_clip_min_max and float/double support for clip aggregates Implements level-based clipping for MIN/MAX aggregates (pac_clip_min, pac_clip_max, pac_noised_clip_min, pac_noised_clip_max) using int8_t extremes with per-level bitmaps for support estimation. Replaces the previous alias-only stubs with a real implementation that reuses UpdateExtremesSIMD from pac_min_max.hpp. Adds native FLOAT/DOUBLE overloads for pac_clip_sum and pac_clip_min_max using power-of-2 scale factors (2^20 for float, 2^27 for double) to convert to int64 before entering the integer-based level machinery. Removes the lossy BIGINT cast workaround from the expression builder. Includes BOUNDOPT (per-level bound optimization), AllValid fast paths, and shared ScaleFloatToInt64 helper with branchless clamping. Co-Authored-By: Claude Opus 4.6 (1M context) --- CMakeLists.txt | 1 + src/aggregates/pac_clip_min_max.cpp | 816 ++++++++++++++++++ src/aggregates/pac_clip_sum.cpp | 150 +++- src/aggregates/pac_min_max.cpp | 51 -- src/core/pac_extension.cpp | 1 + src/include/aggregates/pac_clip_min_max.hpp | 297 +++++++ src/include/aggregates/pac_clip_sum.hpp | 16 + src/include/aggregates/pac_min_max.hpp | 4 - .../pac_expression_builder.cpp | 8 - test/sql/pac_clip_min_max.test | 282 ++++++ test/sql/pac_clip_sum.test | 116 +++ 11 files changed, 1674 insertions(+), 68 deletions(-) create mode 100644 src/aggregates/pac_clip_min_max.cpp create mode 100644 src/include/aggregates/pac_clip_min_max.hpp create mode 100644 test/sql/pac_clip_min_max.test diff --git a/CMakeLists.txt b/CMakeLists.txt index 7aa14f03..fac7d1b2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,7 @@ set(EXTENSION_SOURCES src/aggregates/pac_min_max.cpp src/aggregates/pac_sum.cpp src/aggregates/pac_clip_sum.cpp + src/aggregates/pac_clip_min_max.cpp src/compiler/pac_bitslice_compiler.cpp src/compiler/pac_compiler_helpers.cpp src/query_processing/pac_avg_rewriter.cpp diff --git a/src/aggregates/pac_clip_min_max.cpp b/src/aggregates/pac_clip_min_max.cpp new file mode 100644 index 00000000..d5e51c12 --- /dev/null +++ b/src/aggregates/pac_clip_min_max.cpp @@ -0,0 +1,816 @@ +#include "aggregates/pac_clip_min_max.hpp" +#include "aggregates/pac_clip_sum.hpp" // for PAC2_FLOAT_SCALE, PAC2_DOUBLE_SCALE +#include "categorical/pac_categorical.hpp" +#include "duckdb/common/types/decimal.hpp" +#include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" +#include + +namespace duckdb { + +// ============================================================================ +// Inner state update: route one signed int64 value to the correct level +// ============================================================================ +template +AUTOVECTORIZE inline void PacClipMinMaxUpdateOneInternal(PacClipMinMaxIntState &state, uint64_t key_hash, + int64_t value, ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + uint64_t abs_val = static_cast(value >= 0 ? value : -value); + int level = PacClipMinMaxIntState::GetLevel(abs_val); + int shift = level << 1; + // Arithmetic right shift preserves sign; fits in int8_t due to GetLevel threshold 128 + int8_t shifted_val = static_cast(value >> shift); + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + + // Set bitmap bit (always, even if BOUNDOPT skips the extreme update) + buf[PCMM_SWAR] |= (1ULL << (key_hash >> 58)); + + // BOUNDOPT: skip expensive SIMD update if value can't improve any extreme at this level + if (!PAC_IS_BETTER(shifted_val, state.level_bounds[level])) { + return; + } + state.UpdateExtreme(buf, shifted_val, key_hash); + + // Periodically recompute bound + if ((state.update_count & (BOUND_RECOMPUTE_INTERVAL - 1)) == 0) { + state.RecomputeBound(level); + } +} + +// Overload for unsigned int64 (always positive, shifted fits in 0..127 → int8_t safe) +template +AUTOVECTORIZE inline void PacClipMinMaxUpdateOneInternalUnsigned(PacClipMinMaxIntState &state, + uint64_t key_hash, uint64_t value, + ArenaAllocator &allocator) { + state.key_hash |= key_hash; + + int level = PacClipMinMaxIntState::GetLevel(value); + int shift = level << 1; + int8_t shifted_val = static_cast((value >> shift) & 0x7F); // mask to 7 bits (0-127) + + state.EnsureLevelAllocated(allocator, level); + uint64_t *buf = state.levels[level]; + buf[PCMM_SWAR] |= (1ULL << (key_hash >> 58)); + + // BOUNDOPT + if (!PAC_IS_BETTER(shifted_val, state.level_bounds[level])) { + return; + } + state.UpdateExtreme(buf, shifted_val, key_hash); + if ((state.update_count & (BOUND_RECOMPUTE_INTERVAL - 1)) == 0) { + state.RecomputeBound(level); + } +} + +// ============================================================================ +// Buffered update (single-sided, no pos/neg split) +// ============================================================================ +template +AUTOVECTORIZE inline void PacClipMinMaxUpdateOne(PacClipMinMaxStateWrapper &agg, uint64_t key_hash, + ValueT value, ArenaAllocator &a) { + uint64_t cnt = agg.n_buffered & PacClipMinMaxStateWrapper::BUF_MASK; + if (DUCKDB_UNLIKELY(cnt == PacClipMinMaxStateWrapper::BUF_SIZE)) { + auto *dst_state = agg.EnsureState(a); + for (int i = 0; i < PacClipMinMaxStateWrapper::BUF_SIZE; i++) { + if constexpr (SIGNED) { + PacClipMinMaxUpdateOneInternal(*dst_state, agg.hash_buf[i], agg.val_buf[i], a); + } else { + PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, agg.hash_buf[i], + static_cast(agg.val_buf[i]), a); + } + dst_state->update_count++; + } + if constexpr (SIGNED) { + PacClipMinMaxUpdateOneInternal(*dst_state, key_hash, static_cast(value), a); + } else { + PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, key_hash, static_cast(value), a); + } + dst_state->update_count++; + agg.n_buffered &= ~PacClipMinMaxStateWrapper::BUF_MASK; + } else { + agg.val_buf[cnt] = static_cast(value); + agg.hash_buf[cnt] = key_hash; + agg.n_buffered++; + } +} + +// ============================================================================ +// Buffer flush +// ============================================================================ +template +inline void PacClipMinMaxFlushBuffer(PacClipMinMaxStateWrapper &src, PacClipMinMaxStateWrapper &dst, + ArenaAllocator &a) { + uint64_t cnt = src.n_buffered & PacClipMinMaxStateWrapper::BUF_MASK; + if (cnt > 0) { + auto *dst_state = dst.EnsureState(a); + for (uint64_t i = 0; i < cnt; i++) { + if constexpr (SIGNED) { + PacClipMinMaxUpdateOneInternal(*dst_state, src.hash_buf[i], src.val_buf[i], a); + } else { + PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, src.hash_buf[i], + static_cast(src.val_buf[i]), a); + } + dst_state->update_count++; + } + src.n_buffered &= ~PacClipMinMaxStateWrapper::BUF_MASK; + } +} + +// ============================================================================ +// Vectorized Update and ScatterUpdate +// ============================================================================ +template +static void PacClipMinMaxUpdate(Vector inputs[], PacClipMinMaxStateWrapper &state, idx_t count, + ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + + if (hash_data.validity.AllValid() && value_data.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + PacClipMinMaxUpdateOne(state, hashes[h_idx], + ConvertValue::convert(values[v_idx]), allocator); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipMinMaxUpdateOne(state, hashes[h_idx], + ConvertValue::convert(values[v_idx]), allocator); + } + } +} + +template +static void PacClipMinMaxScatterUpdate(Vector inputs[], Vector &states, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); + + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipMinMaxUpdateOne(*state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); + } +} + +// ============================================================================ +// X-macro: generate Update/ScatterUpdate for integer types +// ============================================================================ +#define PCMM_INT_TYPES_SIGNED \ + X(TinyInt, int64_t, int8_t, true) \ + X(SmallInt, int64_t, int16_t, true) \ + X(Integer, int64_t, int32_t, true) \ + X(BigInt, int64_t, int64_t, true) + +#define PCMM_INT_TYPES_UNSIGNED \ + X(UTinyInt, uint64_t, uint8_t, false) \ + X(USmallInt, uint64_t, uint16_t, false) \ + X(UInteger, uint64_t, uint32_t, false) \ + X(UBigInt, uint64_t, uint64_t, false) + +// Generate for IS_MAX=true (MAX) +#define X(NAME, VALUE_T, INPUT_T, SIGNED_VAL) \ + static void PacClipMaxUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipMinMaxUpdate(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipMaxScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipMinMaxScatterUpdate(inputs, states, count, aggr.allocator); \ + } +PCMM_INT_TYPES_SIGNED +PCMM_INT_TYPES_UNSIGNED +#undef X + +// Generate for IS_MAX=false (MIN) +#define X(NAME, VALUE_T, INPUT_T, SIGNED_VAL) \ + static void PacClipMinUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipMinMaxUpdate(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipMinScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipMinMaxScatterUpdate(inputs, states, count, aggr.allocator); \ + } +PCMM_INT_TYPES_SIGNED +PCMM_INT_TYPES_UNSIGNED +#undef X + +// ============================================================================ +// Float/double update: scale to int64, route through signed path +// ============================================================================ +template +static void PacClipMinMaxUpdateFloat(Vector inputs[], PacClipMinMaxStateWrapper &state, idx_t count, + ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + + if (hash_data.validity.AllValid() && value_data.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + PacClipMinMaxUpdateOne(state, hashes[h_idx], + ScaleFloatToInt64(values[v_idx]), allocator); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipMinMaxUpdateOne(state, hashes[h_idx], + ScaleFloatToInt64(values[v_idx]), allocator); + } + } +} + +template +static void PacClipMinMaxScatterUpdateFloat(Vector inputs[], Vector &states, idx_t count, ArenaAllocator &allocator) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); + + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipMinMaxUpdateOne(*state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), + allocator); + } +} + +// X-macro: generate float/double Update/ScatterUpdate for MAX and MIN +#define PCMM_FLOAT_TYPES \ + XF(SingleFloat, float, PCMM_FLOAT_SHIFT) \ + XF(SingleDouble, double, PCMM_DOUBLE_SHIFT) + +#define XF(NAME, FLOAT_T, SHIFT_VAL) \ + static void PacClipMaxUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipMinMaxUpdateFloat(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipMaxScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipMinMaxScatterUpdateFloat(inputs, states, count, aggr.allocator); \ + } \ + static void PacClipMinUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ + idx_t count) { \ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipMinMaxUpdateFloat(inputs, state, count, aggr.allocator); \ + } \ + static void PacClipMinScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ + idx_t count) { \ + PacClipMinMaxScatterUpdateFloat(inputs, states, count, aggr.allocator); \ + } +PCMM_FLOAT_TYPES +#undef XF + +// ============================================================================ +// Combine +// ============================================================================ +template +static void PacClipMinMaxCombineInt(Vector &src, Vector &dst, idx_t count, ArenaAllocator &allocator) { + auto src_wrapper = FlatVector::GetData *>(src); + auto dst_wrapper = FlatVector::GetData *>(dst); + + for (idx_t i = 0; i < count; i++) { + // Flush src's buffer into dst (always signed — values stored as int64 in buffer) + PacClipMinMaxFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); + + auto *s = src_wrapper[i]->GetState(); + if (!s) { + continue; + } + auto *d = dst_wrapper[i]->EnsureState(allocator); + d->CombineFrom(s, allocator); + } +} + +static void PacClipMaxCombine(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { + PacClipMinMaxCombineInt(src, dst, count, aggr.allocator); +} +static void PacClipMinCombine(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { + PacClipMinMaxCombineInt(src, dst, count, aggr.allocator); +} + +// ============================================================================ +// Bind data +// ============================================================================ +struct PacClipMinMaxBindData : public PacBindData { + int clip_support_threshold; + double float_scale; + + PacClipMinMaxBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support, + double float_scale_val = 1.0) + : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support), + float_scale(float_scale_val) { + } + + unique_ptr Copy() const override { + auto copy = make_uniq(*this); + copy->total_update_count = 0; + copy->suspicious_count = 0; + copy->nonsuspicious_count = 0; + return copy; + } + bool Equals(const FunctionData &other) const override { + if (!PacBindData::Equals(other)) { + return false; + } + auto *o = dynamic_cast(&other); + return o && clip_support_threshold == o->clip_support_threshold && float_scale == o->float_scale; + } +}; + +// ============================================================================ +// Finalize: noised scalar output +// ============================================================================ +template +static void PacClipMinMaxFinalize(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + auto state_ptrs = FlatVector::GetData *>(states); + auto data = FlatVector::GetData(result); + auto &result_mask = FlatVector::Validity(result); + auto &bind = static_cast(*input.bind_data); + double mi = bind.mi; + double correction = bind.correction; + uint64_t query_hash = bind.query_hash; + auto pstate = bind.pstate; + int clip_support = bind.clip_support_threshold; + + for (idx_t i = 0; i < count; i++) { + PacClipMinMaxFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + PAC_FLOAT buf[64] = {0}; + auto *s = state_ptrs[i]->GetState(); + if (!s) { + result_mask.SetInvalid(offset + i); + continue; + } + uint64_t key_hash = s->key_hash; + std::mt19937_64 gen(bind.seed); + if (PacNoiseInNull(key_hash, mi, correction, gen)) { + result_mask.SetInvalid(offset + i); + continue; + } + + s->GetTotals(buf, clip_support); + uint64_t update_count = s->update_count; + + CheckPacSampleDiversity(key_hash, buf, update_count, IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min", + bind); + PAC_FLOAT result_val = PacNoisySampleFrom64Counters(buf, mi, correction, gen, ~key_hash, query_hash, pstate); + result_val /= static_cast(bind.float_scale); + data[offset + i] = FromDouble(result_val); + } +} + +// Noised finalize instantiations — return type matches input type +// Integer inputs: return same type as non-clip min/max (the type itself) +// For clip variants, noised output returns the value type. We use templates to handle all types. + +// Helper to deduce return type from value type. For integers, the noised clip min/max +// returns the same type. For float/double, returns float/double. +// X-macro: generate noised finalize wrappers for all output types × MAX/MIN +#define PCMM_FINALIZE_TYPES \ + XFIN(BigInt, int64_t) \ + XFIN(Float, float) \ + XFIN(Double, double) \ + XFIN(HugeInt, hugeint_t) + +#define XFIN(NAME, ACC_T) \ + static void PacClipMaxNoisedFinalize##NAME(Vector &s, AggregateInputData &i, Vector &r, idx_t c, idx_t o) { \ + PacClipMinMaxFinalize(s, i, r, c, o); \ + } \ + static void PacClipMinNoisedFinalize##NAME(Vector &s, AggregateInputData &i, Vector &r, idx_t c, idx_t o) { \ + PacClipMinMaxFinalize(s, i, r, c, o); \ + } +PCMM_FINALIZE_TYPES +#undef XFIN + +// ============================================================================ +// Counters finalize (LIST output) +// ============================================================================ +template +static void PacClipMinMaxFinalizeCounters(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + auto state_ptrs = FlatVector::GetData *>(states); + auto &bind = static_cast(*input.bind_data); + int clip_support = bind.clip_support_threshold; + double correction = bind.correction; + double float_scale = bind.float_scale; + + auto list_entries = FlatVector::GetData(result); + auto &child_vec = ListVector::GetEntry(result); + + idx_t total_elements = count * 64; + ListVector::Reserve(result, total_elements); + ListVector::SetListSize(result, total_elements); + + auto child_data = FlatVector::GetData(child_vec); + + for (idx_t i = 0; i < count; i++) { + PacClipMinMaxFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + + list_entries[offset + i].offset = i * 64; + list_entries[offset + i].length = 64; + + PAC_FLOAT buf[64] = {0}; + uint64_t key_hash = 0; + uint64_t update_count = 0; + + auto *s = state_ptrs[i]->GetState(); + if (s) { + key_hash = s->key_hash; + update_count = s->update_count; + s->GetTotals(buf, clip_support); + } + + CheckPacSampleDiversity(key_hash, buf, update_count, IS_MAX ? "pac_clip_max" : "pac_clip_min", bind); + + idx_t base = i * 64; + for (int j = 0; j < 64; j++) { + if ((key_hash >> j) & 1ULL) { + child_data[base + j] = static_cast(buf[j] * correction / float_scale); + } else { + child_data[base + j] = 0.0; + } + } + } +} + +static void PacClipMaxFinalizeCounters(Vector &s, AggregateInputData &i, Vector &r, idx_t c, idx_t o) { + PacClipMinMaxFinalizeCounters(s, i, r, c, o); +} +static void PacClipMinFinalizeCounters(Vector &s, AggregateInputData &i, Vector &r, idx_t c, idx_t o) { + PacClipMinMaxFinalizeCounters(s, i, r, c, o); +} + +// ============================================================================ +// State size / init / bind +// ============================================================================ +template +static idx_t PacClipMinMaxStateSize(const AggregateFunction &) { + return sizeof(PacClipMinMaxStateWrapper); +} + +template +static void PacClipMinMaxInitialize(const AggregateFunction &, data_ptr_t state_p) { + memset(state_p, 0, sizeof(PacClipMinMaxStateWrapper)); +} + +static unique_ptr PacClipMinMaxBindWithScale(ClientContext &ctx, vector> &args, + double float_scale = 1.0) { + double mi = GetPacMiFromSetting(ctx); + double correction = 1.0; + if (2 < args.size()) { + if (!args[2]->IsFoldable()) { + throw InvalidInputException("pac_clip_min/max: correction parameter must be a constant"); + } + auto val = ExpressionExecutor::EvaluateScalar(ctx, *args[2]); + correction = val.GetValue(); + if (correction < 0.0) { + throw InvalidInputException("pac_clip_min/max: correction must be >= 0"); + } + } + int clip_support = 0; + Value dc_val; + if (ctx.TryGetCurrentSetting("pac_clip_support", dc_val) && !dc_val.IsNull()) { + clip_support = static_cast(dc_val.GetValue()); + } + return make_uniq(ctx, mi, correction, clip_support, float_scale); +} +static unique_ptr PacClipMinMaxBind(ClientContext &ctx, AggregateFunction &, + vector> &args) { + return PacClipMinMaxBindWithScale(ctx, args); +} +static unique_ptr PacClipMinMaxBindFloat(ClientContext &ctx, AggregateFunction &, + vector> &args) { + return PacClipMinMaxBindWithScale(ctx, args, PCMM_FLOAT_SCALE); +} +static unique_ptr PacClipMinMaxBindDouble(ClientContext &ctx, AggregateFunction &, + vector> &args) { + return PacClipMinMaxBindWithScale(ctx, args, PCMM_DOUBLE_SCALE); +} + +// ============================================================================ +// DECIMAL support: dispatch by physical type +// ============================================================================ +template +static AggregateFunction GetPacClipMinMaxNoisedAggregate(PhysicalType type) { + const char *name = IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min"; + auto finalize = IS_MAX ? PacClipMaxNoisedFinalizeBigInt : PacClipMinNoisedFinalizeBigInt; + auto combine = IS_MAX ? PacClipMaxCombine : PacClipMinCombine; + auto state_size = PacClipMinMaxStateSize; + auto init = PacClipMinMaxInitialize; + + switch (type) { + case PhysicalType::INT16: + return AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::SMALLINT}, LogicalType::HUGEINT, state_size, + init, IS_MAX ? PacClipMaxScatterUpdateSmallInt : PacClipMinScatterUpdateSmallInt, + combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSmallInt : PacClipMinUpdateSmallInt); + case PhysicalType::INT32: + return AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::INTEGER}, LogicalType::HUGEINT, state_size, + init, IS_MAX ? PacClipMaxScatterUpdateInteger : PacClipMinScatterUpdateInteger, + combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateInteger : PacClipMinUpdateInteger); + case PhysicalType::INT64: + return AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::BIGINT}, LogicalType::HUGEINT, state_size, + init, IS_MAX ? PacClipMaxScatterUpdateBigInt : PacClipMinScatterUpdateBigInt, combine, + finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateBigInt : PacClipMinUpdateBigInt); + case PhysicalType::INT128: + return AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::HUGEINT}, LogicalType::HUGEINT, state_size, + init, IS_MAX ? PacClipMaxScatterUpdateBigInt : PacClipMinScatterUpdateBigInt, combine, + IS_MAX ? PacClipMaxNoisedFinalizeHugeInt : PacClipMinNoisedFinalizeHugeInt, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateBigInt : PacClipMinUpdateBigInt); + default: + throw InternalException("pac_noised_clip_min/max: unsupported decimal physical type"); + } +} + +template +static unique_ptr BindDecimalPacNoisedClipMinMax(ClientContext &ctx, AggregateFunction &function, + vector> &args) { + auto decimal_type = args[1]->return_type; + function = GetPacClipMinMaxNoisedAggregate(decimal_type.InternalType()); + function.name = IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min"; + function.arguments[1] = decimal_type; + function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type)); + return PacClipMinMaxBind(ctx, function, args); +} + +// ============================================================================ +// Registration helpers +// ============================================================================ +template +static void AddClipMinMaxCountersFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + aggregate_update_t scatter, aggregate_finalize_t finalize, + aggregate_simple_update_t update) { + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, list_type, + PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, + IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, list_type, + PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, + IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); +} + +template +static void AddNoisedClipMinMaxFcn(AggregateFunctionSet &set, const string &name, const LogicalType &value_type, + const LogicalType &result_type, aggregate_update_t scatter, + aggregate_finalize_t finalize, aggregate_simple_update_t update) { + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, result_type, + PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, + IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, result_type, + PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, + IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); +} + +// Helper to register all type overloads +template +static void RegisterClipMinMaxTypeOverloads(AggregateFunctionSet &set, const string &name, bool counters) { + auto counters_finalize = IS_MAX ? PacClipMaxFinalizeCounters : PacClipMinFinalizeCounters; + auto noised_finalize = IS_MAX ? PacClipMaxNoisedFinalizeBigInt : PacClipMinNoisedFinalizeBigInt; + + if (counters) { + // Counters (LIST) variants — signed types + AddClipMinMaxCountersFcn(set, name, LogicalType::TINYINT, + IS_MAX ? PacClipMaxScatterUpdateTinyInt : PacClipMinScatterUpdateTinyInt, + counters_finalize, IS_MAX ? PacClipMaxUpdateTinyInt : PacClipMinUpdateTinyInt); + AddClipMinMaxCountersFcn(set, name, LogicalType::BOOLEAN, + IS_MAX ? PacClipMaxScatterUpdateTinyInt : PacClipMinScatterUpdateTinyInt, + counters_finalize, IS_MAX ? PacClipMaxUpdateTinyInt : PacClipMinUpdateTinyInt); + AddClipMinMaxCountersFcn(set, name, LogicalType::SMALLINT, + IS_MAX ? PacClipMaxScatterUpdateSmallInt : PacClipMinScatterUpdateSmallInt, + counters_finalize, + IS_MAX ? PacClipMaxUpdateSmallInt : PacClipMinUpdateSmallInt); + AddClipMinMaxCountersFcn(set, name, LogicalType::INTEGER, + IS_MAX ? PacClipMaxScatterUpdateInteger : PacClipMinScatterUpdateInteger, + counters_finalize, IS_MAX ? PacClipMaxUpdateInteger : PacClipMinUpdateInteger); + AddClipMinMaxCountersFcn(set, name, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateBigInt : PacClipMinScatterUpdateBigInt, + counters_finalize, IS_MAX ? PacClipMaxUpdateBigInt : PacClipMinUpdateBigInt); + // Unsigned types + AddClipMinMaxCountersFcn(set, name, LogicalType::UTINYINT, + IS_MAX ? PacClipMaxScatterUpdateUTinyInt : PacClipMinScatterUpdateUTinyInt, + counters_finalize, + IS_MAX ? PacClipMaxUpdateUTinyInt : PacClipMinUpdateUTinyInt); + AddClipMinMaxCountersFcn(set, name, LogicalType::USMALLINT, + IS_MAX ? PacClipMaxScatterUpdateUSmallInt : PacClipMinScatterUpdateUSmallInt, + counters_finalize, + IS_MAX ? PacClipMaxUpdateUSmallInt : PacClipMinUpdateUSmallInt); + AddClipMinMaxCountersFcn(set, name, LogicalType::UINTEGER, + IS_MAX ? PacClipMaxScatterUpdateUInteger : PacClipMinScatterUpdateUInteger, + counters_finalize, + IS_MAX ? PacClipMaxUpdateUInteger : PacClipMinUpdateUInteger); + AddClipMinMaxCountersFcn(set, name, LogicalType::UBIGINT, + IS_MAX ? PacClipMaxScatterUpdateUBigInt : PacClipMinScatterUpdateUBigInt, + counters_finalize, IS_MAX ? PacClipMaxUpdateUBigInt : PacClipMinUpdateUBigInt); + } else { + // Noised (scalar) variants — signed types + AddNoisedClipMinMaxFcn(set, name, LogicalType::TINYINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateTinyInt : PacClipMinScatterUpdateTinyInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateTinyInt : PacClipMinUpdateTinyInt); + AddNoisedClipMinMaxFcn(set, name, LogicalType::BOOLEAN, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateTinyInt : PacClipMinScatterUpdateTinyInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateTinyInt : PacClipMinUpdateTinyInt); + AddNoisedClipMinMaxFcn(set, name, LogicalType::SMALLINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateSmallInt : PacClipMinScatterUpdateSmallInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateSmallInt : PacClipMinUpdateSmallInt); + AddNoisedClipMinMaxFcn(set, name, LogicalType::INTEGER, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateInteger : PacClipMinScatterUpdateInteger, + noised_finalize, IS_MAX ? PacClipMaxUpdateInteger : PacClipMinUpdateInteger); + AddNoisedClipMinMaxFcn(set, name, LogicalType::BIGINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateBigInt : PacClipMinScatterUpdateBigInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateBigInt : PacClipMinUpdateBigInt); + // Unsigned types + AddNoisedClipMinMaxFcn(set, name, LogicalType::UTINYINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateUTinyInt : PacClipMinScatterUpdateUTinyInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateUTinyInt : PacClipMinUpdateUTinyInt); + AddNoisedClipMinMaxFcn(set, name, LogicalType::USMALLINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateUSmallInt : PacClipMinScatterUpdateUSmallInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateUSmallInt : PacClipMinUpdateUSmallInt); + AddNoisedClipMinMaxFcn(set, name, LogicalType::UINTEGER, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateUInteger : PacClipMinScatterUpdateUInteger, + noised_finalize, IS_MAX ? PacClipMaxUpdateUInteger : PacClipMinUpdateUInteger); + AddNoisedClipMinMaxFcn(set, name, LogicalType::UBIGINT, LogicalType::BIGINT, + IS_MAX ? PacClipMaxScatterUpdateUBigInt : PacClipMinScatterUpdateUBigInt, + noised_finalize, IS_MAX ? PacClipMaxUpdateUBigInt : PacClipMinUpdateUBigInt); + } +} + +// ============================================================================ +// Add float/double overloads to a function set +// ============================================================================ +template +static void AddFloatDoubleOverloads(AggregateFunctionSet &set, const string &name, bool counters) { + auto combine = IS_MAX ? PacClipMaxCombine : PacClipMinCombine; + auto state_size = PacClipMinMaxStateSize; + auto init = PacClipMinMaxInitialize; + + if (counters) { + auto finalize = IS_MAX ? PacClipMaxFinalizeCounters : PacClipMinFinalizeCounters; + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + + // FLOAT + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::FLOAT}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + + // DOUBLE + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + } else { + auto float_finalize = IS_MAX ? PacClipMaxNoisedFinalizeFloat : PacClipMinNoisedFinalizeFloat; + auto double_finalize = IS_MAX ? PacClipMaxNoisedFinalizeDouble : PacClipMinNoisedFinalizeDouble; + + // FLOAT → FLOAT + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::FLOAT}, LogicalType::FLOAT, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, float_finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, LogicalType::FLOAT, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, float_finalize, + FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + + // DOUBLE → DOUBLE + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, LogicalType::DOUBLE, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, + double_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, state_size, + init, IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, + double_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + } +} + +// ============================================================================ +// Registration: templated helpers to avoid duplicating MIN/MAX registration +// ============================================================================ +template +static void RegisterPacClipMinMaxCountersFunctions(ExtensionLoader &loader) { + const string name = IS_MAX ? "pac_clip_max" : "pac_clip_min"; + const string short_name = IS_MAX ? "clip_max" : "clip_min"; + AggregateFunctionSet fcn_set(name); + RegisterClipMinMaxTypeOverloads(fcn_set, name, true); + + // DECIMAL overloads + auto list_type = LogicalType::LIST(PacFloatLogicalType()); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, list_type, nullptr, nullptr, + nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, + nullptr, BindDecimalPacNoisedClipMinMax)); + + AddFloatDoubleOverloads(fcn_set, name, true); + AddPacListAggregateOverload(fcn_set, short_name); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = IS_MAX ? "[INTERNAL] Returns 64 PAC subsample max values with per-level clipping as LIST." + : "[INTERNAL] Returns 64 PAC subsample min values with per-level clipping as LIST."; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +template +static void RegisterPacNoisedClipMinMaxFunctions(ExtensionLoader &loader) { + const string name = IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min"; + AggregateFunctionSet fcn_set(name); + RegisterClipMinMaxTypeOverloads(fcn_set, name, false); + + // DECIMAL overloads + fcn_set.AddFunction(AggregateFunction( + {LogicalType::UBIGINT, LogicalTypeId::DECIMAL}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, + nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipMinMax)); + fcn_set.AddFunction(AggregateFunction({LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, + LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, + FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, + BindDecimalPacNoisedClipMinMax)); + + AddFloatDoubleOverloads(fcn_set, name, false); + + CreateAggregateFunctionInfo info(fcn_set); + FunctionDescription desc; + desc.description = IS_MAX ? "Privacy-preserving MAX with per-level clipping and noising." + : "Privacy-preserving MIN with per-level clipping and noising."; + info.descriptions.push_back(std::move(desc)); + loader.RegisterFunction(std::move(info)); +} + +// Public registration functions (called from pac_extension.cpp) +void RegisterPacClipMinFunctions(ExtensionLoader &loader) { + RegisterPacClipMinMaxCountersFunctions(loader); +} +void RegisterPacClipMaxFunctions(ExtensionLoader &loader) { + RegisterPacClipMinMaxCountersFunctions(loader); +} +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader) { + RegisterPacNoisedClipMinMaxFunctions(loader); +} +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader) { + RegisterPacNoisedClipMinMaxFunctions(loader); +} + +} // namespace duckdb diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp index 88d7f5ff..4979ed88 100644 --- a/src/aggregates/pac_clip_sum.cpp +++ b/src/aggregates/pac_clip_sum.cpp @@ -368,6 +368,79 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData } } +// ============================================================================ +// Float/Double update: scale to int64, route through signed path +// ============================================================================ +template +static void PacClipSumUpdateFloat(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { + auto &state = *reinterpret_cast(state_p); + UnifiedVectorFormat hash_data, value_data; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + + if (hash_data.validity.AllValid() && value_data.validity.AllValid()) { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + PacClipSumUpdateOne(state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), + aggr.allocator); + } + } else { + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), + aggr.allocator); + } + } +} + +template +static void PacClipSumScatterUpdateFloat(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, + idx_t count) { + UnifiedVectorFormat hash_data, value_data, sdata; + inputs[0].ToUnifiedFormat(count, hash_data); + inputs[1].ToUnifiedFormat(count, value_data); + states.ToUnifiedFormat(count, sdata); + auto hashes = UnifiedVectorFormat::GetData(hash_data); + auto values = UnifiedVectorFormat::GetData(value_data); + auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + + for (idx_t i = 0; i < count; i++) { + auto h_idx = hash_data.sel->get_index(i); + auto v_idx = value_data.sel->get_index(i); + auto state = state_ptrs[sdata.sel->get_index(i)]; + if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { + continue; + } + PacClipSumUpdateOne(*state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), + aggr.allocator); + } +} + +// Instantiate float/double update functions +static void PacClipSumUpdateSingleFloat(Vector inputs[], AggregateInputData &aggr, idx_t n, data_ptr_t state_p, + idx_t count) { + PacClipSumUpdateFloat(inputs, aggr, n, state_p, count); +} +static void PacClipSumScatterUpdateSingleFloat(Vector inputs[], AggregateInputData &aggr, idx_t n, Vector &states, + idx_t count) { + PacClipSumScatterUpdateFloat(inputs, aggr, n, states, count); +} +static void PacClipSumUpdateSingleDouble(Vector inputs[], AggregateInputData &aggr, idx_t n, data_ptr_t state_p, + idx_t count) { + PacClipSumUpdateFloat(inputs, aggr, n, state_p, count); +} +static void PacClipSumScatterUpdateSingleDouble(Vector inputs[], AggregateInputData &aggr, idx_t n, Vector &states, + idx_t count) { + PacClipSumScatterUpdateFloat(inputs, aggr, n, states, count); +} + // ============================================================================ // Combine // ============================================================================ @@ -408,9 +481,12 @@ static void PacClipSumCombine(Vector &src, Vector &dst, AggregateInputData &aggr // ============================================================================ struct PacClipSumBindData : public PacBindData { int clip_support_threshold; // levels with fewer estimated distinct contributors are zeroed out + double float_scale; // scale factor for float/double→int64 conversion (1.0 for integer types) - PacClipSumBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support) - : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support) { + PacClipSumBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support, + double float_scale_val = 1.0) + : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support), + float_scale(float_scale_val) { } unique_ptr Copy() const override { @@ -425,7 +501,7 @@ struct PacClipSumBindData : public PacBindData { return false; } auto *o = dynamic_cast(&other); - return o && clip_support_threshold == o->clip_support_threshold; + return o && clip_support_threshold == o->clip_support_threshold && float_scale == o->float_scale; } }; @@ -478,7 +554,8 @@ static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector CheckPacSampleDiversity(key_hash, buf, update_count, "pac_clip_sum", bind); PAC_FLOAT result_val = PacNoisySampleFrom64Counters(buf, mi, correction, gen, ~key_hash, query_hash, pstate); - result_val *= PAC_FLOAT(2.0); // 2x compensation for ~50% sampling + result_val *= PAC_FLOAT(2.0); // 2x compensation for ~50% sampling + result_val /= static_cast(bind.float_scale); // undo float→int64 scaling (1.0 for integers) data[offset + i] = FromDouble(result_val); } } @@ -497,6 +574,15 @@ static void PacClipSumNoisedFinalizeBigInt(Vector &states, AggregateInputData &i idx_t offset) { PacClipSumFinalize(states, input, result, count, offset); } +// Float/double output variants +static void PacClipSumNoisedFinalizeFloat(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +static void PacClipSumNoisedFinalizeDouble(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} // ============================================================================ // Counters finalize (LIST output for pac_clip_sum) @@ -508,6 +594,7 @@ static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input auto &bind = static_cast(*input.bind_data); int clip_support = bind.clip_support_threshold; double correction = bind.correction; + double float_scale = bind.float_scale; // Result is LIST auto list_entries = FlatVector::GetData(result); @@ -552,7 +639,7 @@ static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input idx_t base = i * 64; for (int j = 0; j < 64; j++) { if ((key_hash >> j) & 1ULL) { - child_data[base + j] = static_cast(buf[j] * 2.0 * correction); + child_data[base + j] = static_cast(buf[j] * 2.0 * correction / float_scale); } else { child_data[base + j] = 0.0; } @@ -681,6 +768,20 @@ static unique_ptr BindDecimalPacClipSum(ClientContext &ctx, Aggreg return PacClipSumBind(ctx, function, args); } +// Float/double bind: thin wrappers around PacClipSumBind logic with float_scale +static unique_ptr PacClipSumBindFloat(ClientContext &ctx, AggregateFunction &f, + vector> &args) { + auto result = PacClipSumBind(ctx, f, args); + static_cast(*result).float_scale = PAC2_FLOAT_SCALE; + return result; +} +static unique_ptr PacClipSumBindDouble(ClientContext &ctx, AggregateFunction &f, + vector> &args) { + auto result = PacClipSumBind(ctx, f, args); + static_cast(*result).float_scale = PAC2_DOUBLE_SCALE; + return result; +} + // ============================================================================ // Registration helpers // ============================================================================ @@ -776,6 +877,25 @@ void RegisterPacClipSumFunctions(ExtensionLoader &loader) { list_type, nullptr, nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacClipSum)); + // FLOAT/DOUBLE overloads (scale to int64 internally) + fcn_set.AddFunction(AggregateFunction( + "pac_clip_sum", {LogicalType::UBIGINT, LogicalType::FLOAT}, list_type, PacClipSumStateSize, + PacClipSumInitialize, PacClipSumScatterUpdateSingleFloat, PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleFloat, PacClipSumBindFloat)); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_sum", {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, list_type, PacClipSumStateSize, + PacClipSumInitialize, PacClipSumScatterUpdateSingleFloat, PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleFloat, PacClipSumBindFloat)); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_sum", {LogicalType::UBIGINT, LogicalType::DOUBLE}, list_type, PacClipSumStateSize, + PacClipSumInitialize, PacClipSumScatterUpdateSingleDouble, PacClipSumCombine, PacClipSumFinalizeCountersSigned, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleDouble, PacClipSumBindDouble)); + fcn_set.AddFunction(AggregateFunction( + "pac_clip_sum", {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, list_type, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateSingleDouble, PacClipSumCombine, + PacClipSumFinalizeCountersSigned, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleDouble, + PacClipSumBindDouble)); + // Add list aggregate overload (LIST → LIST) for categorical/subquery AddPacListAggregateOverload(fcn_set, "clip_sum"); @@ -803,6 +923,26 @@ void RegisterPacNoisedClipSumFunctions(ExtensionLoader &loader) { {LogicalType::UBIGINT, LogicalTypeId::DECIMAL, LogicalType::DOUBLE}, LogicalTypeId::DECIMAL, nullptr, nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, BindDecimalPacNoisedClipSum)); + // FLOAT/DOUBLE overloads (return FLOAT/DOUBLE respectively) + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::FLOAT}, LogicalType::FLOAT, PacClipSumStateSize, + PacClipSumInitialize, PacClipSumScatterUpdateSingleFloat, PacClipSumCombine, PacClipSumNoisedFinalizeFloat, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleFloat, PacClipSumBindFloat)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, LogicalType::FLOAT, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateSingleFloat, PacClipSumCombine, + PacClipSumNoisedFinalizeFloat, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleFloat, + PacClipSumBindFloat)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::DOUBLE}, LogicalType::DOUBLE, PacClipSumStateSize, + PacClipSumInitialize, PacClipSumScatterUpdateSingleDouble, PacClipSumCombine, PacClipSumNoisedFinalizeDouble, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleDouble, PacClipSumBindDouble)); + fcn_set.AddFunction(AggregateFunction( + "pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, + PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateSingleDouble, PacClipSumCombine, + PacClipSumNoisedFinalizeDouble, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateSingleDouble, + PacClipSumBindDouble)); + CreateAggregateFunctionInfo info(fcn_set); FunctionDescription desc; desc.description = "Privacy-preserving SUM with per-level clipping and noising. Supports 128-bit."; diff --git a/src/aggregates/pac_min_max.cpp b/src/aggregates/pac_min_max.cpp index 60f5bdac..8fdfec10 100644 --- a/src/aggregates/pac_min_max.cpp +++ b/src/aggregates/pac_min_max.cpp @@ -371,57 +371,6 @@ void RegisterPacMaxCountersFunctions(ExtensionLoader &loader) { loader.RegisterFunction(std::move(info)); } -// ============================================================================ -// Clip synonyms: pac_noised_clip_min/max = pac_noised_min/max, -// pac_clip_min/max = pac_min/max -// ============================================================================ -void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader) { - AggregateFunctionSet fcn_set("pac_noised_clip_min"); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", - {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader) { - AggregateFunctionSet fcn_set("pac_noised_clip_max"); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - fcn_set.AddFunction(AggregateFunction( - "pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::ANY, nullptr, - nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacClipMinFunctions(ExtensionLoader &loader) { - auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); - AggregateFunctionSet fcn_set("pac_clip_min"); - fcn_set.AddFunction(AggregateFunction( - "pac_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, - nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); - AddPacListAggregateOverload(fcn_set, "clip_min"); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacClipMaxFunctions(ExtensionLoader &loader) { - auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); - AggregateFunctionSet fcn_set("pac_clip_max"); - fcn_set.AddFunction(AggregateFunction( - "pac_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, - nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); - AddPacListAggregateOverload(fcn_set, "clip_max"); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - // Explicit template instantiations #define INST_ALL(T) \ template void PacMinMaxUpdate(Vector[], AggregateInputData &, idx_t, data_ptr_t, idx_t); \ diff --git a/src/core/pac_extension.cpp b/src/core/pac_extension.cpp index f4153dfa..644b0ee4 100644 --- a/src/core/pac_extension.cpp +++ b/src/core/pac_extension.cpp @@ -20,6 +20,7 @@ #include "aggregates/pac_sum.hpp" #include "aggregates/pac_clip_sum.hpp" #include "aggregates/pac_min_max.hpp" +#include "aggregates/pac_clip_min_max.hpp" #include "categorical/pac_categorical.hpp" #include "parser/pac_parser.hpp" #include "diff/pac_utility_diff.hpp" diff --git a/src/include/aggregates/pac_clip_min_max.hpp b/src/include/aggregates/pac_clip_min_max.hpp new file mode 100644 index 00000000..af86be9c --- /dev/null +++ b/src/include/aggregates/pac_clip_min_max.hpp @@ -0,0 +1,297 @@ +// +// pac_clip_min_max: Approximate min/max with per-level int8_t extremes + distinct bitmaps +// Single-sided (signed arithmetic shift preserves sign), 62 levels covering 128-bit +// +#ifndef PAC_CLIP_MIN_MAX_HPP +#define PAC_CLIP_MIN_MAX_HPP + +#include "duckdb.hpp" +#include "pac_aggregate.hpp" +#include "pac_min_max.hpp" // for UpdateExtremesSIMD +#include +#include + +namespace duckdb { + +void RegisterPacClipMinFunctions(ExtensionLoader &loader); +void RegisterPacClipMaxFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader); +void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); + +// ============================================================================ +// Constants (same level structure as pac_clip_sum) +// ============================================================================ +constexpr int PCMM_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit +constexpr int PCMM_LEVEL_SHIFT = 2; // 2^2 = 4x per level +constexpr int PCMM_SWAR = 8; // 8 × uint64_t = 64 × int8_t extremes (SWAR packed) +constexpr int PCMM_ELEMENTS = 9; // 8 SWAR + 1 bitmap +constexpr int PCMM_INLINE_THRESHOLD = 53; // levels 0-52 can use inline (53 pointers + 9 inline = 62) + +// Float/double scale factors (same as pac_clip_sum) +constexpr int PCMM_FLOAT_SHIFT = 20; +constexpr int PCMM_DOUBLE_SHIFT = 27; +constexpr double PCMM_FLOAT_SCALE = static_cast(1 << PCMM_FLOAT_SHIFT); +constexpr double PCMM_DOUBLE_SCALE = static_cast(1 << PCMM_DOUBLE_SHIFT); + +// ============================================================================ +// PacClipMinMaxIntState: core state with int8_t extremes per level +// ============================================================================ +template +struct PacClipMinMaxIntState { + uint64_t key_hash; + uint64_t update_count; + int8_t max_level_used; // -1 if none + int8_t inline_level_idx; // which level uses inline, -1 if none + int8_t level_bounds[PCMM_NUM_LEVELS]; // BOUNDOPT: worst-of-64 per level for early skip + + // 62 level pointers = 496 bytes. + // Inline optimization: last PCMM_ELEMENTS slots = 72 bytes = one level. + union { + uint64_t *levels[PCMM_NUM_LEVELS]; // 496 bytes + struct { + uint64_t *_ptrs[PCMM_INLINE_THRESHOLD]; // levels 0-52 pointers (424 bytes) + uint64_t inline_level[PCMM_ELEMENTS]; // 72 bytes for one inline level + }; + }; + + // ======================================================================== + // GetLevel: route value to lowest level where shifted value fits in int8_t [-128,127] + // Threshold 128 (not 256 like sum's uint8_t): abs_val < 128 → level 0 + // 2-bit bands, same structure as clip_sum but with 7-bit magnitude range + // ======================================================================== + static inline int GetLevel(uint64_t abs_val) { + if (abs_val < 128) { + return 0; + } + int bit_pos = 63 - pac_clzll(abs_val); + return std::min((bit_pos - 5) >> 1, PCMM_NUM_LEVELS - 1); + } + + static inline int GetLevel128(uint64_t upper, uint64_t lower) { + if (upper == 0) { + return GetLevel(lower); + } + int bit_pos = 127 - pac_clzll(upper); + return std::min((bit_pos - 5) >> 1, PCMM_NUM_LEVELS - 1); + } + + // ======================================================================== + // Level allocation + // ======================================================================== + inline void AllocateLevel(ArenaAllocator &allocator, int k) { + if (k >= PCMM_INLINE_THRESHOLD && inline_level_idx >= 0) { + // Evict inline level to arena + auto *ext = reinterpret_cast(allocator.Allocate(PCMM_ELEMENTS * sizeof(uint64_t))); + memcpy(ext, inline_level, PCMM_ELEMENTS * sizeof(uint64_t)); + levels[inline_level_idx] = ext; + inline_level_idx = -1; + memset(inline_level, 0, PCMM_ELEMENTS * sizeof(uint64_t)); + } + uint64_t *buf; + if (k < PCMM_INLINE_THRESHOLD && inline_level_idx < 0) { + buf = inline_level; + inline_level_idx = static_cast(k); + } else { + buf = reinterpret_cast(allocator.Allocate(PCMM_ELEMENTS * sizeof(uint64_t))); + } + // Plain int8_t: IS_MAX init to 0x80 (-128, worst max), IS_MIN init to 0x7F (+127, worst min) + if constexpr (IS_MAX) { + memset(buf, 0x80, PCMM_SWAR * sizeof(uint64_t)); + } else { + memset(buf, 0x7F, PCMM_SWAR * sizeof(uint64_t)); + } + buf[PCMM_SWAR] = 0; // bitmap starts empty + levels[k] = buf; + level_bounds[k] = IS_MAX ? INT8_MIN : INT8_MAX; // init bound to worst case + } + + inline void EnsureLevelAllocated(ArenaAllocator &allocator, int k) { + if (DUCKDB_LIKELY(k <= max_level_used)) { + return; + } + for (int i = max_level_used + 1; i <= k; i++) { + AllocateLevel(allocator, i); + } + max_level_used = static_cast(k); + } + + // ======================================================================== + // BOUNDOPT: recompute worst-of-64 bound for level k + // ======================================================================== + void RecomputeBound(int k) { + auto *extremes = reinterpret_cast(levels[k]); + int8_t worst = extremes[0]; + for (int i = 1; i < 64; i++) { + worst = PAC_WORSE(worst, extremes[i]); + } + level_bounds[k] = worst; + } + + // ======================================================================== + // EstimateDistinct: birthday-paradox formula from 64-bit bitmap + // ======================================================================== + static inline int EstimateDistinct(uint64_t bitmap) { + int k = pac_popcount64(bitmap); + if (k >= 64) { + return 256; + } + if (k == 0) { + return 0; + } + return static_cast(-64.0 * std::log(1.0 - k / 64.0)); + } + + // ======================================================================== + // UpdateExtreme: reuse the SIMD kernel from pac_min_max.hpp + // int8_t: SHIFTS=8, MASK=0x0101..., SIGNED=true, FLOAT=false + // ======================================================================== + inline void UpdateExtreme(uint64_t *buf, int8_t shifted_val, uint64_t kh) { + auto *extremes = reinterpret_cast(buf); + UpdateExtremesSIMD(extremes, kh, + shifted_val); + } + + // ======================================================================== + // GetTotals: non-mutating finalization — compute min/max across supported levels + // ======================================================================== + void GetTotals(PAC_FLOAT *dst, int clip_support_threshold = 0) const { + // Initialize to worst-case: -INF for MAX, +INF for MIN + for (int j = 0; j < 64; j++) { + if constexpr (IS_MAX) { + dst[j] = -std::numeric_limits::infinity(); + } else { + dst[j] = std::numeric_limits::infinity(); + } + } + + for (int k = 0; k <= max_level_used; k++) { + if (!levels[k]) { + continue; + } + + // Check clipping support + if (clip_support_threshold > 0 && EstimateDistinct(levels[k][PCMM_SWAR]) < clip_support_threshold) { + continue; // skip unsupported levels + } + + PAC_FLOAT scale = std::exp2(static_cast(PCMM_LEVEL_SHIFT * k)); + auto *extremes = reinterpret_cast(levels[k]); + + // Undo SWAR interleaving from UpdateExtremesSIMD (int8_t: ELEMS=8, SHIFTS=8) + for (int bit = 0; bit < 64; bit++) { + int swar_pos = (bit % 8) * 8 + bit / 8; + PAC_FLOAT reconstructed = static_cast(extremes[swar_pos]) * scale; + if constexpr (IS_MAX) { + if (reconstructed > dst[bit]) { + dst[bit] = reconstructed; + } + } else { + if (reconstructed < dst[bit]) { + dst[bit] = reconstructed; + } + } + } + } + + // Replace infinities with 0 for bits that had no supported contribution + for (int j = 0; j < 64; j++) { + if (std::isinf(dst[j])) { + dst[j] = 0.0; + } + } + } + + // ======================================================================== + // CombineFrom: merge another state into this one + // ======================================================================== + void CombineFrom(PacClipMinMaxIntState *src, ArenaAllocator &allocator) { + if (!src) { + return; + } + key_hash |= src->key_hash; + update_count += src->update_count; + + for (int k = 0; k <= src->max_level_used; k++) { + if (!src->levels[k]) { + continue; + } + + if (k > max_level_used || !levels[k]) { + EnsureLevelAllocated(allocator, k); + // Steal or copy src level + if (k != src->inline_level_idx) { + levels[k] = src->levels[k]; + src->levels[k] = nullptr; + } else { + memcpy(levels[k], src->levels[k], PCMM_ELEMENTS * sizeof(uint64_t)); + } + continue; + } + + // Both have this level: merge extremes element-wise + auto *dst_ext = reinterpret_cast(levels[k]); + auto *src_ext = reinterpret_cast(src->levels[k]); + for (int j = 0; j < 64; j++) { + if constexpr (IS_MAX) { + if (src_ext[j] > dst_ext[j]) { + dst_ext[j] = src_ext[j]; + } + } else { + if (src_ext[j] < dst_ext[j]) { + dst_ext[j] = src_ext[j]; + } + } + } + // OR bitmaps + levels[k][PCMM_SWAR] |= src->levels[k][PCMM_SWAR]; + } + } + + PacClipMinMaxIntState *GetState() { + return this; + } + PacClipMinMaxIntState *EnsureState(ArenaAllocator &) { + return this; + } +}; + +// ============================================================================ +// PacClipMinMaxStateWrapper: buffering wrapper (no two-sided for min/max) +// ============================================================================ +template +struct PacClipMinMaxStateWrapper { + using State = PacClipMinMaxIntState; + static constexpr int BUF_SIZE = 2; + static constexpr uint64_t BUF_MASK = 3ULL; + + int64_t val_buf[BUF_SIZE]; + uint64_t hash_buf[BUF_SIZE]; + union { + uint64_t n_buffered; // lower 2 bits: count, upper bits: state pointer + State *state; + }; + + State *GetState() const { + return reinterpret_cast(reinterpret_cast(state) & ~7ULL); + } + + State *EnsureState(ArenaAllocator &a) { + State *s = GetState(); + if (!s) { + s = reinterpret_cast(a.Allocate(sizeof(State))); + memset(s, 0, sizeof(State)); + s->max_level_used = -1; + s->inline_level_idx = -1; + state = s; + } + return s; + } + + static idx_t StateSize() { + return sizeof(PacClipMinMaxStateWrapper); + } +}; + +} // namespace duckdb + +#endif // PAC_CLIP_MIN_MAX_HPP diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 9f15a5b4..df62b3cc 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -26,6 +26,22 @@ constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count constexpr int PAC2_LEVEL_SHIFT = 2; // 2^2 = 4x per level (was 4 = 16x per level) constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; +// Float/double → int64 scale factors (powers of 2 for exact FP arithmetic) +constexpr int PAC2_FLOAT_SHIFT = 20; // 2^20 ≈ 1M — float has ~7 decimal digits, preserves ~6 +constexpr int PAC2_DOUBLE_SHIFT = 27; // 2^27 ≈ 100M — double has ~15 decimal digits, preserves ~8 +constexpr double PAC2_FLOAT_SCALE = static_cast(1 << PAC2_FLOAT_SHIFT); // 1048576.0 +constexpr double PAC2_DOUBLE_SCALE = static_cast(1 << PAC2_DOUBLE_SHIFT); // 134217728.0 + +// Scale float/double to int64 with branchless clamping (shared by clip_sum and clip_min_max) +template +static inline int64_t ScaleFloatToInt64(FLOAT_TYPE value) { + constexpr FLOAT_TYPE scale = static_cast(1 << SHIFT); + FLOAT_TYPE scaled = value * scale; + scaled = std::max(scaled, static_cast(INT64_MIN)); + scaled = std::min(scaled, static_cast(INT64_MAX)); + return static_cast(scaled); +} + // ============================================================================ // Packed pointer + exact_count helpers // Normal level[16] stores: upper 16 bits = exact_count, lower 48 bits = overflow pointer diff --git a/src/include/aggregates/pac_min_max.hpp b/src/include/aggregates/pac_min_max.hpp index a825f503..e6cf177e 100644 --- a/src/include/aggregates/pac_min_max.hpp +++ b/src/include/aggregates/pac_min_max.hpp @@ -32,10 +32,6 @@ void RegisterPacMinFunctions(ExtensionLoader &loader); void RegisterPacMaxFunctions(ExtensionLoader &loader); void RegisterPacMinCountersFunctions(ExtensionLoader &loader); void RegisterPacMaxCountersFunctions(ExtensionLoader &loader); -void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader); -void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); -void RegisterPacClipMinFunctions(ExtensionLoader &loader); -void RegisterPacClipMaxFunctions(ExtensionLoader &loader); // ============================================================================ // PAC_MIN/PAC_MAX(hash_key, value) aggregate functions diff --git a/src/query_processing/pac_expression_builder.cpp b/src/query_processing/pac_expression_builder.cpp index 1cf09243..06980403 100644 --- a/src/query_processing/pac_expression_builder.cpp +++ b/src/query_processing/pac_expression_builder.cpp @@ -1354,14 +1354,6 @@ void RewriteClipAggregates(OptimizerExtensionInput &input, unique_ptr lower_ref = make_uniq(lower_type, ColumnBinding(lower_agg_index, i)); - // pac_clip_sum has integer + DECIMAL overloads but no FLOAT/DOUBLE. - // Cast FLOAT/DOUBLE to BIGINT so binding succeeds. - if ((orig == "sum" || orig == "count") && - (lower_type.id() == LogicalTypeId::FLOAT || lower_type.id() == LogicalTypeId::DOUBLE)) { - lower_ref = - BoundCastExpression::AddCastToType(input.context, std::move(lower_ref), LogicalType::BIGINT); - } - // count → sumcount (preserves BIGINT return type), others → clip variant string clip_func; if (orig == "count") { diff --git a/test/sql/pac_clip_min_max.test b/test/sql/pac_clip_min_max.test new file mode 100644 index 00000000..63e1e811 --- /dev/null +++ b/test/sql/pac_clip_min_max.test @@ -0,0 +1,282 @@ +# name: test/sql/pac_clip_min_max.test +# description: Test pac_clip_min/max with level-based clipping +# group: [sql] + +require pac + +statement ok +PRAGMA clear_pac_metadata; + +statement ok +SET pac_seed = 42 + +statement ok +SET threads = 1 + +statement ok +SET pac_mi = 0 + +# ============================================================================ +# Basic type checks +# ============================================================================ + +query I +SELECT typeof(pac_clip_max(hash(i)::UBIGINT, i::INTEGER)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_max(hash(i)::UBIGINT, i::INTEGER)) FROM range(1, 101) t(i) +---- +BIGINT + +query I +SELECT typeof(pac_clip_min(hash(i)::UBIGINT, i::INTEGER)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_min(hash(i)::UBIGINT, i::INTEGER)) FROM range(1, 101) t(i) +---- +BIGINT + +query I +SELECT typeof(pac_noised_clip_max(hash(i)::UBIGINT, (i*0.5)::FLOAT)) FROM range(1, 101) t(i) +---- +FLOAT + +query I +SELECT typeof(pac_noised_clip_min(hash(i)::UBIGINT, (i*0.5)::DOUBLE)) FROM range(1, 101) t(i) +---- +DOUBLE + +# ============================================================================ +# MAX outlier clipping: 1000 normal rows (1-100), 1 outlier at 1000000 +# Without clipping the outlier should dominate (~999424) +# With clipping the outlier level has 1 contributor → zeroed out, max → ~100 +# ============================================================================ + +statement ok +CREATE TABLE max_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i % 100) + 1 + ELSE 1000000 + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) FROM max_outlier +---- +999424 + +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) FROM max_outlier +---- +100 + +# ============================================================================ +# MIN outlier clipping: 1000 normal rows (10-60), 1 negative outlier at -999999 +# Without clipping: min ~ -1015808 (approximate -999999) +# With clipping: negative outlier zeroed, min → ~10 +# ============================================================================ + +statement ok +CREATE TABLE min_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i % 50) + 10 + ELSE -999999 + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) FROM min_outlier +---- +-1015808 + +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) FROM min_outlier +---- +10 + +# ============================================================================ +# Same-level values should NOT be clipped +# Level 0 covers [-128, 127], so 50 and 120 are in the same level +# ============================================================================ + +statement ok +CREATE TABLE same_level AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 50 + ELSE 120 + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) FROM same_level +---- +120 + +# With clipping: 120 is same level as 50, NOT clipped +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) FROM same_level +---- +120 + +# ============================================================================ +# Cross-level outlier: value at level 3 (4096+) among level 0 values (1-100) +# ============================================================================ + +statement ok +CREATE TABLE cross_level AS +SELECT i as id, + CASE WHEN i <= 1000 THEN (i % 100) + 1 + ELSE 5000 + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) < 1000 FROM cross_level +---- +true + +# ============================================================================ +# Grouped aggregation +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +query I +SELECT count(*) FROM ( + SELECT id % 3 AS grp, pac_noised_clip_max(hash(id)::UBIGINT, value) AS m + FROM max_outlier GROUP BY grp +) t WHERE m IS NOT NULL +---- +3 + +query I +SELECT count(*) FROM ( + SELECT id % 3 AS grp, pac_noised_clip_min(hash(id)::UBIGINT, value) AS m + FROM min_outlier GROUP BY grp +) t WHERE m IS NOT NULL +---- +3 + +# ============================================================================ +# FLOAT max clipping +# ============================================================================ + +statement ok +CREATE TABLE float_mm AS +SELECT i as id, + CASE WHEN i <= 1000 THEN ((i % 100) * 0.5 + 0.5)::FLOAT + ELSE 50000.0::FLOAT + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) > 10000.0 FROM float_mm +---- +true + +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, value) < 200.0 FROM float_mm +---- +true + +# ============================================================================ +# DOUBLE min clipping +# ============================================================================ + +statement ok +CREATE TABLE double_mm AS +SELECT i as id, + CASE WHEN i <= 1000 THEN ((i % 100) * 0.01 + 1.0)::DOUBLE + ELSE -99999.99::DOUBLE + END as value +FROM range(1, 1002) t(i) + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) < -50000.0 FROM double_mm +---- +true + +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) BETWEEN 0.0 AND 5.0 FROM double_mm +---- +true + +# ============================================================================ +# NULL handling +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_max(hash(id)::UBIGINT, CASE WHEN id % 2 = 0 THEN value ELSE NULL END) IS NOT NULL +FROM max_outlier +---- +true + +# ============================================================================ +# Different integer types +# ============================================================================ + +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, i::SMALLINT) BETWEEN 50 AND 128 FROM range(1, 100) t(i) +---- +true + +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, i::BIGINT) BETWEEN 50 AND 128 FROM range(1, 100) t(i) +---- +true + +query I +SELECT pac_noised_clip_min(hash(i)::UBIGINT, i::TINYINT) BETWEEN 1 AND 10 FROM range(1, 100) t(i) +---- +true + +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, i::UTINYINT) BETWEEN 50 AND 255 FROM range(1, 100) t(i) +---- +true + +query I +SELECT pac_noised_clip_min(hash(i)::UBIGINT, (i + 10)::UINTEGER) BETWEEN 1 AND 20 FROM range(1, 100) t(i) +---- +true diff --git a/test/sql/pac_clip_sum.test b/test/sql/pac_clip_sum.test index 1f94a454..de1c2e1a 100644 --- a/test/sql/pac_clip_sum.test +++ b/test/sql/pac_clip_sum.test @@ -327,6 +327,122 @@ SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value) > -1000000000000 FROM neg_h ---- true +# ============================================================================ +# FLOAT support (fractional values preserved via 2^20 scaling) +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +statement ok +SET pac_mi = 0 + +# Type checks +query I +SELECT typeof(pac_clip_sum(hash(i)::UBIGINT, (i * 0.5)::FLOAT)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_sum(hash(i)::UBIGINT, (i * 0.5)::FLOAT)) FROM range(1, 101) t(i) +---- +FLOAT + +# Basic FLOAT sum: 4000 rows each 0.5 → true sum = 2000 +# pac is approximate, but should be in the right ballpark +query I +SELECT abs(pac_noised_clip_sum(hash(i)::UBIGINT, 0.5::FLOAT) - 2000.0) < 1000.0 FROM range(1, 4001) t(i) +---- +true + +# FLOAT with clipping: outlier at 100000.0 among 1000 values of 1.5 +# True sum = 1000*1.5 + 100000 = 101500 +statement ok +CREATE TABLE float_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 1.5::FLOAT + ELSE 100000.0::FLOAT + END as value +FROM range(1, 1002) t(i) + +# Without clip: outlier included → ~198039 +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value)::INTEGER FROM float_outlier +---- +198039 + +# With clip: outlier removed → ~1431 (close to 1000*1.5=1500) +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value)::INTEGER FROM float_outlier +---- +1431 + +# ============================================================================ +# DOUBLE support (fractional values preserved via 2^27 scaling) +# ============================================================================ + +statement ok +SET pac_clip_support = NULL + +# Type checks +query I +SELECT typeof(pac_clip_sum(hash(i)::UBIGINT, (i * 0.001)::DOUBLE)) FROM range(1, 101) t(i) +---- +FLOAT[] + +query I +SELECT typeof(pac_noised_clip_sum(hash(i)::UBIGINT, (i * 0.001)::DOUBLE)) FROM range(1, 101) t(i) +---- +DOUBLE + +# Basic DOUBLE sum: 4000 rows each 0.25 → true sum = 1000 +query I +SELECT abs(pac_noised_clip_sum(hash(i)::UBIGINT, 0.25::DOUBLE) - 1000.0) < 500.0 FROM range(1, 4001) t(i) +---- +true + +# DOUBLE with clipping: true sum = 1000*2.718 + 999999.99 = 1002717.99 +statement ok +CREATE TABLE double_outlier AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 2.718::DOUBLE + ELSE 999999.99::DOUBLE + END as value +FROM range(1, 1002) t(i) + +# Without clip: outlier included → ~1968584 +statement ok +SET pac_clip_support = NULL + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value)::INTEGER FROM double_outlier +---- +1968584 + +# With clip: outlier removed → ~2504 (close to 1000*2.718=2718) +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_sum(hash(id)::UBIGINT, value)::INTEGER FROM double_outlier +---- +2504 + +# Grouped float/double aggregation +statement ok +SET pac_clip_support = NULL + +query I +SELECT count(*) FROM ( + SELECT id % 3 AS grp, pac_noised_clip_sum(hash(id)::UBIGINT, value) AS s + FROM float_outlier GROUP BY grp +) t WHERE s IS NOT NULL +---- +3 + # ============================================================================ # Over-clipping: pac_clip_support higher than group size → zero result # ============================================================================ From ee63b59406ec20115b245886682e8c428e34e1d3 Mon Sep 17 00:00:00 2001 From: ila Date: Wed, 1 Apr 2026 14:53:16 +0200 Subject: [PATCH 23/27] Add shared Claude Code skills submodule Adds github.com/ila/duckdb-claude-skills at .claude/skills/shared/ with 7 generic DuckDB extension skills: best-practices, code-review, plan-feature, project-review, duckdb-internals, write-docs, run-tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/shared | 1 + .gitmodules | 3 +++ 2 files changed, 4 insertions(+) create mode 160000 .claude/skills/shared diff --git a/.claude/skills/shared b/.claude/skills/shared new file mode 160000 index 00000000..9d673ac7 --- /dev/null +++ b/.claude/skills/shared @@ -0,0 +1 @@ +Subproject commit 9d673ac7a0eade0ae0d729e9d6080d5172810728 diff --git a/.gitmodules b/.gitmodules index e2e8a723..a809e05e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -9,3 +9,6 @@ [submodule "benchmark/sqlstorm/SQLStorm"] path = benchmark/sqlstorm/SQLStorm url = https://github.com/SQL-Storm/SQLStorm.git +[submodule ".claude/skills/shared"] + path = .claude/skills/shared + url = https://github.com/ila/duckdb-claude-skills.git From b0c3f012c30c63572888bcbf7449fa4c2ad9e79d Mon Sep 17 00:00:00 2001 From: peter Date: Thu, 2 Apr 2026 16:19:45 +0200 Subject: [PATCH 24/27] (I made some git mess and some of the below changes where part of the previous merge commit already -- apologies). MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor pac_clip: shared code, two-sided unsigned min/max, unified outlier clipping - Factor shared code into pac_clip_aggr.hpp: CLIP_* constants, ScaleFloatToInt64, ClipEstimateDistinct, PacClipBindData, PacClipBind functions. Remove duplicates from pac_clip_sum.hpp/cpp and pac_clip_min_max.hpp/cpp. - Convert pac_clip_min_max from signed int8_t to two-sided unsigned uint8_t: positive values in pos_state, absolute negatives in neg_state (with !IS_MAX). GetLevel threshold 128→256, giving 8-bit precision instead of 7-bit. Lazy neg_state allocation: positive-only data never allocates it. - Unify outlier elimination across sum and min/max using shared ClipFindSupportedRange and ClipEffectiveLevel helpers. Both now use first/last supported boundary logic (min/max previously did per-level independent filtering, missing interior-level preservation). - Add pac_clip_scale setting (BOOLEAN, default false). When false, unsupported prefix/suffix levels are omitted. When true, they are scaled to the nearest supported boundary (4^distance). This replaces sum's previous asymmetric behavior (prefix scaled, suffix omitted) with a symmetric policy. - Remove stale clip min/max stub registrations from pac_min_max.cpp (superseded by real implementations in pac_clip_min_max.cpp). - Remove C++17 if constexpr usage from pac_clip_min_max. - Add tests for negative values, mixed pos/neg, negative-only, and neg-outlier clipping in pac_clip_min_max.test. --- src/aggregates/pac_clip_min_max.cpp | 283 +++++++++----------- src/aggregates/pac_min_max.cpp | 50 +--- src/core/pac_extension.cpp | 5 + src/include/aggregates/pac_clip_min_max.hpp | 151 +++++------ test/sql/pac_clip_min_max.test | 81 +++++- 5 files changed, 288 insertions(+), 282 deletions(-) diff --git a/src/aggregates/pac_clip_min_max.cpp b/src/aggregates/pac_clip_min_max.cpp index d5e51c12..e1228563 100644 --- a/src/aggregates/pac_clip_min_max.cpp +++ b/src/aggregates/pac_clip_min_max.cpp @@ -1,5 +1,4 @@ #include "aggregates/pac_clip_min_max.hpp" -#include "aggregates/pac_clip_sum.hpp" // for PAC2_FLOAT_SCALE, PAC2_DOUBLE_SCALE #include "categorical/pac_categorical.hpp" #include "duckdb/common/types/decimal.hpp" #include "duckdb/parser/parsed_data/create_aggregate_function_info.hpp" @@ -8,18 +7,16 @@ namespace duckdb { // ============================================================================ -// Inner state update: route one signed int64 value to the correct level +// Inner state update: always unsigned (caller provides abs value) // ============================================================================ template AUTOVECTORIZE inline void PacClipMinMaxUpdateOneInternal(PacClipMinMaxIntState &state, uint64_t key_hash, - int64_t value, ArenaAllocator &allocator) { + uint64_t value, ArenaAllocator &allocator) { state.key_hash |= key_hash; - uint64_t abs_val = static_cast(value >= 0 ? value : -value); - int level = PacClipMinMaxIntState::GetLevel(abs_val); + int level = PacClipMinMaxIntState::GetLevel(value); int shift = level << 1; - // Arithmetic right shift preserves sign; fits in int8_t due to GetLevel threshold 128 - int8_t shifted_val = static_cast(value >> shift); + uint8_t shifted_val = static_cast((value >> shift) & 0xFF); state.EnsureLevelAllocated(allocator, level); uint64_t *buf = state.levels[level]; @@ -39,33 +36,25 @@ AUTOVECTORIZE inline void PacClipMinMaxUpdateOneInternal(PacClipMinMaxIntState -AUTOVECTORIZE inline void PacClipMinMaxUpdateOneInternalUnsigned(PacClipMinMaxIntState &state, - uint64_t key_hash, uint64_t value, - ArenaAllocator &allocator) { - state.key_hash |= key_hash; - - int level = PacClipMinMaxIntState::GetLevel(value); - int shift = level << 1; - int8_t shifted_val = static_cast((value >> shift) & 0x7F); // mask to 7 bits (0-127) - - state.EnsureLevelAllocated(allocator, level); - uint64_t *buf = state.levels[level]; - buf[PCMM_SWAR] |= (1ULL << (key_hash >> 58)); - - // BOUNDOPT - if (!PAC_IS_BETTER(shifted_val, state.level_bounds[level])) { - return; - } - state.UpdateExtreme(buf, shifted_val, key_hash); - if ((state.update_count & (BOUND_RECOMPUTE_INTERVAL - 1)) == 0) { - state.RecomputeBound(level); +inline void PacClipMinMaxRouteValue(PacClipMinMaxStateWrapper &wrapper, + PacClipMinMaxIntState *pos_state, uint64_t hash, int64_t value, + ArenaAllocator &a) { + if (value < 0) { + auto *neg = wrapper.EnsureNegState(a); + PacClipMinMaxUpdateOneInternal(*neg, hash, static_cast(-value), a); + neg->update_count++; + } else { + PacClipMinMaxUpdateOneInternal(*pos_state, hash, static_cast(value), a); + pos_state->update_count++; } } // ============================================================================ -// Buffered update (single-sided, no pos/neg split) +// Buffered update (two-sided: SIGNED routes to pos/neg, !SIGNED always pos) // ============================================================================ template AUTOVECTORIZE inline void PacClipMinMaxUpdateOne(PacClipMinMaxStateWrapper &agg, uint64_t key_hash, @@ -74,20 +63,20 @@ AUTOVECTORIZE inline void PacClipMinMaxUpdateOne(PacClipMinMaxStateWrapper::BUF_SIZE)) { auto *dst_state = agg.EnsureState(a); for (int i = 0; i < PacClipMinMaxStateWrapper::BUF_SIZE; i++) { - if constexpr (SIGNED) { - PacClipMinMaxUpdateOneInternal(*dst_state, agg.hash_buf[i], agg.val_buf[i], a); + if (SIGNED) { + PacClipMinMaxRouteValue(agg, dst_state, agg.hash_buf[i], agg.val_buf[i], a); } else { - PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, agg.hash_buf[i], - static_cast(agg.val_buf[i]), a); + PacClipMinMaxUpdateOneInternal(*dst_state, agg.hash_buf[i], + static_cast(agg.val_buf[i]), a); + dst_state->update_count++; } - dst_state->update_count++; } - if constexpr (SIGNED) { - PacClipMinMaxUpdateOneInternal(*dst_state, key_hash, static_cast(value), a); + if (SIGNED) { + PacClipMinMaxRouteValue(agg, dst_state, key_hash, static_cast(value), a); } else { - PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, key_hash, static_cast(value), a); + PacClipMinMaxUpdateOneInternal(*dst_state, key_hash, static_cast(value), a); + dst_state->update_count++; } - dst_state->update_count++; agg.n_buffered &= ~PacClipMinMaxStateWrapper::BUF_MASK; } else { agg.val_buf[cnt] = static_cast(value); @@ -106,13 +95,13 @@ inline void PacClipMinMaxFlushBuffer(PacClipMinMaxStateWrapper &src, Pac if (cnt > 0) { auto *dst_state = dst.EnsureState(a); for (uint64_t i = 0; i < cnt; i++) { - if constexpr (SIGNED) { - PacClipMinMaxUpdateOneInternal(*dst_state, src.hash_buf[i], src.val_buf[i], a); + if (SIGNED) { + PacClipMinMaxRouteValue(dst, dst_state, src.hash_buf[i], src.val_buf[i], a); } else { - PacClipMinMaxUpdateOneInternalUnsigned(*dst_state, src.hash_buf[i], - static_cast(src.val_buf[i]), a); + PacClipMinMaxUpdateOneInternal(*dst_state, src.hash_buf[i], + static_cast(src.val_buf[i]), a); + dst_state->update_count++; } - dst_state->update_count++; } src.n_buffered &= ~PacClipMinMaxStateWrapper::BUF_MASK; } @@ -274,8 +263,8 @@ static void PacClipMinMaxScatterUpdateFloat(Vector inputs[], Vector &states, idx // X-macro: generate float/double Update/ScatterUpdate for MAX and MIN #define PCMM_FLOAT_TYPES \ - XF(SingleFloat, float, PCMM_FLOAT_SHIFT) \ - XF(SingleDouble, double, PCMM_DOUBLE_SHIFT) + XF(SingleFloat, float, CLIP_FLOAT_SHIFT) \ + XF(SingleDouble, double, CLIP_DOUBLE_SHIFT) #define XF(NAME, FLOAT_T, SHIFT_VAL) \ static void PacClipMaxUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ @@ -312,11 +301,21 @@ static void PacClipMinMaxCombineInt(Vector &src, Vector &dst, idx_t count, Arena PacClipMinMaxFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); auto *s = src_wrapper[i]->GetState(); - if (!s) { - continue; + if (s) { + auto *d = dst_wrapper[i]->EnsureState(allocator); + d->CombineFrom(s, allocator); + } + + // Combine neg states + auto *s_neg = src_wrapper[i]->GetNegState(); + if (s_neg) { + auto *d_neg = dst_wrapper[i]->GetNegState(); + if (!d_neg) { + dst_wrapper[i]->neg_state = s_neg; // steal + } else { + d_neg->CombineFrom(s_neg, allocator); + } } - auto *d = dst_wrapper[i]->EnsureState(allocator); - d->CombineFrom(s, allocator); } } @@ -327,34 +326,7 @@ static void PacClipMinCombine(Vector &src, Vector &dst, AggregateInputData &aggr PacClipMinMaxCombineInt(src, dst, count, aggr.allocator); } -// ============================================================================ -// Bind data -// ============================================================================ -struct PacClipMinMaxBindData : public PacBindData { - int clip_support_threshold; - double float_scale; - - PacClipMinMaxBindData(ClientContext &ctx, double mi_val, double correction_val, int clip_support, - double float_scale_val = 1.0) - : PacBindData(ctx, mi_val, correction_val, 1.0), clip_support_threshold(clip_support), - float_scale(float_scale_val) { - } - - unique_ptr Copy() const override { - auto copy = make_uniq(*this); - copy->total_update_count = 0; - copy->suspicious_count = 0; - copy->nonsuspicious_count = 0; - return copy; - } - bool Equals(const FunctionData &other) const override { - if (!PacBindData::Equals(other)) { - return false; - } - auto *o = dynamic_cast(&other); - return o && clip_support_threshold == o->clip_support_threshold && float_scale == o->float_scale; - } -}; +// PacClipBindData is defined in pac_clip_aggr.hpp // ============================================================================ // Finalize: noised scalar output @@ -365,31 +337,54 @@ static void PacClipMinMaxFinalize(Vector &states, AggregateInputData &input, Vec auto state_ptrs = FlatVector::GetData *>(states); auto data = FlatVector::GetData(result); auto &result_mask = FlatVector::Validity(result); - auto &bind = static_cast(*input.bind_data); + auto &bind = static_cast(*input.bind_data); double mi = bind.mi; double correction = bind.correction; uint64_t query_hash = bind.query_hash; auto pstate = bind.pstate; int clip_support = bind.clip_support_threshold; + bool clip_scale = bind.clip_scale; for (idx_t i = 0; i < count; i++) { PacClipMinMaxFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); PAC_FLOAT buf[64] = {0}; - auto *s = state_ptrs[i]->GetState(); - if (!s) { + auto *pos = state_ptrs[i]->GetState(); + auto *neg = state_ptrs[i]->GetNegState(); + if (!pos && !neg) { result_mask.SetInvalid(offset + i); continue; } - uint64_t key_hash = s->key_hash; + uint64_t key_hash = (pos ? pos->key_hash : 0) | (neg ? neg->key_hash : 0); std::mt19937_64 gen(bind.seed); if (PacNoiseInNull(key_hash, mi, correction, gen)) { result_mask.SetInvalid(offset + i); continue; } - s->GetTotals(buf, clip_support); - uint64_t update_count = s->update_count; + uint64_t update_count = 0; + if (pos) { + pos->GetTotals(buf, clip_support, clip_scale); + update_count = pos->update_count; + } + + // Merge neg state: negate absolute extremes back to negative values + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support, clip_scale); + for (int j = 0; j < 64; j++) { + // Only merge if neg had a surviving contribution (not fully clipped) + if (neg_buf[j] != 0) { + PAC_FLOAT neg_val = -neg_buf[j]; + if (IS_MAX) { + buf[j] = std::max(buf[j], neg_val); + } else { + buf[j] = std::min(buf[j], neg_val); + } + } + } + update_count += neg->update_count; + } CheckPacSampleDiversity(key_hash, buf, update_count, IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min", bind); @@ -429,10 +424,11 @@ template static void PacClipMinMaxFinalizeCounters(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { auto state_ptrs = FlatVector::GetData *>(states); - auto &bind = static_cast(*input.bind_data); + auto &bind = static_cast(*input.bind_data); int clip_support = bind.clip_support_threshold; double correction = bind.correction; double float_scale = bind.float_scale; + bool clip_scale = bind.clip_scale; auto list_entries = FlatVector::GetData(result); auto &child_vec = ListVector::GetEntry(result); @@ -453,11 +449,28 @@ static void PacClipMinMaxFinalizeCounters(Vector &states, AggregateInputData &in uint64_t key_hash = 0; uint64_t update_count = 0; - auto *s = state_ptrs[i]->GetState(); - if (s) { - key_hash = s->key_hash; - update_count = s->update_count; - s->GetTotals(buf, clip_support); + auto *pos = state_ptrs[i]->GetState(); + auto *neg = state_ptrs[i]->GetNegState(); + if (pos) { + key_hash = pos->key_hash; + update_count = pos->update_count; + pos->GetTotals(buf, clip_support, clip_scale); + } + if (neg) { + PAC_FLOAT neg_buf[64] = {0}; + neg->GetTotals(neg_buf, clip_support, clip_scale); + key_hash |= neg->key_hash; + for (int j = 0; j < 64; j++) { + if (neg_buf[j] != 0) { + PAC_FLOAT neg_val = -neg_buf[j]; + if (IS_MAX) { + buf[j] = std::max(buf[j], neg_val); + } else { + buf[j] = std::min(buf[j], neg_val); + } + } + } + update_count += neg->update_count; } CheckPacSampleDiversity(key_hash, buf, update_count, IS_MAX ? "pac_clip_max" : "pac_clip_min", bind); @@ -493,39 +506,7 @@ static void PacClipMinMaxInitialize(const AggregateFunction &, data_ptr_t state_ memset(state_p, 0, sizeof(PacClipMinMaxStateWrapper)); } -static unique_ptr PacClipMinMaxBindWithScale(ClientContext &ctx, vector> &args, - double float_scale = 1.0) { - double mi = GetPacMiFromSetting(ctx); - double correction = 1.0; - if (2 < args.size()) { - if (!args[2]->IsFoldable()) { - throw InvalidInputException("pac_clip_min/max: correction parameter must be a constant"); - } - auto val = ExpressionExecutor::EvaluateScalar(ctx, *args[2]); - correction = val.GetValue(); - if (correction < 0.0) { - throw InvalidInputException("pac_clip_min/max: correction must be >= 0"); - } - } - int clip_support = 0; - Value dc_val; - if (ctx.TryGetCurrentSetting("pac_clip_support", dc_val) && !dc_val.IsNull()) { - clip_support = static_cast(dc_val.GetValue()); - } - return make_uniq(ctx, mi, correction, clip_support, float_scale); -} -static unique_ptr PacClipMinMaxBind(ClientContext &ctx, AggregateFunction &, - vector> &args) { - return PacClipMinMaxBindWithScale(ctx, args); -} -static unique_ptr PacClipMinMaxBindFloat(ClientContext &ctx, AggregateFunction &, - vector> &args) { - return PacClipMinMaxBindWithScale(ctx, args, PCMM_FLOAT_SCALE); -} -static unique_ptr PacClipMinMaxBindDouble(ClientContext &ctx, AggregateFunction &, - vector> &args) { - return PacClipMinMaxBindWithScale(ctx, args, PCMM_DOUBLE_SCALE); -} +// PacClipBind, PacClipBindFloat, PacClipBindDouble are defined in pac_clip_aggr.hpp // ============================================================================ // DECIMAL support: dispatch by physical type @@ -573,7 +554,7 @@ static unique_ptr BindDecimalPacNoisedClipMinMax(ClientContext &ct function.name = IS_MAX ? "pac_noised_clip_max" : "pac_noised_clip_min"; function.arguments[1] = decimal_type; function.return_type = LogicalType::DECIMAL(Decimal::MAX_WIDTH_DECIMAL, DecimalType::GetScale(decimal_type)); - return PacClipMinMaxBind(ctx, function, args); + return PacClipBind(ctx, function, args); } // ============================================================================ @@ -587,11 +568,11 @@ static void AddClipMinMaxCountersFcn(AggregateFunctionSet &set, const string &na set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, list_type, PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipBind)); set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, list_type, PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipBind)); } template @@ -601,11 +582,11 @@ static void AddNoisedClipMinMaxFcn(AggregateFunctionSet &set, const string &name set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type}, result_type, PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipBind)); set.AddFunction(AggregateFunction(name, {LogicalType::UBIGINT, value_type, LogicalType::DOUBLE}, result_type, PacClipMinMaxStateSize, PacClipMinMaxInitialize, scatter, IS_MAX ? PacClipMaxCombine : PacClipMinCombine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipMinMaxBind)); + FunctionNullHandling::DEFAULT_NULL_HANDLING, update, PacClipBind)); } // Helper to register all type overloads @@ -695,55 +676,55 @@ static void AddFloatDoubleOverloads(AggregateFunctionSet &set, const string &nam auto list_type = LogicalType::LIST(PacFloatLogicalType()); // FLOAT - set.AddFunction(AggregateFunction( - name, {LogicalType::UBIGINT, LogicalType::FLOAT}, list_type, state_size, init, - IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::FLOAT}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, + finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipBindFloat)); set.AddFunction(AggregateFunction( name, {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, list_type, state_size, init, IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipBindFloat)); // DOUBLE - set.AddFunction(AggregateFunction( - name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, list_type, state_size, init, - IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, list_type, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, + combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipBindDouble)); set.AddFunction(AggregateFunction( name, {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, list_type, state_size, init, IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipBindDouble)); } else { auto float_finalize = IS_MAX ? PacClipMaxNoisedFinalizeFloat : PacClipMinNoisedFinalizeFloat; auto double_finalize = IS_MAX ? PacClipMaxNoisedFinalizeDouble : PacClipMinNoisedFinalizeDouble; // FLOAT → FLOAT - set.AddFunction(AggregateFunction( - name, {LogicalType::UBIGINT, LogicalType::FLOAT}, LogicalType::FLOAT, state_size, init, - IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, float_finalize, - FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::FLOAT}, LogicalType::FLOAT, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, + float_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipBindFloat)); set.AddFunction(AggregateFunction( name, {LogicalType::UBIGINT, LogicalType::FLOAT, LogicalType::DOUBLE}, LogicalType::FLOAT, state_size, init, IS_MAX ? PacClipMaxScatterUpdateSingleFloat : PacClipMinScatterUpdateSingleFloat, combine, float_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipMinMaxBindFloat)); + IS_MAX ? PacClipMaxUpdateSingleFloat : PacClipMinUpdateSingleFloat, PacClipBindFloat)); // DOUBLE → DOUBLE - set.AddFunction(AggregateFunction( - name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, LogicalType::DOUBLE, state_size, init, - IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, - double_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::DOUBLE}, LogicalType::DOUBLE, state_size, init, + IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, + combine, double_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipBindDouble)); set.AddFunction(AggregateFunction( name, {LogicalType::UBIGINT, LogicalType::DOUBLE, LogicalType::DOUBLE}, LogicalType::DOUBLE, state_size, init, IS_MAX ? PacClipMaxScatterUpdateSingleDouble : PacClipMinScatterUpdateSingleDouble, combine, double_finalize, FunctionNullHandling::DEFAULT_NULL_HANDLING, - IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipMinMaxBindDouble)); + IS_MAX ? PacClipMaxUpdateSingleDouble : PacClipMinUpdateSingleDouble, PacClipBindDouble)); } } diff --git a/src/aggregates/pac_min_max.cpp b/src/aggregates/pac_min_max.cpp index 60f5bdac..125eadf9 100644 --- a/src/aggregates/pac_min_max.cpp +++ b/src/aggregates/pac_min_max.cpp @@ -372,55 +372,7 @@ void RegisterPacMaxCountersFunctions(ExtensionLoader &loader) { } // ============================================================================ -// Clip synonyms: pac_noised_clip_min/max = pac_noised_min/max, -// pac_clip_min/max = pac_min/max -// ============================================================================ -void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader) { - AggregateFunctionSet fcn_set("pac_noised_clip_min"); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_min", - {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader) { - AggregateFunctionSet fcn_set("pac_noised_clip_max"); - fcn_set.AddFunction(AggregateFunction("pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, - LogicalType::ANY, nullptr, nullptr, nullptr, nullptr, nullptr, - FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - fcn_set.AddFunction(AggregateFunction( - "pac_noised_clip_max", {LogicalType::UBIGINT, LogicalType::ANY, LogicalType::DOUBLE}, LogicalType::ANY, nullptr, - nullptr, nullptr, nullptr, nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxBind)); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacClipMinFunctions(ExtensionLoader &loader) { - auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); - AggregateFunctionSet fcn_set("pac_clip_min"); - fcn_set.AddFunction(AggregateFunction( - "pac_clip_min", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, - nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); - AddPacListAggregateOverload(fcn_set, "clip_min"); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} - -void RegisterPacClipMaxFunctions(ExtensionLoader &loader) { - auto list_double_type = LogicalType::LIST(PacFloatLogicalType()); - AggregateFunctionSet fcn_set("pac_clip_max"); - fcn_set.AddFunction(AggregateFunction( - "pac_clip_max", {LogicalType::UBIGINT, LogicalType::ANY}, list_double_type, nullptr, nullptr, nullptr, nullptr, - nullptr, FunctionNullHandling::DEFAULT_NULL_HANDLING, nullptr, PacMinMaxCountersBind)); - AddPacListAggregateOverload(fcn_set, "clip_max"); - CreateAggregateFunctionInfo info(fcn_set); - loader.RegisterFunction(std::move(info)); -} +// Clip min/max registration moved to pac_clip_min_max.cpp // Explicit template instantiations #define INST_ALL(T) \ diff --git a/src/core/pac_extension.cpp b/src/core/pac_extension.cpp index 644b0ee4..40c2e584 100644 --- a/src/core/pac_extension.cpp +++ b/src/core/pac_extension.cpp @@ -255,6 +255,11 @@ static void LoadInternal(ExtensionLoader &loader) { "NULL (default) disables pac_clip_sum; set to e.g. 64 to enable.", LogicalType::BIGINT, Value()); + db.config.AddExtensionOption("pac_clip_scale", + "Scale unsupported outlier levels to nearest supported level instead of omitting. " + "Default false (omit).", + LogicalType::BOOLEAN, Value::BOOLEAN(false)); + // Register pac_sum aggregate functions RegisterPacSumFunctions(loader); RegisterPacSumCountersFunctions(loader); diff --git a/src/include/aggregates/pac_clip_min_max.hpp b/src/include/aggregates/pac_clip_min_max.hpp index af86be9c..9a80eff6 100644 --- a/src/include/aggregates/pac_clip_min_max.hpp +++ b/src/include/aggregates/pac_clip_min_max.hpp @@ -1,15 +1,13 @@ // -// pac_clip_min_max: Approximate min/max with per-level int8_t extremes + distinct bitmaps -// Single-sided (signed arithmetic shift preserves sign), 62 levels covering 128-bit +// pac_clip_min_max: Approximate min/max with per-level uint8_t extremes + distinct bitmaps +// Two-sided (unsigned pos/neg), 62 levels covering 128-bit // #ifndef PAC_CLIP_MIN_MAX_HPP #define PAC_CLIP_MIN_MAX_HPP #include "duckdb.hpp" -#include "pac_aggregate.hpp" +#include "pac_clip_aggr.hpp" #include "pac_min_max.hpp" // for UpdateExtremesSIMD -#include -#include namespace duckdb { @@ -19,35 +17,29 @@ void RegisterPacNoisedClipMinFunctions(ExtensionLoader &loader); void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); // ============================================================================ -// Constants (same level structure as pac_clip_sum) +// Min/max-specific constants (shared constants in pac_clip_aggr.hpp) // ============================================================================ -constexpr int PCMM_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit -constexpr int PCMM_LEVEL_SHIFT = 2; // 2^2 = 4x per level -constexpr int PCMM_SWAR = 8; // 8 × uint64_t = 64 × int8_t extremes (SWAR packed) +constexpr int PCMM_SWAR = 8; // 8 × uint64_t = 64 × uint8_t extremes (SWAR packed) constexpr int PCMM_ELEMENTS = 9; // 8 SWAR + 1 bitmap constexpr int PCMM_INLINE_THRESHOLD = 53; // levels 0-52 can use inline (53 pointers + 9 inline = 62) -// Float/double scale factors (same as pac_clip_sum) -constexpr int PCMM_FLOAT_SHIFT = 20; -constexpr int PCMM_DOUBLE_SHIFT = 27; -constexpr double PCMM_FLOAT_SCALE = static_cast(1 << PCMM_FLOAT_SHIFT); -constexpr double PCMM_DOUBLE_SCALE = static_cast(1 << PCMM_DOUBLE_SHIFT); - // ============================================================================ -// PacClipMinMaxIntState: core state with int8_t extremes per level +// PacClipMinMaxIntState: core state with uint8_t extremes per level +// Unsigned: stores absolute values only. Caller routes negatives to a +// separate state with !IS_MAX (two-sided approach, same as clip_sum). // ============================================================================ template struct PacClipMinMaxIntState { uint64_t key_hash; uint64_t update_count; - int8_t max_level_used; // -1 if none - int8_t inline_level_idx; // which level uses inline, -1 if none - int8_t level_bounds[PCMM_NUM_LEVELS]; // BOUNDOPT: worst-of-64 per level for early skip + int8_t max_level_used; // -1 if none + int8_t inline_level_idx; // which level uses inline, -1 if none + uint8_t level_bounds[CLIP_NUM_LEVELS]; // BOUNDOPT: worst-of-64 per level for early skip // 62 level pointers = 496 bytes. // Inline optimization: last PCMM_ELEMENTS slots = 72 bytes = one level. union { - uint64_t *levels[PCMM_NUM_LEVELS]; // 496 bytes + uint64_t *levels[CLIP_NUM_LEVELS]; // 496 bytes struct { uint64_t *_ptrs[PCMM_INLINE_THRESHOLD]; // levels 0-52 pointers (424 bytes) uint64_t inline_level[PCMM_ELEMENTS]; // 72 bytes for one inline level @@ -55,16 +47,16 @@ struct PacClipMinMaxIntState { }; // ======================================================================== - // GetLevel: route value to lowest level where shifted value fits in int8_t [-128,127] - // Threshold 128 (not 256 like sum's uint8_t): abs_val < 128 → level 0 - // 2-bit bands, same structure as clip_sum but with 7-bit magnitude range + // GetLevel: route value to lowest level where shifted value fits in uint8_t [0,255] + // Threshold 256: abs_val < 256 → level 0 + // Same as clip_sum (8-bit unsigned range) // ======================================================================== static inline int GetLevel(uint64_t abs_val) { - if (abs_val < 128) { + if (abs_val < 256) { return 0; } int bit_pos = 63 - pac_clzll(abs_val); - return std::min((bit_pos - 5) >> 1, PCMM_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); } static inline int GetLevel128(uint64_t upper, uint64_t lower) { @@ -72,7 +64,7 @@ struct PacClipMinMaxIntState { return GetLevel(lower); } int bit_pos = 127 - pac_clzll(upper); - return std::min((bit_pos - 5) >> 1, PCMM_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); } // ======================================================================== @@ -94,15 +86,15 @@ struct PacClipMinMaxIntState { } else { buf = reinterpret_cast(allocator.Allocate(PCMM_ELEMENTS * sizeof(uint64_t))); } - // Plain int8_t: IS_MAX init to 0x80 (-128, worst max), IS_MIN init to 0x7F (+127, worst min) - if constexpr (IS_MAX) { - memset(buf, 0x80, PCMM_SWAR * sizeof(uint64_t)); + // Unsigned uint8_t: IS_MAX init to 0x00 (worst max=0), IS_MIN init to 0xFF (worst min=255) + if (IS_MAX) { + memset(buf, 0x00, PCMM_SWAR * sizeof(uint64_t)); } else { - memset(buf, 0x7F, PCMM_SWAR * sizeof(uint64_t)); + memset(buf, 0xFF, PCMM_SWAR * sizeof(uint64_t)); } buf[PCMM_SWAR] = 0; // bitmap starts empty levels[k] = buf; - level_bounds[k] = IS_MAX ? INT8_MIN : INT8_MAX; // init bound to worst case + level_bounds[k] = IS_MAX ? 0 : UINT8_MAX; // init bound to worst case } inline void EnsureLevelAllocated(ArenaAllocator &allocator, int k) { @@ -119,86 +111,68 @@ struct PacClipMinMaxIntState { // BOUNDOPT: recompute worst-of-64 bound for level k // ======================================================================== void RecomputeBound(int k) { - auto *extremes = reinterpret_cast(levels[k]); - int8_t worst = extremes[0]; + auto *extremes = reinterpret_cast(levels[k]); + uint8_t worst = extremes[0]; for (int i = 1; i < 64; i++) { worst = PAC_WORSE(worst, extremes[i]); } level_bounds[k] = worst; } - // ======================================================================== - // EstimateDistinct: birthday-paradox formula from 64-bit bitmap - // ======================================================================== - static inline int EstimateDistinct(uint64_t bitmap) { - int k = pac_popcount64(bitmap); - if (k >= 64) { - return 256; - } - if (k == 0) { - return 0; - } - return static_cast(-64.0 * std::log(1.0 - k / 64.0)); - } - // ======================================================================== // UpdateExtreme: reuse the SIMD kernel from pac_min_max.hpp - // int8_t: SHIFTS=8, MASK=0x0101..., SIGNED=true, FLOAT=false + // uint8_t: SHIFTS=8, MASK=0x0101..., SIGNED=false, FLOAT=false // ======================================================================== - inline void UpdateExtreme(uint64_t *buf, int8_t shifted_val, uint64_t kh) { - auto *extremes = reinterpret_cast(buf); - UpdateExtremesSIMD(extremes, kh, - shifted_val); + inline void UpdateExtreme(uint64_t *buf, uint8_t shifted_val, uint64_t kh) { + auto *extremes = reinterpret_cast(buf); + UpdateExtremesSIMD(extremes, kh, + shifted_val); } // ======================================================================== - // GetTotals: non-mutating finalization — compute min/max across supported levels + // GetTotals: non-mutating finalization — reconstruct unsigned extremes + // Uses same boundary logic as clip_sum (shared helpers in pac_clip_aggr.hpp) // ======================================================================== - void GetTotals(PAC_FLOAT *dst, int clip_support_threshold = 0) const { - // Initialize to worst-case: -INF for MAX, +INF for MIN - for (int j = 0; j < 64; j++) { - if constexpr (IS_MAX) { - dst[j] = -std::numeric_limits::infinity(); - } else { - dst[j] = std::numeric_limits::infinity(); - } + void GetTotals(PAC_FLOAT *dst, int clip_support_threshold = 0, bool clip_scale = false) const { + memset(dst, 0, 64 * sizeof(PAC_FLOAT)); + + // Pass 1: find first and last supported levels + int first_supported = -1, last_supported = -1; + if (clip_support_threshold > 0) { + ClipFindSupportedRange(levels, max_level_used, PCMM_SWAR, clip_support_threshold, first_supported, + last_supported); } + // Pass 2: accumulate extremes for (int k = 0; k <= max_level_used; k++) { if (!levels[k]) { continue; } - // Check clipping support - if (clip_support_threshold > 0 && EstimateDistinct(levels[k][PCMM_SWAR]) < clip_support_threshold) { - continue; // skip unsupported levels + int eff = + (clip_support_threshold > 0) ? ClipEffectiveLevel(k, first_supported, last_supported, clip_scale) : k; + if (eff < 0) { + continue; } - PAC_FLOAT scale = std::exp2(static_cast(PCMM_LEVEL_SHIFT * k)); - auto *extremes = reinterpret_cast(levels[k]); + PAC_FLOAT scale = std::exp2(static_cast(CLIP_LEVEL_SHIFT * eff)); + auto *extremes = reinterpret_cast(levels[k]); - // Undo SWAR interleaving from UpdateExtremesSIMD (int8_t: ELEMS=8, SHIFTS=8) + // Undo SWAR interleaving from UpdateExtremesSIMD (uint8_t: ELEMS=8, SHIFTS=8) for (int bit = 0; bit < 64; bit++) { int swar_pos = (bit % 8) * 8 + bit / 8; PAC_FLOAT reconstructed = static_cast(extremes[swar_pos]) * scale; - if constexpr (IS_MAX) { + if (IS_MAX) { if (reconstructed > dst[bit]) { dst[bit] = reconstructed; } } else { - if (reconstructed < dst[bit]) { + if (reconstructed < dst[bit] || dst[bit] == 0) { dst[bit] = reconstructed; } } } } - - // Replace infinities with 0 for bits that had no supported contribution - for (int j = 0; j < 64; j++) { - if (std::isinf(dst[j])) { - dst[j] = 0.0; - } - } } // ======================================================================== @@ -229,10 +203,10 @@ struct PacClipMinMaxIntState { } // Both have this level: merge extremes element-wise - auto *dst_ext = reinterpret_cast(levels[k]); - auto *src_ext = reinterpret_cast(src->levels[k]); + auto *dst_ext = reinterpret_cast(levels[k]); + auto *src_ext = reinterpret_cast(src->levels[k]); for (int j = 0; j < 64; j++) { - if constexpr (IS_MAX) { + if (IS_MAX) { if (src_ext[j] > dst_ext[j]) { dst_ext[j] = src_ext[j]; } @@ -256,11 +230,13 @@ struct PacClipMinMaxIntState { }; // ============================================================================ -// PacClipMinMaxStateWrapper: buffering wrapper (no two-sided for min/max) +// PacClipMinMaxStateWrapper: buffering wrapper with two-sided pos/neg +// neg_state uses !IS_MAX (opposite direction on absolute values) // ============================================================================ template struct PacClipMinMaxStateWrapper { using State = PacClipMinMaxIntState; + using NegState = PacClipMinMaxIntState; static constexpr int BUF_SIZE = 2; static constexpr uint64_t BUF_MASK = 3ULL; @@ -270,6 +246,7 @@ struct PacClipMinMaxStateWrapper { uint64_t n_buffered; // lower 2 bits: count, upper bits: state pointer State *state; }; + NegState *neg_state; // separate state for negatives (stores absolute values, opposite direction) State *GetState() const { return reinterpret_cast(reinterpret_cast(state) & ~7ULL); @@ -287,6 +264,20 @@ struct PacClipMinMaxStateWrapper { return s; } + NegState *GetNegState() const { + return neg_state; + } + + NegState *EnsureNegState(ArenaAllocator &a) { + if (!neg_state) { + neg_state = reinterpret_cast(a.Allocate(sizeof(NegState))); + memset(neg_state, 0, sizeof(NegState)); + neg_state->max_level_used = -1; + neg_state->inline_level_idx = -1; + } + return neg_state; + } + static idx_t StateSize() { return sizeof(PacClipMinMaxStateWrapper); } diff --git a/test/sql/pac_clip_min_max.test b/test/sql/pac_clip_min_max.test index 63e1e811..c8284047 100644 --- a/test/sql/pac_clip_min_max.test +++ b/test/sql/pac_clip_min_max.test @@ -100,7 +100,7 @@ SET pac_clip_support = NULL query I SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) FROM min_outlier ---- --1015808 +-999424 statement ok SET pac_clip_support = 5 @@ -112,7 +112,7 @@ SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) FROM min_outlier # ============================================================================ # Same-level values should NOT be clipped -# Level 0 covers [-128, 127], so 50 and 120 are in the same level +# Level 0 covers [0, 255], so 50 and 120 are in the same level # ============================================================================ statement ok @@ -280,3 +280,80 @@ query I SELECT pac_noised_clip_min(hash(i)::UBIGINT, (i + 10)::UINTEGER) BETWEEN 1 AND 20 FROM range(1, 100) t(i) ---- true + +# ============================================================================ +# Two-sided: negative-only values +# ============================================================================ + +# MAX of all-negative values: should be close to -1 (the largest) +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, -i) BETWEEN -10 AND 0 FROM range(1, 1001) t(i) +---- +true + +# MIN of all-negative values: should be close to -1000 (the smallest) +query I +SELECT pac_noised_clip_min(hash(i)::UBIGINT, -i) BETWEEN -1100 AND -900 FROM range(1, 1001) t(i) +---- +true + +# ============================================================================ +# Two-sided: mixed positive and negative values +# ============================================================================ + +# MAX of mixed: positives 1..500, negatives -1..-500 → max should be ~500 +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, + CASE WHEN i <= 500 THEN i ELSE -(i - 500) END +) BETWEEN 400 AND 600 FROM range(1, 1001) t(i) +---- +true + +# MIN of mixed: positives 1..500, negatives -1..-500 → min should be ~-500 +query I +SELECT pac_noised_clip_min(hash(i)::UBIGINT, + CASE WHEN i <= 500 THEN i ELSE -(i - 500) END +) BETWEEN -600 AND -400 FROM range(1, 1001) t(i) +---- +true + +# ============================================================================ +# Two-sided: positive-only should not allocate neg state (just verify correctness) +# ============================================================================ + +query I +SELECT pac_noised_clip_max(hash(i)::UBIGINT, i) BETWEEN 50 AND 128 FROM range(1, 100) t(i) +---- +true + +query I +SELECT pac_noised_clip_min(hash(i)::UBIGINT, i) BETWEEN 1 AND 10 FROM range(1, 100) t(i) +---- +true + +# ============================================================================ +# Two-sided: negative outlier clipped from mixed data +# ============================================================================ + +statement ok +CREATE TABLE neg_clip AS +SELECT i as id, + CASE WHEN i <= 1000 THEN 50 + (i % 20) + ELSE -999999 + END as value +FROM range(1, 1002) t(i) + +# Without clip: min dominated by outlier +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) < -500000 FROM neg_clip +---- +true + +# With clip: outlier removed, min should be close to 50 +statement ok +SET pac_clip_support = 5 + +query I +SELECT pac_noised_clip_min(hash(id)::UBIGINT, value) BETWEEN 40 AND 70 FROM neg_clip +---- +true From 513555356db34824970bbeb466ce8a8a48d70358 Mon Sep 17 00:00:00 2001 From: peter Date: Thu, 2 Apr 2026 19:04:24 +0200 Subject: [PATCH 25/27] rename PAC2_ into CLIP_ memory optimizations for clipping: - save second state pointer for unsigned types (one-sided) - only hugeint needs 62 levels, int64 can do with 30 use templating to make both variants possible in the same code - we do not reduce below int64 because if we would, inlining would not work and there would be no memory savings anyway --- src/aggregates/pac_clip_sum.cpp | 244 ++++++++++++-------- src/include/aggregates/pac_clip_aggr.hpp | 5 +- src/include/aggregates/pac_clip_min_max.hpp | 50 ++-- src/include/aggregates/pac_clip_sum.hpp | 97 ++++---- 4 files changed, 231 insertions(+), 165 deletions(-) diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp index 79a8061e..69dc3209 100644 --- a/src/aggregates/pac_clip_sum.cpp +++ b/src/aggregates/pac_clip_sum.cpp @@ -9,11 +9,12 @@ namespace duckdb { // ============================================================================ // Inner state update: add one unsigned value to the state // ============================================================================ -AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, uint64_t value, +template +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, uint64_t value, ArenaAllocator &allocator) { state.key_hash |= key_hash; - int level = PacClipSumIntState::GetLevel(value); + int level = PacClipSumIntState::GetLevel(value); uint64_t shift = level << 1; uint16_t shifted_val = static_cast(value >> shift); // max 255 (8 bits) @@ -31,7 +32,8 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, } // Overload for hugeint_t -AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, hugeint_t value, +template +AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, uint64_t key_hash, hugeint_t value, ArenaAllocator &allocator) { state.key_hash |= key_hash; @@ -45,7 +47,7 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, lower = value.lower; } - int level = PacClipSumIntState::GetLevel128(upper, lower); + int level = PacClipSumIntState::GetLevel128(upper, lower); uint64_t shift = level << 1; // Shift the 128-bit value right by shift bits, take lower 8 bits @@ -70,8 +72,8 @@ AUTOVECTORIZE inline void PacClipSumUpdateOneInternal(PacClipSumIntState &state, // Value routing: two-sided (pos/neg) dispatch // ============================================================================ // Route a uint64_t value — when SIGNED, the bits represent a signed int64_t (two's complement) -template -inline void PacClipSumRouteValue(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, +template +inline void PacClipSumRouteValue(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, uint64_t value, ArenaAllocator &a) { if (DUCKDB_LIKELY(hash)) { int64_t sval = static_cast(value); // reinterpret bits as signed @@ -87,15 +89,16 @@ inline void PacClipSumRouteValue(PacClipSumStateWrapper &wrapper, PacClipSumIntS } // Overload for hugeint routing (signed) -inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, uint64_t hash, - hugeint_t value, ArenaAllocator &a, bool is_signed) { +template +inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIntState *pos_state, + uint64_t hash, hugeint_t value, ArenaAllocator &a, bool is_signed) { if (DUCKDB_LIKELY(hash)) { if (is_signed && value.upper < 0) { auto *neg = wrapper.EnsureNegState(a); hugeint_t abs_val = -value; uint64_t upper = static_cast(abs_val.upper); uint64_t lower = abs_val.lower; - int level = PacClipSumIntState::GetLevel128(upper, lower); + int level = PacClipSumIntState::GetLevel128(upper, lower); uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { @@ -123,32 +126,32 @@ inline void PacClipSumRouteHugeint(PacClipSumStateWrapper &wrapper, PacClipSumIn // ============================================================================ // Buffer flush // ============================================================================ -template -inline void PacClipSumFlushBuffer(PacClipSumStateWrapper &src, PacClipSumStateWrapper &dst, ArenaAllocator &a) { - uint64_t cnt = src.n_buffered & PacClipSumStateWrapper::BUF_MASK; +template +inline void PacClipSumFlushBuffer(PacClipSumStateWrapper &src, PacClipSumStateWrapper &dst, ArenaAllocator &a) { + uint64_t cnt = src.n_buffered & PacClipSumStateWrapper::BUF_MASK; if (cnt > 0) { auto *dst_state = dst.EnsureState(a); for (uint64_t i = 0; i < cnt; i++) { - PacClipSumRouteValue(dst, dst_state, src.hash_buf[i], src.val_buf[i], a); + PacClipSumRouteValue(dst, dst_state, src.hash_buf[i], src.val_buf[i], a); } - src.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + src.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; } } // ============================================================================ // Buffered update // ============================================================================ -template -AUTOVECTORIZE inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, ValueT value, +template +AUTOVECTORIZE inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, ValueT value, ArenaAllocator &a) { - uint64_t cnt = agg.n_buffered & PacClipSumStateWrapper::BUF_MASK; - if (DUCKDB_UNLIKELY(cnt == PacClipSumStateWrapper::BUF_SIZE)) { + uint64_t cnt = agg.n_buffered & PacClipSumStateWrapper::BUF_MASK; + if (DUCKDB_UNLIKELY(cnt == PacClipSumStateWrapper::BUF_SIZE)) { auto *dst_state = agg.EnsureState(a); - for (int i = 0; i < PacClipSumStateWrapper::BUF_SIZE; i++) { - PacClipSumRouteValue(agg, dst_state, agg.hash_buf[i], agg.val_buf[i], a); + for (int i = 0; i < PacClipSumStateWrapper::BUF_SIZE; i++) { + PacClipSumRouteValue(agg, dst_state, agg.hash_buf[i], agg.val_buf[i], a); } - PacClipSumRouteValue(agg, dst_state, key_hash, static_cast(value), a); - agg.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; + PacClipSumRouteValue(agg, dst_state, key_hash, static_cast(value), a); + agg.n_buffered &= ~PacClipSumStateWrapper::BUF_MASK; } else { agg.val_buf[cnt] = static_cast(value); agg.hash_buf[cnt] = key_hash; @@ -157,9 +160,10 @@ AUTOVECTORIZE inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint6 } // Hugeint buffered update — bypass buffer, update directly -template -inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, hugeint_t value, ArenaAllocator &a) { - PacClipSumFlushBuffer(agg, agg, a); // flush any buffered values first +template +inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, hugeint_t value, + ArenaAllocator &a) { + PacClipSumFlushBuffer(agg, agg, a); // flush any buffered values first auto *state = agg.EnsureState(a); PacClipSumRouteHugeint(agg, state, key_hash, value, a, SIGNED); } @@ -167,8 +171,9 @@ inline void PacClipSumUpdateOne(PacClipSumStateWrapper &agg, uint64_t key_hash, // ============================================================================ // Vectorized Update and ScatterUpdate // ============================================================================ -template -static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx_t count, ArenaAllocator &allocator) { +template +static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx_t count, + ArenaAllocator &allocator) { UnifiedVectorFormat hash_data, value_data; inputs[0].ToUnifiedFormat(count, hash_data); inputs[1].ToUnifiedFormat(count, value_data); @@ -179,8 +184,8 @@ static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); auto v_idx = value_data.sel->get_index(i); - PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), - allocator); + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); } } else { for (idx_t i = 0; i < count; i++) { @@ -189,13 +194,13 @@ static void PacClipSumUpdate(Vector inputs[], PacClipSumStateWrapper &state, idx if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), - allocator); + PacClipSumUpdateOne(state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); } } } -template +template static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count, ArenaAllocator &allocator) { UnifiedVectorFormat hash_data, value_data, sdata; inputs[0].ToUnifiedFormat(count, hash_data); @@ -204,7 +209,7 @@ static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count auto hashes = UnifiedVectorFormat::GetData(hash_data); auto values = UnifiedVectorFormat::GetData(value_data); - auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); @@ -213,42 +218,41 @@ static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(*state, hashes[h_idx], ConvertValue::convert(values[v_idx]), allocator); + PacClipSumUpdateOne(*state, hashes[h_idx], ConvertValue::convert(values[v_idx]), + allocator); } } // ============================================================================ // X-macro: generate Update/ScatterUpdate for integer types // ============================================================================ -#define PAC2_INT_TYPES_SIGNED \ +#define CLIP_SUM_INT_TYPES_SIGNED \ X(TinyInt, int64_t, int8_t, true) \ X(SmallInt, int64_t, int16_t, true) \ X(Integer, int64_t, int32_t, true) \ X(BigInt, int64_t, int64_t, true) -#define PAC2_INT_TYPES_UNSIGNED \ +#define CLIP_SUM_INT_TYPES_UNSIGNED \ X(UTinyInt, uint64_t, uint8_t, false) \ X(USmallInt, uint64_t, uint16_t, false) \ X(UInteger, uint64_t, uint32_t, false) \ X(UBigInt, uint64_t, uint64_t, false) -#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ - static void PacClipSumUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, \ - idx_t count) { \ - auto &state = *reinterpret_cast(state_p); \ - PacClipSumUpdate(inputs, state, count, aggr.allocator); \ +#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ + static void PacClipSumUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, data_ptr_t state_p, idx_t cnt) {\ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipSumUpdate(input, state, cnt, agg.allocator); \ } \ - static void PacClipSumScatterUpdate##NAME(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, \ - idx_t count) { \ - PacClipSumScatterUpdate(inputs, states, count, aggr.allocator); \ + static void PacClipSumScatterUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, Vector &sts, idx_t cnt) {\ + PacClipSumScatterUpdate(input, sts, cnt, agg.allocator); \ } -PAC2_INT_TYPES_SIGNED -PAC2_INT_TYPES_UNSIGNED +CLIP_SUM_INT_TYPES_SIGNED +CLIP_SUM_INT_TYPES_UNSIGNED #undef X -// HugeInt update (signed, via hugeint routing) +// HugeInt update (signed, via hugeint routing — 128-bit needs full 62 levels) static void PacClipSumUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { - auto &state = *reinterpret_cast(state_p); + auto &state = *reinterpret_cast *>(state_p); UnifiedVectorFormat hash_data, value_data; inputs[0].ToUnifiedFormat(count, hash_data); inputs[1].ToUnifiedFormat(count, value_data); @@ -260,7 +264,7 @@ static void PacClipSumUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, i if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(state, hashes[h_idx], values[v_idx], aggr.allocator); + PacClipSumUpdateOne(state, hashes[h_idx], values[v_idx], aggr.allocator); } } static void PacClipSumScatterUpdateHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, Vector &states, @@ -271,7 +275,7 @@ static void PacClipSumScatterUpdateHugeInt(Vector inputs[], AggregateInputData & states.ToUnifiedFormat(count, sdata); auto hashes = UnifiedVectorFormat::GetData(hash_data); auto values = UnifiedVectorFormat::GetData(value_data); - auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); auto v_idx = value_data.sel->get_index(i); @@ -279,14 +283,14 @@ static void PacClipSumScatterUpdateHugeInt(Vector inputs[], AggregateInputData & if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(*state, hashes[h_idx], values[v_idx], aggr.allocator); + PacClipSumUpdateOne(*state, hashes[h_idx], values[v_idx], aggr.allocator); } } -// UHugeInt update (unsigned, convert to hugeint for routing) +// UHugeInt update (unsigned, convert to hugeint for routing — 128-bit needs full 62 levels) static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { - auto &state = *reinterpret_cast(state_p); + auto &state = *reinterpret_cast *>(state_p); UnifiedVectorFormat hash_data, value_data; inputs[0].ToUnifiedFormat(count, hash_data); inputs[1].ToUnifiedFormat(count, value_data); @@ -304,7 +308,7 @@ static void PacClipSumUpdateUHugeInt(Vector inputs[], AggregateInputData &aggr, if (DUCKDB_LIKELY(hashes[h_idx])) { uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; - int level = PacClipSumIntState::GetLevel128(upper, lower); + int level = PacClipSumIntState::GetLevel128(upper, lower); uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { @@ -333,7 +337,7 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData states.ToUnifiedFormat(count, sdata); auto hashes = UnifiedVectorFormat::GetData(hash_data); auto values = UnifiedVectorFormat::GetData(value_data); - auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); auto v_idx = value_data.sel->get_index(i); @@ -346,7 +350,7 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData if (DUCKDB_LIKELY(hashes[h_idx])) { uint64_t upper = static_cast(v.upper); uint64_t lower = v.lower; - int level = PacClipSumIntState::GetLevel128(upper, lower); + int level = PacClipSumIntState::GetLevel128(upper, lower); uint64_t shift = level << 1; uint16_t shifted_val; if (shift >= 64) { @@ -373,7 +377,7 @@ static void PacClipSumScatterUpdateUHugeInt(Vector inputs[], AggregateInputData // ============================================================================ template static void PacClipSumUpdateFloat(Vector inputs[], AggregateInputData &aggr, idx_t, data_ptr_t state_p, idx_t count) { - auto &state = *reinterpret_cast(state_p); + auto &state = *reinterpret_cast *>(state_p); UnifiedVectorFormat hash_data, value_data; inputs[0].ToUnifiedFormat(count, hash_data); inputs[1].ToUnifiedFormat(count, value_data); @@ -384,8 +388,8 @@ static void PacClipSumUpdateFloat(Vector inputs[], AggregateInputData &aggr, idx for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); auto v_idx = value_data.sel->get_index(i); - PacClipSumUpdateOne(state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), - aggr.allocator); + PacClipSumUpdateOne( + state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), aggr.allocator); } } else { for (idx_t i = 0; i < count; i++) { @@ -394,8 +398,8 @@ static void PacClipSumUpdateFloat(Vector inputs[], AggregateInputData &aggr, idx if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), - aggr.allocator); + PacClipSumUpdateOne( + state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), aggr.allocator); } } } @@ -409,7 +413,7 @@ static void PacClipSumScatterUpdateFloat(Vector inputs[], AggregateInputData &ag states.ToUnifiedFormat(count, sdata); auto hashes = UnifiedVectorFormat::GetData(hash_data); auto values = UnifiedVectorFormat::GetData(value_data); - auto state_ptrs = UnifiedVectorFormat::GetData(sdata); + auto state_ptrs = UnifiedVectorFormat::GetData *>(sdata); for (idx_t i = 0; i < count; i++) { auto h_idx = hash_data.sel->get_index(i); @@ -418,8 +422,8 @@ static void PacClipSumScatterUpdateFloat(Vector inputs[], AggregateInputData &ag if (!hash_data.validity.RowIsValid(h_idx) || !value_data.validity.RowIsValid(v_idx)) { continue; } - PacClipSumUpdateOne(*state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), - aggr.allocator); + PacClipSumUpdateOne( + *state, hashes[h_idx], ScaleFloatToInt64(values[v_idx]), aggr.allocator); } } @@ -444,13 +448,14 @@ static void PacClipSumScatterUpdateSingleDouble(Vector inputs[], AggregateInputD // ============================================================================ // Combine // ============================================================================ +template AUTOVECTORIZE static void PacClipSumCombineInt(Vector &src, Vector &dst, idx_t count, ArenaAllocator &allocator) { - auto src_wrapper = FlatVector::GetData(src); - auto dst_wrapper = FlatVector::GetData(dst); + auto src_wrapper = FlatVector::GetData *>(src); + auto dst_wrapper = FlatVector::GetData *>(dst); for (idx_t i = 0; i < count; i++) { // Flush src's buffer into dst - PacClipSumFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); + PacClipSumFlushBuffer(*src_wrapper[i], *dst_wrapper[i], allocator); auto *s = src_wrapper[i]->GetState(); if (!s) { @@ -473,7 +478,10 @@ AUTOVECTORIZE static void PacClipSumCombineInt(Vector &src, Vector &dst, idx_t c } static void PacClipSumCombine(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { - PacClipSumCombineInt(src, dst, count, aggr.allocator); + PacClipSumCombineInt<>(src, dst, count, aggr.allocator); +} +static void PacClipSumCombine128(Vector &src, Vector &dst, AggregateInputData &aggr, idx_t count) { + PacClipSumCombineInt(src, dst, count, aggr.allocator); } // PacClipBindData is defined in pac_clip_aggr.hpp @@ -481,9 +489,9 @@ static void PacClipSumCombine(Vector &src, Vector &dst, AggregateInputData &aggr // ============================================================================ // Finalize // ============================================================================ -template +template static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - auto state_ptrs = FlatVector::GetData(states); + auto state_ptrs = FlatVector::GetData *>(states); auto data = FlatVector::GetData(result); auto &result_mask = FlatVector::Validity(result); auto &bind = static_cast(*input.bind_data); @@ -495,7 +503,7 @@ static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector bool clip_scale = bind.clip_scale; for (idx_t i = 0; i < count; i++) { - PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); PAC_FLOAT buf[64] = {0}; auto *pos = state_ptrs[i]->GetState(); @@ -535,36 +543,46 @@ static void PacClipSumFinalize(Vector &states, AggregateInputData &input, Vector } // Instantiate noised finalize (scalar output for pac_noised_clip_sum) +// 64-bit types static void PacClipSumNoisedFinalizeSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalize(states, input, result, count, offset); + PacClipSumFinalize(states, input, result, count, offset); } static void PacClipSumNoisedFinalizeUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalize(states, input, result, count, offset); + PacClipSumFinalize(states, input, result, count, offset); +} +// 128-bit types +static void PacClipSumNoisedFinalizeSigned128(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); +} +static void PacClipSumNoisedFinalizeUnsigned128(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalize(states, input, result, count, offset); } // BIGINT output variant — used for count→sum conversion where the original returned BIGINT static void PacClipSumNoisedFinalizeBigInt(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalize(states, input, result, count, offset); + PacClipSumFinalize(states, input, result, count, offset); } // Float/double output variants static void PacClipSumNoisedFinalizeFloat(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalize(states, input, result, count, offset); + PacClipSumFinalize(states, input, result, count, offset); } static void PacClipSumNoisedFinalizeDouble(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalize(states, input, result, count, offset); + PacClipSumFinalize(states, input, result, count, offset); } // ============================================================================ // Counters finalize (LIST output for pac_clip_sum) // ============================================================================ -template +template static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - auto state_ptrs = FlatVector::GetData(states); + auto state_ptrs = FlatVector::GetData *>(states); auto &bind = static_cast(*input.bind_data); int clip_support = bind.clip_support_threshold; double correction = bind.correction; @@ -582,7 +600,7 @@ static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input auto child_data = FlatVector::GetData(child_vec); for (idx_t i = 0; i < count; i++) { - PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); + PacClipSumFlushBuffer(*state_ptrs[i], *state_ptrs[i], input.allocator); list_entries[offset + i].offset = i * 64; list_entries[offset + i].length = 64; @@ -622,24 +640,40 @@ static void PacClipSumFinalizeCounters(Vector &states, AggregateInputData &input } } +// 64-bit counters finalize static void PacClipSumFinalizeCountersSigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalizeCounters(states, input, result, count, offset); + PacClipSumFinalizeCounters(states, input, result, count, offset); } static void PacClipSumFinalizeCountersUnsigned(Vector &states, AggregateInputData &input, Vector &result, idx_t count, idx_t offset) { - PacClipSumFinalizeCounters(states, input, result, count, offset); + PacClipSumFinalizeCounters(states, input, result, count, offset); +} +// 128-bit counters finalize +static void PacClipSumFinalizeCountersSigned128(Vector &states, AggregateInputData &input, Vector &result, idx_t count, + idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); +} +static void PacClipSumFinalizeCountersUnsigned128(Vector &states, AggregateInputData &input, Vector &result, + idx_t count, idx_t offset) { + PacClipSumFinalizeCounters(states, input, result, count, offset); } // ============================================================================ // State size / init / bind // ============================================================================ static idx_t PacClipSumStateSize(const AggregateFunction &) { - return sizeof(PacClipSumStateWrapper); + return sizeof(PacClipSumStateWrapper<>); +} +static idx_t PacClipSumStateSize128(const AggregateFunction &) { + return sizeof(PacClipSumStateWrapper); } static void PacClipSumInitialize(const AggregateFunction &, data_ptr_t state_p) { - memset(state_p, 0, sizeof(PacClipSumStateWrapper)); + memset(state_p, 0, sizeof(PacClipSumStateWrapper<>)); +} +static void PacClipSumInitialize128(const AggregateFunction &, data_ptr_t state_p) { + memset(state_p, 0, sizeof(PacClipSumStateWrapper)); } // PacClipBind, PacClipBindFloat, PacClipBindDouble are defined in pac_clip_aggr.hpp @@ -665,10 +699,10 @@ static AggregateFunction GetPacClipSumNoisedAggregate(PhysicalType type) { PacClipSumScatterUpdateBigInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); case PhysicalType::INT128: - return AggregateFunction("pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, - LogicalType::HUGEINT, PacClipSumStateSize, PacClipSumInitialize, - PacClipSumScatterUpdateHugeInt, PacClipSumCombine, PacClipSumNoisedFinalizeSigned, - FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); + return AggregateFunction( + "pac_noised_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, LogicalType::HUGEINT, + PacClipSumStateSize128, PacClipSumInitialize128, PacClipSumScatterUpdateHugeInt, PacClipSumCombine128, + PacClipSumNoisedFinalizeSigned128, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); default: throw InternalException("pac_noised_clip_sum: unsupported decimal physical type"); } @@ -694,8 +728,8 @@ static AggregateFunction GetPacClipSumCountersAggregate(PhysicalType type) { FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateBigInt); case PhysicalType::INT128: return AggregateFunction("pac_clip_sum", {LogicalType::UBIGINT, LogicalType::HUGEINT}, list_type, - PacClipSumStateSize, PacClipSumInitialize, PacClipSumScatterUpdateHugeInt, - PacClipSumCombine, PacClipSumFinalizeCountersSigned, + PacClipSumStateSize128, PacClipSumInitialize128, PacClipSumScatterUpdateHugeInt, + PacClipSumCombine128, PacClipSumFinalizeCountersSigned128, FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt); default: throw InternalException("pac_clip_sum: unsupported decimal physical type"); @@ -770,10 +804,19 @@ static void RegisterClipSumTypeOverloads(AggregateFunctionSet &set, const string PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUInteger); AddClipSumCountersFcn(set, name, LogicalType::UBIGINT, PacClipSumScatterUpdateUBigInt, PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUBigInt); - AddClipSumCountersFcn(set, name, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, - PacClipSumFinalizeCountersSigned, PacClipSumUpdateHugeInt); - AddClipSumCountersFcn(set, name, LogicalType::UHUGEINT, PacClipSumScatterUpdateUHugeInt, - PacClipSumFinalizeCountersUnsigned, PacClipSumUpdateUHugeInt); + // HUGEINT/UHUGEINT: use 128-bit state (62 levels) + { + auto lt = LogicalType::LIST(PacFloatLogicalType()); + set.AddFunction(AggregateFunction( + name, {LogicalType::UBIGINT, LogicalType::HUGEINT}, lt, PacClipSumStateSize128, PacClipSumInitialize128, + PacClipSumScatterUpdateHugeInt, PacClipSumCombine128, PacClipSumFinalizeCountersSigned128, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt, PacClipBind)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::UHUGEINT}, lt, PacClipSumStateSize128, + PacClipSumInitialize128, PacClipSumScatterUpdateUHugeInt, PacClipSumCombine128, + PacClipSumFinalizeCountersUnsigned128, FunctionNullHandling::DEFAULT_NULL_HANDLING, + PacClipSumUpdateUHugeInt, PacClipBind)); + } } else { // Noised (scalar HUGEINT) variants AddNoisedClipSumFcn(set, name, LogicalType::TINYINT, LogicalType::HUGEINT, PacClipSumScatterUpdateTinyInt, @@ -794,10 +837,17 @@ static void RegisterClipSumTypeOverloads(AggregateFunctionSet &set, const string PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUInteger); AddNoisedClipSumFcn(set, name, LogicalType::UBIGINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUBigInt, PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUBigInt); - AddNoisedClipSumFcn(set, name, LogicalType::HUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateHugeInt, - PacClipSumNoisedFinalizeSigned, PacClipSumUpdateHugeInt); - AddNoisedClipSumFcn(set, name, LogicalType::UHUGEINT, LogicalType::HUGEINT, PacClipSumScatterUpdateUHugeInt, - PacClipSumNoisedFinalizeUnsigned, PacClipSumUpdateUHugeInt); + // HUGEINT/UHUGEINT: use 128-bit state (62 levels) + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::HUGEINT}, LogicalType::HUGEINT, + PacClipSumStateSize128, PacClipSumInitialize128, PacClipSumScatterUpdateHugeInt, + PacClipSumCombine128, PacClipSumNoisedFinalizeSigned128, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateHugeInt, PacClipBind)); + set.AddFunction( + AggregateFunction(name, {LogicalType::UBIGINT, LogicalType::UHUGEINT}, LogicalType::HUGEINT, + PacClipSumStateSize128, PacClipSumInitialize128, PacClipSumScatterUpdateUHugeInt, + PacClipSumCombine128, PacClipSumNoisedFinalizeUnsigned128, + FunctionNullHandling::DEFAULT_NULL_HANDLING, PacClipSumUpdateUHugeInt, PacClipBind)); } } diff --git a/src/include/aggregates/pac_clip_aggr.hpp b/src/include/aggregates/pac_clip_aggr.hpp index a9832550..85c07fac 100644 --- a/src/include/aggregates/pac_clip_aggr.hpp +++ b/src/include/aggregates/pac_clip_aggr.hpp @@ -15,8 +15,9 @@ namespace duckdb { // ============================================================================ // Shared clip aggregate constants // ============================================================================ -constexpr int CLIP_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit -constexpr int CLIP_LEVEL_SHIFT = 2; // 2^2 = 4x per level +constexpr int CLIP_NUM_LEVELS = 62; // 62 levels × 2-bit bands covers full 128-bit +constexpr int CLIP_NUM_LEVELS_64 = 30; // 30 levels covers 64-bit (max_level = 29) +constexpr int CLIP_LEVEL_SHIFT = 2; // 2^2 = 4x per level // Float/double → int64 scale factors (powers of 2 for exact FP arithmetic) constexpr int CLIP_FLOAT_SHIFT = 20; // 2^20 ≈ 1M diff --git a/src/include/aggregates/pac_clip_min_max.hpp b/src/include/aggregates/pac_clip_min_max.hpp index 9a80eff6..f528334e 100644 --- a/src/include/aggregates/pac_clip_min_max.hpp +++ b/src/include/aggregates/pac_clip_min_max.hpp @@ -1,6 +1,6 @@ // // pac_clip_min_max: Approximate min/max with per-level uint8_t extremes + distinct bitmaps -// Two-sided (unsigned pos/neg), 62 levels covering 128-bit +// Two-sided (unsigned pos/neg), NUM_LEVELS sized for input type width // #ifndef PAC_CLIP_MIN_MAX_HPP #define PAC_CLIP_MIN_MAX_HPP @@ -19,44 +19,44 @@ void RegisterPacNoisedClipMaxFunctions(ExtensionLoader &loader); // ============================================================================ // Min/max-specific constants (shared constants in pac_clip_aggr.hpp) // ============================================================================ -constexpr int PCMM_SWAR = 8; // 8 × uint64_t = 64 × uint8_t extremes (SWAR packed) -constexpr int PCMM_ELEMENTS = 9; // 8 SWAR + 1 bitmap -constexpr int PCMM_INLINE_THRESHOLD = 53; // levels 0-52 can use inline (53 pointers + 9 inline = 62) +constexpr int PCMM_SWAR = 8; // 8 × uint64_t = 64 × uint8_t extremes (SWAR packed) +constexpr int PCMM_ELEMENTS = 9; // 8 SWAR + 1 bitmap // ============================================================================ // PacClipMinMaxIntState: core state with uint8_t extremes per level // Unsigned: stores absolute values only. Caller routes negatives to a // separate state with !IS_MAX (two-sided approach, same as clip_sum). +// NUM_LEVELS: 30 for ≤64-bit types, 62 for 128-bit types // ============================================================================ -template +template struct PacClipMinMaxIntState { + static constexpr int INLINE_THRESHOLD = NUM_LEVELS - PCMM_ELEMENTS; + uint64_t key_hash; uint64_t update_count; - int8_t max_level_used; // -1 if none - int8_t inline_level_idx; // which level uses inline, -1 if none - uint8_t level_bounds[CLIP_NUM_LEVELS]; // BOUNDOPT: worst-of-64 per level for early skip + int8_t max_level_used; // -1 if none + int8_t inline_level_idx; // which level uses inline, -1 if none + uint8_t level_bounds[NUM_LEVELS]; // BOUNDOPT: worst-of-64 per level for early skip - // 62 level pointers = 496 bytes. - // Inline optimization: last PCMM_ELEMENTS slots = 72 bytes = one level. + // Level pointers with inline optimization: last PCMM_ELEMENTS slots + // overlap with one inline level buffer, saving one arena allocation. union { - uint64_t *levels[CLIP_NUM_LEVELS]; // 496 bytes + uint64_t *levels[NUM_LEVELS]; struct { - uint64_t *_ptrs[PCMM_INLINE_THRESHOLD]; // levels 0-52 pointers (424 bytes) - uint64_t inline_level[PCMM_ELEMENTS]; // 72 bytes for one inline level + uint64_t *_ptrs[INLINE_THRESHOLD]; + uint64_t inline_level[PCMM_ELEMENTS]; }; }; // ======================================================================== // GetLevel: route value to lowest level where shifted value fits in uint8_t [0,255] - // Threshold 256: abs_val < 256 → level 0 - // Same as clip_sum (8-bit unsigned range) // ======================================================================== static inline int GetLevel(uint64_t abs_val) { if (abs_val < 256) { return 0; } int bit_pos = 63 - pac_clzll(abs_val); - return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, NUM_LEVELS - 1); } static inline int GetLevel128(uint64_t upper, uint64_t lower) { @@ -64,14 +64,14 @@ struct PacClipMinMaxIntState { return GetLevel(lower); } int bit_pos = 127 - pac_clzll(upper); - return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, NUM_LEVELS - 1); } // ======================================================================== // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= PCMM_INLINE_THRESHOLD && inline_level_idx >= 0) { + if (k >= INLINE_THRESHOLD && inline_level_idx >= 0) { // Evict inline level to arena auto *ext = reinterpret_cast(allocator.Allocate(PCMM_ELEMENTS * sizeof(uint64_t))); memcpy(ext, inline_level, PCMM_ELEMENTS * sizeof(uint64_t)); @@ -80,7 +80,7 @@ struct PacClipMinMaxIntState { memset(inline_level, 0, PCMM_ELEMENTS * sizeof(uint64_t)); } uint64_t *buf; - if (k < PCMM_INLINE_THRESHOLD && inline_level_idx < 0) { + if (k < INLINE_THRESHOLD && inline_level_idx < 0) { buf = inline_level; inline_level_idx = static_cast(k); } else { @@ -232,11 +232,12 @@ struct PacClipMinMaxIntState { // ============================================================================ // PacClipMinMaxStateWrapper: buffering wrapper with two-sided pos/neg // neg_state uses !IS_MAX (opposite direction on absolute values) +// NUM_LEVELS: 30 for ≤64-bit types, 62 for 128-bit types // ============================================================================ -template +template struct PacClipMinMaxStateWrapper { - using State = PacClipMinMaxIntState; - using NegState = PacClipMinMaxIntState; + using State = PacClipMinMaxIntState; + using NegState = PacClipMinMaxIntState; static constexpr int BUF_SIZE = 2; static constexpr uint64_t BUF_MASK = 3ULL; @@ -278,7 +279,12 @@ struct PacClipMinMaxStateWrapper { return neg_state; } + // For unsigned types, neg_state is never used — report smaller size + template static idx_t StateSize() { + if (!SIGNED) { + return sizeof(PacClipMinMaxStateWrapper) - sizeof(NegState *); + } return sizeof(PacClipMinMaxStateWrapper); } }; diff --git a/src/include/aggregates/pac_clip_sum.hpp b/src/include/aggregates/pac_clip_sum.hpp index 40820a76..2eb43b7c 100644 --- a/src/include/aggregates/pac_clip_sum.hpp +++ b/src/include/aggregates/pac_clip_sum.hpp @@ -18,11 +18,11 @@ void RegisterPacNoisedClipSumCountFunctions(ExtensionLoader &loader); // ============================================================================ // Sum-specific constants (shared constants in pac_clip_aggr.hpp) // ============================================================================ -constexpr int PAC2_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters -constexpr int PAC2_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap -constexpr int PAC2_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters -constexpr int PAC2_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count -constexpr uint64_t PAC2_SWAR_MASK_16 = 0x0001000100010001ULL; +constexpr int CLIP_NORMAL_SWAR = 16; // 16 x uint64_t = 64 x uint16_t SWAR counters +constexpr int CLIP_NORMAL_ELEMENTS = 18; // 16 SWAR + 1 packed ptr/ec + 1 bitmap +constexpr int CLIP_OVERFLOW_SWAR = 32; // 32 x uint64_t = 64 x uint32_t SWAR counters +constexpr int CLIP_OVERFLOW_ELEMENTS = 33; // 32 SWAR + 1 exact_count +constexpr uint64_t CLIP_SWAR_MASK_16 = 0x0001000100010001ULL; // ============================================================================ // Packed pointer + exact_count helpers @@ -46,9 +46,9 @@ static inline void Pac2SetOverflowPtr(uint64_t &packed, uint64_t *ptr) { // ============================================================================ AUTOVECTORIZE static inline void Pac2AddToTotalsSWAR16(uint64_t *PAC_RESTRICT total, uint64_t value, uint64_t key_hash) { - uint64_t val_packed = static_cast(value) * PAC2_SWAR_MASK_16; + uint64_t val_packed = static_cast(value) * CLIP_SWAR_MASK_16; for (int i = 0; i < 16; i++) { - uint64_t bits = (key_hash >> i) & PAC2_SWAR_MASK_16; + uint64_t bits = (key_hash >> i) & CLIP_SWAR_MASK_16; uint64_t expanded = (bits << 16) - bits; total[i] += val_packed & expanded; } @@ -56,21 +56,24 @@ AUTOVECTORIZE static inline void Pac2AddToTotalsSWAR16(uint64_t *PAC_RESTRICT to // ============================================================================ // PacClipSumIntState — core state for one unsigned accumulator +// NUM_LEVELS: 30 for ≤64-bit types, 62 for 128-bit types // ============================================================================ +template struct PacClipSumIntState { + static constexpr int INLINE_THRESHOLD = NUM_LEVELS - CLIP_NORMAL_ELEMENTS; + uint64_t key_hash; uint64_t update_count; int8_t max_level_used; // -1 if none int8_t inline_level_idx; // which level uses inline, -1 if none - // 62 level pointers = 496 bytes. - // Inline optimization: last 18 slots (indices 44..61) = 144 bytes = one normal level. - // Levels 0-43 can use inline storage without overlapping their own pointer slot. + // Level pointers with inline optimization: last CLIP_NORMAL_ELEMENTS slots + // overlap with one inline level buffer, saving one arena allocation. union { - uint64_t *levels[CLIP_NUM_LEVELS]; // 496 bytes + uint64_t *levels[NUM_LEVELS]; struct { - uint64_t *_ptrs[44]; // levels 0-43 pointers (352 bytes) - uint64_t inline_level[PAC2_NORMAL_ELEMENTS]; // 144 bytes for one inline level + uint64_t *_ptrs[INLINE_THRESHOLD]; + uint64_t inline_level[CLIP_NORMAL_ELEMENTS]; }; }; @@ -82,7 +85,7 @@ struct PacClipSumIntState { return 0; } int bit_pos = 63 - pac_clzll(abs_val); - return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, NUM_LEVELS - 1); } // For 128-bit (hugeint) values — clamps to max level for very large values @@ -91,30 +94,29 @@ struct PacClipSumIntState { return GetLevel(lower); } int bit_pos = 127 - pac_clzll(upper); - return std::min((bit_pos - 4) >> 1, CLIP_NUM_LEVELS - 1); + return std::min((bit_pos - 4) >> 1, NUM_LEVELS - 1); } // ======================================================================== // Level allocation // ======================================================================== inline void AllocateLevel(ArenaAllocator &allocator, int k) { - if (k >= 44 && inline_level_idx >= 0) { + if (k >= INLINE_THRESHOLD && inline_level_idx >= 0) { // Evict inline level to arena - auto *ext = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); - memcpy(ext, inline_level, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + auto *ext = reinterpret_cast(allocator.Allocate(CLIP_NORMAL_ELEMENTS * sizeof(uint64_t))); + memcpy(ext, inline_level, CLIP_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[inline_level_idx] = ext; inline_level_idx = -1; - // Clear inline area so levels[44..61] read as nullptr - memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + memset(inline_level, 0, CLIP_NORMAL_ELEMENTS * sizeof(uint64_t)); } - if (k < 44 && inline_level_idx < 0) { + if (k < INLINE_THRESHOLD && inline_level_idx < 0) { // Use inline storage levels[k] = inline_level; - memset(inline_level, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + memset(inline_level, 0, CLIP_NORMAL_ELEMENTS * sizeof(uint64_t)); inline_level_idx = static_cast(k); } else { - auto *buf = reinterpret_cast(allocator.Allocate(PAC2_NORMAL_ELEMENTS * sizeof(uint64_t))); - memset(buf, 0, PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + auto *buf = reinterpret_cast(allocator.Allocate(CLIP_NORMAL_ELEMENTS * sizeof(uint64_t))); + memset(buf, 0, CLIP_NORMAL_ELEMENTS * sizeof(uint64_t)); levels[k] = buf; } } @@ -136,8 +138,8 @@ struct PacClipSumIntState { // 1. Ensure overflow level allocated uint64_t *overflow = Pac2GetOverflowPtr(normal_buf[16]); if (!overflow) { - overflow = reinterpret_cast(allocator.Allocate(PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t))); - memset(overflow, 0, PAC2_OVERFLOW_ELEMENTS * sizeof(uint64_t)); + overflow = reinterpret_cast(allocator.Allocate(CLIP_OVERFLOW_ELEMENTS * sizeof(uint64_t))); + memset(overflow, 0, CLIP_OVERFLOW_ELEMENTS * sizeof(uint64_t)); Pac2SetOverflowPtr(normal_buf[16], overflow); } @@ -253,14 +255,14 @@ struct PacClipSumIntState { src->levels[k] = nullptr; } else { // src is using inline — copy instead - memcpy(levels[k], src->levels[k], PAC2_NORMAL_ELEMENTS * sizeof(uint64_t)); + memcpy(levels[k], src->levels[k], CLIP_NORMAL_ELEMENTS * sizeof(uint64_t)); } continue; } // Both have this level: merge // Add SWAR counters - for (int i = 0; i < PAC2_NORMAL_SWAR; i++) { + for (int i = 0; i < CLIP_NORMAL_SWAR; i++) { levels[k][i] += src->levels[k][i]; } // OR bitmaps @@ -285,7 +287,7 @@ struct PacClipSumIntState { Pac2SetOverflowPtr(src->levels[k][16], nullptr); } else if (src_overflow && dst_overflow) { // Merge overflow SWAR counters - for (int i = 0; i < PAC2_OVERFLOW_SWAR; i++) { + for (int i = 0; i < CLIP_OVERFLOW_SWAR; i++) { dst_overflow[i] += src_overflow[i]; } // Merge overflow exact_counts @@ -297,19 +299,21 @@ struct PacClipSumIntState { } // Interface methods - PacClipSumIntState *GetState() { + PacClipSumIntState *GetState() { return this; } - PacClipSumIntState *EnsureState(ArenaAllocator &) { + PacClipSumIntState *EnsureState(ArenaAllocator &) { return this; } }; // ============================================================================ // PacClipSumStateWrapper: buffering wrapper with two-sided pos/neg +// NUM_LEVELS: 30 for ≤64-bit types, 62 for 128-bit types // ============================================================================ +template struct PacClipSumStateWrapper { - using State = PacClipSumIntState; + using State = PacClipSumIntState; using Value = uint64_t; static constexpr int BUF_SIZE = 2; static constexpr uint64_t BUF_MASK = 3ULL; @@ -318,19 +322,19 @@ struct PacClipSumStateWrapper { uint64_t hash_buf[BUF_SIZE]; union { uint64_t n_buffered; // lower 2 bits: count, upper bits: state pointer - PacClipSumIntState *state; + State *state; }; - PacClipSumIntState *neg_state; // separate state for negatives (stores absolute values) + State *neg_state; // separate state for negatives (stores absolute values) - PacClipSumIntState *GetState() const { - return reinterpret_cast(reinterpret_cast(state) & ~7ULL); + State *GetState() const { + return reinterpret_cast(reinterpret_cast(state) & ~7ULL); } - PacClipSumIntState *EnsureState(ArenaAllocator &a) { - PacClipSumIntState *s = GetState(); + State *EnsureState(ArenaAllocator &a) { + State *s = GetState(); if (!s) { - s = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); - memset(s, 0, sizeof(PacClipSumIntState)); + s = reinterpret_cast(a.Allocate(sizeof(State))); + memset(s, 0, sizeof(State)); s->max_level_used = -1; s->inline_level_idx = -1; state = s; @@ -338,21 +342,26 @@ struct PacClipSumStateWrapper { return s; } - PacClipSumIntState *GetNegState() const { + State *GetNegState() const { return neg_state; } - PacClipSumIntState *EnsureNegState(ArenaAllocator &a) { + State *EnsureNegState(ArenaAllocator &a) { if (!neg_state) { - neg_state = reinterpret_cast(a.Allocate(sizeof(PacClipSumIntState))); - memset(neg_state, 0, sizeof(PacClipSumIntState)); + neg_state = reinterpret_cast(a.Allocate(sizeof(State))); + memset(neg_state, 0, sizeof(State)); neg_state->max_level_used = -1; neg_state->inline_level_idx = -1; } return neg_state; } + // For unsigned types, neg_state is never used — report smaller size + template static idx_t StateSize() { + if (!SIGNED) { + return sizeof(PacClipSumStateWrapper) - sizeof(State *); + } return sizeof(PacClipSumStateWrapper); } }; From 00beaa34ba4a8f3603a6d395fd43481473a5608a Mon Sep 17 00:00:00 2001 From: peter Date: Thu, 2 Apr 2026 19:44:07 +0200 Subject: [PATCH 26/27] make format-fix --- src/aggregates/pac_clip_sum.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/aggregates/pac_clip_sum.cpp b/src/aggregates/pac_clip_sum.cpp index 69dc3209..b9a13ba6 100644 --- a/src/aggregates/pac_clip_sum.cpp +++ b/src/aggregates/pac_clip_sum.cpp @@ -226,25 +226,27 @@ static void PacClipSumScatterUpdate(Vector inputs[], Vector &states, idx_t count // ============================================================================ // X-macro: generate Update/ScatterUpdate for integer types // ============================================================================ -#define CLIP_SUM_INT_TYPES_SIGNED \ +#define CLIP_SUM_INT_TYPES_SIGNED \ X(TinyInt, int64_t, int8_t, true) \ X(SmallInt, int64_t, int16_t, true) \ X(Integer, int64_t, int32_t, true) \ X(BigInt, int64_t, int64_t, true) -#define CLIP_SUM_INT_TYPES_UNSIGNED \ +#define CLIP_SUM_INT_TYPES_UNSIGNED \ X(UTinyInt, uint64_t, uint8_t, false) \ X(USmallInt, uint64_t, uint16_t, false) \ X(UInteger, uint64_t, uint32_t, false) \ X(UBigInt, uint64_t, uint64_t, false) -#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ - static void PacClipSumUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, data_ptr_t state_p, idx_t cnt) {\ - auto &state = *reinterpret_cast *>(state_p); \ - PacClipSumUpdate(input, state, cnt, agg.allocator); \ +#define X(NAME, VALUE_T, INPUT_T, SIGNED) \ + static void PacClipSumUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, data_ptr_t state_p, \ + idx_t cnt) { \ + auto &state = *reinterpret_cast *>(state_p); \ + PacClipSumUpdate(input, state, cnt, agg.allocator); \ } \ - static void PacClipSumScatterUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, Vector &sts, idx_t cnt) {\ - PacClipSumScatterUpdate(input, sts, cnt, agg.allocator); \ + static void PacClipSumScatterUpdate##NAME(Vector input[], AggregateInputData &agg, idx_t, Vector &sts, \ + idx_t cnt) { \ + PacClipSumScatterUpdate(input, sts, cnt, agg.allocator); \ } CLIP_SUM_INT_TYPES_SIGNED CLIP_SUM_INT_TYPES_UNSIGNED From bf4f404e6d4131d216170b750c7d8db5b6eaee1f Mon Sep 17 00:00:00 2001 From: ila Date: Thu, 2 Apr 2026 22:41:53 +0200 Subject: [PATCH 27/27] more attacks --- attacks/clip_attack_results.md | 68 +++++++++++++++ attacks/clip_scale_test.sh | 152 +++++++++++++++++++++++++++++++++ 2 files changed, 220 insertions(+) create mode 100644 attacks/clip_scale_test.sh diff --git a/attacks/clip_attack_results.md b/attacks/clip_attack_results.md index 5baf8847..4cd0e8cb 100644 --- a/attacks/clip_attack_results.md +++ b/attacks/clip_attack_results.md @@ -312,3 +312,71 @@ Even the minimum level-3 value is zeroed when it's the sole contributor. 5. **The pre-aggregation step remains essential** — 20K small items are correctly collapsed and clipped. + +--- + +## pac_clip_scale comparison (2026-04-02) + +Tests `pac_clip_scale = true` (scale unsupported outlier levels to nearest supported +level) vs `false` (hard-zero / omit). Peter's hypothesis: scaling should be safe +because outliers become a minority in the already-supported bucket. + +Code version: after Peter's refactoring into `pac_clip_aggr.hpp`, CLIP_LEVEL_SHIFT=2 +(4x per level, was 16x previously). + +### Setup + +- N=1000 background users, acctbal ∈ [1, 10000] +- pac_mi = 0.0078125 (1/128), 30 trials per condition +- Background sum: filter<=3 = 18,347; filter<=999 = 4,871,091 + +### Small filter results (filter<=3, clip_support=2) + +| Test | Outlier | scale=false best% | scale=true best% | scale=false std_in | scale=true std_in | std_out | +|------|---------|-------------------|------------------|--------------------|-------------------|---------| +| Extreme | tv=999,999 | **55.6%** | **64.8%** | 94,882 | 169,449 | 107,976 | +| Moderate | tv=50,000 | **55.6%** | **61.1%** | 94,882 | 147,406 | 107,976 | +| Multi-row | 20K×$50 | **55.6%** | **64.8%** | 94,882 | 169,449 | 107,976 | +| Borderline | tv=65,536 | **55.6%** | **57.4%** | 94,882 | 104,640 | 107,976 | + +With **hard-zero** (scale=false): all outlier contributions are completely zeroed. The +"in" distribution is identical to "out" — attack accuracy ~55% (random). No side-channel. + +With **scaling** (scale=true): outlier values are scaled down to the nearest supported +level. This creates a mild variance side-channel (std ratio ~1.57x). Attack accuracy +rises to 57-65% — measurable but not catastrophic. + +### Small filter results (clip_support=10 and 50) + +All conditions return 0 for both in/out (no level reaches 10 or 50 distinct +contributors with only 3-4 users). Scale mode is irrelevant — both modes identical. + +### Wide filter results (filter<=999, clip_support=2) + +| clip | scale | mean_in | std_in | mean_out | std_out | best% | +|------|-------|---------|--------|----------|---------|-------| +| 2 | false | 4,737,114 | 1,516,576 | 4,973,475 | 1,642,654 | 53.3% | +| 2 | true | 4,750,242 | 1,538,381 | 4,973,475 | 1,642,654 | 53.3% | +| 50 | false | 4,734,354 | 1,516,680 | 4,970,423 | 1,643,039 | 53.3% | +| 50 | true | 4,791,594 | 1,539,287 | 5,019,631 | 1,639,877 | 55.0% | + +No meaningful difference. Both modes near random with wide filters. + +### Key findings + +1. **Peter is partially right**: scaling does NOT cause a variance explosion. The leak + is moderate (~10 percentage points above random at worst), not catastrophic. + His intuition that scaled values become a minority in the supported bucket holds. + +2. **Hard-zero is still strictly better for privacy**: it produces zero information + leakage in all tested scenarios. Scaling leaks mildly. + +3. **Major improvement from 4x granularity**: the moderate outlier (tv=50,000) that + previously defeated clipping at 76.5% accuracy (when levels were 16x wide, both + 5000 and 50000 in same level) is now caught by both modes (~55-61%). The shift + from CLIP_LEVEL_SHIFT=4 (16x) to CLIP_LEVEL_SHIFT=2 (4x) dramatically improved + detection of moderate outliers. + +4. **Recommendation**: keep hard-zero as default (no leakage), but scaling is a + reasonable option where utility matters more. The ~10pp accuracy gap may be + acceptable in many threat models. diff --git a/attacks/clip_scale_test.sh b/attacks/clip_scale_test.sh new file mode 100644 index 00000000..a12319e0 --- /dev/null +++ b/attacks/clip_scale_test.sh @@ -0,0 +1,152 @@ +#!/usr/bin/env bash +# Test pac_clip_scale=true (scale outliers to nearest supported level) +# vs default false (hard-zero / omit). Peter's hypothesis: scaling should +# be safe because outliers become a minority in an already-supported bucket. +set -euo pipefail + +DUCKDB="/home/ila/Code/pac/build/release/duckdb" +PAC_EXT="/home/ila/Code/pac/build/release/extension/pac/pac.duckdb_extension" + +run_sum() { + local cond=$1 seed=$2 n_users=$3 target_val=$4 filter=$5 clip=$6 scale=$7 + local insert="" + [ "$cond" = "in" ] && insert="$target_val" + $DUCKDB -noheader -list 2>/dev/null < threshold THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}) <= threshold THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*) AS acc + FROM raw, generate_series(10000, 500000, 10000) thresholds(threshold) + WHERE v IS NOT NULL + GROUP BY threshold +); + +-- Midpoint classifier +SELECT 'Midpoint clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND v > (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + WHEN truth='out' AND v <= (${fbg} + ${fbg} + ${tv}) / 2.0 THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; + +-- Likelihood ratio +SELECT 'LR clf' AS clf, + printf('%.1f%%', 100.0*SUM(CASE + WHEN truth='in' AND ABS(v - ${fbg}::DOUBLE - ${tv}) < ABS(v - ${fbg}::DOUBLE) THEN 1 + WHEN truth='out' AND ABS(v - ${fbg}::DOUBLE - ${tv}) >= ABS(v - ${fbg}::DOUBLE) THEN 1 + ELSE 0 END)::DOUBLE / COUNT(*)) AS accuracy +FROM raw WHERE v IS NOT NULL; +SQL + echo "" +} + +FBG=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,3) t(i);" | tr -d '[:space:]') +FBG999=$($DUCKDB -noheader -list -c \ + "SELECT SUM((hash(i*31+7)%10000+1)::INTEGER) FROM generate_series(1,999) t(i);" | tr -d '[:space:]') + +echo "=================================================================" +echo " pac_clip_scale COMPARISON TEST" +echo " Comparing scale=true (Peter's preference) vs scale=false (hard-zero)" +echo "=================================================================" +echo "Background: filter<=3 sum=$FBG, filter<=999 sum=$FBG999" +echo "" + +run_test() { + local test_name=$1 n_users=$2 target_sql=$3 filter=$4 tv=$5 fbg=$6 + local nt=30 + + for clip in 2 10 50; do + for scale in false true; do + local label="${test_name} [clip=${clip}, scale=${scale}]" + IN_F=$(mktemp); OUT_F=$(mktemp) + for seed in $(seq 1 $nt); do + echo "in,$(run_sum in $seed $n_users "$target_sql" $filter $clip $scale)" >> "$IN_F" + echo "out,$(run_sum out $seed $n_users "" $filter $clip $scale)" >> "$OUT_F" + done + analyze "$label" "$IN_F" "$OUT_F" "$fbg" "$tv" + rm -f "$IN_F" "$OUT_F" + done + done +} + +# --------------------------------------------------------------- +# TEST 1: Extreme outlier, small filter +# Hard-zero baseline: 52.4% (random) at clip=2 +# --------------------------------------------------------------- +echo "## TEST 1: Extreme outlier (tv=999999), small filter (<=3), N=1000" +echo "" +run_test "T1" 1000 "INSERT INTO users VALUES (0, 999999);" 3 999999 "$FBG" + +# --------------------------------------------------------------- +# TEST 2: Extreme outlier, wide filter +# Hard-zero baseline: ~55% at clip=2 +# --------------------------------------------------------------- +echo "## TEST 2: Extreme outlier (tv=999999), wide filter (<=999), N=1000" +echo "" +run_test "T2" 1000 "INSERT INTO users VALUES (0, 999999);" 999 999999 "$FBG999" + +# --------------------------------------------------------------- +# TEST 3: Moderate outlier (same magnitude level as normal users) +# Hard-zero baseline: 76.5% at clip=2 (already leaks) +# --------------------------------------------------------------- +echo "## TEST 3: Moderate outlier (tv=50000), small filter (<=3)" +echo "Normal users ~5000, target ~50000 — both in level 2 (4096-65535)" +echo "" +run_test "T3" 1000 "INSERT INTO users VALUES (0, 50000);" 3 50000 "$FBG" + +# --------------------------------------------------------------- +# TEST 4: 20K small items (multi-row outlier) +# Hard-zero baseline: 52.9% at clip=2 +# --------------------------------------------------------------- +echo "## TEST 4: 20K small items (50 x 20000 = 1M), small filter (<=3)" +echo "" +run_test "T4" 1000 "INSERT INTO users SELECT 0, 50 FROM generate_series(1,20000) t(i);" 3 1000000 "$FBG" + +# --------------------------------------------------------------- +# TEST 5: Borderline outlier (exactly at level boundary) +# Hard-zero baseline: 52.9% at clip=2 +# --------------------------------------------------------------- +echo "## TEST 5: Borderline outlier (tv=65536, level 3 boundary), small filter (<=3)" +echo "" +run_test "T5" 1000 "INSERT INTO users VALUES (0, 65536);" 3 65536 "$FBG" + +echo "=================================================================" +echo " SCALE TEST COMPLETE" +echo "================================================================="