Skip to content

Commit 2645a38

Browse files
committed
space optimizations, fixed some tests
1 parent c141948 commit 2645a38

File tree

3 files changed

+411
-325
lines changed

3 files changed

+411
-325
lines changed

include/sketch/sketch_columns.h

Lines changed: 122 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <hwy/aligned_allocator.h>
1111

1212
#include <gtest/gtest.h>
13+
#include <cstdint>
1314

1415
// #include /* <folly/synchronization/RWSpinLock.h> */
1516
#include "RWSpinLock.h"
@@ -19,10 +20,37 @@
1920
*/
2021
class FixedSizeSketchColumn {
2122
private:
22-
Bucket deterministic_bucket = {0, 0};
23-
Bucket *buckets;
23+
uintptr_t buckets_tagged = 0;
2424
uint64_t seed;
25-
uint8_t capacity;
25+
26+
static constexpr uintptr_t kCapacityMask = 0xFFu;
27+
static constexpr uintptr_t kPointerMask = ~kCapacityMask;
28+
29+
Bucket *buckets_raw() {
30+
return reinterpret_cast<Bucket*>(buckets_tagged & kPointerMask);
31+
}
32+
const Bucket *buckets_raw() const {
33+
return reinterpret_cast<const Bucket*>(buckets_tagged & kPointerMask);
34+
}
35+
Bucket *buckets() {
36+
return buckets_raw() + 1;
37+
}
38+
const Bucket *buckets() const {
39+
return buckets_raw() + 1;
40+
}
41+
Bucket &deterministic_bucket_ref() {
42+
return buckets_raw()[0];
43+
}
44+
const Bucket &deterministic_bucket_ref() const {
45+
return buckets_raw()[0];
46+
}
47+
uint8_t capacity() const {
48+
return static_cast<uint8_t>(buckets_tagged & kCapacityMask);
49+
}
50+
51+
void set_tagged_buckets(Bucket *raw_buckets, uint8_t capacity);
52+
static Bucket *allocate_bucket_block(uint8_t capacity);
53+
static void free_bucket_block(Bucket *raw_buckets);
2654
public:
2755
void set_seed(uint64_t new_seed) {
2856
seed = new_seed;
@@ -65,7 +93,7 @@ class FixedSizeSketchColumn {
6593

6694

6795
inline bool is_initialized() const {
68-
return buckets != nullptr;
96+
return buckets_raw() != nullptr;
6997
}
7098

7199
[[deprecated]]
@@ -74,21 +102,27 @@ class FixedSizeSketchColumn {
74102
}
75103

76104
bool operator==(const FixedSizeSketchColumn &other) const {
77-
for (size_t i = 0; i < capacity; ++i) {
78-
if (buckets[i] != other.buckets[i]) {
105+
if (capacity() != other.capacity()) {
106+
return false;
107+
}
108+
for (size_t i = 0; i < capacity(); ++i) {
109+
if (buckets()[i] != other.buckets()[i]) {
79110
return false;
80111
}
81112
}
113+
if (deterministic_bucket_ref() != other.deterministic_bucket_ref()) {
114+
return false;
115+
}
82116
return true;
83117
}
84118

85119
friend std::ostream& operator<<(std::ostream &os, const FixedSizeSketchColumn &sketch) {
86120
os << "FixedSizeSketchColumn: " << std::endl;
87-
os << "Capacity: " << (int)sketch.capacity << std::endl;
121+
os << "Capacity: " << (int)sketch.capacity() << std::endl;
88122
os << "Column Seed: " << (int)sketch.seed << std::endl;
89-
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
90-
for (size_t i = 0; i < sketch.capacity; ++i) {
91-
os << "Bucket[" << i << "]: " << sketch.buckets[i] << std::endl;
123+
os << "Deterministic Bucket: " << sketch.deterministic_bucket_ref() << std::endl;
124+
for (size_t i = 0; i < sketch.capacity(); ++i) {
125+
os << "Bucket[" << i << "]: " << sketch.buckets()[i] << std::endl;
92126
}
93127
return os;
94128
}
@@ -103,11 +137,38 @@ FRIEND_TEST(SketchColumnTestSuite, TestClear);
103137
FRIEND_TEST(SketchColumnTestSuite, TestClearMerge);
104138
FRIEND_TEST(SketchColumnTestSuite, TestUpdateReallocation);
105139
private:
106-
Bucket deterministic_bucket = {0, 0};
107-
Bucket *buckets;
140+
uintptr_t buckets_tagged = 0;
108141
uint64_t seed;
109142
from_folly::RWSpinLock lock;
110-
uint8_t capacity;
143+
144+
static constexpr uintptr_t kCapacityMask = 0xFFu;
145+
static constexpr uintptr_t kPointerMask = ~kCapacityMask;
146+
147+
Bucket *buckets_raw() {
148+
return reinterpret_cast<Bucket*>(buckets_tagged & kPointerMask);
149+
}
150+
const Bucket *buckets_raw() const {
151+
return reinterpret_cast<const Bucket*>(buckets_tagged & kPointerMask);
152+
}
153+
Bucket *buckets() {
154+
return buckets_raw() + 1;
155+
}
156+
const Bucket *buckets() const {
157+
return buckets_raw() + 1;
158+
}
159+
Bucket &deterministic_bucket_ref() {
160+
return buckets_raw()[0];
161+
}
162+
const Bucket &deterministic_bucket_ref() const {
163+
return buckets_raw()[0];
164+
}
165+
uint8_t capacity() const {
166+
return static_cast<uint8_t>(buckets_tagged & kCapacityMask);
167+
}
168+
169+
void set_tagged_buckets(Bucket *raw_buckets, uint8_t capacity);
170+
static Bucket *allocate_bucket_block(uint8_t capacity);
171+
static void free_bucket_block(Bucket *raw_buckets);
111172
public:
112173
void set_seed(uint64_t new_seed) { seed = new_seed; };
113174
uint64_t get_seed() const { return seed; };
@@ -151,17 +212,17 @@ FRIEND_TEST(SketchColumnTestSuite, TestUpdateReallocation);
151212

152213
friend std::ostream& operator<<(std::ostream &os, const ResizeableSketchColumn&sketch) {
153214
os << "ResizeableSketchColumn: " << std::endl;
154-
os << "Capacity: " << (int)sketch.capacity << std::endl;
215+
os << "Capacity: " << (int)sketch.capacity() << std::endl;
155216
os << "Column Seed: " << (int)sketch.seed << std::endl;
156-
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
157-
for (size_t i = 0; i < sketch.capacity; ++i) {
158-
os << "Bucket[" << i << "]: " << sketch.buckets[i] << std::endl;
217+
os << "Deterministic Bucket: " << sketch.deterministic_bucket_ref() << std::endl;
218+
for (size_t i = 0; i < sketch.capacity(); ++i) {
219+
os << "Bucket[" << i << "]: " << sketch.buckets()[i] << std::endl;
159220
}
160221
return os;
161222
}
162223

163224
inline bool is_initialized() const {
164-
return buckets != nullptr;
225+
return buckets_raw() != nullptr;
165226
}
166227

167228
bool operator==(const ResizeableSketchColumn &other) const {
@@ -170,10 +231,13 @@ FRIEND_TEST(SketchColumnTestSuite, TestUpdateReallocation);
170231
return false;
171232
}
172233
for (size_t i = 0; i < other_depth; ++i) {
173-
if (buckets[i] != other.buckets[i]) {
234+
if (buckets()[i] != other.buckets()[i]) {
174235
return false;
175236
}
176237
}
238+
if (deterministic_bucket_ref() != other.deterministic_bucket_ref()) {
239+
return false;
240+
}
177241
return true;
178242
}
179243
private:
@@ -183,10 +247,37 @@ FRIEND_TEST(SketchColumnTestSuite, TestUpdateReallocation);
183247

184248
class ResizeableAlignedSketchColumn {
185249
private:
186-
hwy::AlignedFreeUniquePtr<Bucket[]> aligned_buckets;
187-
Bucket deterministic_bucket = {0, 0};
250+
uintptr_t buckets_tagged = 0;
188251
uint64_t seed;
189-
uint8_t capacity;
252+
253+
static constexpr uintptr_t kCapacityMask = 0xFFu;
254+
static constexpr uintptr_t kPointerMask = ~kCapacityMask;
255+
256+
Bucket *buckets_raw() {
257+
return reinterpret_cast<Bucket*>(buckets_tagged & kPointerMask);
258+
}
259+
const Bucket *buckets_raw() const {
260+
return reinterpret_cast<const Bucket*>(buckets_tagged & kPointerMask);
261+
}
262+
Bucket *buckets() {
263+
return buckets_raw() + 1;
264+
}
265+
const Bucket *buckets() const {
266+
return buckets_raw() + 1;
267+
}
268+
Bucket &deterministic_bucket_ref() {
269+
return buckets_raw()[0];
270+
}
271+
const Bucket &deterministic_bucket_ref() const {
272+
return buckets_raw()[0];
273+
}
274+
uint8_t capacity() const {
275+
return static_cast<uint8_t>(buckets_tagged & kCapacityMask);
276+
}
277+
278+
void set_tagged_buckets(Bucket *raw_buckets, uint8_t capacity);
279+
static Bucket *allocate_bucket_block(uint8_t capacity);
280+
static void free_bucket_block(Bucket *raw_buckets);
190281
public:
191282
void set_seed(uint64_t new_seed) { seed = new_seed; };
192283
uint64_t get_seed() const { return seed; };
@@ -224,7 +315,7 @@ class ResizeableAlignedSketchColumn {
224315
}
225316

226317
inline bool is_initialized() const {
227-
return aligned_buckets != nullptr;
318+
return buckets_raw() != nullptr;
228319
}
229320

230321
void reset_sample_state() {
@@ -239,11 +330,11 @@ class ResizeableAlignedSketchColumn {
239330

240331
friend std::ostream& operator<<(std::ostream &os, const ResizeableAlignedSketchColumn&sketch) {
241332
os << "ResizeableSketchColumn: " << std::endl;
242-
os << "Capacity: " << (int)sketch.capacity << std::endl;
333+
os << "Capacity: " << (int)sketch.capacity() << std::endl;
243334
os << "Column Seed: " << (int)sketch.seed << std::endl;
244-
os << "Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
245-
for (size_t i = 0; i < sketch.capacity; ++i) {
246-
os << "Bucket[" << i << "]: " << sketch.aligned_buckets[i] << std::endl;
335+
os << "Deterministic Bucket: " << sketch.deterministic_bucket_ref() << std::endl;
336+
for (size_t i = 0; i < sketch.capacity(); ++i) {
337+
os << "Bucket[" << i << "]: " << sketch.buckets()[i] << std::endl;
247338
}
248339
return os;
249340
}
@@ -254,10 +345,13 @@ class ResizeableAlignedSketchColumn {
254345
return false;
255346
}
256347
for (size_t i = 0; i < other_depth; ++i) {
257-
if (aligned_buckets[i] != other.aligned_buckets[i]) {
348+
if (buckets()[i] != other.buckets()[i]) {
258349
return false;
259350
}
260351
}
352+
if (deterministic_bucket_ref() != other.deterministic_bucket_ref()) {
353+
return false;
354+
}
261355
return true;
262356
}
263357
private:

0 commit comments

Comments
 (0)