1212*/
1313class FixedSizeSketchColumn {
1414private:
15- static uint64_t seed;
1615
17- Bucket * buckets;
16+ std::unique_ptr< Bucket[]> buckets;
1817 Bucket deterministic_bucket = {0 , 0 };
18+ uint16_t col_idx; // determines column seeding
1919 uint8_t capacity;
20- uint8_t col_idx; // determines column seeding
2120public:
21+ static uint64_t seed;
2222 static void set_seed (uint64_t new_seed) {
2323 seed = new_seed;
2424 };
2525 static const uint64_t get_seed () {
2626 return seed;
2727 };
2828
29- FixedSizeSketchColumn (uint8_t capacity, uint8_t col_idx);
29+ FixedSizeSketchColumn (uint8_t capacity, uint16_t col_idx);
30+ FixedSizeSketchColumn (const FixedSizeSketchColumn &other);
3031 ~FixedSizeSketchColumn ();
3132 SketchSample<vec_t > sample () const ;
3233 void clear ();
3334 void update (const vec_t update);
3435 void merge (FixedSizeSketchColumn &other);
3536 uint8_t get_depth () const ;
3637 void serialize (std::ostream &binary_out) const ;
38+ friend std::ostream& operator <<(std::ostream &os, const FixedSizeSketchColumn &sketch) {
39+ os << " FixedSizeSketchColumn: " << std::endl;
40+ os << " Capacity: " << (int )sketch.capacity << std::endl;
41+ os << " Column Index: " << (int )sketch.col_idx << std::endl;
42+ os << " Deterministic Bucket: " << sketch.deterministic_bucket << std::endl;
43+ for (size_t i = 0 ; i < sketch.capacity ; ++i) {
44+ os << " Bucket[" << i << " ]: " << sketch.buckets [i] << std::endl;
45+ }
46+ return os;
47+ }
3748};
3849
39- FixedSizeSketchColumn::FixedSizeSketchColumn (uint8_t capacity, uint8_t col_idx) :
50+ FixedSizeSketchColumn::FixedSizeSketchColumn (uint8_t capacity, uint16_t col_idx) :
4051 capacity(capacity), col_idx(col_idx) {
41- buckets = new Bucket[capacity];
42- // std::memset(buckets, 0, capacity * sizeof(Bucket));
52+ buckets = std::make_unique<Bucket[]>(capacity);
53+ // std::memset(buckets.get(), 0, capacity * sizeof(Bucket));
54+ }
55+
56+ FixedSizeSketchColumn::FixedSizeSketchColumn (const FixedSizeSketchColumn &other) :
57+ capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
58+ buckets = std::make_unique<Bucket[]>(capacity);
59+ std::memcpy (buckets.get (), other.buckets .get (), capacity * sizeof (Bucket));
4360}
4461
4562FixedSizeSketchColumn::~FixedSizeSketchColumn () {
46- delete[] buckets;
63+ // delete[] buckets;
4764}
4865
4966uint8_t FixedSizeSketchColumn::get_depth () const {
@@ -56,7 +73,7 @@ uint8_t FixedSizeSketchColumn::get_depth() const {
5673
5774// TODO - implement actual deserialization
5875void FixedSizeSketchColumn::serialize (std::ostream &binary_out) const {
59- binary_out.write ((char *) buckets, capacity * sizeof (Bucket));
76+ binary_out.write ((char *) buckets. get () , capacity * sizeof (Bucket));
6077 binary_out.write ((char *) &deterministic_bucket, sizeof (Bucket));
6178 binary_out.write ((char *) &capacity, sizeof (uint8_t ));
6279 binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
@@ -75,7 +92,7 @@ SketchSample<vec_t> FixedSizeSketchColumn::sample() const {
7592}
7693
7794void FixedSizeSketchColumn::clear () {
78- std::memset (buckets, 0 , capacity * sizeof (Bucket));
95+ std::memset (buckets. get () , 0 , capacity * sizeof (Bucket));
7996 deterministic_bucket = {0 , 0 };
8097}
8198
@@ -89,6 +106,7 @@ void FixedSizeSketchColumn::merge(FixedSizeSketchColumn &other) {
89106void FixedSizeSketchColumn::update (const vec_t update) {
90107 vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
91108 col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, capacity);
109+ assert (depth < capacity);
92110 buckets[depth] ^= {update, checksum};
93111 deterministic_bucket ^= {update, checksum};
94112}
@@ -98,16 +116,16 @@ void FixedSizeSketchColumn::update(const vec_t update) {
98116class ResizeableSketchColumn {
99117private:
100118 static uint64_t seed;
101-
102119 hwy::AlignedFreeUniquePtr<Bucket[]> aligned_buckets;
103120 Bucket deterministic_bucket = {0 , 0 };
121+ uint16_t col_idx; // determines column seeding
104122 uint8_t capacity;
105- uint8_t col_idx; // determines column seeding
106123public:
107124 static void set_seed (uint64_t new_seed) { seed = new_seed; };
108125 static const uint64_t get_seed () { return seed; };
109126
110- ResizeableSketchColumn (uint8_t start_capacity, uint8_t col_idx);
127+ ResizeableSketchColumn (uint8_t start_capacity, uint16_t col_idx);
128+ ResizeableSketchColumn (const ResizeableSketchColumn &other);
111129 ~ResizeableSketchColumn ();
112130 SketchSample<vec_t > sample () const ;
113131 void clear ();
@@ -119,15 +137,27 @@ class ResizeableSketchColumn {
119137 void reallocate (uint8_t new_capacity);
120138};
121139
140+ uint64_t ResizeableSketchColumn::seed = 0 ;
141+ uint64_t FixedSizeSketchColumn::seed = 0 ;
142+
122143
123- ResizeableSketchColumn::ResizeableSketchColumn (uint8_t start_capacity, uint8_t col_idx) :
144+ ResizeableSketchColumn::ResizeableSketchColumn (uint8_t start_capacity, uint16_t col_idx) :
124145 capacity(start_capacity), col_idx(col_idx) {
125146
126147 // auto aligned_memptr = hwy::MakeUniqueAlignedArray<Bucket>(start_capacity);
127148 aligned_buckets = hwy::AllocateAligned<Bucket>(start_capacity);
128149 std::memset (aligned_buckets.get (), 0 , capacity * sizeof (Bucket));
129150}
130151
152+ ResizeableSketchColumn::ResizeableSketchColumn (const ResizeableSketchColumn &other) :
153+ capacity(other.capacity), col_idx(other.col_idx), deterministic_bucket(other.deterministic_bucket) {
154+ aligned_buckets = hwy::AllocateAligned<Bucket>(capacity);
155+ std::memcpy (aligned_buckets.get (), other.aligned_buckets .get (), capacity * sizeof (Bucket));
156+ }
157+
158+ ResizeableSketchColumn::~ResizeableSketchColumn () {
159+ }
160+
131161/*
132162 Note this DROPS the contents if allocated down too much.
133163*/
@@ -154,13 +184,29 @@ void ResizeableSketchColumn::serialize(std::ostream &binary_out) const {
154184 binary_out.write ((char *) &col_idx, sizeof (uint8_t ));
155185}
156186
187+ SketchSample<vec_t > ResizeableSketchColumn::sample () const {
188+ if (Bucket_Boruvka::is_empty (deterministic_bucket)) {
189+ return {0 , ZERO}; // the "first" bucket is deterministic so if all zero then no edges to return
190+ }
191+ for (size_t i = capacity; i > 0 ; --i) {
192+ if (Bucket_Boruvka::is_good (aligned_buckets[i - 1 ], seed)) {
193+ return {aligned_buckets[i - 1 ].alpha , GOOD};
194+ }
195+ }
196+ return {0 , FAIL};
197+ }
198+
157199void ResizeableSketchColumn::update (const vec_t update) {
158200 vec_hash_t checksum = Bucket_Boruvka::get_index_hash (update, seed);
159- col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, capacity);
201+ // TODO - remove magic number
202+ // TODO - get_index_depth needs to be fixed. hashes need to be longer
203+ // than 32 bits if we're not using the deep bucket buffer idea.
204+ col_hash_t depth = Bucket_Boruvka::get_index_depth (update, seed, col_idx, 32 );
160205 deterministic_bucket ^= {update, checksum};
161206
162- if (depth >= capacity) {
163- reallocate (depth + 4 );
207+ while (depth >= capacity) {
208+ // first multple of 4 larger than or equal to depth
209+ reallocate (capacity + 4 );
164210 }
165211 aligned_buckets[depth] ^= {update, checksum};
166212}
@@ -187,6 +233,7 @@ uint8_t ResizeableSketchColumn::get_depth() const {
187233}
188234
189235
236+
190237static_assert (SketchColumnConcept<FixedSizeSketchColumn, vec_t >,
191238 " FixedSizeSketchColumn does not satisfy SketchColumnConcept" );
192239
0 commit comments