Skip to content

Commit 7fda92c

Browse files
committed
tried to optimize with ll but didn't really work
1 parent 0936d78 commit 7fda92c

File tree

3 files changed

+28
-110
lines changed

3 files changed

+28
-110
lines changed

include/sparse_sketch.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ class SparseSketch {
7878
size_t sparse_data_size = ceil(double(sparse_capacity) * sizeof(SparseBucket) / sizeof(Bucket));
7979
size_t ll_metadata_size = ceil((double(num_columns) + 1) * sizeof(uint8_t) / sizeof(Bucket));
8080

81-
void update_sparse(uint8_t col, SparseBucket to_add, bool realloc_if_needed = true);
81+
void update_sparse(uint8_t col, const SparseBucket &to_add);
8282
SketchSample sample_sparse(size_t first_col, size_t end_col);
8383

8484
inline uint8_t remove_ll_head(size_t col) {
@@ -106,7 +106,8 @@ class SparseSketch {
106106
inline void remove_from_ll(SparseBucket& bkt_to_remove, SparseBucket &prev) {
107107
prev.next = bkt_to_remove.next;
108108
}
109-
inline bool merge_sparse_bkt(uint8_t our_idx, SparseBucket& oth, uint8_t prev_idx, size_t col) {
109+
inline bool merge_sparse_bkt(uint8_t our_idx, const SparseBucket& oth, uint8_t prev_idx,
110+
size_t col) {
110111
SparseBucket &ours = sparse_buckets[our_idx];
111112
ours.bkt.alpha ^= oth.bkt.alpha;
112113
ours.bkt.gamma ^= oth.bkt.gamma;
@@ -161,10 +162,8 @@ class SparseSketch {
161162
}
162163

163164
// given another SparseSketch column, merge it into ours
164-
void merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t *oth_ll_metadata, size_t col);
165-
166-
void validate();
167-
165+
void merge_sparse_column(const SparseBucket* oth_sparse_buckets, const uint8_t* oth_ll_metadata,
166+
size_t col);
168167
public:
169168
/**
170169
* The below constructors use vector length as their input. However, in graph sketching our input

src/sparse_sketch.cpp

Lines changed: 18 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -183,15 +183,15 @@ void SparseSketch::reallocate_if_needed(int delta) {
183183
// +1 if we added a new bucket value
184184
// 0 if the bucket was found and update (but not cleared)
185185
// -1 if the bucket was found and cleared of all content
186-
void SparseSketch::update_sparse(uint8_t col, SparseBucket to_add, bool realloc_if_needed) {
186+
void SparseSketch::update_sparse(uint8_t col, const SparseBucket &to_add) {
187187
uint8_t next_ptr = ll_metadata[col];
188188
uint8_t prev = uint8_t(-1);
189189
while (next_ptr != uint8_t(-1)) {
190190
if (sparse_buckets[next_ptr].row == to_add.row) {
191191
bool removed = merge_sparse_bkt(next_ptr, to_add, prev, col);
192192
if (removed) {
193193
number_of_sparse_buckets -= 1;
194-
if (realloc_if_needed) reallocate_if_needed(-1);
194+
reallocate_if_needed(-1);
195195
}
196196
return;
197197
} else if (sparse_buckets[next_ptr].row > to_add.row) {
@@ -221,7 +221,7 @@ void SparseSketch::update_sparse(uint8_t col, SparseBucket to_add, bool realloc_
221221
// std::cerr << "Placed new bucket in column " << size_t(prev) << "->" << size_t(sparse_buckets[prev].next) << "->" << size_t(sparse_buckets[free_bucket].next) << std::endl;
222222
}
223223

224-
if (realloc_if_needed) reallocate_if_needed(1);
224+
reallocate_if_needed(1);
225225
}
226226

227227
// sample a good bucket from the sparse region if one exists.
@@ -261,8 +261,6 @@ void SparseSketch::update(const vec_t update_idx) {
261261
}
262262
}
263263
}
264-
265-
validate();
266264
}
267265

268266
// TODO: Switch the L0_SAMPLING flag to instead affect query procedure.
@@ -373,38 +371,16 @@ ExhaustiveSketchSample SparseSketch::exhaustive_sample() {
373371
return {ret, GOOD};
374372
}
375373

376-
void SparseSketch::merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t *oth_ll_metadata,
377-
size_t col) {
374+
void SparseSketch::merge_sparse_column(const SparseBucket *oth_sparse_buckets,
375+
const uint8_t *oth_ll_metadata, size_t col) {
378376
// std::cerr << "Merging sparse column: " << col << std::endl;
379-
380-
// std::cerr << "Our column" << std::endl;
381-
// uint8_t idx = ll_metadata[col];
382-
// while (idx != uint8_t(-1)) {
383-
// bool good = Bucket_Boruvka::is_good(sparse_buckets[idx].bkt, checksum_seed());
384-
// std::cerr << "i: " << size_t(idx) << " n: " << size_t(sparse_buckets[idx].next) << " r:"
385-
// << size_t(sparse_buckets[idx].row) << " := a:" << sparse_buckets[idx].bkt.alpha
386-
// << " c:" << sparse_buckets[idx].bkt.gamma << (good ? " good" : " bad") << std::endl;
387-
// idx = sparse_buckets[idx].next;
388-
// }
389-
390-
// std::cerr << "Oth column" << std::endl;
391-
// idx = oth_ll_metadata[col];
392-
// while (idx != uint8_t(-1)) {
393-
// bool good = Bucket_Boruvka::is_good(oth_sparse_buckets[idx].bkt, checksum_seed());
394-
// std::cerr << "i: " << size_t(idx) << " n: " << size_t(oth_sparse_buckets[idx].next) << " r:"
395-
// << size_t(oth_sparse_buckets[idx].row) << " := a:" << oth_sparse_buckets[idx].bkt.alpha
396-
// << " c:" << oth_sparse_buckets[idx].bkt.gamma << (good ? " good" : " bad") << std::endl;
397-
// idx = oth_sparse_buckets[idx].next;
398-
// }
399-
400-
401377
uint8_t oth_idx = oth_ll_metadata[col];
402378
uint8_t our_idx = ll_metadata[col];
403379
uint8_t prev = uint8_t(-1);
404380

405381
// merge column until one runs out
406382
while (oth_idx != uint8_t(-1) && our_idx != uint8_t(-1)) {
407-
SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
383+
const SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
408384
SparseBucket& our_sparse = sparse_buckets[our_idx];
409385

410386
if (oth_sparse.row < num_dense_rows) {
@@ -453,7 +429,14 @@ void SparseSketch::merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t
453429

454430
// if there's more in the other column, merge that stuff in
455431
while (oth_idx != uint8_t(-1)) {
456-
SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
432+
const SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
433+
if (oth_sparse.row < num_dense_rows) {
434+
bucket(col, oth_sparse.row).alpha ^= oth_sparse.bkt.alpha;
435+
bucket(col, oth_sparse.row).gamma ^= oth_sparse.bkt.gamma;
436+
oth_idx = oth_sparse.next;
437+
continue;
438+
}
439+
457440
uint8_t free_bucket = claim_free_bucket();
458441
sparse_buckets[free_bucket] = oth_sparse;
459442
if (prev == uint8_t(-1)) {
@@ -462,53 +445,11 @@ void SparseSketch::merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t
462445
insert_to_ll(free_bucket, sparse_buckets[prev]);
463446
}
464447
number_of_sparse_buckets += 1;
465-
reallocate_if_needed(1); // TODO: There could be an edge case where the sparse bucket we're looking at becomes dense
448+
reallocate_if_needed(1);
466449
prev = free_bucket;
467450
if (ll_metadata[col] == uint8_t(-1)) prev = uint8_t(-1);
468451
oth_idx = oth_sparse.next;
469452
}
470-
471-
validate();
472-
}
473-
474-
void SparseSketch::validate() {
475-
size_t num_alloced = 0;
476-
for (size_t c = 0; c < num_columns; c++) {
477-
uint8_t idx = ll_metadata[c];
478-
while (idx != uint8_t(-1)) {
479-
if (Bucket_Boruvka::is_empty(sparse_buckets[idx].bkt)) {
480-
std::cerr << "ERROR: Empty bucket found in column " << c << std::endl;
481-
std::cerr << *this << std::endl;
482-
assert(false);
483-
} else {
484-
num_alloced += 1;
485-
}
486-
idx = sparse_buckets[idx].next;
487-
}
488-
}
489-
size_t num_free = 0;
490-
uint8_t idx = ll_metadata[num_columns];
491-
while (idx != uint8_t(-1)) {
492-
if (!Bucket_Boruvka::is_empty(sparse_buckets[idx].bkt)) {
493-
std::cerr << "ERROR: Non-empty bucket found in free list!" << std::endl;
494-
std::cerr << *this << std::endl;
495-
assert(false);
496-
} else {
497-
num_free += 1;
498-
}
499-
idx = sparse_buckets[idx].next;
500-
}
501-
502-
if (num_alloced != number_of_sparse_buckets) {
503-
std::cerr << "ERROR: number of sparse buckets does not match expectation!" << std::endl;
504-
std::cerr << *this << std::endl;
505-
assert(false);
506-
}
507-
if (num_free != sparse_capacity - number_of_sparse_buckets) {
508-
std::cerr << "ERROR; number of free buckets does not match expectation!" << std::endl;
509-
std::cerr << *this << std::endl;
510-
assert(num_free == sparse_capacity - number_of_sparse_buckets);
511-
}
512453
}
513454

514455
void SparseSketch::merge(const SparseSketch &other) {
@@ -532,8 +473,6 @@ void SparseSketch::merge(const SparseSketch &other) {
532473
SparseBucket sparse_bkt;
533474
sparse_bkt.row = r;
534475
sparse_bkt.bkt = other.bucket(c, r);
535-
536-
// TODO: This could be made a push-front
537476
update_sparse(c, sparse_bkt);
538477
}
539478
}
@@ -585,20 +524,7 @@ void SparseSketch::range_merge(const SparseSketch &other, size_t start_sample, s
585524

586525
// Merge all sparse buckets from other sketch into this one
587526
for (size_t c = start_column; c < end_column; c++) {
588-
uint8_t this_idx = ll_metadata[c];
589-
uint8_t oth_idx = other.ll_metadata[c];
590-
591-
while (oth_idx != uint8_t(-1)) {
592-
if (other.sparse_buckets[oth_idx].row < num_dense_rows) {
593-
auto &bkt = bucket(c, other.sparse_buckets[oth_idx].row);
594-
bkt.alpha ^= other.sparse_buckets[oth_idx].bkt.alpha;
595-
bkt.gamma ^= other.sparse_buckets[oth_idx].bkt.gamma;
596-
} else {
597-
// TODO: This can be made faster by utilizing this_idx and performing a merge operation
598-
update_sparse(c, other.sparse_buckets[oth_idx]);
599-
}
600-
oth_idx = other.sparse_buckets[oth_idx].next;
601-
}
527+
merge_sparse_column(other.sparse_buckets, other.ll_metadata, c);
602528
}
603529
// std::cerr << "SKETCH AFTER MERGE" << std::endl;
604530
// std::cerr << *this << std::endl;
@@ -629,20 +555,7 @@ void SparseSketch::merge_raw_bucket_buffer(const Bucket *raw_buckets, size_t n_r
629555

630556
// Merge all sparse buckets from other sketch into this one
631557
for (size_t c = 0; c < num_columns; c++) {
632-
uint8_t this_idx = ll_metadata[c];
633-
uint8_t oth_idx = raw_metadata[c];
634-
635-
while (oth_idx != uint8_t(-1)) {
636-
if (raw_sparse[oth_idx].row < num_dense_rows) {
637-
auto &bkt = bucket(c, raw_sparse[oth_idx].row);
638-
bkt.alpha ^= raw_sparse[oth_idx].bkt.alpha;
639-
bkt.gamma ^= raw_sparse[oth_idx].bkt.gamma;
640-
} else {
641-
// TODO: This can be made faster by utilizing this_idx and performing a merge operation
642-
update_sparse(c, raw_sparse[oth_idx]);
643-
}
644-
oth_idx = raw_sparse[oth_idx].next;
645-
}
558+
merge_sparse_column(raw_sparse, raw_metadata, c);
646559
}
647560
}
648561

@@ -666,6 +579,7 @@ std::ostream &operator<<(std::ostream &os, const SparseSketch &sketch) {
666579

667580
os << " a:" << a << " c:" << c << (good ? " good" : " bad") << std::endl;
668581

582+
os << "Number of dense rows = " << sketch.num_dense_rows << std::endl;
669583
for (unsigned i = 0; i < sketch.num_columns; ++i) {
670584
for (unsigned j = 0; j < sketch.num_dense_rows; ++j) {
671585
Bucket bkt = sketch.bucket(i, j);

test/sketch_test.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ void test_sketch_merge(unsigned long num_sketches,
167167
sketch2.update(test_vec2.get_update(j));
168168
}
169169
sketch1.merge(sketch2);
170+
Sketch backup(sketch1);
170171
try {
171172
SketchSample query_ret = sketch1.sample();
172173
vec_t res_idx = query_ret.idx;
@@ -177,6 +178,10 @@ void test_sketch_merge(unsigned long num_sketches,
177178
if (test_vec1.get_entry(res_idx) == test_vec2.get_entry(res_idx)) {
178179
sample_incorrect_failures++;
179180
std::cerr << "GOT A SAMPLE INCORRECT ERROR!" << std::endl;
181+
std::cerr << "Got: " << res_idx << std::endl;
182+
std::cerr << sketch1 << std::endl;
183+
std::cerr << backup << std::endl;
184+
std::cerr << sketch2 << std::endl;
180185
}
181186
}
182187
else if (ret_code == ZERO) {

0 commit comments

Comments
 (0)