@@ -183,15 +183,15 @@ void SparseSketch::reallocate_if_needed(int delta) {
183183// +1 if we added a new bucket value
184184// 0 if the bucket was found and update (but not cleared)
185185// -1 if the bucket was found and cleared of all content
186- void SparseSketch::update_sparse (uint8_t col, SparseBucket to_add, bool realloc_if_needed ) {
186+ void SparseSketch::update_sparse (uint8_t col, const SparseBucket & to_add) {
187187 uint8_t next_ptr = ll_metadata[col];
188188 uint8_t prev = uint8_t (-1 );
189189 while (next_ptr != uint8_t (-1 )) {
190190 if (sparse_buckets[next_ptr].row == to_add.row ) {
191191 bool removed = merge_sparse_bkt (next_ptr, to_add, prev, col);
192192 if (removed) {
193193 number_of_sparse_buckets -= 1 ;
194- if (realloc_if_needed) reallocate_if_needed (-1 );
194+ reallocate_if_needed (-1 );
195195 }
196196 return ;
197197 } else if (sparse_buckets[next_ptr].row > to_add.row ) {
@@ -221,7 +221,7 @@ void SparseSketch::update_sparse(uint8_t col, SparseBucket to_add, bool realloc_
221221 // std::cerr << "Placed new bucket in column " << size_t(prev) << "->" << size_t(sparse_buckets[prev].next) << "->" << size_t(sparse_buckets[free_bucket].next) << std::endl;
222222 }
223223
224- if (realloc_if_needed) reallocate_if_needed (1 );
224+ reallocate_if_needed (1 );
225225}
226226
227227// sample a good bucket from the sparse region if one exists.
@@ -261,8 +261,6 @@ void SparseSketch::update(const vec_t update_idx) {
261261 }
262262 }
263263 }
264-
265- validate ();
266264}
267265
268266// TODO: Switch the L0_SAMPLING flag to instead affect query procedure.
@@ -373,38 +371,16 @@ ExhaustiveSketchSample SparseSketch::exhaustive_sample() {
373371 return {ret, GOOD};
374372}
375373
376- void SparseSketch::merge_sparse_column (SparseBucket *oth_sparse_buckets, uint8_t *oth_ll_metadata ,
377- size_t col) {
374+ void SparseSketch::merge_sparse_column (const SparseBucket *oth_sparse_buckets,
375+ const uint8_t *oth_ll_metadata, size_t col) {
378376 // std::cerr << "Merging sparse column: " << col << std::endl;
379-
380- // std::cerr << "Our column" << std::endl;
381- // uint8_t idx = ll_metadata[col];
382- // while (idx != uint8_t(-1)) {
383- // bool good = Bucket_Boruvka::is_good(sparse_buckets[idx].bkt, checksum_seed());
384- // std::cerr << "i: " << size_t(idx) << " n: " << size_t(sparse_buckets[idx].next) << " r:"
385- // << size_t(sparse_buckets[idx].row) << " := a:" << sparse_buckets[idx].bkt.alpha
386- // << " c:" << sparse_buckets[idx].bkt.gamma << (good ? " good" : " bad") << std::endl;
387- // idx = sparse_buckets[idx].next;
388- // }
389-
390- // std::cerr << "Oth column" << std::endl;
391- // idx = oth_ll_metadata[col];
392- // while (idx != uint8_t(-1)) {
393- // bool good = Bucket_Boruvka::is_good(oth_sparse_buckets[idx].bkt, checksum_seed());
394- // std::cerr << "i: " << size_t(idx) << " n: " << size_t(oth_sparse_buckets[idx].next) << " r:"
395- // << size_t(oth_sparse_buckets[idx].row) << " := a:" << oth_sparse_buckets[idx].bkt.alpha
396- // << " c:" << oth_sparse_buckets[idx].bkt.gamma << (good ? " good" : " bad") << std::endl;
397- // idx = oth_sparse_buckets[idx].next;
398- // }
399-
400-
401377 uint8_t oth_idx = oth_ll_metadata[col];
402378 uint8_t our_idx = ll_metadata[col];
403379 uint8_t prev = uint8_t (-1 );
404380
405381 // merge column until one runs out
406382 while (oth_idx != uint8_t (-1 ) && our_idx != uint8_t (-1 )) {
407- SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
383+ const SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
408384 SparseBucket& our_sparse = sparse_buckets[our_idx];
409385
410386 if (oth_sparse.row < num_dense_rows) {
@@ -453,7 +429,14 @@ void SparseSketch::merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t
453429
454430 // if there's more in the other column, merge that stuff in
455431 while (oth_idx != uint8_t (-1 )) {
456- SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
432+ const SparseBucket& oth_sparse = oth_sparse_buckets[oth_idx];
433+ if (oth_sparse.row < num_dense_rows) {
434+ bucket (col, oth_sparse.row ).alpha ^= oth_sparse.bkt .alpha ;
435+ bucket (col, oth_sparse.row ).gamma ^= oth_sparse.bkt .gamma ;
436+ oth_idx = oth_sparse.next ;
437+ continue ;
438+ }
439+
457440 uint8_t free_bucket = claim_free_bucket ();
458441 sparse_buckets[free_bucket] = oth_sparse;
459442 if (prev == uint8_t (-1 )) {
@@ -462,53 +445,11 @@ void SparseSketch::merge_sparse_column(SparseBucket *oth_sparse_buckets, uint8_t
462445 insert_to_ll (free_bucket, sparse_buckets[prev]);
463446 }
464447 number_of_sparse_buckets += 1 ;
465- reallocate_if_needed (1 ); // TODO: There could be an edge case where the sparse bucket we're looking at becomes dense
448+ reallocate_if_needed (1 );
466449 prev = free_bucket;
467450 if (ll_metadata[col] == uint8_t (-1 )) prev = uint8_t (-1 );
468451 oth_idx = oth_sparse.next ;
469452 }
470-
471- validate ();
472- }
473-
474- void SparseSketch::validate () {
475- size_t num_alloced = 0 ;
476- for (size_t c = 0 ; c < num_columns; c++) {
477- uint8_t idx = ll_metadata[c];
478- while (idx != uint8_t (-1 )) {
479- if (Bucket_Boruvka::is_empty (sparse_buckets[idx].bkt )) {
480- std::cerr << " ERROR: Empty bucket found in column " << c << std::endl;
481- std::cerr << *this << std::endl;
482- assert (false );
483- } else {
484- num_alloced += 1 ;
485- }
486- idx = sparse_buckets[idx].next ;
487- }
488- }
489- size_t num_free = 0 ;
490- uint8_t idx = ll_metadata[num_columns];
491- while (idx != uint8_t (-1 )) {
492- if (!Bucket_Boruvka::is_empty (sparse_buckets[idx].bkt )) {
493- std::cerr << " ERROR: Non-empty bucket found in free list!" << std::endl;
494- std::cerr << *this << std::endl;
495- assert (false );
496- } else {
497- num_free += 1 ;
498- }
499- idx = sparse_buckets[idx].next ;
500- }
501-
502- if (num_alloced != number_of_sparse_buckets) {
503- std::cerr << " ERROR: number of sparse buckets does not match expectation!" << std::endl;
504- std::cerr << *this << std::endl;
505- assert (false );
506- }
507- if (num_free != sparse_capacity - number_of_sparse_buckets) {
508- std::cerr << " ERROR; number of free buckets does not match expectation!" << std::endl;
509- std::cerr << *this << std::endl;
510- assert (num_free == sparse_capacity - number_of_sparse_buckets);
511- }
512453}
513454
514455void SparseSketch::merge (const SparseSketch &other) {
@@ -532,8 +473,6 @@ void SparseSketch::merge(const SparseSketch &other) {
532473 SparseBucket sparse_bkt;
533474 sparse_bkt.row = r;
534475 sparse_bkt.bkt = other.bucket (c, r);
535-
536- // TODO: This could be made a push-front
537476 update_sparse (c, sparse_bkt);
538477 }
539478 }
@@ -585,20 +524,7 @@ void SparseSketch::range_merge(const SparseSketch &other, size_t start_sample, s
585524
586525 // Merge all sparse buckets from other sketch into this one
587526 for (size_t c = start_column; c < end_column; c++) {
588- uint8_t this_idx = ll_metadata[c];
589- uint8_t oth_idx = other.ll_metadata [c];
590-
591- while (oth_idx != uint8_t (-1 )) {
592- if (other.sparse_buckets [oth_idx].row < num_dense_rows) {
593- auto &bkt = bucket (c, other.sparse_buckets [oth_idx].row );
594- bkt.alpha ^= other.sparse_buckets [oth_idx].bkt .alpha ;
595- bkt.gamma ^= other.sparse_buckets [oth_idx].bkt .gamma ;
596- } else {
597- // TODO: This can be made faster by utilizing this_idx and performing a merge operation
598- update_sparse (c, other.sparse_buckets [oth_idx]);
599- }
600- oth_idx = other.sparse_buckets [oth_idx].next ;
601- }
527+ merge_sparse_column (other.sparse_buckets , other.ll_metadata , c);
602528 }
603529 // std::cerr << "SKETCH AFTER MERGE" << std::endl;
604530 // std::cerr << *this << std::endl;
@@ -629,20 +555,7 @@ void SparseSketch::merge_raw_bucket_buffer(const Bucket *raw_buckets, size_t n_r
629555
630556 // Merge all sparse buckets from other sketch into this one
631557 for (size_t c = 0 ; c < num_columns; c++) {
632- uint8_t this_idx = ll_metadata[c];
633- uint8_t oth_idx = raw_metadata[c];
634-
635- while (oth_idx != uint8_t (-1 )) {
636- if (raw_sparse[oth_idx].row < num_dense_rows) {
637- auto &bkt = bucket (c, raw_sparse[oth_idx].row );
638- bkt.alpha ^= raw_sparse[oth_idx].bkt .alpha ;
639- bkt.gamma ^= raw_sparse[oth_idx].bkt .gamma ;
640- } else {
641- // TODO: This can be made faster by utilizing this_idx and performing a merge operation
642- update_sparse (c, raw_sparse[oth_idx]);
643- }
644- oth_idx = raw_sparse[oth_idx].next ;
645- }
558+ merge_sparse_column (raw_sparse, raw_metadata, c);
646559 }
647560}
648561
@@ -666,6 +579,7 @@ std::ostream &operator<<(std::ostream &os, const SparseSketch &sketch) {
666579
667580 os << " a:" << a << " c:" << c << (good ? " good" : " bad" ) << std::endl;
668581
582+ os << " Number of dense rows = " << sketch.num_dense_rows << std::endl;
669583 for (unsigned i = 0 ; i < sketch.num_columns ; ++i) {
670584 for (unsigned j = 0 ; j < sketch.num_dense_rows ; ++j) {
671585 Bucket bkt = sketch.bucket (i, j);
0 commit comments