GraphStreamingProject
diff --git a/‎experiment/cache_exp.cpp‎
Lines changed: 2 additions & 2 deletions b/‎experiment/cache_exp.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎experiment/standalone_exp.cpp‎
Lines changed: 1 addition & 1 deletion b/‎experiment/standalone_exp.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎include/cache_guttering.h‎
Lines changed: 77 additions & 23 deletions b/‎include/cache_guttering.h‎
Lines changed: 77 additions & 23 deletions
diff --git a/‎include/guttering_system.h‎
Lines changed: 11 additions & 5 deletions b/‎include/guttering_system.h‎
Lines changed: 11 additions & 5 deletions
diff --git a/‎include/work_queue.h‎
Lines changed: 23 additions & 24 deletions b/‎include/work_queue.h‎
Lines changed: 23 additions & 24 deletions
@@ -16,12 +16,12 @@ static std::atomic<size_t> num_updates_processed;
 // queries the guttering system
 // Should be run in a seperate thread
 static void querier(GutteringSystem *gts) {
-  WorkQueue<update_batch>::DataNode *data;
+  VertexBatchQueue::DataNode *data;
   while(true) {
     bool valid = gts->get_data(data);
     if (valid) {
       size_t updates = 0;
-      for (auto batch : data->get_batches())
+      for (auto batch : data->get_data())
         updates += batch.upd_vec.size();
       num_updates_processed += updates;
     }
 
@@ -11,7 +11,7 @@ static constexpr uint32_t prime = 100000007;
 // queries the guttering system
 // Should be run in a seperate thread
 void querier(GutteringSystem *gts) {
-  WorkQueue<update_batch>::DataNode *data;
+  VertexBatchQueue::DataNode *data;
   while(true) {
     bool valid = gts->get_data(data);
     if(!valid && shutdown)
 
@@ -12,7 +12,7 @@ constexpr int log2_constexpr(size_t num) {
   return power;
 }
 
-class CacheGuttering : public GutteringSystem {
+class PipelineHyperTree {
  private:
   const size_t inserters;
   const node_id_t num_nodes;
@@ -102,7 +102,7 @@ class CacheGuttering : public GutteringSystem {
 
   class SharedGutter {
    private:
-    CacheGuttering &CGsystem;
+    PipelineHyperTree &CGsystem;
    public:
     update_t *data;
     std::atomic<size_t> insert_pos;
@@ -112,7 +112,7 @@ class CacheGuttering : public GutteringSystem {
     const size_t level;
 
     // true init
-    SharedGutter(CacheGuttering &CGsystem, size_t size, size_t level, size_t index)
+    SharedGutter(PipelineHyperTree &CGsystem, size_t size, size_t level, size_t index)
         : CGsystem(CGsystem),
           data(new update_t[size]),
           insert_pos(0),
@@ -124,31 +124,31 @@ class CacheGuttering : public GutteringSystem {
       delete[] data;
     }
 
-    bool batch_insert(CacheGuttering::InsertThread &thr, SharedGutter *&gut_ptr,
+    bool batch_insert(PipelineHyperTree::InsertThread &thr, SharedGutter *&gut_ptr,
                       const std::vector<update_t> &updates);
     void flush(InsertThread &thr, SharedGutter *&gut_ptr, size_t num_upd_flush,
                const std::vector<update_t> &updates);
   };
 
   class LeafGutter {
    private:
-    CacheGuttering &CGsystem;
+    PipelineHyperTree &CGsystem;
    public:
     std::vector<node_id_t> data;
     std::atomic<size_t> insert_pos;
     std::atomic<int> active_inserts;
     node_id_t index;
     const size_t capacity;
 
-    LeafGutter(CacheGuttering &CGsystem, size_t size, size_t index)
+    LeafGutter(PipelineHyperTree &CGsystem, size_t size, size_t index)
         : CGsystem(CGsystem),
           data(size),
           insert_pos(0),
           active_inserts(0),
           index(index),
           capacity(size) {}
 
-    bool batch_insert(CacheGuttering::InsertThread &thr, LeafGutter *&gut_ptr,
+    bool batch_insert(PipelineHyperTree::InsertThread &thr, LeafGutter *&gut_ptr,
                       const std::vector<node_id_t> &updates);
     void flush(InsertThread &thr, LeafGutter *&gut_ptr, size_t num_upd_flush,
                const std::vector<node_id_t> &updates);
@@ -163,15 +163,15 @@ class CacheGuttering : public GutteringSystem {
    private:
     static constexpr size_t root_buffer_capacity = 256;
     size_t root_buffer_size = 0;
-    CacheGuttering &CGsystem; // reference to associated CacheGuttering system
+    PipelineHyperTree &CGsystem; // reference to associated PipelineHyperTree system
 
     // thread local gutters
     update_t root_buffer[root_buffer_capacity];
     std::array<LocalGutter<level1_elms_per_buf>, level1_bufs> level1_gutters;
     std::array<LocalGutter<level2_elms_per_buf>, level2_bufs> level2_gutters;
 
    public:
-    InsertThread(CacheGuttering &CGsystem)
+    InsertThread(PipelineHyperTree &CGsystem)
         : CGsystem(CGsystem),
           l3_insert_bufs(local_fanout),
           l4_insert_bufs(global_fanout),
@@ -239,6 +239,9 @@ class CacheGuttering : public GutteringSystem {
     InsertThread (InsertThread &&) = default;
   };
 
+  void flush_leaf(PipelineHyperTree::InsertThread &thr, LeafGutter *&gut_ptr,
+                  const std::vector<node_id_t> &updates);
+
   // buffers shared amongst all threads
   SharedGutter **level3_gutters = nullptr;
   SharedGutter **level4_gutters = nullptr;
@@ -249,48 +252,44 @@ class CacheGuttering : public GutteringSystem {
   friend class InsertThread;
 
   std::vector<InsertThread> insert_threads; // vector of InsertThreads
+  VertexBatchQueue &wq;
  public:
   /**
    * Constructs a new guttering systems using a tree like structure for cache efficiency.
    * @param nodes       number of nodes in the graph.
    * @param workers     the number of workers which will be removing batches
    * @param inserters   the number of inserter buffers
    */
-  CacheGuttering(node_id_t nodes, uint32_t workers, uint32_t inserters,
-                 GutteringConfiguration conf);
-  CacheGuttering(node_id_t nodes, uint32_t workers, uint32_t inserters) : 
-    CacheGuttering(nodes, workers, inserters, GutteringConfiguration()) {};
+  PipelineHyperTree(node_id_t nodes, size_t inserters, GutteringConfiguration &conf,
+                    VertexBatchQueue &wq);
 
-  ~CacheGuttering();
+  ~PipelineHyperTree();
 
   /**
    * Puts an update into the data structure.
-   * @param upd the edge update.1
+   * @param upd the edge update.
    * @param which, which thread is inserting this update
    * @return nothing.
    */
-  insert_ret_t insert(const update_t &upd, size_t which) override { 
+  insert_ret_t insert(const update_t &upd, size_t which) { 
     assert(which < inserters);
     insert_threads[which].insert(upd);
   }
 
-  insert_ret_t batch_insert(const update_t *batch, size_t num_updates, size_t which) override {
+  insert_ret_t batch_insert(const update_t *batch, size_t num_updates, size_t which) {
     assert(which < inserters);
     insert_threads[which].batch_insert(batch, num_updates);
   }
 
   insert_ret_t process_stream_upd_batch(const GraphStreamUpdate *batch, size_t num_updates,
-                                        size_t which) override {
+                                        size_t which) {
     assert(which < inserters);
     insert_threads[which].process_stream_upd_batch(batch, num_updates);
   }
 
   // pure virtual functions don't like default params, so default to 'which' of 0
   insert_ret_t insert(const update_t &upd) { insert_threads[0].insert(upd); }
 
-  void flush_leaf(CacheGuttering::InsertThread &thr, LeafGutter *&gut_ptr,
-                    const std::vector<node_id_t> &updates);
-
   /**
    * Flushes all pending buffers. When this function returns there are no more updates in the
    * guttering system
@@ -304,14 +303,69 @@ class CacheGuttering : public GutteringSystem {
    * distributed guttering. If you don't know what that means, don't use this function!
    * 
    * @param offset 
-   * @return a reference to the parent CacheGuttering object.
+   * @return a reference to the parent PipelineHyperTree object.
    */
-  CacheGuttering& set_offset(node_id_t offset) { relabelling_offset = offset; return *this; }
+  void set_offset(node_id_t offset) { relabelling_offset = offset; }
 
   /*
    * Helper function for tracing a root to leaf path. Prints path to stdout
    * @param src   the node id to trace
    */
   void print_r_to_l(node_id_t src);
   void print_fanouts();
+
+  // number of batches per work queue element
+  const size_t wq_batch_per_elm;
+  const size_t leaf_gutter_size;
+};
+
+// The CacheGuttering class adds the GutteringSystem base class to the PipelineHyperTree
+class CacheGuttering : public GutteringSystem {
+ private:
+  PipelineHyperTree pht;
+ public:
+  CacheGuttering(node_id_t nodes, size_t workers, size_t inserters, GutteringConfiguration conf)
+      : GutteringSystem(nodes, workers, conf), pht(nodes, inserters, conf, wq){};
+
+
+  /**
+   * Puts an update into the data structure.
+   * @param upd the edge update.
+   * @param which, which thread is inserting this update
+   * @return nothing.
+   */
+  insert_ret_t insert(const update_t &upd, size_t which) override { 
+    pht.insert(upd, which);
+  }
+
+  insert_ret_t batch_insert(const update_t *batch, size_t num_updates, size_t which) override {
+    pht.batch_insert(batch, num_updates, which);
+  }
+
+  insert_ret_t process_stream_upd_batch(const GraphStreamUpdate *batch, size_t num_updates,
+                                        size_t which) override {
+    pht.process_stream_upd_batch(batch, num_updates, which);
+  }
+
+  // pure virtual functions don't like default params, so default to 'which' of 0
+  insert_ret_t insert(const update_t &upd) { pht.insert(upd); }
+
+  /**
+   * Flushes all pending buffers. When this function returns there are no more updates in the
+   * guttering system
+   * @return nothing.
+   */
+  flush_ret_t force_flush() {
+    pht.force_flush();
+  }
+
+  /**
+   * Set the "offset" for incoming edges. That is, if we set an offset of x, an incoming edge
+   * {i,j} will be stored internally as an edge {i - x, j}. Use only for integration with
+   * distributed guttering. If you don't know what that means, don't use this function!
+   * 
+   * @param offset 
+   * @return a reference to the parent PipelineHyperTree object.
+   */
+  void set_offset(node_id_t offset) { pht.set_offset(offset); }
 };
@@ -12,10 +12,12 @@ struct update_batch {
   std::vector<node_id_t> upd_vec;
 };
 
+typedef WorkQueue<std::vector<update_batch>> VertexBatchQueue;
+
 class GutteringSystem {
  public:
   // Constructor for programmatic configuration
-  GutteringSystem(node_id_t num_nodes, int workers, GutteringConfiguration conf,
+  GutteringSystem(node_id_t num_nodes, int workers, GutteringConfiguration &conf,
                   bool page_slots = false)
       : page_size((conf.set_defaults())._page_size),  // set defaults first to default init params
         buffer_size(conf._buffer_size),
@@ -25,7 +27,7 @@ class GutteringSystem {
         wq_batch_per_elm(conf._wq_batch_per_elm),
         num_nodes(num_nodes),
         leaf_gutter_size(conf._gutter_bytes / sizeof(node_id_t)),
-        wq(workers * queue_factor, wq_batch_per_elm) {
+        wq(workers * queue_factor) {
     size_t batch_len =
         page_slots ? leaf_gutter_size + page_size / sizeof(node_id_t) : leaf_gutter_size;
     std::vector<std::vector<update_batch>> wq_data;
@@ -75,8 +77,12 @@ class GutteringSystem {
   size_t gutter_size() { return leaf_gutter_size * sizeof(node_id_t); }
 
   // get data out of the guttering system either one gutter at a time or in a batched fashion
-  bool get_data(WorkQueue<update_batch>::DataNode *&data) { return wq.pop(data); }
-  void get_data_callback(WorkQueue<update_batch>::DataNode *data) { wq.pop_callback(data); }
+  bool get_data(VertexBatchQueue::DataNode *&data) {
+    return wq.pop(data);
+  }
+  void get_data_callback(VertexBatchQueue::DataNode *data) {
+    wq.pop_callback(data);
+  }
   void set_non_block(bool block) { wq.set_non_block(block); }  // set non-blocking calls in wq
  protected:
   // parameters of the GutteringSystem, defined by the GutteringConfiguration param or config file
@@ -89,5 +95,5 @@ class GutteringSystem {
 
   const node_id_t num_nodes;
   const node_id_t leaf_gutter_size;
-  WorkQueue<update_batch> wq;
+  VertexBatchQueue wq;
 };
@@ -7,33 +7,38 @@
 #include <exception>
 #include "types.h"
 
-template<class T> // templatized by data type we're storing
+/**
+ * WorkQueue is templatized by data type we're storing.
+ * This data-type must be: 1. default constructable, 2. able to use operator=
+ * Ideally it should also have fast std::swap() performance (e.g. a std::vector just swaps
+ * metadata/pointers)
+ */
+template<class T> 
 class WorkQueue {
  public:
   class DataNode {
    private:
     // LL next pointer
     DataNode *next = nullptr;
-    std::vector<T> data_batch;
+    T data;
 
     friend class WorkQueue;
    public:
-    const std::vector<T>& get_batches() { return data_batch; }
+    const T& get_data() { return data; }
   };
 
   /**
    * Construct a work queue
-   * @param num_queue_elements   the rough number of batches to have in the queue
-   * @param data_per_elm         number of batches per queue element.
+   * @param num_queue_elements   the rough number of data elements to have in the queue
    */
-  WorkQueue(size_t num_queue_elements, size_t data_per_elm)
-      : len(num_queue_elements), batch_per_elm(data_per_elm) {
+  WorkQueue(size_t num_queue_elements)
+      : len(num_queue_elements) {
     non_block = false;
 
     // place all nodes of linked list in the producer queue and reserve
     // memory for the vectors
     for (size_t i = 0; i < len; i++) {
-      // create and reserve space for updates
+      // create and reserve space for queue elements
       DataNode *node = new DataNode();
       node->next = producer_list;  // next of node is head
       producer_list = node;        // set head to new node
@@ -59,37 +64,32 @@ class WorkQueue {
   }
 
   /**
+   * TODO: Rewrite this description
    * Initialize the queue pointers to point at actual data instead of nullptrs
    * If this function is called, IT MUST be called before performing any operations with the queue
    * The queue can also work without initializing pointers, so long as the pointers returned from
    * push being null is acceptable. (i.e. user initializes after push or does not need the returned
    * pointer)
-   * @param data_batches   a vector of data batches that will start in the queue but is swapped with
+   * @param new_data   a vector of data that will start in the queue but is swapped with
    *                       data that is pushed into the queue.
    */
-  void populate_queue(std::vector<std::vector<T>> data_batches) {
-    if (data_batches.size() != len) {
+  void populate_queue(const std::vector<T> &new_data) {
+    if (new_data.size() != len) {
       throw std::invalid_argument("WQ: Error number of initialized data batches incorrect");
     }
     DataNode *data = producer_list; // head of producer list
     for (size_t i = 0; i < len; i++) {
-      if (data_batches[i].size() != batch_per_elm) {
-        throw std::invalid_argument("WQ: Error number of data elements per batch incorrect");
-      }
-      data->data_batch = data_batches[i];
+      data->data = new_data[i];
       data = data->next;
     }
   }
 
   /**
-   * Add a data element to the queue
-   * @param upd_vec_batch  vector of graph node id the associated updates
+   * Adds a data element to the queue
+   * @param push_data   the data the user wants to add to the queue. When this function returns,
+   *                    this reference will hold the data that was in the "empty" queue node it replaced
    */
-  void push(std::vector<T> &upd_vec_batch) {
-    if (upd_vec_batch.size() > batch_per_elm) {
-      throw std::runtime_error("WQ: Too many batches in call to push " + 
-        std::to_string(upd_vec_batch.size()) + " > " + std::to_string(batch_per_elm));
-    }
+  void push(T &push_data) {
     std::unique_lock<std::mutex> lk(producer_list_lock);
     producer_condition.wait(lk, [this]{return !full();});
 
@@ -102,7 +102,7 @@ class WorkQueue {
     lk.unlock();
 
     // swap the batch vectors to perform the update
-    std::swap(node->data_batch, upd_vec_batch);
+    std::swap(node->data, push_data);
 
     // add this block to the consumer queue for processing
     consumer_list_lock.lock();
@@ -198,7 +198,6 @@ class WorkQueue {
   DataNode *consumer_list = nullptr; // list of nodes with data for reading
 
   const size_t len;            // number of elments in queue
-  const size_t batch_per_elm;  // number of batches per work queue element
 
   // locks and condition variables for producer list
   std::condition_variable producer_condition;