background worker refactor

DvirDukhan · DvirDukhan · commit 116d1f689b48 · 2021-01-12T10:16:05.000+02:00
diff --git a/src/DAG/dag.c b/src/DAG/dag.c
@@ -91,7 +91,7 @@ static void Dag_LoadInputsToModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *curre
 
 static void Dag_StoreOutputsFromModelRunCtx(RedisAI_RunInfo *rinfo, RAI_DagOp *currentOp) {
 
-    RAI_ContextReadLock(rinfo);
+    RAI_ContextWriteLock(rinfo);
     const size_t noutputs = RAI_ModelRunCtxNumOutputs(currentOp->mctx);
     for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
         RAI_Tensor *tensor = RAI_ModelRunCtxOutputTensor(currentOp->mctx, outputNumber);
@@ -348,16 +348,20 @@ int RAI_DagOpBatchable(RAI_DagOp *op1, RedisAI_RunInfo *rinfo1, RAI_DagOp *op2,
     return 1;
 }
 
-int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
+bool RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo) {
     return rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount;
 }
 
-int RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
+bool RedisAI_DagComplete(RedisAI_RunInfo *rinfo) {
     int completeOpCount = __atomic_load_n(rinfo->dagCompleteOpCount, __ATOMIC_RELAXED);
 
     return completeOpCount == rinfo->dagOpCount;
 }
 
+bool RedisAI_DagError(RedisAI_RunInfo *rinfo) {
+    return __atomic_load_n(rinfo->dagError, __ATOMIC_RELAXED) != 0;
+}
+
 RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
     if (rinfo->dagDeviceCompleteOpCount == rinfo->dagDeviceOpCount) {
         return NULL;
@@ -366,21 +370,21 @@ RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo) {
     return rinfo->dagDeviceOps[rinfo->dagDeviceCompleteOpCount];
 }
 
-void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
-                              int *currentOpBatchable) {
+void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, bool *currentOpReady,
+                              bool *currentOpBatchable) {
     RAI_DagOp *currentOp_ = RedisAI_DagCurrentOp(rinfo);
 
-    *currentOpReady = 0;
-    *currentOpBatchable = 0;
+    *currentOpReady = false;
+    *currentOpBatchable = false;
 
     if (currentOp_ == NULL) {
         return;
     }
 
     if (currentOp_->mctx && currentOp_->mctx->model->opts.batchsize > 0) {
-        *currentOpBatchable = 1;
+        *currentOpBatchable = true;
     }
-    *currentOpReady = 1;
+    *currentOpReady = true;
     // If this is a single op dag, the op is definitely ready.
     if (rinfo->single_op_dag == 1)
         return;
@@ -391,7 +395,7 @@ void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady,
     for (int i = 0; i < n_inkeys; i++) {
         if (AI_dictFind(rinfo->dagTensorsContext, currentOp_->inkeys[i]) == NULL) {
             RAI_ContextUnlock(rinfo);
-            *currentOpReady = 0;
+            *currentOpReady = false;
             return;
         }
     }
@@ -588,7 +592,7 @@ static void _ScriptSingleOp_PersistTensors(RedisModuleCtx *ctx, RAI_DagOp *op) {
     const size_t noutputs = RAI_ScriptRunCtxNumOutputs(op->sctx);
     for (size_t outputNumber = 0; outputNumber < noutputs; outputNumber++) {
         RAI_Tensor *tensor = RAI_ScriptRunCtxOutputTensor(op->sctx, outputNumber);
-        tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
+        // tensor = tensor ? RAI_TensorGetShallowCopy(tensor) : NULL;
         if (tensor)
             _StoreTensorInKeySpace(ctx, tensor, op->outkeys[outputNumber], false);
     }
diff --git a/src/DAG/dag.h b/src/DAG/dag.h
@@ -19,17 +19,25 @@
  * successfully. Since rinfo carries information on what queue
  * it has been placed in, there's no need to pass the device identifier.
  * @param rinfo context in which RedisAI blocking commands operate.
- * @return nonzero if all ops are complete for device, 0 otherwise
+ * @return true if all ops are complete for device, 0 otherwise
  */
-int RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo);
+bool RedisAI_DagDeviceComplete(RedisAI_RunInfo *rinfo);
 
 /**
  * Get whether all DAG ops have been executed successfully irrespective
  * of the device, i.e. if the DAG has been completely executed.
  * @param rinfo context in which RedisAI blocking commands operate.
- * @return nonzero of all ops in DAG are complete, 0 otherwise
+ * @return true of all ops in DAG are complete, 0 otherwise
  */
-int RedisAI_DagComplete(RedisAI_RunInfo *rinfo);
+bool RedisAI_DagComplete(RedisAI_RunInfo *rinfo);
+
+/**
+ * @brief Get an indication if an error happend during the dag run.
+ * 
+ * @param rinfo context in which RedisAI blocking commands operate.
+ * @return true if there was an error 
+ */
+bool RedisAI_DagError(RedisAI_RunInfo *rinfo);
 
 /**
  * Get current DAG op for the given device. An op is current if it's
@@ -50,7 +58,7 @@ RAI_DagOp *RedisAI_DagCurrentOp(RedisAI_RunInfo *rinfo);
  *            a MODELRUN and is BATCHSIZE greater than zero
  * @return
  */
-void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, int *currentOpReady, int *currentOpBatchable);
+void RedisAI_DagCurrentOpInfo(RedisAI_RunInfo *rinfo, bool *currentOpReady, bool *currentOpBatchable);
 
 /**
  * Get batching information about a DAG op.
diff --git a/src/background_workers.c b/src/background_workers.c