RedisAI
diff --git a/‎docs/commands.md‎
Lines changed: 4 additions & 1 deletion b/‎docs/commands.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/redisai.c‎
Lines changed: 4 additions & 1 deletion b/‎src/redisai.c‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/serialization/RDB/decoder/current/v2/decode_v2.c‎
Lines changed: 260 additions & 0 deletions b/‎src/serialization/RDB/decoder/current/v2/decode_v2.c‎
Lines changed: 260 additions & 0 deletions
diff --git a/‎src/serialization/RDB/decoder/current/v2/decode_v2.h‎
Lines changed: 8 additions & 0 deletions b/‎src/serialization/RDB/decoder/current/v2/decode_v2.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/serialization/RDB/decoder/decode_previous.c‎
Lines changed: 8 additions & 0 deletions b/‎src/serialization/RDB/decoder/decode_previous.c‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎…ation/RDB/decoder/current/v1/decode_v1.c‎ ‎…tion/RDB/decoder/previous/v1/decode_v1.c‎src/serialization/RDB/decoder/current/v1/decode_v1.c renamed to src/serialization/RDB/decoder/previous/v1/decode_v1.c b/‎…ation/RDB/decoder/current/v1/decode_v1.c‎ ‎…tion/RDB/decoder/previous/v1/decode_v1.c‎src/serialization/RDB/decoder/current/v1/decode_v1.c renamed to src/serialization/RDB/decoder/previous/v1/decode_v1.c
diff --git a/‎…ation/RDB/decoder/current/v1/decode_v1.h‎ ‎…tion/RDB/decoder/previous/v1/decode_v1.h‎src/serialization/RDB/decoder/current/v1/decode_v1.h renamed to src/serialization/RDB/decoder/previous/v1/decode_v1.h
Lines changed: 1 addition & 1 deletion b/‎…ation/RDB/decoder/current/v1/decode_v1.h‎ ‎…tion/RDB/decoder/previous/v1/decode_v1.h‎src/serialization/RDB/decoder/current/v1/decode_v1.h renamed to src/serialization/RDB/decoder/previous/v1/decode_v1.h
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/serialization/RDB/decoder/rai_rdb_decoder.c‎
Lines changed: 4 additions & 4 deletions b/‎src/serialization/RDB/decoder/rai_rdb_decoder.c‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/serialization/RDB/encoder/rai_rdb_encode.c‎
Lines changed: 4 additions & 4 deletions b/‎src/serialization/RDB/encoder/rai_rdb_encode.c‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/serialization/RDB/encoder/v1/encode_v1.h‎
Lines changed: 0 additions & 8 deletions b/‎src/serialization/RDB/encoder/v1/encode_v1.h‎
Lines changed: 0 additions & 8 deletions
@@ -222,7 +222,7 @@ AI.MODELGET <key> [META] [BLOB]
 _Arguments
 
 * **key**: the model's key name
-* **META**: will return the model's meta information on backend, device and tag
+* **META**: will return the model's meta information on backend, device, tag and batching parameters
 * **BLOB**: will return the model's blob containing the serialized model
 
 _Return_
@@ -236,6 +236,7 @@ An array of alternating key-value pairs as follows:
 1. **MINBATCHSIZE**: The minimum size of any batch of incoming requests.
 1. **INPUTS**: array reply with one or more names of the model's input nodes (applicable only for TensorFlow models)
 1. **OUTPUTS**: array reply with one or more names of the model's output nodes (applicable only for TensorFlow models)
+1. **MINBATCHTIMEOUT**: The time in milliseconds for which the engine will wait before executing a request to run the model, when the number of incoming requests is lower than `MINBATCHSIZE`. When `MINBATCHTIMEOUT` is 0, the engine will not run the model before it receives at least `MINBATCHSIZE` requests.
 1. **BLOB**: a blob containing the serialized model (when called with the `BLOB` argument) as a String. If the size of the serialized model exceeds `MODEL_CHUNK_SIZE` (see `AI.CONFIG` command), then an array of chunks is returned. The full serialized model can be obtained by concatenating the chunks.
 
 **Examples**
@@ -259,6 +260,8 @@ redis> AI.MODELGET mymodel META
     2) "b"
 13) "outputs"
 14) 1) "c"
+15) "minbatchtimeout"
+16) (integer) 0
 ```
 
 You can also save it to the local file 'model.ext' with [`redis-cli`](https://redis.io/topics/cli) like so:
 
@@ -463,7 +463,7 @@ int RedisAI_ModelGet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
         return REDISMODULE_OK;
     }
 
-    const int outentries = blob ? 16 : 14;
+    const int outentries = blob ? 18 : 16;
     RedisModule_ReplyWithArray(ctx, outentries);
 
     RedisModule_ReplyWithCString(ctx, "backend");
@@ -500,6 +500,9 @@ int RedisAI_ModelGet_RedisCommand(RedisModuleCtx *ctx, RedisModuleString **argv,
         RedisModule_ReplyWithCString(ctx, mto->outputs[i]);
     }
 
+    RedisModule_ReplyWithCString(ctx, "minbatchtimeout");
+    RedisModule_ReplyWithLongLong(ctx, (long)mto->opts.minbatchtimeout);
+
     if (meta && blob) {
         RedisModule_ReplyWithCString(ctx, "blob");
         RAI_ReplyWithChunks(ctx, buffer, len);
 
@@ -0,0 +1,260 @@
+#include "decode_v2.h"
+#include "assert.h"
+
+/**
+ * In case of IO errors, the default return values are:
+ * numbers - 0
+ * strings - null
+ * So only when it is necessary check for IO errors.
+ */
+
+void *RAI_RDBLoadTensor_v2(RedisModuleIO *io) {
+    int64_t *shape = NULL;
+    int64_t *strides = NULL;
+
+    DLDevice device;
+    device.device_type = RedisModule_LoadUnsigned(io);
+    device.device_id = RedisModule_LoadUnsigned(io);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    // For now we only support CPU tensors (except during model and script run)
+    assert(device.device_type == kDLCPU);
+    assert(device.device_id == 0);
+
+    DLDataType dtype;
+    dtype.bits = RedisModule_LoadUnsigned(io);
+    dtype.code = RedisModule_LoadUnsigned(io);
+    dtype.lanes = RedisModule_LoadUnsigned(io);
+
+    size_t ndims = RedisModule_LoadUnsigned(io);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    shape = RedisModule_Calloc(ndims, sizeof(*shape));
+    for (size_t i = 0; i < ndims; ++i) {
+        shape[i] = RedisModule_LoadUnsigned(io);
+    }
+
+    strides = RedisModule_Calloc(ndims, sizeof(*strides));
+    for (size_t i = 0; i < ndims; ++i) {
+        strides[i] = RedisModule_LoadUnsigned(io);
+    }
+
+    size_t byte_offset = RedisModule_LoadUnsigned(io);
+
+    size_t len;
+    char *data = RedisModule_LoadStringBuffer(io, &len);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    RAI_Tensor *ret = RAI_TensorNew();
+    ret->tensor = (DLManagedTensor){.dl_tensor = (DLTensor){.device = device,
+                                                            .data = data,
+                                                            .ndim = ndims,
+                                                            .dtype = dtype,
+                                                            .shape = shape,
+                                                            .strides = strides,
+                                                            .byte_offset = byte_offset},
+                                    .manager_ctx = NULL,
+                                    .deleter = NULL};
+    return ret;
+
+cleanup:
+    if (shape)
+        RedisModule_Free(shape);
+    if (strides)
+        RedisModule_Free(strides);
+    RedisModule_LogIOError(io, "error", "Experienced a short read while reading a tensor from RDB");
+    return NULL;
+}
+
+void *RAI_RDBLoadModel_v2(RedisModuleIO *io) {
+
+    char *devicestr = NULL;
+    RedisModuleString *tag = NULL;
+    size_t ninputs = 0;
+    const char **inputs = NULL;
+    size_t noutputs = 0;
+    const char **outputs = NULL;
+    char *buffer = NULL;
+
+    RAI_Backend backend = RedisModule_LoadUnsigned(io);
+    devicestr = RedisModule_LoadStringBuffer(io, NULL);
+    tag = RedisModule_LoadString(io);
+
+    const size_t batchsize = RedisModule_LoadUnsigned(io);
+    const size_t minbatchsize = RedisModule_LoadUnsigned(io);
+    const size_t minbatchtimeout = RedisModule_LoadUnsigned(io);
+
+    ninputs = RedisModule_LoadUnsigned(io);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    inputs = RedisModule_Alloc(ninputs * sizeof(char *));
+
+    for (size_t i = 0; i < ninputs; i++) {
+        inputs[i] = RedisModule_LoadStringBuffer(io, NULL);
+    }
+
+    noutputs = RedisModule_LoadUnsigned(io);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    outputs = RedisModule_Alloc(noutputs * sizeof(char *));
+
+    for (size_t i = 0; i < noutputs; i++) {
+        outputs[i] = RedisModule_LoadStringBuffer(io, NULL);
+    }
+
+    RAI_ModelOpts opts = {
+        .batchsize = batchsize,
+        .minbatchsize = minbatchsize,
+        .minbatchtimeout = minbatchtimeout,
+        .backends_intra_op_parallelism = getBackendsIntraOpParallelism(),
+        .backends_inter_op_parallelism = getBackendsInterOpParallelism(),
+    };
+
+    size_t len = RedisModule_LoadUnsigned(io);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    buffer = RedisModule_Alloc(len);
+    const size_t n_chunks = RedisModule_LoadUnsigned(io);
+    long long chunk_offset = 0;
+    for (size_t i = 0; i < n_chunks; i++) {
+        size_t chunk_len;
+        char *chunk_buffer = RedisModule_LoadStringBuffer(io, &chunk_len);
+        if (RedisModule_IsIOError(io))
+            goto cleanup;
+        memcpy(buffer + chunk_offset, chunk_buffer, chunk_len);
+        chunk_offset += chunk_len;
+        RedisModule_Free(chunk_buffer);
+    }
+
+    RAI_Error err = {0};
+    RAI_Model *model = RAI_ModelCreate(backend, devicestr, tag, opts, ninputs, inputs, noutputs,
+                                       outputs, buffer, len, &err);
+
+    if (err.code == RAI_EBACKENDNOTLOADED) {
+        RedisModuleCtx *ctx = RedisModule_GetContextFromIO(io);
+        int ret = RAI_LoadDefaultBackend(ctx, backend);
+        if (ret == REDISMODULE_ERR) {
+            RedisModule_Log(ctx, "warning", "Could not load default backend");
+            RAI_ClearError(&err);
+            goto cleanup;
+        }
+        RAI_ClearError(&err);
+        model = RAI_ModelCreate(backend, devicestr, tag, opts, ninputs, inputs, noutputs, outputs,
+                                buffer, len, &err);
+    }
+
+    if (err.code != RAI_OK) {
+        RedisModuleCtx *ctx = RedisModule_GetContextFromIO(io);
+        RedisModule_Log(ctx, "warning", "%s", err.detail);
+        RAI_ClearError(&err);
+        goto cleanup;
+    }
+
+    RedisModuleCtx *stats_ctx = RedisModule_GetContextFromIO(io);
+    RedisModuleString *stats_keystr =
+        RedisModule_CreateStringFromString(stats_ctx, RedisModule_GetKeyNameFromIO(io));
+
+    model->infokey = RAI_AddStatsEntry(stats_ctx, stats_keystr, RAI_MODEL, backend, devicestr, tag);
+
+    for (size_t i = 0; i < ninputs; i++) {
+        RedisModule_Free((void *)inputs[i]);
+    }
+    RedisModule_Free(inputs);
+    for (size_t i = 0; i < noutputs; i++) {
+        RedisModule_Free((void *)outputs[i]);
+    }
+    RedisModule_Free(outputs);
+    RedisModule_Free(buffer);
+    RedisModule_Free(devicestr);
+    RedisModule_FreeString(NULL, stats_keystr);
+    RedisModule_FreeString(NULL, tag);
+
+    return model;
+
+cleanup:
+    if (devicestr)
+        RedisModule_Free(devicestr);
+    if (tag)
+        RedisModule_FreeString(NULL, tag);
+    if (inputs) {
+        for (size_t i = 0; i < ninputs; i++) {
+            RedisModule_Free((void *)inputs[i]);
+        }
+        RedisModule_Free(inputs);
+    }
+
+    if (outputs) {
+        for (size_t i = 0; i < noutputs; i++) {
+            RedisModule_Free((void *)outputs[i]);
+        }
+        RedisModule_Free(outputs);
+    }
+
+    if (buffer)
+        RedisModule_Free(buffer);
+
+    RedisModule_LogIOError(io, "error", "Experienced a short read while reading a model from RDB");
+    return NULL;
+}
+
+void *RAI_RDBLoadScript_v2(RedisModuleIO *io) {
+    RedisModuleString *tag = NULL;
+    char *devicestr = NULL;
+    char *scriptdef = NULL;
+    RAI_Error err = {0};
+
+    devicestr = RedisModule_LoadStringBuffer(io, NULL);
+    tag = RedisModule_LoadString(io);
+
+    size_t len;
+    scriptdef = RedisModule_LoadStringBuffer(io, &len);
+    if (RedisModule_IsIOError(io))
+        goto cleanup;
+
+    RAI_Script *script = RAI_ScriptCreate(devicestr, tag, scriptdef, &err);
+
+    if (err.code == RAI_EBACKENDNOTLOADED) {
+        RedisModuleCtx *ctx = RedisModule_GetContextFromIO(io);
+        int ret = RAI_LoadDefaultBackend(ctx, RAI_BACKEND_TORCH);
+        if (ret == REDISMODULE_ERR) {
+            RedisModule_Log(ctx, "warning", "Could not load default TORCH backend\n");
+            RAI_ClearError(&err);
+            goto cleanup;
+        }
+        RAI_ClearError(&err);
+        script = RAI_ScriptCreate(devicestr, tag, scriptdef, &err);
+    }
+
+    if (err.code != RAI_OK) {
+        printf("ERR: %s\n", err.detail);
+        RAI_ClearError(&err);
+        goto cleanup;
+    }
+
+    RedisModuleCtx *stats_ctx = RedisModule_GetContextFromIO(io);
+    RedisModuleString *stats_keystr =
+        RedisModule_CreateStringFromString(stats_ctx, RedisModule_GetKeyNameFromIO(io));
+
+    script->infokey =
+        RAI_AddStatsEntry(stats_ctx, stats_keystr, RAI_SCRIPT, RAI_BACKEND_TORCH, devicestr, tag);
+
+    RedisModule_FreeString(NULL, stats_keystr);
+    RedisModule_FreeString(NULL, tag);
+    RedisModule_Free(devicestr);
+    RedisModule_Free(scriptdef);
+    return script;
+cleanup:
+    if (devicestr)
+        RedisModule_Free(devicestr);
+    if (scriptdef)
+        RedisModule_Free(scriptdef);
+    if (tag)
+        RedisModule_FreeString(NULL, tag);
+    return NULL;
+}
@@ -0,0 +1,8 @@
+#pragma once
+#include "serialization/serialization_include.h"
+
+void *RAI_RDBLoadTensor_v2(RedisModuleIO *io);
+
+void *RAI_RDBLoadModel_v2(RedisModuleIO *io);
+
+void *RAI_RDBLoadScript_v2(RedisModuleIO *io);
@@ -1,9 +1,13 @@
 #include "decode_previous.h"
 #include "previous/v0/decode_v0.h"
+#include "previous/v1/decode_v1.h"
+
 void *Decode_PreviousTensor(RedisModuleIO *rdb, int encver) {
     switch (encver) {
     case 0:
         return RAI_RDBLoadTensor_v0(rdb);
+    case 1:
+        return RAI_RDBLoadTensor_v1(rdb);
     default:
         assert(false && "Invalid encoding version");
     }
@@ -14,6 +18,8 @@ void *Decode_PreviousModel(RedisModuleIO *rdb, int encver) {
     switch (encver) {
     case 0:
         return RAI_RDBLoadModel_v0(rdb);
+    case 1:
+        return RAI_RDBLoadModel_v1(rdb);
     default:
         assert(false && "Invalid encoding version");
     }
@@ -24,6 +30,8 @@ void *Decode_PreviousScript(RedisModuleIO *rdb, int encver) {
     switch (encver) {
     case 0:
         return RAI_RDBLoadScript_v0(rdb);
+    case 1:
+        return RAI_RDBLoadScript_v1(rdb);
     default:
         assert(false && "Invalid encoding version");
     }
 
@@ -1,5 +1,5 @@
 #pragma once
-#include "../../../../serialization_include.h"
+#include "serialization/serialization_include.h"
 
 void *RAI_RDBLoadTensor_v1(RedisModuleIO *io);
 
 
@@ -1,8 +1,8 @@
 #include "rai_rdb_decoder.h"
-#include "current/v1/decode_v1.h"
+#include "current/v2/decode_v2.h"
 
-void *RAI_RDBLoadTensor(RedisModuleIO *io) { return RAI_RDBLoadTensor_v1(io); }
+void *RAI_RDBLoadTensor(RedisModuleIO *io) { return RAI_RDBLoadTensor_v2(io); }
 
-void *RAI_RDBLoadModel(RedisModuleIO *io) { return RAI_RDBLoadModel_v1(io); }
+void *RAI_RDBLoadModel(RedisModuleIO *io) { return RAI_RDBLoadModel_v2(io); }
 
-void *RAI_RDBLoadScript(RedisModuleIO *io) { return RAI_RDBLoadScript_v1(io); }
+void *RAI_RDBLoadScript(RedisModuleIO *io) { return RAI_RDBLoadScript_v2(io); }
@@ -1,8 +1,8 @@
 #include "rai_rdb_encode.h"
-#include "v1/encode_v1.h"
+#include "v2/encode_v2.h"
 
-void RAI_RDBSaveTensor(RedisModuleIO *io, void *value) { RAI_RDBSaveTensor_v1(io, value); }
+void RAI_RDBSaveTensor(RedisModuleIO *io, void *value) { RAI_RDBSaveTensor_v2(io, value); }
 
-void RAI_RDBSaveModel(RedisModuleIO *io, void *value) { RAI_RDBSaveModel_v1(io, value); }
+void RAI_RDBSaveModel(RedisModuleIO *io, void *value) { RAI_RDBSaveModel_v2(io, value); }
 
-void RAI_RDBSaveScript(RedisModuleIO *io, void *value) { RAI_RDBSaveScript_v1(io, value); }
+void RAI_RDBSaveScript(RedisModuleIO *io, void *value) { RAI_RDBSaveScript_v2(io, value); }