From 7330e846d00a3d1b9502b911e7ce3e0fc96d7f8a Mon Sep 17 00:00:00 2001
From: hyunil park <hyunil46.park@samsung.com>
Date: Wed, 16 Jul 2025 17:15:13 +0900
Subject: [PATCH] [ml service] Add Flare as a new nnfw type

- Implemented support for 'flare' nnfw type in ML service API
- Included test cases to validate flare functionality

Signed-off-by: hyunil park <hyunil46.park@samsung.com>
---
 c/include/nnstreamer-tizen-internal.h         |  2 +
 c/src/ml-api-inference-single.c               | 23 ++++++-
 tests/capi/unittest_capi_service_extension.cc | 60 ++++++++++++++-----
 .../config/config_single_flare.conf           | 11 ++++
 tests/test_models/data/flare_input.txt        |  1 +
 5 files changed, 81 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_models/config/config_single_flare.conf
 create mode 100644 tests/test_models/data/flare_input.txt
diff --git a/c/include/nnstreamer-tizen-internal.h b/c/include/nnstreamer-tizen-internal.h
index 00e74785..a31de19d 100644
--- a/c/include/nnstreamer-tizen-internal.h
+++ b/c/include/nnstreamer-tizen-internal.h
@@ -19,6 +19,8 @@
 extern "C" {
 #endif /* __cplusplus */
 
+#define ML_NNFW_TYPE_FLARE 23 /**< FLARE framework, It will be moved to ml-api-common.h after Tizen 10.0 M2 release. */
+
 /**
  * @brief Callback for tensor data stream of machine-learning API.
  * @details Note that the buffer may be deallocated after the return and this is synchronously called. Thus, if you need the data afterwards, copy the data to another buffer and return fast. Do not spend too much time in the callback.
diff --git a/c/src/ml-api-inference-single.c b/c/src/ml-api-inference-single.c
index 7a742d36..fb160ec7 100644
--- a/c/src/ml-api-inference-single.c
+++ b/c/src/ml-api-inference-single.c
@@ -114,6 +114,7 @@ static const char *ml_nnfw_subplugin_name[] = {
   [ML_NNFW_TYPE_QNN] = "qnn",
   [ML_NNFW_TYPE_LLAMACPP] = "llamacpp",
   [ML_NNFW_TYPE_TIZEN_HAL] = "tizen-hal",
+  [ML_NNFW_TYPE_FLARE] = "flare",
   NULL
 };
 
@@ -1058,6 +1059,12 @@ ml_single_open_custom (ml_single_h * single, ml_single_preset * info)
   for (i = 0; i < num_models; i++)
     g_strstrip (list_models[i]);
 
+  /**
+   * Currently ML_NNFW_TYPE_FLARE is defined temporarily to avoid ACR.
+   */
+  if (info->fw_name && strcasecmp (info->fw_name, "flare") == 0) {
+    nnfw = ML_NNFW_TYPE_FLARE;
+  }
   status = _ml_validate_model_file ((const char **) list_models, num_models,
       &nnfw);
   if (status != ML_ERROR_NONE) {
@@ -2073,7 +2080,21 @@ _ml_validate_model_file (const char *const *model,
     file_ext[i] = g_ascii_strdown (pos, -1);
   }
 
-  /** @todo Make sure num_models is correct for each nnfw type */
+  /**
+   * @todo Currently ML_NNFW_TYPE_FLARE is defined temporarily to avoid ACR.
+   * Move checking ML_NNFW_TYPE_FLARE to below switch statement.
+   */
+  if (*nnfw == ML_NNFW_TYPE_FLARE) {
+    if (!g_str_equal (file_ext[0], ".bin")) {
+      _ml_error_report
+          ("Flare accepts .bin file only. Please support correct file extension. You have specified: \"%s\"",
+          file_ext[0]);
+      status = ML_ERROR_INVALID_PARAMETER;
+    }
+    goto done;
+  }
+
+                                        /** @todo Make sure num_models is correct for each nnfw type */
   switch (*nnfw) {
     case ML_NNFW_TYPE_NNFW:
     case ML_NNFW_TYPE_TVM:
diff --git a/tests/capi/unittest_capi_service_extension.cc b/tests/capi/unittest_capi_service_extension.cc
index f3322ac3..90e3ab5f 100644
--- a/tests/capi/unittest_capi_service_extension.cc
+++ b/tests/capi/unittest_capi_service_extension.cc
@@ -9,7 +9,7 @@
 
 #include <gtest/gtest.h>
 #include <glib.h>
-
+#include <iostream>
 #include <ml-api-service-private.h>
 #include <ml-api-service.h>
 #include "ml-api-service-extension.h"
@@ -394,9 +394,9 @@ _extension_test_imgclf (ml_service_h handle, gboolean is_pipeline)
 /**
  * @brief Macro to skip testcase if model file is not ready.
  */
-#define skip_llamacpp_tc(tc_name)                                                                  \
+#define skip_llm_tc(tc_name, model_name)                                                           \
   do {                                                                                             \
-    g_autofree gchar *model_file = _get_model_path ("llama-2-7b-chat.Q2_K.gguf");                  \
+    g_autofree gchar *model_file = _get_model_path (model_name);                                   \
     if (!g_file_test (model_file, G_FILE_TEST_EXISTS)) {                                           \
       g_autofree gchar *msg = g_strdup_printf (                                                    \
           "Skipping '%s' due to missing model file. "                                              \
@@ -406,12 +406,12 @@ _extension_test_imgclf (ml_service_h handle, gboolean is_pipeline)
     }                                                                                              \
   } while (0)
 
+
 /**
  * @brief Callback function for scenario test.
  */
 static void
-_extension_test_llamacpp_cb (
-    ml_service_event_e event, ml_information_h event_data, void *user_data)
+_extension_test_llm_cb (ml_service_event_e event, ml_information_h event_data, void *user_data)
 {
   extension_test_data_s *tdata = (extension_test_data_s *) user_data;
   ml_tensors_data_h data = NULL;
@@ -429,7 +429,8 @@ _extension_test_llamacpp_cb (
       status = ml_tensors_data_get_tensor_data (data, 0U, &_raw, &_size);
       EXPECT_EQ (status, ML_ERROR_NONE);
 
-      g_print ("%s", (char *) _raw);
+      std::cout.write (static_cast<const char *> (_raw), _size); /* korean output */
+      std::cout.flush ();
 
       if (tdata)
         tdata->received++;
@@ -443,15 +444,25 @@ _extension_test_llamacpp_cb (
  * @brief Internal function to run test with ml-service extension handle.
  */
 static inline void
-_extension_test_llamacpp (const gchar *config, gboolean is_pipeline)
+_extension_test_llm (const gchar *config, gchar *input_file, guint sleep_us, gboolean is_pipeline)
 {
   extension_test_data_s *tdata;
   ml_service_h handle;
   ml_tensors_info_h info;
   ml_tensors_data_h input;
   int status;
+  gsize len = 0;
+  g_autofree gchar *contents = NULL;
 
-  const gchar input_text[] = "Hello my name is";
+  if (input_file != NULL) {
+
+    g_autofree gchar *data_file = _get_data_path (input_file);
+    ASSERT_TRUE (g_file_test (data_file, G_FILE_TEST_EXISTS));
+    ASSERT_TRUE (g_file_get_contents (data_file, &contents, &len, NULL));
+  } else {
+    contents = g_strdup ("Hello my name is");
+    len = strlen (contents);
+  }
 
   tdata = _create_test_data (is_pipeline);
   ASSERT_TRUE (tdata != NULL);
@@ -459,7 +470,8 @@ _extension_test_llamacpp (const gchar *config, gboolean is_pipeline)
   status = ml_service_new (config, &handle);
   ASSERT_EQ (status, ML_ERROR_NONE);
 
-  status = ml_service_set_event_cb (handle, _extension_test_llamacpp_cb, tdata);
+  status = ml_service_set_event_cb (handle, _extension_test_llm_cb, tdata);
+
   EXPECT_EQ (status, ML_ERROR_NONE);
 
   /* Create and push input data. */
@@ -468,12 +480,12 @@ _extension_test_llamacpp (const gchar *config, gboolean is_pipeline)
 
   ml_tensors_data_create (info, &input);
 
-  ml_tensors_data_set_tensor_data (input, 0U, input_text, strlen (input_text));
+  ml_tensors_data_set_tensor_data (input, 0U, contents, len);
 
   status = ml_service_request (handle, NULL, input);
   EXPECT_EQ (status, ML_ERROR_NONE);
 
-  g_usleep (5000000U);
+  g_usleep (sleep_us);
   EXPECT_GT (tdata->received, 0);
 
   /* Clear callback before releasing tdata. */
@@ -494,11 +506,11 @@ _extension_test_llamacpp (const gchar *config, gboolean is_pipeline)
  */
 TEST (MLServiceExtension, scenarioConfigLlamacpp)
 {
-  skip_llamacpp_tc ("scenarioConfigLlamacpp");
+  skip_llm_tc ("scenarioConfigLlamacpp", "llama-2-7b-chat.Q2_K.gguf");
 
   g_autofree gchar *config = get_config_path ("config_single_llamacpp.conf");
 
-  _extension_test_llamacpp (config, FALSE);
+  _extension_test_llm (config, NULL, 5000000U, FALSE);
 }
 
 /**
@@ -506,14 +518,32 @@ TEST (MLServiceExtension, scenarioConfigLlamacpp)
  */
 TEST (MLServiceExtension, scenarioConfigLlamacppAsync)
 {
-  skip_llamacpp_tc ("scenarioConfigLlamacppAsync");
+  skip_llm_tc ("scenarioConfigLlamacppAsync", "llama-2-7b-chat.Q2_K.gguf");
 
   g_autofree gchar *config = get_config_path ("config_single_llamacpp_async.conf");
 
-  _extension_test_llamacpp (config, FALSE);
+  _extension_test_llm (config, NULL, 5000000U, FALSE);
+}
+
+/**
+ * @brief Usage of ml-service extension API.
+ *
+ * Note: For test, copy modelfile to current dir
+ * There are some commonly used functions, so Flare is temporarily put into ENABLE_LLAMACPP.
+ */
+TEST (MLServiceExtension, scenarioConfigFlare)
+{
+  g_autofree gchar *input_file = g_strdup ("flare_input.txt");
+
+  skip_llm_tc ("scenarioConfigFlare", "sflare_if_4bit_3b.bin");
+
+  g_autofree gchar *config = get_config_path ("config_single_flare.conf");
+
+  _extension_test_llm (config, input_file, 40000000U, FALSE);
 }
 #endif /* ENABLE_LLAMACPP */
 
+
 /**
  * @brief Usage of ml-service extension API.
  */
diff --git a/tests/test_models/config/config_single_flare.conf b/tests/test_models/config/config_single_flare.conf
new file mode 100644
index 00000000..10e42e9e
--- /dev/null
+++ b/tests/test_models/config/config_single_flare.conf
@@ -0,0 +1,11 @@
+{
+    "single" :
+    {
+        "framework" : "flare",
+        "model" : ["../tests/test_models/models/sflare_if_4bit_3b.bin"],
+        "adapter" : ["../tests/test_models/models/history_lora.bin"],
+        "custom" : "tokenizer_path:../tests/test_models/data/tokenizer.json,backend:CPU,output_size:1024,model_type:3B,data_type:W4A32",
+        "invoke_dynamic" : "true",
+        "invoke_async" : "false"
+    }
+}
diff --git a/tests/test_models/data/flare_input.txt b/tests/test_models/data/flare_input.txt
new file mode 100644
index 00000000..c3637da2
--- /dev/null
+++ b/tests/test_models/data/flare_input.txt
@@ -0,0 +1 @@
+<|begin_of_text|><|turn_start|>System\n<|turn_end|>\n<|turn_start|>User\nYou are a summarization expert.Please read the provided <Text> carefully and summarize it in 3 sentences in English. The summary should comprehensively cover the entire content of the original text and be written with the same meaning as the source material.<|begin_of_text|><|turn_start|>System<|turn_end|>국제 연구 컨소시엄이 차세대 재생 에너지 기술 개발에 박차를 가하고 있습니다. 스탠포드 대학을 주도로 MIT, KAIST 등 12개 기관이 참여한 이 프로젝트는 기존 실리콘 기반 태양광 패널의 한계를 극복하기 위해 페로브스카이트-실리콘 탠덤 구조를 적용했습니다. 실험 결과, 이 신소재는 31.2%의 광변환 효율을 달성하며 상용화 가능성을 입증했는데, 이는 기존 단일 접합 태양전지의 최대 효율(26%)을 크게 상회하는 수치입니다. 연구팀은 나노스케일 광포획 구조와 양자점 기술을 접목해 적층형 셀의 내구성을 향상시키는 데 성공했으며, 2025년 상용화를 목표로 대량 생산 공정 개발에 착수했습니다. 현재 캘리포니아의 모하비 사막과 독일 바이에른 지역에 설치된 시범 플랜트에서 실외 테스트가 진행 중이며, 초기 데이터는 일사량 변동 환경에서도 안정적인 성능을 보여주고 있습니다. 산업계 전문가들은 이 기술이 2030년까지 전 세계 태양광 시장의 35%를 점유할 것으로 예상하며, 화석 연료 의존도를 12% 감소시킬 수 있을 것으로 내다보고 있습니다. 특히 개발도상국을 위한 저비용 버전 개발도 병행 중인데, 필리핀과 케냐에서 2024년 말 시범 설치될 예정입니다. 한편 유럽 에너지 위원회는 이 기술이 RE100 목표 달성 시기를 5년 앞당길 수 있을 것으로 평가하며 추가 지원 방안을 검토 중입니다.\n<|turn_end|>\n<|turn_start|>Assistant\n"
\ No newline at end of file