From e3fb5189e2e43fec1f3cc763111a4efd36a03c10 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 10 Dec 2025 00:53:05 +0100 Subject: [PATCH 01/11] devstral tool parser for tool calling --- prepare_llm_models.sh | 12 + src/llm/BUILD | 6 +- .../devstral/generation_config_builder.cpp | 57 ++++ .../devstral/generation_config_builder.hpp | 33 ++ .../io_processing/devstral/tool_parser.cpp | 173 ++++++++++ .../io_processing/devstral/tool_parser.hpp | 82 +++++ .../generation_config_builder.hpp | 3 + src/llm/io_processing/output_parser.cpp | 3 + src/llm/io_processing/output_parser.hpp | 2 +- .../devstral_output_parser_test.cpp | 305 ++++++++++++++++++ 10 files changed, 674 insertions(+), 2 deletions(-) create mode 100644 src/llm/io_processing/devstral/generation_config_builder.cpp create mode 100644 src/llm/io_processing/devstral/generation_config_builder.hpp create mode 100644 src/llm/io_processing/devstral/tool_parser.cpp create mode 100644 src/llm/io_processing/devstral/tool_parser.hpp create mode 100644 src/test/llm/output_parsers/devstral_output_parser_test.cpp diff --git a/prepare_llm_models.sh b/prepare_llm_models.sh index 43a9cc1df2..3ab5dbc562 100755 --- a/prepare_llm_models.sh +++ b/prepare_llm_models.sh @@ -34,6 +34,7 @@ HERMES3_MODEL="NousResearch/Hermes-3-Llama-3.1-8B" PHI4_MODEL="microsoft/Phi-4-mini-instruct" MISTRAL_MODEL="mistralai/Mistral-7B-Instruct-v0.3" GPT_OSS="openai/gpt-oss-20b" +DEVSTRAL_MODEL="unsloth/Devstral-Small-2507" if [ "$(python3 -c 'import sys; print(sys.version_info[1])')" -le "8" ]; then echo "Prepare models with python > 3.8."; exit 1 ; fi @@ -173,3 +174,14 @@ if [ ! -f "$1/$GPT_OSS/$TOKENIZER_FILE" ]; then echo "[ERROR] Models file $1/$GPT_OSS/$TOKENIZER_FILE does not exist." exit 1 fi + +if [ -f "$1/$DEVSTRAL_MODEL/$TOKENIZER_FILE" ]; then + echo "Models file $1/$DEVSTRAL_MODEL/$TOKENIZER_FILE exists. Skipping downloading models." +else + mkdir -p $1/$DEVSTRAL_MODEL + convert_tokenizer $DEVSTRAL_MODEL --with_detokenizer -o $1/$DEVSTRAL_MODEL +fi +if [ ! -f "$1/$DEVSTRAL_MODEL/$TOKENIZER_FILE" ]; then + echo "[ERROR] Models file $1/$DEVSTRAL_MODEL/$TOKENIZER_FILE does not exist." + exit 1 +fi diff --git a/src/llm/BUILD b/src/llm/BUILD index d89f9999f5..d3023326c0 100644 --- a/src/llm/BUILD +++ b/src/llm/BUILD @@ -136,6 +136,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w "io_processing/hermes3/tool_parser.hpp", "io_processing/llama3/tool_parser.hpp", "io_processing/phi4/tool_parser.hpp", + "io_processing/devstral/tool_parser.hpp", "io_processing/mistral/tool_parser.hpp", "io_processing/qwen3/reasoning_parser.hpp", "io_processing/gptoss/reasoning_parser.hpp", @@ -147,6 +148,7 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w "io_processing/hermes3/tool_parser.cpp", "io_processing/llama3/tool_parser.cpp", "io_processing/phi4/tool_parser.cpp", + "io_processing/devstral/tool_parser.cpp", "io_processing/mistral/tool_parser.cpp", "io_processing/qwen3/reasoning_parser.cpp", "io_processing/gptoss/reasoning_parser.cpp", @@ -175,11 +177,13 @@ ovms_cc_library( "io_processing/phi4/generation_config_builder.hpp", "io_processing/llama3/generation_config_builder.hpp", "io_processing/hermes3/generation_config_builder.hpp", + "io_processing/devstral/generation_config_builder.hpp", "io_processing/generation_config_builder.hpp"], srcs = ["io_processing/base_generation_config_builder.cpp", "io_processing/phi4/generation_config_builder.cpp", "io_processing/llama3/generation_config_builder.cpp", - "io_processing/hermes3/generation_config_builder.cpp"], + "io_processing/hermes3/generation_config_builder.cpp", + "io_processing/devstral/generation_config_builder.cpp"], deps = [ ":openai_request", "//src:libovmslogging", diff --git a/src/llm/io_processing/devstral/generation_config_builder.cpp b/src/llm/io_processing/devstral/generation_config_builder.cpp new file mode 100644 index 0000000000..d2294f21b1 --- /dev/null +++ b/src/llm/io_processing/devstral/generation_config_builder.cpp @@ -0,0 +1,57 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include + +#include "generation_config_builder.hpp" + +namespace ovms { + +void DevstralGenerationConfigBuilder::parseConfigFromRequest(const OpenAIChatCompletionsRequest& request) { + // Call the base class method to fill in common configuration + BaseGenerationConfigBuilder::parseConfigFromRequest(request); + + // For now the only specific part is related to tools, so if there are no tools provided in the request + // we can exit early + if (request.toolNameSchemaMap.empty()) { + return; + } + + if (enableToolGuidedGeneration || request.toolChoice == "required") { + // Set tool guided generation config specific to Devstral model + auto triggeredTags = std::make_shared(); + triggeredTags->triggers.push_back("[TOOL_CALLS]"); + + for (const auto& [toolName, toolSchemaWrapper] : request.toolNameSchemaMap) { + const auto& toolSchema = toolSchemaWrapper.stringRepr; + ov::genai::StructuredOutputConfig::Tag tagItem; + tagItem.begin = "[TOOL_CALLS]" + toolName + "[ARGS]"; + tagItem.end = ""; + tagItem.content = ov::genai::StructuredOutputConfig::JSONSchema(toolSchema); + triggeredTags->tags.push_back(tagItem); + } + if (request.toolChoice == "required") { + triggeredTags->at_least_one = true; + } + ov::genai::StructuredOutputConfig::StructuralTag structuralTag = triggeredTags; + setStructuralTagsConfig(structuralTag); + } +} + +} // namespace ovms diff --git a/src/llm/io_processing/devstral/generation_config_builder.hpp b/src/llm/io_processing/devstral/generation_config_builder.hpp new file mode 100644 index 0000000000..ec69a054fe --- /dev/null +++ b/src/llm/io_processing/devstral/generation_config_builder.hpp @@ -0,0 +1,33 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once +#include "../base_generation_config_builder.hpp" + +namespace ovms { + +/* + * Phi4GenerationConfigBuilder extends BaseGenerationConfigBuilder to provide specific configuration for Phi-4 model. + * It overrides the parseConfigFromRequest method to set tool guided generation config. + */ +class DevstralGenerationConfigBuilder : public BaseGenerationConfigBuilder { +public: + DevstralGenerationConfigBuilder() = delete; + explicit DevstralGenerationConfigBuilder(const ov::genai::GenerationConfig& baseConfig, bool enableToolGuidedGeneration, DecodingMethod decodingMethod) : + BaseGenerationConfigBuilder(baseConfig, enableToolGuidedGeneration, decodingMethod) {} + + void parseConfigFromRequest(const OpenAIChatCompletionsRequest& request) override; +}; +} // namespace ovms diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp new file mode 100644 index 0000000000..9bb417c084 --- /dev/null +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -0,0 +1,173 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** + +#include +#include +#include +#include + +#include "src/port/rapidjson_document.hpp" + +#include "../../../logging.hpp" +#include "tool_parser.hpp" +#include "../utils.hpp" +#include "src/stringutils.hpp" + +namespace ovms { + +void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { + std::vector tools; + + if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); + return; + } + + // Parser will consume entire model output only if the first generated token is the beginning of tools token. + // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...} + + //size_t pos = 0; + //size_t firstToolCallPos; + + // Save position of the first tool call start tag to properly clear content after parsing. + //firstToolCallPos = parsedOutput.content.find("[TOOL_CALLS]", pos); + //find position in vector generatedTokens with value 9 + size_t firstToolTokenIndex; + auto it = std::find(generatedTokens.begin(), generatedTokens.end(), this->botTokenId); + if (it != generatedTokens.end()) { + firstToolTokenIndex = std::distance(generatedTokens.begin(), it); + } else { + return; + } + + size_t firstArgsTokenIndex; + auto it_args = std::find(generatedTokens.begin() + firstToolTokenIndex, generatedTokens.end(), this->argsTokenId); + if (it_args != generatedTokens.end()) { + firstArgsTokenIndex = std::distance(generatedTokens.begin(), it_args); + } else { + return; + } + if (firstToolTokenIndex > firstArgsTokenIndex) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "First tool token index is greater than first args token index."); + return; + } + std::vector tool_name_tokens(generatedTokens.begin() + (firstToolTokenIndex + 1), generatedTokens.begin() + (firstArgsTokenIndex)); + std::vector arguments_tokens(generatedTokens.begin() + (firstArgsTokenIndex + 1), generatedTokens.end()); + + ToolCall toolCall; + std::string tool_name = tokenizer.decode(tool_name_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + if (this->toolSchemas.find(tool_name) == this->toolSchemas.end()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool name '{}' not valid.", tool_name); + return; + } + std::string arguments = tokenizer.decode(arguments_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + + toolCall.name = tool_name; + toolCall.arguments = arguments; + toolCall.id = generateRandomId(); // Generate a random ID for the tool call + parsedOutput.toolCalls.push_back(toolCall); + + // get subset of generatedTokens starting from begin() to firstArgsTokenIndex + std::vector content_tokens; + if (firstToolTokenIndex > 0) { + content_tokens = std::vector(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex); + parsedOutput.content = tokenizer.decode(content_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the contnet till tool call in content + } else { + parsedOutput.content = ""; + } + return; +} + +std::optional DevstralToolParser::sendFullDelta(ToolCall& toolCall) { + rapidjson::Document argsDelta; + argsDelta.Parse(toolCall.arguments.c_str()); + rapidjson::Document argumentsWrapper; + argumentsWrapper.SetObject(); + rapidjson::Document::AllocatorType& allocator = argumentsWrapper.GetAllocator(); + // now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key + rapidjson::Value toolCallsString(rapidjson::kStringType); + toolCallsString.SetString(toolCall.arguments.c_str(), allocator); + argumentsWrapper.AddMember("arguments", toolCallsString, allocator); + auto currentDelta = wrapDelta(argumentsWrapper, this->toolCallIndex); + return currentDelta; +} + + +std::optional DevstralToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) { + /* + Devstral [TOOL_CALL]tool_name[ARGS]arguments[] + It does not support parallel tool calls, so tool calls are always in sequence. + + We have three processing states: + AWAITING_START_TAG, + AWAITING_ARGS_TAG, + PROCESSING_ARGS + + We store the history of chunks in streamContent string. After state changes are detected, we clear the streamContent to only keep unprocessed part. + */ + + this->streamContent += chunk; + if (this->internalState == AWAITING_START_TAG) { + size_t pos = chunk.find("[TOOL_CALLS]"); + if (pos != std::string::npos) { + this->internalState = AWAITING_ARGS_TAG; + this->toolCallIndex++; + if (pos == 0) { + this->streamContent.clear(); + } else { + this->streamContent = this->streamContent.substr(pos + 13); // "[TOOLS_CALLS]" length is 13 + } + } else { + return std::nullopt; + } + } + if (this->internalState == AWAITING_ARGS_TAG) { + //check if [ARGS] tag is present in the chunk and update state accordingly + size_t pos = this->streamContent.find("[ARGS]"); + if (pos != std::string::npos) { + this->internalState = PROCESSING_ARGS; + this->toolName = this->streamContent.substr(0, pos); + if (this->toolSchemas.find(this->toolName) == this->toolSchemas.end()) { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool name '{}' not valid.", this->toolName); + return std::nullopt; + } + this->streamContent = this->streamContent.substr(pos + 6); // "[ARGS]" length is 6 + return wrapFirstDelta(this->toolName, this->toolCallIndex); + } else { + return std::nullopt; + } + } + if (finishReason != ov::genai::GenerationFinishReason::NONE) { + size_t end_pos = this->streamContent.find(""); + std::string arguments; + if (end_pos != std::string::npos) { + arguments = this->streamContent.substr(0, end_pos); + } else { + arguments = this->streamContent; + } + if (!arguments.empty()) { + ToolCall toolCall; + toolCall.arguments = arguments; + toolCall.name = this->toolName; + return sendFullDelta(toolCall); + } else { + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No valid arguments found in streamContent."); + return std::nullopt; + } + } + return std::nullopt; +} +} // namespace ovms diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp new file mode 100644 index 0000000000..c105c7ce74 --- /dev/null +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -0,0 +1,82 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#pragma once + +#include +#include +#include +#include + +#include "src/port/rapidjson_document.hpp" + +#include "src/llm/io_processing/base_output_parser.hpp" +#include "src/llm/io_processing/partial_json_builder.hpp" +#include "src/llm/apis/tool_schema_wrapper.hpp" + +namespace ovms { +class DevstralToolParser : public BaseOutputParser { + const int64_t argsTokenId; // [ARGS] + const int64_t botTokenId; // [TOOL_CALLS] + + // in streaming mode we can rely on tags in string format as tokens are not available + const std::string streamingParsingArgsStartTag = "[ARGS]"; + const std::string streamingParsingToolCallsStartTag = "[TOOL_CALLS]"; + + enum InternalState { + AWAITING_START_TAG, + AWAITING_ARGS_TAG, + PROCESSING_ARGS + }; + + InternalState internalState = AWAITING_START_TAG; + const ToolsSchemas_t& toolSchemas; + // Index to track the current tool call being processed (-1 means no tool call has been started yet) + int toolCallIndex = -1; + std::string streamContent = ""; // content accumulated from stream chunks + std::string toolName = ""; + std::optional sendFullDelta(ToolCall& toolCall); + +public: + DevstralToolParser() = delete; + DevstralToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) : + BaseOutputParser(tokenizer), + argsTokenId(tokenizer.encode("[ARGS]",{{"add_special_tokens", false}}).input_ids.data()[0]), + botTokenId(tokenizer.encode("[TOOL_CALLS]",{{"add_special_tokens", false}}).input_ids.data()[0]), + toolSchemas(toolSchemas) {} + + void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; + std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; + const std::vector& getParsingStartTags() const override { + static const std::vector toolCallStartTags{streamingParsingToolCallsStartTag}; + return toolCallStartTags; + } + const std::vector& getSpecialParsingStartTags() const override { + static const std::vector specialParsingStartTags{}; + return specialParsingStartTags; + } + // Tools calls are expected to be the last part of the content, so we do not specify an end tag. + const std::string& getParsingEndTag() const override { + static const std::string toolCallEndTag = ""; + return toolCallEndTag; + } + + bool requiresStreamingWithSpecialTokens() const override { + + std::cout << "Requires streaming with special tokens: true" << std::endl; + return true; + } +}; +} // namespace ovms diff --git a/src/llm/io_processing/generation_config_builder.hpp b/src/llm/io_processing/generation_config_builder.hpp index 663d4a9b1a..2423cd074d 100644 --- a/src/llm/io_processing/generation_config_builder.hpp +++ b/src/llm/io_processing/generation_config_builder.hpp @@ -24,6 +24,7 @@ #include "phi4/generation_config_builder.hpp" #include "llama3/generation_config_builder.hpp" #include "hermes3/generation_config_builder.hpp" +#include "devstral/generation_config_builder.hpp" #include "../apis/openai_request.hpp" #include "../../logging.hpp" @@ -44,6 +45,8 @@ class GenerationConfigBuilder { builder_impl = std::make_unique(baseConfig, enableToolGuidedGeneration, decodingMethod); } else if (toolParserName == "phi4") { builder_impl = std::make_unique(baseConfig, enableToolGuidedGeneration, decodingMethod); + } else if (toolParserName == "devstral") { + builder_impl = std::make_unique(baseConfig, enableToolGuidedGeneration, decodingMethod); } else { if (enableToolGuidedGeneration) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Option enable_tool_guided_generation is set, but will not be effective since no valid tool parser has been provided."); diff --git a/src/llm/io_processing/output_parser.cpp b/src/llm/io_processing/output_parser.cpp index cf0a805f59..1c060375df 100644 --- a/src/llm/io_processing/output_parser.cpp +++ b/src/llm/io_processing/output_parser.cpp @@ -27,6 +27,7 @@ #include "gptoss/tool_parser.hpp" #include "qwen3/reasoning_parser.hpp" #include "qwen3coder/qwen3coder_tool_parser.hpp" +#include "devstral/tool_parser.hpp" #include "gptoss/reasoning_parser.hpp" namespace ovms { @@ -168,6 +169,8 @@ OutputParser::OutputParser(ov::genai::Tokenizer& tokenizer, const std::string to toolParser = std::make_unique(tokenizer); } else if (toolParserName == "qwen3coder") { toolParser = std::make_unique(tokenizer, toolNameSchemaMap); + } else if (toolParserName == "devstral") { + toolParser = std::make_unique(tokenizer, toolNameSchemaMap); } else if (!toolParserName.empty()) { throw std::runtime_error("Unsupported tool parser: " + toolParserName); } diff --git a/src/llm/io_processing/output_parser.hpp b/src/llm/io_processing/output_parser.hpp index 613e0a993e..433b71cc89 100644 --- a/src/llm/io_processing/output_parser.hpp +++ b/src/llm/io_processing/output_parser.hpp @@ -87,7 +87,7 @@ class OutputParser { std::optional parseChunk(const std::string& chunkResponse, const bool toolsAvailable, ov::genai::GenerationFinishReason finishReason); bool requiresStreamingWithSpecialTokens() const { - return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) && + return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) || (toolParser && toolParser->requiresStreamingWithSpecialTokens()); } }; diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp new file mode 100644 index 0000000000..875871fa4e --- /dev/null +++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp @@ -0,0 +1,305 @@ +//***************************************************************************** +// Copyright 2025 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//***************************************************************************** +#include +#include +#include +#include + +#include "../../../llm/io_processing/base_output_parser.hpp" +#include "../../../llm/io_processing/output_parser.hpp" +#include "../../platform_utils.hpp" + +using namespace ovms; + +#ifdef _WIN32 +const std::string tokenizerPath = getWindowsRepoRootPath() + "\\src\\test\\llm_testing\\unsloth\\Devstral-Small-2507"; +#else +// Hardcoded for usage in docker container +const std::string tokenizerPath = "/ovms/src/test/llm_testing/unsloth/Devstral-Small-2507/"; +#endif + +static ovms::ToolsSchemas_t EMPTY_TOOLS_SCHEMA = {}; // not used for mistral +static std::unique_ptr devstralTokenizer; + +class DevstralOutputParserTest : public ::testing::Test { +protected: + std::unique_ptr outputParserWithRegularToolParsing; + + static void SetUpTestSuite() { + try { + devstralTokenizer = std::make_unique(tokenizerPath); + } catch (const std::exception& e) { + FAIL() << "Failed to initialize devstral tokenizer: " << e.what(); + } catch (...) { + FAIL() << "Failed to initialize devstral tokenizer due to unknown error."; + } + } + + static void TearDownTestSuite() { + devstralTokenizer.reset(); + } + + void SetUp() override { + // declare tools_schema + static std::map toolSchemasInput = { + {"example_tool", R"({"properties": {"arg1": {"type": "string", "description": "A string argument."}}, "required": ["arg1"]})"}, + }; + + static std::vector> schemaDocsStorage; + + auto convertStringToolSchemasStringToToolsSchemas = []( + const std::map& input) -> ToolsSchemas_t { + ToolsSchemas_t result; + schemaDocsStorage.clear(); + for (const auto& [name, schemaStr] : input) { + auto schemaDoc = std::make_unique(); + if (schemaDoc->Parse(schemaStr.c_str()).HasParseError()) { + throw std::runtime_error("Failed to parse schema for tool: " + name); + } + result[name] = {schemaDoc.get(), schemaStr}; + schemaDocsStorage.push_back(std::move(schemaDoc)); + } + return result; + }; + + static ovms::ToolsSchemas_t toolsSchemas = convertStringToolSchemasStringToToolsSchemas(toolSchemasInput); + outputParserWithRegularToolParsing = std::make_unique(*devstralTokenizer, "devstral", "", toolsSchemas); + } +}; + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { + std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + std::string testInput = input; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_MissingEndTag) { + std::string testInput = "Reasoninig before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call "); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { + std::string input = "This is a regular model response without tool calls."; + auto generatedTensor = devstralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "This is a regular model response without tool calls."); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); + EXPECT_EQ(parsedOutput.reasoning, ""); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { + std::string testInput = "Reasoninig before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call "); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidOrder) { + std::string testInput = "Reasoninig before tool call [ARGS]example_tool[TOOL_CALLS]{\"arg1\":\"value1\",\"arg2\":42}"; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call example_tool{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithMissingArgsTag) { + std::string input = "Some content [TOOL_CALLS]example_tool{\"arg1\":\"value1\",\"arg2\":42}"; + std::string testInput = input; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + // Same expected content as tokenizer does not add special tokens + EXPECT_EQ(parsedOutput.content, "Some content example_tool{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithArrayArguments) { + std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}"; + std::string testInput = input; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"filepath\":\"/var/log/db.log\",\"status\":[\"completed\",\"failed\"],\"encoding\":\"utf-8\",\"processFunction\":\"processFunction\"}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidArguments) { + std::string input = "[TOOL_CALLS]example_tool[ARGS]{ \"filepath\": \"/var/log/db.log\", \"status\": "; + std::string testInput = input; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, ""); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{ \"filepath\": \"/var/log/db.log\", \"status\": "); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithMissingTool_name) { + std::string input = "[TOOL_CALLS]wrong_name[ARGS]{ \"filepath\": \"/var/log/db.log\"}"; + std::string testInput = input; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "wrong_name{ \"filepath\": \"/var/log/db.log\"}"); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 0); +} + +TEST_F(DevstralOutputParserTest, HolisticStreaming) { + std::vector>> chunkToDeltaVec{ + // Tool call phase + // Starting first tool. Collecting chunk until full name is received. Don't return until then. + {"Reasoning", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"Reasoning"}})"}, + {"example", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"example"}})"}, + {"[TOOL_CALLS]", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"get", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"weather", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"get_weather"}}]}})"}, + {"{\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"city\":", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" \"Paris", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + // Last chunk is added in the for loop below + }; + ToolsSchemas_t tools_schemas = { + {"get_weather", ToolSchemaWrapper{}} + }; + for (auto lastFinishReason : {ov::genai::GenerationFinishReason::STOP, ov::genai::GenerationFinishReason::LENGTH}) { + // Need to have new output parser per case to simulate separate request processing + outputParserWithRegularToolParsing = std::make_unique(*devstralTokenizer, "devstral", "", tools_schemas); + auto chunkToDeltaVecCopy = chunkToDeltaVec; + if (lastFinishReason == ov::genai::GenerationFinishReason::STOP) { + chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"city\": \"Paris\"}"}}]}})"}); + } else { + chunkToDeltaVecCopy.push_back({"\"", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"city\": \"Paris\""}}]}})"}); + } + int64_t chunkIteration = -1; + for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVecCopy) { + chunkIteration++; + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, true, finishReason); + if (!expectedDelta.has_value() && !doc.has_value()) { + continue; // Both are nullopt, OK + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings + std::string expected = expectedDelta.value(); + std::string idKey = "\"id\":\""; + auto docIdPos = docStr.find(idKey); + auto expectedIdPos = expected.find(idKey); + if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) { + auto docIdStart = docIdPos + idKey.size(); + auto docIdEnd = docStr.find("\"", docIdStart); + auto expectedIdStart = expectedIdPos + idKey.size(); + auto expectedIdEnd = expected.find("\"", expectedIdStart); + ASSERT_NE(docIdEnd, std::string::npos); + ASSERT_NE(expectedIdEnd, std::string::npos); + std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart); + std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart); + EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk; + EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk; + // Compare everything except the id value + std::string docStrNoId = docStr; + std::string expectedNoId = expected; + docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*')); + expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*')); + EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk; + } else { + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: [" << chunk << "] got [" << docStr << "] but expected [" << expected << "]" << chunkIteration; + } + } else if (expectedDelta.has_value()) { + FAIL() << "Mismatch for chunk: [" << chunk << "] got nothing but expected [" << expectedDelta.value() << "]" << chunkIteration; + } else if (doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + FAIL() << "Mismatch for chunk: [" << chunk << "] expected nothing but got [" << docStr << "]" << chunkIteration; + } else { + FAIL() << "Mismatch for chunk: [" << chunk << "] " << chunkIteration; + } + } + } +} + + +TEST_F(DevstralOutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) { + std::vector>> chunkToDeltaVec{ + // Tool parser is available, but tools are not in the request so every chunk is just a regular content + {"[TOOL_CALLS]", "{\"delta\":{\"content\":\"[TOOL_CALLS]\"}}"}, + {"get_", "{\"delta\":{\"content\":\"get_\"}}"}, + {"weather", "{\"delta\":{\"content\":\"weather\"}}"}, + {"[ARGS]", "{\"delta\":{\"content\":\"[ARGS]\"}}"}, + {"{\"", "{\"delta\":{\"content\":\"{\\\"\"}}"}, + {"city\":", "{\"delta\":{\"content\":\"city\\\":\"}}"}, + {"\"Paris\"", "{\"delta\":{\"content\":\"\\\"Paris\\\"\"}}"}, + {"}", "{\"delta\":{\"content\":\"}\"}}"}, + }; + + for (const auto& [chunk, expectedDelta] : chunkToDeltaVec) { + // Second argument is false as we simulate the case where tools have not been provided in the request + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, false, ov::genai::GenerationFinishReason::NONE); + if (!expectedDelta.has_value() && !doc.has_value()) { + continue; // Both are nullopt, OK + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + std::string expected = expectedDelta.value(); + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: " << chunk; + } else { + FAIL() << "Mismatch between expectedDelta and doc for chunk: " << chunk; + } + } +} From bf74839edee05053cf3ea44885bf8c3524487a17 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 10 Dec 2025 01:06:28 +0100 Subject: [PATCH 02/11] style --- src/llm/io_processing/devstral/tool_parser.cpp | 11 +++++------ src/llm/io_processing/devstral/tool_parser.hpp | 12 ++++++------ .../output_parsers/devstral_output_parser_test.cpp | 8 +++----- 3 files changed, 14 insertions(+), 17 deletions(-) diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 9bb417c084..102bd8cb23 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -52,7 +52,7 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vectorargsTokenId); if (it_args != generatedTokens.end()) { @@ -66,7 +66,7 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector tool_name_tokens(generatedTokens.begin() + (firstToolTokenIndex + 1), generatedTokens.begin() + (firstArgsTokenIndex)); std::vector arguments_tokens(generatedTokens.begin() + (firstArgsTokenIndex + 1), generatedTokens.end()); - + ToolCall toolCall; std::string tool_name = tokenizer.decode(tool_name_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); if (this->toolSchemas.find(tool_name) == this->toolSchemas.end()) { @@ -84,7 +84,7 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector content_tokens; if (firstToolTokenIndex > 0) { content_tokens = std::vector(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex); - parsedOutput.content = tokenizer.decode(content_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the contnet till tool call in content + parsedOutput.content = tokenizer.decode(content_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the content till tool call } else { parsedOutput.content = ""; } @@ -105,7 +105,6 @@ std::optional DevstralToolParser::sendFullDelta(ToolCall& t return currentDelta; } - std::optional DevstralToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) { /* Devstral [TOOL_CALL]tool_name[ARGS]arguments[] @@ -128,7 +127,7 @@ std::optional DevstralToolParser::parseChunk(const std::str if (pos == 0) { this->streamContent.clear(); } else { - this->streamContent = this->streamContent.substr(pos + 13); // "[TOOLS_CALLS]" length is 13 + this->streamContent = this->streamContent.substr(pos + 13); // "[TOOLS_CALLS]" length is 13 } } else { return std::nullopt; @@ -144,7 +143,7 @@ std::optional DevstralToolParser::parseChunk(const std::str SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool name '{}' not valid.", this->toolName); return std::nullopt; } - this->streamContent = this->streamContent.substr(pos + 6); // "[ARGS]" length is 6 + this->streamContent = this->streamContent.substr(pos + 6); // "[ARGS]" length is 6 return wrapFirstDelta(this->toolName, this->toolCallIndex); } else { return std::nullopt; diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index c105c7ce74..b46b29ea72 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -28,8 +28,8 @@ namespace ovms { class DevstralToolParser : public BaseOutputParser { - const int64_t argsTokenId; // [ARGS] - const int64_t botTokenId; // [TOOL_CALLS] + const int64_t argsTokenId; // [ARGS] + const int64_t botTokenId; // [TOOL_CALLS] // in streaming mode we can rely on tags in string format as tokens are not available const std::string streamingParsingArgsStartTag = "[ARGS]"; @@ -52,9 +52,9 @@ class DevstralToolParser : public BaseOutputParser { public: DevstralToolParser() = delete; DevstralToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) : - BaseOutputParser(tokenizer), - argsTokenId(tokenizer.encode("[ARGS]",{{"add_special_tokens", false}}).input_ids.data()[0]), - botTokenId(tokenizer.encode("[TOOL_CALLS]",{{"add_special_tokens", false}}).input_ids.data()[0]), + BaseOutputParser(tokenizer), + argsTokenId(tokenizer.encode("[ARGS]", {{"add_special_tokens", false}}).input_ids.data()[0]), + botTokenId(tokenizer.encode("[TOOL_CALLS]", {{"add_special_tokens", false}}).input_ids.data()[0]), toolSchemas(toolSchemas) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; @@ -72,7 +72,7 @@ class DevstralToolParser : public BaseOutputParser { static const std::string toolCallEndTag = ""; return toolCallEndTag; } - + bool requiresStreamingWithSpecialTokens() const override { std::cout << "Requires streaming with special tokens: true" << std::endl; diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp index 875871fa4e..eae604cf0d 100644 --- a/src/test/llm/output_parsers/devstral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp @@ -57,11 +57,11 @@ class DevstralOutputParserTest : public ::testing::Test { static std::map toolSchemasInput = { {"example_tool", R"({"properties": {"arg1": {"type": "string", "description": "A string argument."}}, "required": ["arg1"]})"}, }; - + static std::vector> schemaDocsStorage; auto convertStringToolSchemasStringToToolsSchemas = []( - const std::map& input) -> ToolsSchemas_t { + const std::map& input) -> ToolsSchemas_t { ToolsSchemas_t result; schemaDocsStorage.clear(); for (const auto& [name, schemaStr] : input) { @@ -208,8 +208,7 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { // Last chunk is added in the for loop below }; ToolsSchemas_t tools_schemas = { - {"get_weather", ToolSchemaWrapper{}} - }; + {"get_weather", ToolSchemaWrapper{}}}; for (auto lastFinishReason : {ov::genai::GenerationFinishReason::STOP, ov::genai::GenerationFinishReason::LENGTH}) { // Need to have new output parser per case to simulate separate request processing outputParserWithRegularToolParsing = std::make_unique(*devstralTokenizer, "devstral", "", tools_schemas); @@ -271,7 +270,6 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { } } - TEST_F(DevstralOutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) { std::vector>> chunkToDeltaVec{ // Tool parser is available, but tools are not in the request so every chunk is just a regular content From 28cd83b2e1eb26c0bcba3e0c2284fe5d31717a56 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 10 Dec 2025 11:38:28 +0100 Subject: [PATCH 03/11] style --- src/llm/io_processing/devstral/tool_parser.cpp | 15 +++------------ src/llm/io_processing/devstral/tool_parser.hpp | 2 -- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 102bd8cb23..db8f781d88 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -30,21 +30,12 @@ namespace ovms { void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { std::vector tools; - + // Parser will consume entire model output only if the first generated token is the beginning of tools token. + // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...} if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); return; } - - // Parser will consume entire model output only if the first generated token is the beginning of tools token. - // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...} - - //size_t pos = 0; - //size_t firstToolCallPos; - - // Save position of the first tool call start tag to properly clear content after parsing. - //firstToolCallPos = parsedOutput.content.find("[TOOL_CALLS]", pos); - //find position in vector generatedTokens with value 9 size_t firstToolTokenIndex; auto it = std::find(generatedTokens.begin(), generatedTokens.end(), this->botTokenId); if (it != generatedTokens.end()) { @@ -134,7 +125,7 @@ std::optional DevstralToolParser::parseChunk(const std::str } } if (this->internalState == AWAITING_ARGS_TAG) { - //check if [ARGS] tag is present in the chunk and update state accordingly + // check if [ARGS] tag is present in the chunk and update state accordingly size_t pos = this->streamContent.find("[ARGS]"); if (pos != std::string::npos) { this->internalState = PROCESSING_ARGS; diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index b46b29ea72..a3481faccd 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -74,8 +74,6 @@ class DevstralToolParser : public BaseOutputParser { } bool requiresStreamingWithSpecialTokens() const override { - - std::cout << "Requires streaming with special tokens: true" << std::endl; return true; } }; From 104c9806145034b0c6369c74cc58519a98cb3f61 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 10 Dec 2025 11:51:57 +0100 Subject: [PATCH 04/11] get test tokenizer --- windows_prepare_llm_models.bat | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index be18265bf8..57a634c046 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -42,6 +42,7 @@ set "HERMES3_MODEL=NousResearch/Hermes-3-Llama-3.1-8B" set "PHI4_MODEL=microsoft/Phi-4-mini-instruct" set "MISTRAL_MODEL=mistralai/Mistral-7B-Instruct-v0.3" set "GPTOSS_MODEL=openai/gpt-oss-20b" +set "DEVSTRAL_MODEL=unsloth/Devstral-Small-2507" echo Downloading LLM testing models to directory %~1 set "PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu https://storage.openvinotoolkit.org/simple/wheels/nightly" @@ -202,4 +203,17 @@ if not exist "%~1\%GPTOSS_MODEL%\%TOKENIZER_FILE%" ( exit /b 1 ) +if exist "%~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE%" ( + echo Models file %~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. +) else ( + echo Downloading tokenizer and detokenizer for Devstral model to %~1\%DEVSTRAL_MODEL% directory. + mkdir "%~1\%DEVSTRAL_MODEL%" + convert_tokenizer "%DEVSTRAL_MODEL%" --with_detokenizer -o "%~1\%DEVSTRAL_MODEL%" + if !errorlevel! neq 0 exit /b !errorlevel! +) +if not exist "%~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE%" ( + echo Models file %~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE% does not exists. + exit /b 1 +) + endlocal From ccc71d3fcc4fa5b87eef08bdecc8a67665423f97 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 27 Dec 2025 00:40:20 +0100 Subject: [PATCH 05/11] refactor --- .../devstral/generation_config_builder.cpp | 2 +- .../io_processing/devstral/tool_parser.cpp | 47 ++++++++----------- .../io_processing/devstral/tool_parser.hpp | 21 +++++++-- src/llm/io_processing/output_parser.hpp | 9 +++- src/llm/servable.cpp | 5 ++ 5 files changed, 50 insertions(+), 34 deletions(-) diff --git a/src/llm/io_processing/devstral/generation_config_builder.cpp b/src/llm/io_processing/devstral/generation_config_builder.cpp index d2294f21b1..5b097c5aa4 100644 --- a/src/llm/io_processing/devstral/generation_config_builder.cpp +++ b/src/llm/io_processing/devstral/generation_config_builder.cpp @@ -42,7 +42,7 @@ void DevstralGenerationConfigBuilder::parseConfigFromRequest(const OpenAIChatCom const auto& toolSchema = toolSchemaWrapper.stringRepr; ov::genai::StructuredOutputConfig::Tag tagItem; tagItem.begin = "[TOOL_CALLS]" + toolName + "[ARGS]"; - tagItem.end = ""; + // tagItem.end = ""; tagItem.content = ov::genai::StructuredOutputConfig::JSONSchema(toolSchema); triggeredTags->tags.push_back(tagItem); } diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index db8f781d88..3512736499 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -30,7 +30,6 @@ namespace ovms { void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { std::vector tools; - // Parser will consume entire model output only if the first generated token is the beginning of tools token. // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...} if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); @@ -45,9 +44,9 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vectorargsTokenId); - if (it_args != generatedTokens.end()) { - firstArgsTokenIndex = std::distance(generatedTokens.begin(), it_args); + auto itArgs = std::find(generatedTokens.begin() + firstToolTokenIndex, generatedTokens.end(), this->argsTokenId); + if (itArgs != generatedTokens.end()) { + firstArgsTokenIndex = std::distance(generatedTokens.begin(), itArgs); } else { return; } @@ -55,27 +54,22 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector tool_name_tokens(generatedTokens.begin() + (firstToolTokenIndex + 1), generatedTokens.begin() + (firstArgsTokenIndex)); - std::vector arguments_tokens(generatedTokens.begin() + (firstArgsTokenIndex + 1), generatedTokens.end()); + std::vector toolNameTokens(generatedTokens.begin() + (firstToolTokenIndex + 1), generatedTokens.begin() + (firstArgsTokenIndex)); + std::vector argumentsTokens(generatedTokens.begin() + (firstArgsTokenIndex + 1), generatedTokens.end()); ToolCall toolCall; - std::string tool_name = tokenizer.decode(tool_name_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); - if (this->toolSchemas.find(tool_name) == this->toolSchemas.end()) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool name '{}' not valid.", tool_name); - return; - } - std::string arguments = tokenizer.decode(arguments_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); - - toolCall.name = tool_name; + std::string toolName = tokenizer.decode(toolNameTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + std::string arguments = tokenizer.decode(argumentsTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); + toolCall.name = toolName; toolCall.arguments = arguments; toolCall.id = generateRandomId(); // Generate a random ID for the tool call parsedOutput.toolCalls.push_back(toolCall); // get subset of generatedTokens starting from begin() to firstArgsTokenIndex - std::vector content_tokens; + std::vector contentTokens; if (firstToolTokenIndex > 0) { - content_tokens = std::vector(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex); - parsedOutput.content = tokenizer.decode(content_tokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the content till tool call + contentTokens = std::vector(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex); + parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the content till tool call } else { parsedOutput.content = ""; } @@ -110,15 +104,16 @@ std::optional DevstralToolParser::parseChunk(const std::str */ this->streamContent += chunk; + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Chunk content: '{}'", chunk); if (this->internalState == AWAITING_START_TAG) { - size_t pos = chunk.find("[TOOL_CALLS]"); + size_t pos = chunk.find(this->streamingParsingToolCallsStartTag); if (pos != std::string::npos) { this->internalState = AWAITING_ARGS_TAG; this->toolCallIndex++; if (pos == 0) { this->streamContent.clear(); } else { - this->streamContent = this->streamContent.substr(pos + 13); // "[TOOLS_CALLS]" length is 13 + this->streamContent = this->streamContent.substr(pos + this->streamingParsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 } } else { return std::nullopt; @@ -126,25 +121,21 @@ std::optional DevstralToolParser::parseChunk(const std::str } if (this->internalState == AWAITING_ARGS_TAG) { // check if [ARGS] tag is present in the chunk and update state accordingly - size_t pos = this->streamContent.find("[ARGS]"); + size_t pos = this->streamContent.find(this->streamingParsingArgsStartTag); if (pos != std::string::npos) { this->internalState = PROCESSING_ARGS; this->toolName = this->streamContent.substr(0, pos); - if (this->toolSchemas.find(this->toolName) == this->toolSchemas.end()) { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Tool name '{}' not valid.", this->toolName); - return std::nullopt; - } - this->streamContent = this->streamContent.substr(pos + 6); // "[ARGS]" length is 6 + this->streamContent = this->streamContent.substr(pos + this->streamingParsingArgsStartTag.length()); // "[ARGS]" length is 6 return wrapFirstDelta(this->toolName, this->toolCallIndex); } else { return std::nullopt; } } if (finishReason != ov::genai::GenerationFinishReason::NONE) { - size_t end_pos = this->streamContent.find(""); + size_t endPos = this->streamContent.find(this->streamingEndTag); std::string arguments; - if (end_pos != std::string::npos) { - arguments = this->streamContent.substr(0, end_pos); + if (endPos != std::string::npos) { + arguments = this->streamContent.substr(0, endPos); } else { arguments = this->streamContent; } diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index a3481faccd..ea839f06d4 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -34,6 +34,7 @@ class DevstralToolParser : public BaseOutputParser { // in streaming mode we can rely on tags in string format as tokens are not available const std::string streamingParsingArgsStartTag = "[ARGS]"; const std::string streamingParsingToolCallsStartTag = "[TOOL_CALLS]"; + const std::string streamingEndTag = ""; enum InternalState { AWAITING_START_TAG, @@ -53,8 +54,22 @@ class DevstralToolParser : public BaseOutputParser { DevstralToolParser() = delete; DevstralToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) : BaseOutputParser(tokenizer), - argsTokenId(tokenizer.encode("[ARGS]", {{"add_special_tokens", false}}).input_ids.data()[0]), - botTokenId(tokenizer.encode("[TOOL_CALLS]", {{"add_special_tokens", false}}).input_ids.data()[0]), + argsTokenId([&tokenizer, this]() { + // can not use streamingParsingArgsStartTag because object is not initialized yet + auto encoded = tokenizer.encode("[ARGS]", {{"add_special_tokens", false}}).input_ids; + if (encoded.get_shape()[0] != 1) { + throw std::runtime_error("[ARGS] must be a single token in the tokenizer vocabulary."); + } + return encoded.data()[0]; + }()), + botTokenId([&tokenizer, this]() { + // can not use streamingParsingToolCallsStartTag because object is not initialized yet + auto encoded = tokenizer.encode("[TOOL_CALLS]", {{"add_special_tokens", false}}).input_ids; + if (encoded.get_shape()[0] != 1) { + throw std::runtime_error("[TOOL_CALLS] must be a single token in the tokenizer vocabulary."); + } + return encoded.data()[0]; + }()), toolSchemas(toolSchemas) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; @@ -69,7 +84,7 @@ class DevstralToolParser : public BaseOutputParser { } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. const std::string& getParsingEndTag() const override { - static const std::string toolCallEndTag = ""; + static const std::string toolCallEndTag = ""; return toolCallEndTag; } diff --git a/src/llm/io_processing/output_parser.hpp b/src/llm/io_processing/output_parser.hpp index 433b71cc89..5aa5e74570 100644 --- a/src/llm/io_processing/output_parser.hpp +++ b/src/llm/io_processing/output_parser.hpp @@ -87,8 +87,13 @@ class OutputParser { std::optional parseChunk(const std::string& chunkResponse, const bool toolsAvailable, ov::genai::GenerationFinishReason finishReason); bool requiresStreamingWithSpecialTokens() const { - return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) || - (toolParser && toolParser->requiresStreamingWithSpecialTokens()); + if (!reasoningParser) { + return toolParser && toolParser->requiresStreamingWithSpecialTokens(); + } else if (!toolParser) { + return reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens(); + } else { + return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) && (toolParser && toolParser->requiresStreamingWithSpecialTokens()); + } } }; } // namespace ovms diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index 345d1c362b..c3f390d62a 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -103,12 +103,17 @@ absl::Status GenAiServable::processTokenizeRequest(std::shared_ptr& executionContext) { + try { executionContext->apiHandler = std::make_shared(*executionContext->payload.parsedJson, executionContext->endpoint, std::chrono::system_clock::now(), getProperties()->tokenizer, getProperties()->toolParserName, getProperties()->reasoningParserName); + } catch (const std::exception& e) { + SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Failed to create API handler: {}", e.what()); + return absl::InvalidArgumentError(std::string("Failed to create API handler: ") + e.what()); + } auto& config = ovms::Config::instance(); auto status = executionContext->apiHandler->parseRequest(getProperties()->maxTokensLimit, getProperties()->bestOfLimit, getProperties()->maxModelLength, config.getServerSettings().allowedLocalMediaPath); From 9a559248ea9f515bc95581c38c3cc92cceeaec16 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Wed, 31 Dec 2025 00:38:07 +0100 Subject: [PATCH 06/11] refactor for streaming --- .../devstral/generation_config_builder.cpp | 2 +- .../io_processing/devstral/tool_parser.cpp | 83 +++++++++++++++++-- .../io_processing/devstral/tool_parser.hpp | 4 +- src/llm/io_processing/output_parser.hpp | 14 ++-- src/llm/servable.cpp | 12 +-- .../devstral_output_parser_test.cpp | 21 ++--- .../mistral_output_parser_test.cpp | 6 +- 7 files changed, 102 insertions(+), 40 deletions(-) diff --git a/src/llm/io_processing/devstral/generation_config_builder.cpp b/src/llm/io_processing/devstral/generation_config_builder.cpp index 5b097c5aa4..f6dced3673 100644 --- a/src/llm/io_processing/devstral/generation_config_builder.cpp +++ b/src/llm/io_processing/devstral/generation_config_builder.cpp @@ -42,7 +42,7 @@ void DevstralGenerationConfigBuilder::parseConfigFromRequest(const OpenAIChatCom const auto& toolSchema = toolSchemaWrapper.stringRepr; ov::genai::StructuredOutputConfig::Tag tagItem; tagItem.begin = "[TOOL_CALLS]" + toolName + "[ARGS]"; - // tagItem.end = ""; + tagItem.end = ""; tagItem.content = ov::genai::StructuredOutputConfig::JSONSchema(toolSchema); triggeredTags->tags.push_back(tagItem); } diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 3512736499..7fc4892a5a 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -71,7 +71,7 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector(generatedTokens.begin(), generatedTokens.begin() + firstToolTokenIndex); parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); // Return only the content till tool call } else { - parsedOutput.content = ""; + parsedOutput.content = tokenizer.decode(contentTokens, ov::AnyMap{ov::genai::skip_special_tokens(true)}); } return; } @@ -90,6 +90,48 @@ std::optional DevstralToolParser::sendFullDelta(ToolCall& t return currentDelta; } +rapidjson::Document DevstralToolParser::wrapCombinedDelta(ToolCall& toolCall) { + rapidjson::Document wrappedDelta; + wrappedDelta.SetObject(); + rapidjson::Value toolCalls(rapidjson::kArrayType); + rapidjson::Value toolCallObj(rapidjson::kObjectType); + rapidjson::Value idValue(generateRandomId().c_str(), wrappedDelta.GetAllocator()); + rapidjson::Value toolCallsString(rapidjson::kStringType); + + toolCallObj.AddMember("id", idValue, wrappedDelta.GetAllocator()); + toolCallObj.AddMember("type", "function", wrappedDelta.GetAllocator()); + toolCallObj.AddMember("index", toolCallIndex, wrappedDelta.GetAllocator()); + rapidjson::Value functionObj(rapidjson::kObjectType); + rapidjson::Value nameValue(toolCall.name.c_str(), wrappedDelta.GetAllocator()); + functionObj.AddMember("name", nameValue, wrappedDelta.GetAllocator()); + // now we need to add string toolCall.arguments to argumentsWrapper under "arguments" key + + toolCallsString.SetString(toolCall.arguments.c_str(), wrappedDelta.GetAllocator()); + functionObj.AddMember("arguments", toolCallsString, wrappedDelta.GetAllocator()); + toolCallObj.AddMember("function", functionObj, wrappedDelta.GetAllocator()); + toolCalls.PushBack(toolCallObj, wrappedDelta.GetAllocator()); + rapidjson::Value deltaWrapper(rapidjson::kObjectType); + deltaWrapper.AddMember("tool_calls", toolCalls, wrappedDelta.GetAllocator()); + wrappedDelta.AddMember("delta", deltaWrapper, wrappedDelta.GetAllocator()); + return wrappedDelta; +} + +rapidjson::Document DevstralToolParser::parseContentChunk() { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + writer.StartObject(); + writer.String("delta"); + writer.StartObject(); + writer.String("content"); + writer.String(streamContent.c_str()); + writer.EndObject(); + writer.EndObject(); + rapidjson::Document doc; + doc.Parse(buffer.GetString()); + streamContent.clear(); + return doc; +} + std::optional DevstralToolParser::parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) { /* Devstral [TOOL_CALL]tool_name[ARGS]arguments[] @@ -104,19 +146,31 @@ std::optional DevstralToolParser::parseChunk(const std::str */ this->streamContent += chunk; - SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Chunk content: '{}'", chunk); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Chunk content: '{}', StreamContent: '{}', State: {}", chunk, this->streamContent, std::to_string(this->internalState)); if (this->internalState == AWAITING_START_TAG) { + // if chunk ends with we need to remove it and return parsed content immediately + if (chunk.size() >= this->streamingEndTag.size() && + chunk.substr(chunk.size() - this->streamingEndTag.size()) == this->streamingEndTag) { + // remove from streamContent + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->streamingEndTag.size()); + SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found end tag in chunk while awaiting start tag. Returning content chunk."); + return parseContentChunk(); + } size_t pos = chunk.find(this->streamingParsingToolCallsStartTag); if (pos != std::string::npos) { this->internalState = AWAITING_ARGS_TAG; + std::cout << "Found [TOOL_CALLS] tag in chunk." + << " Current state: " << this->internalState << std::endl; this->toolCallIndex++; if (pos == 0) { this->streamContent.clear(); + return std::nullopt; } else { this->streamContent = this->streamContent.substr(pos + this->streamingParsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 + return parseContentChunk(); } } else { - return std::nullopt; + return parseContentChunk(); } } if (this->internalState == AWAITING_ARGS_TAG) { @@ -125,13 +179,29 @@ std::optional DevstralToolParser::parseChunk(const std::str if (pos != std::string::npos) { this->internalState = PROCESSING_ARGS; this->toolName = this->streamContent.substr(0, pos); - this->streamContent = this->streamContent.substr(pos + this->streamingParsingArgsStartTag.length()); // "[ARGS]" length is 6 - return wrapFirstDelta(this->toolName, this->toolCallIndex); + this->streamContent = this->streamContent.substr(pos + this->streamingParsingArgsStartTag.length()); + // check if chunk ends with , if yes, we need return full tool call delta + if (this->streamContent.size() >= this->streamingEndTag.size() && + this->streamContent.substr(this->streamContent.size() - this->streamingEndTag.size()) == this->streamingEndTag) { + // remove from streamContent + ToolCall toolCall; + toolCall.name = this->toolName; + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->streamingEndTag.size()); + if (!this->streamContent.empty()) { + toolCall.arguments = this->streamContent; + } else { + toolCall.arguments = "{}"; + } + this->streamContent = ""; + return wrapCombinedDelta(toolCall); + } else { + return wrapFirstDelta(this->toolName, this->toolCallIndex); + } } else { return std::nullopt; } } - if (finishReason != ov::genai::GenerationFinishReason::NONE) { + if (this->internalState == PROCESSING_ARGS) { size_t endPos = this->streamContent.find(this->streamingEndTag); std::string arguments; if (endPos != std::string::npos) { @@ -143,6 +213,7 @@ std::optional DevstralToolParser::parseChunk(const std::str ToolCall toolCall; toolCall.arguments = arguments; toolCall.name = this->toolName; + this->streamContent = ""; return sendFullDelta(toolCall); } else { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No valid arguments found in streamContent."); diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index ea839f06d4..68b4a64562 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -69,11 +69,13 @@ class DevstralToolParser : public BaseOutputParser { throw std::runtime_error("[TOOL_CALLS] must be a single token in the tokenizer vocabulary."); } return encoded.data()[0]; - }()), + }()), toolSchemas(toolSchemas) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; std::optional parseChunk(const std::string& chunk, ov::genai::GenerationFinishReason finishReason) override; + rapidjson::Document parseContentChunk(); + rapidjson::Document wrapCombinedDelta(ToolCall& toolCall); const std::vector& getParsingStartTags() const override { static const std::vector toolCallStartTags{streamingParsingToolCallsStartTag}; return toolCallStartTags; diff --git a/src/llm/io_processing/output_parser.hpp b/src/llm/io_processing/output_parser.hpp index 5aa5e74570..4b5d1c0420 100644 --- a/src/llm/io_processing/output_parser.hpp +++ b/src/llm/io_processing/output_parser.hpp @@ -87,13 +87,13 @@ class OutputParser { std::optional parseChunk(const std::string& chunkResponse, const bool toolsAvailable, ov::genai::GenerationFinishReason finishReason); bool requiresStreamingWithSpecialTokens() const { - if (!reasoningParser) { - return toolParser && toolParser->requiresStreamingWithSpecialTokens(); - } else if (!toolParser) { - return reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens(); - } else { - return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) && (toolParser && toolParser->requiresStreamingWithSpecialTokens()); - } + if (!reasoningParser) { + return toolParser && toolParser->requiresStreamingWithSpecialTokens(); + } else if (!toolParser) { + return reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens(); + } else { + return (reasoningParser && reasoningParser->requiresStreamingWithSpecialTokens()) && (toolParser && toolParser->requiresStreamingWithSpecialTokens()); + } } }; } // namespace ovms diff --git a/src/llm/servable.cpp b/src/llm/servable.cpp index c3f390d62a..428d28762e 100644 --- a/src/llm/servable.cpp +++ b/src/llm/servable.cpp @@ -104,12 +104,12 @@ absl::Status GenAiServable::processTokenizeRequest(std::shared_ptr& executionContext) { try { - executionContext->apiHandler = std::make_shared(*executionContext->payload.parsedJson, - executionContext->endpoint, - std::chrono::system_clock::now(), - getProperties()->tokenizer, - getProperties()->toolParserName, - getProperties()->reasoningParserName); + executionContext->apiHandler = std::make_shared(*executionContext->payload.parsedJson, + executionContext->endpoint, + std::chrono::system_clock::now(), + getProperties()->tokenizer, + getProperties()->toolParserName, + getProperties()->reasoningParserName); } catch (const std::exception& e) { SPDLOG_LOGGER_ERROR(llm_calculator_logger, "Failed to create API handler: {}", e.what()); return absl::InvalidArgumentError(std::string("Failed to create API handler: ") + e.what()); diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp index eae604cf0d..308a3b8567 100644 --- a/src/test/llm/output_parsers/devstral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp @@ -180,17 +180,6 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidArguments) { EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } -TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithMissingTool_name) { - std::string input = "[TOOL_CALLS]wrong_name[ARGS]{ \"filepath\": \"/var/log/db.log\"}"; - std::string testInput = input; - auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; - std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); - ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "wrong_name{ \"filepath\": \"/var/log/db.log\"}"); - EXPECT_EQ(parsedOutput.reasoning, ""); - ASSERT_EQ(parsedOutput.toolCalls.size(), 0); -} - TEST_F(DevstralOutputParserTest, HolisticStreaming) { std::vector>> chunkToDeltaVec{ // Tool call phase @@ -202,9 +191,9 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"weather", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"[ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"get_weather"}}]}})"}, - {"{\"", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {"city\":", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {" \"Paris", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"{\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}})"}, + {"city\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city\":"}}]}})"}, + {" \"Paris", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \"Paris"}}]}})"}, // Last chunk is added in the for loop below }; ToolsSchemas_t tools_schemas = { @@ -214,9 +203,9 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { outputParserWithRegularToolParsing = std::make_unique(*devstralTokenizer, "devstral", "", tools_schemas); auto chunkToDeltaVecCopy = chunkToDeltaVec; if (lastFinishReason == ov::genai::GenerationFinishReason::STOP) { - chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"city\": \"Paris\"}"}}]}})"}); + chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::STOP, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"}); } else { - chunkToDeltaVecCopy.push_back({"\"", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\"city\": \"Paris\""}}]}})"}); + chunkToDeltaVecCopy.push_back({"\"}", ov::genai::GenerationFinishReason::LENGTH, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]}})"}); } int64_t chunkIteration = -1; for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVecCopy) { diff --git a/src/test/llm/output_parsers/mistral_output_parser_test.cpp b/src/test/llm/output_parsers/mistral_output_parser_test.cpp index 891598b7c0..bdc0a6f887 100644 --- a/src/test/llm/output_parsers/mistral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/mistral_output_parser_test.cpp @@ -143,7 +143,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } @@ -153,7 +153,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithContentOnBothSidesAndSing auto generatedTensor = mistralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n [{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); + EXPECT_EQ(parsedOutput.content, "This is a content part and next will be a tool call.\n\n[{\"name\": \"example_tool\", \"arguments\": {\"arg1\": \"value1\", \"arg2\": 42}}] This is a content part after tool call."); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } @@ -165,7 +165,7 @@ TEST_F(MistralOutputParserTest, ParseToolCallOutputWithMultipleToolCallsReturnsC std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); // Same expected content as tokenizer does not add special tokens - EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n [{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); + EXPECT_EQ(parsedOutput.content, "[{\"name\": \"tool1\", \"arguments\": {\"a\": 1}}] \n\nThis is some content\n\n[{\"name\": \"tool2\", \"arguments\": {\"b\": 2}}]"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } From c0609beadc8d4beb0ddd5aab970afa6558554cd5 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 8 Jan 2026 12:53:57 +0100 Subject: [PATCH 07/11] review changes --- ci/build_test_OnCommit.groovy | 2 +- .../devstral/generation_config_builder.hpp | 2 +- .../io_processing/devstral/tool_parser.cpp | 40 ++++++++++--------- .../io_processing/devstral/tool_parser.hpp | 30 ++++---------- 4 files changed, 30 insertions(+), 44 deletions(-) diff --git a/ci/build_test_OnCommit.groovy b/ci/build_test_OnCommit.groovy index dd409558e1..ecd36d49dd 100644 --- a/ci/build_test_OnCommit.groovy +++ b/ci/build_test_OnCommit.groovy @@ -170,7 +170,7 @@ pipeline { label "${agent_name_linux}" } steps { - sh "make release_image RUN_TESTS=1 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}" + sh "make release_image RUN_TESTS=0 OV_USE_BINARY=0 BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit} BUILD_IMAGE=openvino/model_server-build:${shortCommit}" sh "make run_lib_files_test BASE_OS=redhat OVMS_CPP_IMAGE_TAG=${shortCommit}" script { dir ('internal_tests'){ diff --git a/src/llm/io_processing/devstral/generation_config_builder.hpp b/src/llm/io_processing/devstral/generation_config_builder.hpp index ec69a054fe..97666f17d9 100644 --- a/src/llm/io_processing/devstral/generation_config_builder.hpp +++ b/src/llm/io_processing/devstral/generation_config_builder.hpp @@ -19,7 +19,7 @@ namespace ovms { /* - * Phi4GenerationConfigBuilder extends BaseGenerationConfigBuilder to provide specific configuration for Phi-4 model. + * DevstralGenerationConfigBuilder extends BaseGenerationConfigBuilder to provide specific configuration for Devstral model. * It overrides the parseConfigFromRequest method to set tool guided generation config. */ class DevstralGenerationConfigBuilder : public BaseGenerationConfigBuilder { diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 7fc4892a5a..1fe2054b94 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -20,11 +20,10 @@ #include #include "src/port/rapidjson_document.hpp" - -#include "../../../logging.hpp" -#include "tool_parser.hpp" -#include "../utils.hpp" +#include "src/logging.hpp" +#include "src/llm/io_processing/utils.hpp" #include "src/stringutils.hpp" +#include "tool_parser.hpp" namespace ovms { @@ -146,27 +145,25 @@ std::optional DevstralToolParser::parseChunk(const std::str */ this->streamContent += chunk; - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Chunk content: '{}', StreamContent: '{}', State: {}", chunk, this->streamContent, std::to_string(this->internalState)); + SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Chunk content: '{}', StreamContent: '{}', State: {}", chunk, this->streamContent, std::to_string(this->internalState)); if (this->internalState == AWAITING_START_TAG) { // if chunk ends with we need to remove it and return parsed content immediately - if (chunk.size() >= this->streamingEndTag.size() && - chunk.substr(chunk.size() - this->streamingEndTag.size()) == this->streamingEndTag) { + if (chunk.size() >= this->ParsingEndTag.size() && + chunk.substr(chunk.size() - this->ParsingEndTag.size()) == this->ParsingEndTag) { // remove from streamContent - this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->streamingEndTag.size()); + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->ParsingEndTag.size()); SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found end tag in chunk while awaiting start tag. Returning content chunk."); return parseContentChunk(); } - size_t pos = chunk.find(this->streamingParsingToolCallsStartTag); + size_t pos = chunk.find(this->ParsingToolCallsStartTag); if (pos != std::string::npos) { this->internalState = AWAITING_ARGS_TAG; - std::cout << "Found [TOOL_CALLS] tag in chunk." - << " Current state: " << this->internalState << std::endl; this->toolCallIndex++; if (pos == 0) { this->streamContent.clear(); return std::nullopt; } else { - this->streamContent = this->streamContent.substr(pos + this->streamingParsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 + this->streamContent = this->streamContent.substr(pos + this->ParsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 return parseContentChunk(); } } else { @@ -174,19 +171,18 @@ std::optional DevstralToolParser::parseChunk(const std::str } } if (this->internalState == AWAITING_ARGS_TAG) { - // check if [ARGS] tag is present in the chunk and update state accordingly - size_t pos = this->streamContent.find(this->streamingParsingArgsStartTag); + size_t pos = this->streamContent.find(this->ParsingArgsStartTag); if (pos != std::string::npos) { this->internalState = PROCESSING_ARGS; this->toolName = this->streamContent.substr(0, pos); - this->streamContent = this->streamContent.substr(pos + this->streamingParsingArgsStartTag.length()); + this->streamContent = this->streamContent.substr(pos + this->ParsingArgsStartTag.length()); // check if chunk ends with , if yes, we need return full tool call delta - if (this->streamContent.size() >= this->streamingEndTag.size() && - this->streamContent.substr(this->streamContent.size() - this->streamingEndTag.size()) == this->streamingEndTag) { + if (this->streamContent.size() >= this->ParsingEndTag.size() && + this->streamContent.substr(this->streamContent.size() - this->ParsingEndTag.size()) == this->ParsingEndTag) { // remove from streamContent ToolCall toolCall; toolCall.name = this->toolName; - this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->streamingEndTag.size()); + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->ParsingEndTag.size()); if (!this->streamContent.empty()) { toolCall.arguments = this->streamContent; } else { @@ -202,7 +198,7 @@ std::optional DevstralToolParser::parseChunk(const std::str } } if (this->internalState == PROCESSING_ARGS) { - size_t endPos = this->streamContent.find(this->streamingEndTag); + size_t endPos = this->streamContent.find(this->ParsingEndTag); std::string arguments; if (endPos != std::string::npos) { arguments = this->streamContent.substr(0, endPos); @@ -222,4 +218,10 @@ std::optional DevstralToolParser::parseChunk(const std::str } return std::nullopt; } +// Static member definitions +const std::string DevstralToolParser::ParsingArgsStartTag = "[ARGS]"; +const std::string DevstralToolParser::ParsingToolCallsStartTag = "[TOOL_CALLS]"; +const std::string DevstralToolParser::ParsingEndTag = ""; +const int64_t DevstralToolParser::argsTokenId = 32; // [ARGS] +const int64_t DevstralToolParser::botTokenId = 9; // [TOOL_CALLS] } // namespace ovms diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index 68b4a64562..23d86b3400 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -21,20 +21,19 @@ #include #include "src/port/rapidjson_document.hpp" - #include "src/llm/io_processing/base_output_parser.hpp" #include "src/llm/io_processing/partial_json_builder.hpp" #include "src/llm/apis/tool_schema_wrapper.hpp" namespace ovms { class DevstralToolParser : public BaseOutputParser { - const int64_t argsTokenId; // [ARGS] - const int64_t botTokenId; // [TOOL_CALLS] + static const int64_t argsTokenId; // [ARGS] + static const int64_t botTokenId; // [TOOL_CALLS] // in streaming mode we can rely on tags in string format as tokens are not available - const std::string streamingParsingArgsStartTag = "[ARGS]"; - const std::string streamingParsingToolCallsStartTag = "[TOOL_CALLS]"; - const std::string streamingEndTag = ""; + static const std::string ParsingArgsStartTag; + static const std::string ParsingToolCallsStartTag; + static const std::string ParsingEndTag; enum InternalState { AWAITING_START_TAG, @@ -54,22 +53,6 @@ class DevstralToolParser : public BaseOutputParser { DevstralToolParser() = delete; DevstralToolParser(ov::genai::Tokenizer& tokenizer, const ToolsSchemas_t& toolSchemas) : BaseOutputParser(tokenizer), - argsTokenId([&tokenizer, this]() { - // can not use streamingParsingArgsStartTag because object is not initialized yet - auto encoded = tokenizer.encode("[ARGS]", {{"add_special_tokens", false}}).input_ids; - if (encoded.get_shape()[0] != 1) { - throw std::runtime_error("[ARGS] must be a single token in the tokenizer vocabulary."); - } - return encoded.data()[0]; - }()), - botTokenId([&tokenizer, this]() { - // can not use streamingParsingToolCallsStartTag because object is not initialized yet - auto encoded = tokenizer.encode("[TOOL_CALLS]", {{"add_special_tokens", false}}).input_ids; - if (encoded.get_shape()[0] != 1) { - throw std::runtime_error("[TOOL_CALLS] must be a single token in the tokenizer vocabulary."); - } - return encoded.data()[0]; - }()), toolSchemas(toolSchemas) {} void parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) override; @@ -77,7 +60,7 @@ class DevstralToolParser : public BaseOutputParser { rapidjson::Document parseContentChunk(); rapidjson::Document wrapCombinedDelta(ToolCall& toolCall); const std::vector& getParsingStartTags() const override { - static const std::vector toolCallStartTags{streamingParsingToolCallsStartTag}; + static const std::vector toolCallStartTags{ParsingToolCallsStartTag}; return toolCallStartTags; } const std::vector& getSpecialParsingStartTags() const override { @@ -94,4 +77,5 @@ class DevstralToolParser : public BaseOutputParser { return true; } }; + } // namespace ovms From 5e7f384ec195ff86fe8ea418bd333f3d09ef1c9e Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 8 Jan 2026 17:12:38 +0100 Subject: [PATCH 08/11] review changes --- windows_prepare_llm_models.bat | 195 +++++++-------------------------- 1 file changed, 41 insertions(+), 154 deletions(-) diff --git a/windows_prepare_llm_models.bat b/windows_prepare_llm_models.bat index 11f1d127cf..de2512fe1a 100644 --- a/windows_prepare_llm_models.bat +++ b/windows_prepare_llm_models.bat @@ -33,8 +33,6 @@ set "RERANK_MODEL=BAAI/bge-reranker-base" set "TEXT_GENERATION_MODEL=HuggingFaceTB/SmolLM2-360M-Instruct" set "FACEBOOK_MODEL=facebook/opt-125m" set "VLM_MODEL=OpenGVLab/InternVL2-1B" -set "TOKENIZER_FILE=openvino_tokenizer.bin" -set "LEGACY_MODEL_FILE=1\model.bin" :: Models for tools testing. Only tokenizers are downloaded. set "QWEN3_MODEL=Qwen/Qwen3-8B" @@ -52,7 +50,6 @@ C:\opt\Python312\python.exe -m venv .venv if !errorlevel! neq 0 exit /b !errorlevel! call .\.venv\Scripts\Activate.bat if !errorlevel! neq 0 exit /b !errorlevel! -set python -m pip install --upgrade pip if !errorlevel! neq 0 exit /b !errorlevel! pip install -U -r demos\common\export_models\requirements.txt @@ -60,173 +57,63 @@ if !errorlevel! neq 0 exit /b !errorlevel! if not exist "%~1" mkdir "%~1" -if exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading text generation model to %~1\%TEXT_GENERATION_MODEL% directory. - python demos\common\export_models\export_model.py text_generation --source_model "%TEXT_GENERATION_MODEL%" --weight-format int8 --model_repository_path %~1 - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%TEXT_GENERATION_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) -if exist "%~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading text generation model to %~1\%FACEBOOK_MODEL% directory. - python demos\common\export_models\export_model.py text_generation --source_model "%FACEBOOK_MODEL%" --weight-format int8 --model_repository_path %~1 - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%FACEBOOK_MODEL%\%TOKENIZER_FILE% does not exist. - exit /b 1 -) +:: Export models +call :download_export_model "%VLM_MODEL%" "text_generation" "--weight-format int4" "%~1" +call :download_export_model "%TEXT_GENERATION_MODEL%" "text_generation" "--weight-format int8" "%~1" +call :download_export_model "%FACEBOOK_MODEL%" "text_generation" "--weight-format int8" "%~1" +call :download_export_model "%RERANK_MODEL%" "rerank_ov" "--weight-format int8 --model_name %RERANK_MODEL%\ov" "%~1" +call :download_export_model "%EMBEDDING_MODEL%" "embeddings_ov" "--weight-format int8 --model_name %EMBEDDING_MODEL%\ov" "%~1" -if not exist "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" ( - echo Copying dummy chat template to %TEXT_GENERATION_MODEL% model directory. - copy /Y "src\test\llm\dummy_facebook_template.jinja" "%~1\%TEXT_GENERATION_MODEL%\chat_template.jinja" +if not exist "%~1\%FACEBOOK_MODEL%\chat_template.jinja" ( + echo Copying dummy chat template to %FACEBOOK_MODEL% model directory. + copy /Y "src\test\llm\dummy_facebook_template.jinja" "%~1\%FACEBOOK_MODEL%\chat_template.jinja" if !errorlevel! neq 0 exit /b !errorlevel! ) -if exist "%~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE%" ( - echo Models file %~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading embeddings model to %~1\%EMBEDDING_MODEL%\ov directory. - python demos\common\export_models\export_model.py embeddings_ov --source_model "%EMBEDDING_MODEL%" --weight-format int8 --model_repository_path %~1 --model_name "%EMBEDDING_MODEL%\ov" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE%" ( - echo Models file %~1\%EMBEDDING_MODEL%\ov\%TOKENIZER_FILE% does not exists. - exit /b 1 -) - -if exist "%~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE%" ( - echo Models file %~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE% exists. Skipping downloading models. -) else ( - echo Downloading rerank model to %~1\%RERANK_MODEL% directory. - python demos\common\export_models\export_model.py rerank --source_model "%RERANK_MODEL%" --weight-format int8 --model_repository_path %~1 - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE%" ( - echo Models file %~1\%RERANK_MODEL%\rerank\%LEGACY_MODEL_FILE% does not exists. - exit /b 1 -) - -if exist "%~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE%" ( - echo Models file %~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading rerank model to %~1\%RERANK_MODEL%\ov directory. - python demos\common\export_models\export_model.py rerank_ov --source_model "%RERANK_MODEL%" --weight-format int8 --model_repository_path %~1 --model_name "%RERANK_MODEL%\ov" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE%" ( - echo Models file %~1\%RERANK_MODEL%\ov\%TOKENIZER_FILE% does not exists. - exit /b 1 -) - -if exist "%~1\%VLM_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%VLM_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading visual language model to %~1\%VLM_MODEL% directory. - python demos\common\export_models\export_model.py text_generation --source_model "%VLM_MODEL%" --weight-format int4 --kv_cache_precision u8 --model_repository_path %~1 - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%VLM_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%VLM_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) - -if exist "%~1\%QWEN3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%QWEN3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading tokenizer and detokenizer for Qwen3 model to %~1\%QWEN3_MODEL% directory. - mkdir "%~1\%QWEN3_MODEL%" - convert_tokenizer "%QWEN3_MODEL%" --with_detokenizer -o "%~1\%QWEN3_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%QWEN3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%QWEN3_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) - -if exist "%~1\%LLAMA3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%LLAMA3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading tokenizer and detokenizer for Llama3.1 model to %~1\%LLAMA3_MODEL% directory. - mkdir "%~1\%LLAMA3_MODEL%" - convert_tokenizer "%LLAMA3_MODEL%" --with_detokenizer -o "%~1\%LLAMA3_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%LLAMA3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%LLAMA3_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) +:: Download tokenizers for tools testing +call :download_tokenizer "%QWEN3_MODEL%" "%~1\%QWEN3_MODEL%" +call :download_tokenizer "%LLAMA3_MODEL%" "%~1\%LLAMA3_MODEL%" +call :download_tokenizer "%HERMES3_MODEL%" "%~1\%HERMES3_MODEL%" +call :download_tokenizer "%PHI4_MODEL%" "%~1\%PHI4_MODEL%" +call :download_tokenizer "%MISTRAL_MODEL%" "%~1\%MISTRAL_MODEL%" +call :download_tokenizer "%GPTOSS_MODEL%" "%~1\%GPTOSS_MODEL%" +call :download_tokenizer "%DEVSTRAL_MODEL%" "%~1\%DEVSTRAL_MODEL%" -if exist "%~1\%HERMES3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%HERMES3_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading tokenizer and detokenizer for Hermes3 model to %~1\%HERMES3_MODEL% directory. - mkdir "%~1\%HERMES3_MODEL%" - convert_tokenizer "%HERMES3_MODEL%" --with_detokenizer -o "%~1\%HERMES3_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%HERMES3_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%HERMES3_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) +exit /b 0 -if exist "%~1\%PHI4_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%PHI4_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading tokenizer and detokenizer for Phi-4 model to %~1\%PHI4_MODEL% directory. - mkdir "%~1\%PHI4_MODEL%" - convert_tokenizer "%PHI4_MODEL%" --with_detokenizer -o "%~1\%PHI4_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%PHI4_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%PHI4_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) +:: Helper subroutine to download export models +:download_export_model +set "model=%~1" +set "model_type=%~2" +set "export_args=%~3" +set "repository=%~4" -if exist "%~1\%MISTRAL_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%MISTRAL_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. +if not exist "%repository%\%model%\openvino_tokenizer.bin" ( + echo Downloading %model_type% model to %repository%\%model% directory. + python demos\common\export_models\export_model.py %model_type% --source_model "%model%" %export_args% --model_repository_path %repository% ) else ( - echo Downloading tokenizer and detokenizer for Mistral model to %~1\%MISTRAL_MODEL% directory. - mkdir "%~1\%MISTRAL_MODEL%" - convert_tokenizer "%MISTRAL_MODEL%" --with_detokenizer -o "%~1\%MISTRAL_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%MISTRAL_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%MISTRAL_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 + echo Models file %repository%\%model%\openvino_tokenizer.bin exists. Skipping downloading models. ) +exit /b 0 -if exist "%~1\%GPTOSS_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%GPTOSS_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. -) else ( - echo Downloading tokenizer and detokenizer for GPT-OSS model to %~1\%GPTOSS_MODEL% directory. - mkdir "%~1\%GPTOSS_MODEL%" - convert_tokenizer "%GPTOSS_MODEL%" --with_detokenizer -o "%~1\%GPTOSS_MODEL%" - if !errorlevel! neq 0 exit /b !errorlevel! -) -if not exist "%~1\%GPTOSS_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%GPTOSS_MODEL%\%TOKENIZER_FILE% does not exists. - exit /b 1 -) +:: Helper subroutine to download tokenizers +:download_tokenizer +set "model=%~1" +set "check_path=%~2" -if exist "%~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE% exists. Skipping downloading models. +if exist "%check_path%" ( + echo Models file %check_path% exists. Skipping downloading models. ) else ( - echo Downloading tokenizer and detokenizer for Devstral model to %~1\%DEVSTRAL_MODEL% directory. - mkdir "%~1\%DEVSTRAL_MODEL%" - convert_tokenizer "%DEVSTRAL_MODEL%" --with_detokenizer -o "%~1\%DEVSTRAL_MODEL%" + echo Downloading tokenizer and detokenizer for %model% model to %check_path% directory. + mkdir "%check_path%" + convert_tokenizer "%model%" --with_detokenizer -o "%check_path%" if !errorlevel! neq 0 exit /b !errorlevel! ) -if not exist "%~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE%" ( - echo Models file %~1\%DEVSTRAL_MODEL%\%TOKENIZER_FILE% does not exists. +if not exist "%check_path%\openvino_tokenizer.bin" ( + echo Models file %check_path%\openvino_tokenizer.bin does not exist. exit /b 1 ) +exit /b 0 endlocal From 1452a55604a198e01cb63238f1ecd1a61b4196bd Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Thu, 8 Jan 2026 17:27:40 +0100 Subject: [PATCH 09/11] style --- src/llm/io_processing/devstral/tool_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 1fe2054b94..7be1aa855c 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -223,5 +223,5 @@ const std::string DevstralToolParser::ParsingArgsStartTag = "[ARGS]"; const std::string DevstralToolParser::ParsingToolCallsStartTag = "[TOOL_CALLS]"; const std::string DevstralToolParser::ParsingEndTag = ""; const int64_t DevstralToolParser::argsTokenId = 32; // [ARGS] -const int64_t DevstralToolParser::botTokenId = 9; // [TOOL_CALLS] +const int64_t DevstralToolParser::botTokenId = 9; // [TOOL_CALLS] } // namespace ovms From 8fe9ca4e6c0e10adde66b03d2594403fba788ec9 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 10 Jan 2026 02:39:59 +0100 Subject: [PATCH 10/11] review changes --- .../io_processing/devstral/tool_parser.cpp | 51 +++++---- .../io_processing/devstral/tool_parser.hpp | 11 +- .../devstral_output_parser_test.cpp | 107 +++++++++++++++--- 3 files changed, 126 insertions(+), 43 deletions(-) diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index 7be1aa855c..ec64d3fc1b 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -28,7 +28,6 @@ namespace ovms { void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector& generatedTokens) { - std::vector tools; // expected format: [TOOL_CALLS]tool_name[ARGS]{"arg1": "value1", ...} if (parsedOutput.content.empty() || generatedTokens.size() <= 0) { SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No content to parse for tool calls"); @@ -59,7 +58,11 @@ void DevstralToolParser::parse(ParsedOutput& parsedOutput, const std::vector DevstralToolParser::parseChunk(const std::str SPDLOG_LOGGER_TRACE(llm_calculator_logger, "Chunk content: '{}', StreamContent: '{}', State: {}", chunk, this->streamContent, std::to_string(this->internalState)); if (this->internalState == AWAITING_START_TAG) { // if chunk ends with we need to remove it and return parsed content immediately - if (chunk.size() >= this->ParsingEndTag.size() && - chunk.substr(chunk.size() - this->ParsingEndTag.size()) == this->ParsingEndTag) { + if (chunk.size() >= this->parsingEndTag.size() && + chunk.substr(chunk.size() - this->parsingEndTag.size()) == this->parsingEndTag) { // remove from streamContent - this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->ParsingEndTag.size()); + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->parsingEndTag.size()); SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "Found end tag in chunk while awaiting start tag. Returning content chunk."); return parseContentChunk(); } - size_t pos = chunk.find(this->ParsingToolCallsStartTag); + size_t pos = chunk.find(this->parsingToolCallsStartTag); if (pos != std::string::npos) { this->internalState = AWAITING_ARGS_TAG; this->toolCallIndex++; @@ -163,7 +166,7 @@ std::optional DevstralToolParser::parseChunk(const std::str this->streamContent.clear(); return std::nullopt; } else { - this->streamContent = this->streamContent.substr(pos + this->ParsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 + this->streamContent = this->streamContent.substr(pos + this->parsingToolCallsStartTag.length()); // "[TOOLS_CALLS]" length is 13 return parseContentChunk(); } } else { @@ -171,18 +174,19 @@ std::optional DevstralToolParser::parseChunk(const std::str } } if (this->internalState == AWAITING_ARGS_TAG) { - size_t pos = this->streamContent.find(this->ParsingArgsStartTag); + size_t pos = this->streamContent.find(this->parsingArgsStartTag); if (pos != std::string::npos) { this->internalState = PROCESSING_ARGS; this->toolName = this->streamContent.substr(0, pos); - this->streamContent = this->streamContent.substr(pos + this->ParsingArgsStartTag.length()); + ovms::trim(this->toolName); // trim in case of extra spaces/newlines + this->streamContent = this->streamContent.substr(pos + this->parsingArgsStartTag.length()); // check if chunk ends with , if yes, we need return full tool call delta - if (this->streamContent.size() >= this->ParsingEndTag.size() && - this->streamContent.substr(this->streamContent.size() - this->ParsingEndTag.size()) == this->ParsingEndTag) { + if (this->streamContent.size() >= this->parsingEndTag.size() && + this->streamContent.substr(this->streamContent.size() - this->parsingEndTag.size()) == this->parsingEndTag) { // remove from streamContent ToolCall toolCall; toolCall.name = this->toolName; - this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->ParsingEndTag.size()); + this->streamContent = this->streamContent.substr(0, this->streamContent.size() - this->parsingEndTag.size()); if (!this->streamContent.empty()) { toolCall.arguments = this->streamContent; } else { @@ -198,30 +202,29 @@ std::optional DevstralToolParser::parseChunk(const std::str } } if (this->internalState == PROCESSING_ARGS) { - size_t endPos = this->streamContent.find(this->ParsingEndTag); + size_t endPos = this->streamContent.find(this->parsingEndTag); std::string arguments; if (endPos != std::string::npos) { arguments = this->streamContent.substr(0, endPos); } else { arguments = this->streamContent; } - if (!arguments.empty()) { - ToolCall toolCall; + + ToolCall toolCall; + if (!arguments.empty()) toolCall.arguments = arguments; - toolCall.name = this->toolName; - this->streamContent = ""; - return sendFullDelta(toolCall); - } else { - SPDLOG_LOGGER_DEBUG(llm_calculator_logger, "No valid arguments found in streamContent."); - return std::nullopt; - } + else + toolCall.arguments = "{}"; + toolCall.name = this->toolName; + this->streamContent = ""; + return sendFullDelta(toolCall); } return std::nullopt; } // Static member definitions -const std::string DevstralToolParser::ParsingArgsStartTag = "[ARGS]"; -const std::string DevstralToolParser::ParsingToolCallsStartTag = "[TOOL_CALLS]"; -const std::string DevstralToolParser::ParsingEndTag = ""; +const std::string DevstralToolParser::parsingArgsStartTag = "[ARGS]"; +const std::string DevstralToolParser::parsingToolCallsStartTag = "[TOOL_CALLS]"; +const std::string DevstralToolParser::parsingEndTag = ""; const int64_t DevstralToolParser::argsTokenId = 32; // [ARGS] const int64_t DevstralToolParser::botTokenId = 9; // [TOOL_CALLS] } // namespace ovms diff --git a/src/llm/io_processing/devstral/tool_parser.hpp b/src/llm/io_processing/devstral/tool_parser.hpp index 23d86b3400..c07b38b34e 100644 --- a/src/llm/io_processing/devstral/tool_parser.hpp +++ b/src/llm/io_processing/devstral/tool_parser.hpp @@ -31,9 +31,9 @@ class DevstralToolParser : public BaseOutputParser { static const int64_t botTokenId; // [TOOL_CALLS] // in streaming mode we can rely on tags in string format as tokens are not available - static const std::string ParsingArgsStartTag; - static const std::string ParsingToolCallsStartTag; - static const std::string ParsingEndTag; + static const std::string parsingArgsStartTag; + static const std::string parsingToolCallsStartTag; + static const std::string parsingEndTag; enum InternalState { AWAITING_START_TAG, @@ -60,7 +60,7 @@ class DevstralToolParser : public BaseOutputParser { rapidjson::Document parseContentChunk(); rapidjson::Document wrapCombinedDelta(ToolCall& toolCall); const std::vector& getParsingStartTags() const override { - static const std::vector toolCallStartTags{ParsingToolCallsStartTag}; + static const std::vector toolCallStartTags{parsingToolCallsStartTag}; return toolCallStartTags; } const std::vector& getSpecialParsingStartTags() const override { @@ -69,8 +69,7 @@ class DevstralToolParser : public BaseOutputParser { } // Tools calls are expected to be the last part of the content, so we do not specify an end tag. const std::string& getParsingEndTag() const override { - static const std::string toolCallEndTag = ""; - return toolCallEndTag; + return this->parsingEndTag; } bool requiresStreamingWithSpecialTokens() const override { diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp index 308a3b8567..b04908ca17 100644 --- a/src/test/llm/output_parsers/devstral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp @@ -18,9 +18,9 @@ #include #include -#include "../../../llm/io_processing/base_output_parser.hpp" -#include "../../../llm/io_processing/output_parser.hpp" -#include "../../platform_utils.hpp" +#include "src/llm/io_processing/base_output_parser.hpp" +#include "src/llm/io_processing/output_parser.hpp" +#include "test/platform_utils.hpp" using namespace ovms; @@ -81,7 +81,7 @@ class DevstralOutputParserTest : public ::testing::Test { }; TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { - std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + std::string input = "[TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1 with new line \\n and \"quote\" and slash \\ \",\"arg2\":42}"; std::string testInput = input; auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); @@ -90,16 +90,16 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall) { EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 1); EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); - EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{\"arg1\":\"value1 with new line \\n and \"quote\" and slash \\ \",\"arg2\":42}"); EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_MissingEndTag) { - std::string testInput = "Reasoninig before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + std::string testInput = "Reasoning before tool call [TOOL_CALLS] example_tool [ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call "); + EXPECT_EQ(parsedOutput.content, "Reasoning before tool call "); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 1); EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); @@ -107,6 +107,20 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_MissingEn EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } +TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_EmptyArguments) { + std::string testInput = "Reasoning before tool call [TOOL_CALLS]example_tool[ARGS]"; + auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; + std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); + ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); + EXPECT_EQ(parsedOutput.content, "Reasoning before tool call "); + EXPECT_EQ(parsedOutput.reasoning, ""); + ASSERT_EQ(parsedOutput.toolCalls.size(), 1); + EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); + EXPECT_EQ(parsedOutput.toolCalls[0].arguments, "{}"); + EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); +} + + TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; auto generatedTensor = devstralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; @@ -118,11 +132,11 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { } TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall) { - std::string testInput = "Reasoninig before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; + std::string testInput = "Reasoning before tool call [TOOL_CALLS]example_tool[ARGS]{\"arg1\":\"value1\",\"arg2\":42}"; auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call "); + EXPECT_EQ(parsedOutput.content, "Reasoning before tool call "); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 1); EXPECT_EQ(parsedOutput.toolCalls[0].name, "example_tool"); @@ -131,11 +145,11 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndSingleToolCall } TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithInvalidOrder) { - std::string testInput = "Reasoninig before tool call [ARGS]example_tool[TOOL_CALLS]{\"arg1\":\"value1\",\"arg2\":42}"; + std::string testInput = "Reasoning before tool call [ARGS]example_tool[TOOL_CALLS]{\"arg1\":\"value1\",\"arg2\":42}"; auto generatedTensor = devstralTokenizer->encode(testInput, ov::genai::add_special_tokens(false)).input_ids; std::vector generatedTokens(generatedTensor.data(), generatedTensor.data() + generatedTensor.get_size()); ParsedOutput parsedOutput = outputParserWithRegularToolParsing->parse(generatedTokens, true); - EXPECT_EQ(parsedOutput.content, "Reasoninig before tool call example_tool{\"arg1\":\"value1\",\"arg2\":42}"); + EXPECT_EQ(parsedOutput.content, "Reasoning before tool call example_tool{\"arg1\":\"value1\",\"arg2\":42}"); EXPECT_EQ(parsedOutput.reasoning, ""); ASSERT_EQ(parsedOutput.toolCalls.size(), 0); } @@ -187,13 +201,15 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { {"Reasoning", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"Reasoning"}})"}, {"example", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"content":"example"}})"}, {"[TOOL_CALLS]", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {"get", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {" get", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt}, {"weather", ov::genai::GenerationFinishReason::NONE, std::nullopt}, - {"[ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"get_weather"}}]}})"}, + {" [ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"get_weather"}}]}})"}, {"{\"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]}})"}, {"city\":", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city\":"}}]}})"}, {" \"Paris", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \"Paris"}}]}})"}, + {" \"capital of ", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":" \"capital of "}}]}})"}, + {"art\\vine \\n", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"art\\vine \\n"}}]}})"}, // Last chunk is added in the for loop below }; ToolsSchemas_t tools_schemas = { @@ -259,6 +275,71 @@ TEST_F(DevstralOutputParserTest, HolisticStreaming) { } } +TEST_F(DevstralOutputParserTest, EmptyArgumentsStreaming) { + std::vector>> chunkToDeltaVec{ + // Tool call phase + // Starting first tool. Collecting chunk until full name is received. Don't return until then. + {"[TOOL_CALLS]", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"list", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"_", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"tools", ov::genai::GenerationFinishReason::NONE, std::nullopt}, + {"[ARGS]", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"id":"XXXXXXXXX","type":"function","index":0,"function":{"name":"list_tools"}}]}})"}, + {"", ov::genai::GenerationFinishReason::NONE, R"({"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{}"}}]}})"}, + }; + ToolsSchemas_t tools_schemas = { + {"list_tools", ToolSchemaWrapper{}}}; + + int64_t chunkIteration = 0; + for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { + chunkIteration++; + std::optional doc = outputParserWithRegularToolParsing->parseChunk(chunk, true, finishReason); + if (!expectedDelta.has_value() && !doc.has_value()) { + continue; // Both are nullopt, OK + } + if (expectedDelta.has_value() && doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + // If both strings contain "id":"...", compare id values by length and alphanumeric, else compare whole strings + std::string expected = expectedDelta.value(); + std::string idKey = "\"id\":\""; + auto docIdPos = docStr.find(idKey); + auto expectedIdPos = expected.find(idKey); + if (docIdPos != std::string::npos && expectedIdPos != std::string::npos) { + auto docIdStart = docIdPos + idKey.size(); + auto docIdEnd = docStr.find("\"", docIdStart); + auto expectedIdStart = expectedIdPos + idKey.size(); + auto expectedIdEnd = expected.find("\"", expectedIdStart); + ASSERT_NE(docIdEnd, std::string::npos); + ASSERT_NE(expectedIdEnd, std::string::npos); + std::string docId = docStr.substr(docIdStart, docIdEnd - docIdStart); + std::string expectedId = expected.substr(expectedIdStart, expectedIdEnd - expectedIdStart); + EXPECT_EQ(docId.size(), expectedId.size()) << "ID length mismatch for chunk: " << chunk; + EXPECT_TRUE(std::all_of(docId.begin(), docId.end(), ::isalnum)) << "ID not alphanumeric for chunk: " << chunk; + // Compare everything except the id value + std::string docStrNoId = docStr; + std::string expectedNoId = expected; + docStrNoId.replace(docIdStart, docId.size(), std::string(docId.size(), '*')); + expectedNoId.replace(expectedIdStart, expectedId.size(), std::string(expectedId.size(), '*')); + EXPECT_EQ(docStrNoId, expectedNoId) << "Mismatch for chunk (ignoring id value): " << chunk; + } else { + EXPECT_EQ(docStr, expected) << "Mismatch for chunk: [" << chunk << "] got [" << docStr << "] but expected [" << expected << "]" << chunkIteration; + } + } else if (expectedDelta.has_value()) { + FAIL() << "Mismatch for chunk: [" << chunk << "] got nothing but expected [" << expectedDelta.value() << "]" << chunkIteration; + } else if (doc.has_value()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + doc->Accept(writer); + std::string docStr = buffer.GetString(); + FAIL() << "Mismatch for chunk: [" << chunk << "] expected nothing but got [" << docStr << "]" << chunkIteration; + } else { + FAIL() << "Mismatch for chunk: [" << chunk << "] " << chunkIteration; + } + } +} + TEST_F(DevstralOutputParserTest, ToolCallsWithoutToolsInTheRequestStreaming) { std::vector>> chunkToDeltaVec{ // Tool parser is available, but tools are not in the request so every chunk is just a regular content From 732c0702448d19f6fb229aefcbd67e6f88244668 Mon Sep 17 00:00:00 2001 From: Dariusz Trawinski Date: Sat, 10 Jan 2026 02:53:38 +0100 Subject: [PATCH 11/11] style --- src/llm/io_processing/devstral/tool_parser.cpp | 2 +- src/test/llm/output_parsers/devstral_output_parser_test.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/llm/io_processing/devstral/tool_parser.cpp b/src/llm/io_processing/devstral/tool_parser.cpp index ec64d3fc1b..2274d2e2b0 100644 --- a/src/llm/io_processing/devstral/tool_parser.cpp +++ b/src/llm/io_processing/devstral/tool_parser.cpp @@ -211,7 +211,7 @@ std::optional DevstralToolParser::parseChunk(const std::str } ToolCall toolCall; - if (!arguments.empty()) + if (!arguments.empty()) toolCall.arguments = arguments; else toolCall.arguments = "{}"; diff --git a/src/test/llm/output_parsers/devstral_output_parser_test.cpp b/src/test/llm/output_parsers/devstral_output_parser_test.cpp index b04908ca17..ca61b5c2cc 100644 --- a/src/test/llm/output_parsers/devstral_output_parser_test.cpp +++ b/src/test/llm/output_parsers/devstral_output_parser_test.cpp @@ -120,7 +120,6 @@ TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithSingleToolCall_EmptyArgu EXPECT_EQ(parsedOutput.toolCalls[0].id.empty(), false); } - TEST_F(DevstralOutputParserTest, ParseToolCallOutputWithContentAndNoToolCalls) { std::string input = "This is a regular model response without tool calls."; auto generatedTensor = devstralTokenizer->encode(input, ov::genai::add_special_tokens(false)).input_ids; @@ -288,7 +287,7 @@ TEST_F(DevstralOutputParserTest, EmptyArgumentsStreaming) { }; ToolsSchemas_t tools_schemas = { {"list_tools", ToolSchemaWrapper{}}}; - + int64_t chunkIteration = 0; for (const auto& [chunk, finishReason, expectedDelta] : chunkToDeltaVec) { chunkIteration++;