modAI-systems · guenhter · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026 · Mar 4, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -43,4 +43,11 @@ jobs:
         run: uv sync --all-extras
 
       - name: Run tests
-        run: uv run pytest -v
+        run: uv run pytest -v --junitxml=junit.xml
+
+      - name: Generate CI badges
+        uses: gaelgirodon/ci-badges-action@v1
+        if: github.ref == 'refs/heads/main'
+        with:
+          gist-id: ${{ env.GIST_ID }}
+          token: ${{ secrets.GIST_TOKEN }}
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,4 @@ dist/
 *.egg-info/
 .env
 .vscode/
+junit.xml
diff --git a/README.md b/README.md
@@ -1,6 +1,7 @@
 # LLM Mock
 
 [![CI](https://github.com/modAI-systems/llmock/actions/workflows/ci.yml/badge.svg)](https://github.com/modAI-systems/llmock/actions/workflows/ci.yml)
+[![Tests](https://img.shields.io/endpoint?url=https://gist.githubusercontent.com/guenhter/e105735f20b2a01389046b1b6dd9a5e5/raw/llmock-junit-tests.json)](https://github.com/modAI-systems/llmock/actions/workflows/ci.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 
 OpenAI-compatible mock server for testing LLM integrations.
@@ -94,27 +95,30 @@ print(response.output[0].content[0].text)
 
 ### Tool Calling
 
-When `ToolCallStrategy` is included in the `strategies` list in `config.yaml` and matching `tool-calls` entries exist, llmock responds with tool calls using the configured arguments. If no tools match the config, the next strategy in the chain is tried.
+When `ToolCallStrategy` is included in the `strategies` list, llmock watches the last user message for lines matching the pattern:
+
+```
+call tool '<name>' with '<json>'
+```
+
+- `<name>` must match one of the tools declared in `tools`.
+- `<json>` is the arguments string passed to the tool (use `'{}'` for no arguments).
+- Multiple matching lines produce multiple tool calls.
+- If no line matches, the strategy falls through to the next one (e.g. `MirrorStrategy`).
+
+No extra config keys are needed — adding `ToolCallStrategy` to the `strategies` list is sufficient.
 
 This works on both `/chat/completions` and `/responses` endpoints.
 
 #### Configuration
 
-Include `ToolCallStrategy` in the `strategies` list and add a `tool-calls` section to `config.yaml`:
-
 ```yaml
 strategies:
   - ErrorStrategy
   - ToolCallStrategy
   - MirrorStrategy
-
-tool-calls:
-  calculate: '{"expression": "2+2"}'
-  get_weather: '{"location": "San Francisco", "unit": "celsius"}'
 ```
 
-When a request includes a tool named `calculate`, the mock responds with a tool call whose arguments are `{"expression": "2+2"}`.
-
 #### Chat Completions API
 
 ```python
@@ -124,7 +128,7 @@ client = OpenAI(base_url="http://localhost:8000", api_key="mock-key")
 
 response = client.chat.completions.create(
     model="gpt-4o",
-    messages=[{"role": "user", "content": "Calculate 6*7"}],
+    messages=[{"role": "user", "content": "call tool 'calculate' with '{\"expression\": \"6*7\"}'"}],
     tools=[{
         "type": "function",
         "function": {
@@ -139,7 +143,7 @@ response = client.chat.completions.create(
 )
 tool_call = response.choices[0].message.tool_calls[0]
 # tool_call.function.name == "calculate"
-# tool_call.function.arguments == '{"expression": "2+2"}'  (from config)
+# tool_call.function.arguments == '{"expression": "6*7"}'  (from trigger phrase)
 ```
 
 #### Responses API
@@ -151,7 +155,7 @@ client = OpenAI(base_url="http://localhost:8000", api_key="mock-key")
 
 response = client.responses.create(
     model="gpt-4o",
-    input="Calculate 6*7",
+    input="call tool 'calculate' with '{\"expression\": \"6*7\"}'",
     tools=[{
         "type": "function",
         "name": "calculate",
@@ -164,7 +168,7 @@ response = client.responses.create(
 )
 function_call = response.output[0]
 # function_call.name == "calculate"
-# function_call.arguments == '{"expression": "2+2"}'  (from config)
+# function_call.arguments == '{"expression": "6*7"}'  (from trigger phrase)
 ```
 
 ### Error Message Simulation
@@ -246,11 +250,6 @@ error-messages:
     type: "server_error"
     code: "internal_error"
 
-# Optional: configure tool call mock responses (used by ToolCallStrategy)
-tool-calls:
-  calculate: '{"expression": "2+2"}'
-  get_weather: '{"location": "San Francisco", "unit": "celsius"}'
-
 ### Environment Variable Overrides
 
 You can override values from `config.yaml` using environment variables with the `LLMOCK_` prefix.

diff --git a/config.yaml b/config.yaml
@@ -26,41 +26,10 @@ models:
 #
 # Available strategies:
 #   MirrorStrategy    - echoes back the last user message
-#   ToolCallStrategy  - returns config-driven tool calls
-#   ErrorStrategy     - returns config-driven error responses
+#   ToolCallStrategy  - triggered by `call tool '<name>' with '<json>'` phrase in last user message
+#   ErrorStrategy     - triggered by `raise error <json>` phrase in last user message
 #
 strategies:
   - ErrorStrategy
   - ToolCallStrategy
   - MirrorStrategy
-
-# Error messages (optional)
-# Maps message content to error responses. When a request's last user
-# message matches one of these keys exactly, the server returns the
-# configured error instead of a normal response. Works on both
-# /chat/completions and /responses.
-#
-# Each entry needs: status-code, message, type, code
-error-messages:
-  "trigger-429":
-    status-code: 429
-    message: "Rate limit exceeded"
-    type: "rate_limit_error"
-    code: "rate_limit_exceeded"
-  "trigger-500":
-    status-code: 500
-    message: "Internal server error"
-    type: "server_error"
-    code: "internal_error"
-
-# Tool call strategy configuration (optional)
-# Maps function names to their mock response arguments (JSON strings).
-# When response-strategy is set to ToolCallStrategy, the strategy looks up
-# each tool by name. If found here, it responds with a tool call using
-# these arguments. Tools not listed here are ignored.
-#
-# Example:
-#   tool-calls:
-#     calculate: '{"expression": "2+2"}'
-#     get_weather: '{"location": "San Francisco", "unit": "celsius"}'
-#     search: '{"query": "example search"}'
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
@@ -66,10 +66,9 @@ ResponseStrategy {
 - `ChatMirrorStrategy`: Extract last user message → return `[StrategyResponse(type="text", content=...)]`
 - `ResponseMirrorStrategy`: Extract last user input → return `[StrategyResponse(type="text", content=...)]`
 
-**Tool Call Strategies** (config-driven):
-- `ChatToolCallStrategy` (Chat Completions): Reads `tool-calls` from config. Goes through each tool in the request, looks up by name in config. Configured tools → `StrategyResponse(type="tool_call")`, unconfigured tools → ignored with warning message.
-- `ResponseToolCallStrategy` (Responses API): Same config-driven logic.
-- If no tools match config → returns a text warning message.
+**Tool Call Strategies** (trigger phrase–driven):
+- `ChatToolCallStrategy` (Chat Completions): Parses the **last user message** line-by-line for the pattern `call tool '<name>' with '<json>'`. Each matching line whose `<name>` appears in `request.tools` produces a `StrategyResponse(type="tool_call")` with the extracted JSON arguments. Multiple matching lines produce multiple responses. If no line matches, returns an empty list (falls through to the next strategy). No config keys required.
+- `ResponseToolCallStrategy` (Responses API): Same trigger-phrase logic but operates on `ResponseCreateRequest` inputs (string or structured message list).
 - Both support streaming and non-streaming modes.
 
 **Strategy Factory**:
@@ -86,12 +85,10 @@ ResponseStrategy {
 - Not registered in the factory — it **wraps** the factory internally.
 - Both routers (`/chat/completions` and `/responses`) instantiate the composition strategy directly.
 
-**Error Strategies** (config-driven):
-- `ChatErrorStrategy` / `ResponseErrorStrategy`: Read `error-messages` from config. If the last user message content matches a key in `error-messages`, return `StrategyResponse(type="error", ...)` with the configured status code, message, type, and code. Otherwise return empty list (no error).
-- Error check happens after model validation but before the main response strategy runs.
+**Error Strategies** (trigger phrase–driven):
+- `ChatErrorStrategy` / `ResponseErrorStrategy`: Scan the last user message line-by-line for `raise error <json>`. JSON must contain `code` (int) and `message` (str); optional `type` and `error_code`. First matching line wins. No config required.
 - Only the last user message is checked (system/assistant/tool messages are ignored).
-- Separate from the main strategy factory — created via `create_chat_error_strategy(config)` / `create_response_error_strategy(config)`.
-- Typically placed first in the `strategies` list so errors take priority.
+- Registered in the factory under `"ErrorStrategy"` — place first in the `strategies` list so errors take priority.
 
 **Future Strategies**: FixedResponse, Template, Random, AIProxy
 
@@ -115,33 +112,13 @@ models:
     created: 1721172741
     owned_by: openai
 
-# Config-driven error responses (triggered by message content)
-error-messages:
-  "trigger-401":
-    status-code: 401
-    message: "Invalid API key"
-    type: "authentication_error"
-    code: "invalid_api_key"
-  "trigger-429":
-    status-code: 429
-    message: "Rate limit exceeded"
-    type: "rate_limit_error"
-    code: "rate_limit_exceeded"
-  "trigger-500":
-    status-code: 500
-    message: "Internal server error"
-    type: "server_error"
-    code: "internal_error"
-
-# Config-driven tool call responses (used by ToolCallStrategy)
-tool-calls:
-  calculate: '{"expression": "2+2"}'
-  get_weather: '{"location": "San Francisco", "unit": "celsius"}'
 ```
 
 The `strategies` field is an ordered list of strategy names to try. The composition strategy runs them in order; the first one that returns a non-empty result wins. If omitted, `["MirrorStrategy"]` is the default.
 
-The `error-messages` section maps message content strings to error responses. When a request's last user message matches a key in `error-messages` exactly, the server returns the configured HTTP error instead of a normal response. Each entry requires `status-code`, `message`, `type`, and `code`.
+`ToolCallStrategy` fires when the last user message contains a line matching `call tool '<name>' with '<json>'` and `<name>` is present in `request.tools`.
+
+`ErrorStrategy` fires when the last user message contains a line matching `raise error <json>`, where `<json>` has at least `code` (int) and `message` (str).
 
 ## Endpoints
 
@@ -154,30 +131,12 @@ Returns configured model list. [OpenAI Spec](https://platform.openai.com/docs/ap
 Chat-style completions with streaming support. [OpenAI Spec](https://platform.openai.com/docs/api-reference/chat/create)
 - Supports `tools` for tool calling
 - Supports `stream_options.include_usage` for usage stats in streaming
-- Message content matching `error-messages` config returns HTTP errors (see Error Messages below)
+- `raise error <json>` trigger phrase in last user message returns the configured HTTP error
 
 ### POST /responses
 OpenAI's newer Responses API with streaming support. [OpenAI Spec](https://platform.openai.com/docs/api-reference/responses)
 - Supports `tools` for tool calling (Responses API format: flat `{"type": "function", "name": ...}` tools)
-- Message content matching `error-messages` config returns HTTP errors (see Error Messages below)
-
-## Error Messages
-
-Error responses are fully config-driven via the `error-messages` section in `config.yaml`. When a request's last user message content matches a key in `error-messages` exactly, the server returns the configured HTTP error instead of a normal response.
-
-Default configuration:
-
-| Message Content   | HTTP Status | Error Type              | Message                  |
-|------------------|-------------|-------------------------|--------------------------|
-| `trigger-401`    | 401         | `authentication_error`  | Invalid API key          |
-| `trigger-429`    | 429         | `rate_limit_error`      | Rate limit exceeded      |
-| `trigger-500`    | 500         | `server_error`          | Internal server error    |
-
-Custom error triggers can be added by adding entries to the `error-messages` section with any message string and custom status code, message, type, and code.
-
-Only the last user message is checked. System/assistant/tool messages are ignored.
-Model validation happens first, so the model must be valid.
-Works on both `/chat/completions` and `/responses`.
+- `raise error <json>` trigger phrase in last user message returns the configured HTTP error
 
 ## Streaming (SSE)
 

diff --git a/docs/llmock-skill/SKILL.md b/docs/llmock-skill/SKILL.md
@@ -98,11 +98,11 @@ strategies:
   - MirrorStrategy     # Fall back to echoing input
 ```
 
-| Strategy | Config Section Required | Behavior |
-|----------|----------------------|----------|
-| `MirrorStrategy` | None | Echoes the last user message |
-| `ToolCallStrategy` | `tool-calls` | Returns tool calls with configured arguments |
-| `ErrorStrategy` | `error-messages` | Returns HTTP errors when message content matches a trigger |
+| Strategy | Behavior |
+|----------|----------|
+| `MirrorStrategy` | Echoes the last user message |
+| `ToolCallStrategy` | Returns tool calls triggered by `call tool '<name>' with '<json>'` phrase in the last user message |
+| `ErrorStrategy` | Returns HTTP errors triggered by `raise error <json>` phrase in the last user message |
 
 If `strategies` is omitted, defaults to `["MirrorStrategy"]`. Unknown names are skipped with a warning.
 
@@ -164,31 +164,59 @@ strategies:
   - ErrorStrategy
   - ToolCallStrategy
   - MirrorStrategy
-
-error-messages:
-  "trigger-429":
-    status-code: 429
-    message: "Rate limit exceeded"
-    type: "rate_limit_error"
-    code: "rate_limit_exceeded"
-  "trigger-500":
-    status-code: 500
-    message: "Internal server error"
-    type: "server_error"
-    code: "internal_error"
-
-tool-calls:
-  calculate: '{"expression": "2+2"}'
-  get_weather: '{"location": "San Francisco", "unit": "celsius"}'
 ```
 
 ### Tool Calling
 
-When `ToolCallStrategy` is in the strategies list and a request includes a tool whose name matches a key in `tool-calls`, llmock responds with a tool call using the configured arguments. Tools not listed in config are ignored. If no tools match, the next strategy in the chain runs.
+When `ToolCallStrategy` is in the strategies list, llmock scans the last user message line-by-line for the pattern:
+
+```
+call tool '<name>' with '<json>'
+```
+
+- `<name>` must match one of the tools declared in the request's `tools` list.
+- `<json>` is the arguments string passed back as the tool call arguments (use `'{}'` for no arguments).
+- Multiple matching lines each produce a separate tool call response.
+- If no line matches, or the named tool is not in `request.tools`, the strategy returns an empty list and the next strategy runs.
 
 ### Error Simulation
 
-When `ErrorStrategy` is in the strategies list and the last user message matches a key in `error-messages` exactly (case-sensitive), the server returns the configured HTTP error. Only the last user message is checked. System/assistant/tool messages are ignored.
+When `ErrorStrategy` is in the strategies list, llmock scans the last user message line-by-line for the pattern:
+
+```
+raise error {"code": 429, "message": "Rate limit exceeded"}
+```
+
+| Field | Required | Maps to |
+|-------|----------|---------|
+| `code` | yes (int) | HTTP response status code (e.g. `429`) |
+| `message` | yes (string) | `error.message` in the JSON body |
+| `type` | no (string) | `error.type` in the JSON body — defaults to `"api_error"` |
+| `error_code` | no (string) | `error.code` in the JSON body — defaults to `"error"` |
+
+Example with all fields:
+
+```
+raise error {"code": 429, "message": "Rate limit exceeded", "type": "rate_limit_error", "error_code": "rate_limit_exceeded"}
+```
+
+Produces HTTP 429 with body:
+
+```json
+{
+  "error": {
+    "message": "Rate limit exceeded",
+    "type": "rate_limit_error",
+    "param": null,
+    "code": "rate_limit_exceeded"
+  }
+}
+```
+
+- The phrase can appear anywhere in the message — the line is scanned, not the whole message.
+- First matching line wins; remaining lines are ignored.
+- If no line matches, the strategy returns an empty list and the next strategy runs.
+- Works on both `/chat/completions` and `/responses`.
 
 ## Default Models
 

diff --git a/docs/llmock-skill/references/config.yaml b/docs/llmock-skill/references/config.yaml
@@ -26,41 +26,10 @@ models:
 #
 # Available strategies:
 #   MirrorStrategy    - echoes back the last user message
-#   ToolCallStrategy  - returns config-driven tool calls
-#   ErrorStrategy     - returns config-driven error responses
+#   ToolCallStrategy  - triggered by `call tool '<name>' with '<json>'` phrase in last user message
+#   ErrorStrategy     - triggered by `raise error <json>` phrase in last user message
 #
 strategies:
   - ErrorStrategy
   - ToolCallStrategy
   - MirrorStrategy
-
-# Error messages (optional)
-# Maps message content to error responses. When a request's last user
-# message matches one of these keys exactly, the server returns the
-# configured error instead of a normal response. Works on both
-# /chat/completions and /responses.
-#
-# Each entry needs: status-code, message, type, code
-error-messages:
-  "trigger-429":
-    status-code: 429
-    message: "Rate limit exceeded"
-    type: "rate_limit_error"
-    code: "rate_limit_exceeded"
-  "trigger-500":
-    status-code: 500
-    message: "Internal server error"
-    type: "server_error"
-    code: "internal_error"
-
-# Tool call strategy configuration (optional)
-# Maps function names to their mock response arguments (JSON strings).
-# When response-strategy is set to ToolCallStrategy, the strategy looks up
-# each tool by name. If found here, it responds with a tool call using
-# these arguments. Tools not listed here are ignored.
-#
-# Example:
-#   tool-calls:
-#     calculate: '{"expression": "2+2"}'
-#     get_weather: '{"location": "San Francisco", "unit": "celsius"}'
-#     search: '{"query": "example search"}'