From 18ea6b194910319987da59daec03309a942ae9d7 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 14:20:53 +0100
Subject: [PATCH 01/10] feat: add raw tool support

---
 .agents/skills/tool-microservice/SKILL.md     | 158 ++++++++
 backend/config.yaml                           |  17 +
 backend/docs/architecture/tools.md            | 238 ++++++++++++
 backend/pyproject.toml                        |   1 +
 backend/src/modai/default_config.yaml         |  11 +
 backend/src/modai/modules/tools/__init__.py   |   0
 .../modai/modules/tools/__tests__/__init__.py |   0
 .../tools/__tests__/test_tool_registry.py     | 352 ++++++++++++++++++
 .../tools/__tests__/test_tools_web_module.py  | 269 +++++++++++++
 backend/src/modai/modules/tools/module.py     |  96 +++++
 .../src/modai/modules/tools/tool_registry.py  |  92 +++++
 .../modai/modules/tools/tools_web_module.py   |  86 +++++
 12 files changed, 1320 insertions(+)
 create mode 100644 .agents/skills/tool-microservice/SKILL.md
 create mode 100644 backend/docs/architecture/tools.md
 create mode 100644 backend/src/modai/modules/tools/__init__.py
 create mode 100644 backend/src/modai/modules/tools/__tests__/__init__.py
 create mode 100644 backend/src/modai/modules/tools/__tests__/test_tool_registry.py
 create mode 100644 backend/src/modai/modules/tools/__tests__/test_tools_web_module.py
 create mode 100644 backend/src/modai/modules/tools/module.py
 create mode 100644 backend/src/modai/modules/tools/tool_registry.py
 create mode 100644 backend/src/modai/modules/tools/tools_web_module.py

diff --git a/.agents/skills/tool-microservice/SKILL.md b/.agents/skills/tool-microservice/SKILL.md
new file mode 100644
index 0000000..6d23422
--- /dev/null
+++ b/.agents/skills/tool-microservice/SKILL.md
@@ -0,0 +1,158 @@
+---
+name: tool-microservice
+description: How to create a new tool microservice for modAI-chat. Tools are independent HTTP microservices that expose an OpenAPI spec and a trigger endpoint. They are registered in modAI's tool registry via config.yaml.
+---
+
+# Creating a Tool Microservice
+
+## Overview
+
+In modAI-chat, tools are **independent microservices** — not modAI modules. Each tool is a standalone HTTP service that:
+
+1. Exposes a **trigger endpoint** (any HTTP method) that performs the tool's action
+2. Serves an **OpenAPI spec** at `/openapi.json` describing the trigger endpoint
+3. Has **no dependency on modAI** — it can be written in any language/framework
+
+The modAI Tool Registry discovers tools by fetching their OpenAPI spec and uses the `operationId` as the tool's function name.
+
+## Requirements
+
+A valid tool microservice MUST:
+
+- **Serve `/openapi.json`** at the service root (e.g. `http://my-tool:8000/openapi.json`)
+- **Have exactly one trigger operation** with an `operationId` field — this becomes the tool's name in the LLM
+- **Use `summary` or `description`** on the operation — this becomes the tool's description shown to the LLM
+- **Define request body schema** under `requestBody.content.application/json.schema` — this becomes the tool's parameters
+- **Return JSON responses** with appropriate status codes
+
+## Step-by-Step Guide
+
+### 1. Create the Microservice
+
+Use any HTTP framework. Example with Python FastAPI:
+
+```python
+from fastapi import FastAPI
+
+app = FastAPI(
+    title="Calculator Tool",
+    version="1.0.0",
+    description="Evaluate mathematical expressions",
+)
+
+@app.post("/calculate", operation_id="calculate", summary="Evaluate a math expression")
+async def calculate(expression: str) -> dict:
+    """Evaluate the given math expression and return the result."""
+    result = eval(expression)  # Use a safe evaluator in production
+    return {"result": result}
+```
+
+FastAPI automatically generates the `/openapi.json` spec from the route definition.
+
+### 2. Verify the OpenAPI Spec
+
+Start the service and check that `/openapi.json` contains:
+
+- `operationId` — unique name for the tool (e.g. `"calculate"`)
+- `summary` or `description` — what the tool does (shown to the LLM)
+- `requestBody.content.application/json.schema` — input parameters
+
+```bash
+curl http://localhost:8000/openapi.json | jq '.paths'
+```
+
+Expected structure:
+
+```json
+{
+  "/calculate": {
+    "post": {
+      "summary": "Evaluate a math expression",
+      "operationId": "calculate",
+      "requestBody": {
+        "required": true,
+        "content": {
+          "application/json": {
+            "schema": {
+              "type": "object",
+              "properties": {
+                "expression": {
+                  "type": "string",
+                  "description": "Math expression to evaluate"
+                }
+              },
+              "required": ["expression"]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+### 3. Register in modAI config.yaml
+
+Add the tool to the `tool_registry` module's `tools` list in `config.yaml` (and `default_config.yaml` if it should be a default):
+
+```yaml
+modules:
+  tool_registry:
+    class: modai.modules.tools.tool_registry.HttpToolRegistryModule
+    config:
+      tools:
+        - url: http://calculator-service:8000/calculate
+          method: POST
+```
+
+Each entry has:
+- **`url`**: The full trigger endpoint URL (not the base URL)
+- **`method`**: The HTTP method to invoke the tool (POST, PUT, GET, etc.)
+
+The registry derives the base URL from `url` (strips the path) and appends `/openapi.json` to fetch the spec.
+
+### 4. Test the Integration
+
+1. Start the tool microservice
+2. Start modAI backend
+3. Call `GET /api/tools` and verify your tool appears in OpenAI function-calling format:
+
+```bash
+curl http://localhost:8000/api/tools | jq '.tools[] | select(.function.name == "calculate")'
+```
+
+Expected:
+
+```json
+{
+  "type": "function",
+  "function": {
+    "name": "calculate",
+    "description": "Evaluate a math expression",
+    "parameters": { ... },
+    "strict": true
+  }
+}
+```
+
+## Key Conventions
+
+| Aspect | Convention |
+|---|---|
+| OpenAPI spec location | `/openapi.json` at service root |
+| Tool name | `operationId` from the OpenAPI spec |
+| Tool description | `summary` (preferred) or `description` from the operation |
+| Parameters | `requestBody.content.application/json.schema` |
+| HTTP method | Choose what's idiomatic (POST for actions, GET for queries, etc.) |
+| Error handling | Return appropriate HTTP status codes; modAI logs warnings for unreachable tools |
+
+## Common Pitfalls
+
+- **Missing `operationId`**: The tool will be silently skipped. Always set `operationId` on your trigger operation.
+- **Wrong URL in config**: The `url` must be the full trigger endpoint (e.g. `/calculate`), not just the base URL. The registry strips the path to derive the base for fetching `/openapi.json`.
+- **Multiple operations**: The registry uses the **first** operation with an `operationId` it finds. Keep one trigger operation per tool service.
+- **Non-JSON responses**: The LLM expects JSON results. Always return `application/json`.
+
+## Architecture Reference
+
+See `backend/docs/architecture/tools.md` for the full tools architecture including the registry module, web module, and chat agent integration.
diff --git a/backend/config.yaml b/backend/config.yaml
index 1d2087f..801948b 100644
--- a/backend/config.yaml
+++ b/backend/config.yaml
@@ -13,6 +13,7 @@ modules:
     class: modai.modules.chat.openai_agent_chat.StrandsAgentChatModule
     module_dependencies:
       llm_provider_module: openai_model_provider
+      tool_registry: tool_registry
   model_provider_store:
     class: modai.modules.model_provider_store.sql_model_provider_store.SQLAlchemyModelProviderStore
     config:
@@ -60,3 +61,19 @@ modules:
     module_dependencies:
       session: "session"
       user_settings_store: "user_settings_store"
+
+  tool_registry:
+    class: modai.modules.tools.tool_registry.HttpToolRegistryModule
+    config:
+      tools: []
+      # Example:
+      # tools:
+      #   - url: http://calculator-service:8000/calculate
+      #     method: POST
+      #   - url: http://web-search-service:8000/search
+      #     method: PUT
+
+  tools_web:
+    class: modai.modules.tools.tools_web_module.OpenAIToolsWebModule
+    module_dependencies:
+      tool_registry: tool_registry
diff --git a/backend/docs/architecture/tools.md b/backend/docs/architecture/tools.md
new file mode 100644
index 0000000..1439041
--- /dev/null
+++ b/backend/docs/architecture/tools.md
@@ -0,0 +1,238 @@
+# Tools Architecture
+
+## 1. Overview
+- **Architecture Style**: Microservice-based tool system with a registry and a web layer that serves tools in OpenAI format
+- **Design Principles**:
+  - Tools are independent microservices — each tool is a standalone service with its own OpenAPI spec
+  - OpenAPI as contract — the tool's definition (parameters, description, endpoints) is read from its OpenAPI spec
+  - Registry as aggregator — the Tool Registry fetches and holds OpenAPI specs + invocation metadata (url, method) without transformation
+  - Web layer transforms — the Tools Web Module converts OpenAPI specs into OpenAI function-calling format for the frontend
+  - Chat Agent resolves via registry — when the LLM emits a tool call, the Chat Agent looks up the tool's url and method from the registry to make the HTTP call
+- **Quality Attributes**: Decoupled, language-agnostic, independently deployable, discoverable
+
+## 2. Tool Microservice Convention
+
+Each tool is a standalone microservice that follows these conventions:
+
+1. **HTTP endpoint**: The tool is triggered via an HTTP request. Each tool chooses the HTTP method (PUT, POST, GET, etc.) that is most idiomatic for its use case. The method is configured in the tool registry.
+2. **OpenAPI spec**: The microservice exposes its OpenAPI specification (typically at `/openapi.json`). This spec documents all endpoints, including the trigger endpoint with its parameters, description, and response schema.
+3. **Independence**: Tools have no dependency on modAI. They are plain HTTP microservices that can be developed, deployed, and tested independently in any language/framework.
+
+### Example Tool Microservice (OpenAPI spec)
+```json
+{
+  "openapi": "3.1.0",
+  "info": {
+    "title": "Calculator Tool",
+    "version": "1.0.0",
+    "description": "Evaluate mathematical expressions"
+  },
+  "paths": {
+    "/calculate": {
+      "post": {
+        "summary": "Evaluate a math expression",
+        "operationId": "calculate",
+        "requestBody": {
+          "required": true,
+          "content": {
+            "application/json": {
+              "schema": {
+                "type": "object",
+                "properties": {
+                  "expression": {
+                    "type": "string",
+                    "description": "Math expression to evaluate"
+                  }
+                },
+                "required": ["expression"]
+              }
+            }
+          }
+        },
+        "responses": {
+          "200": {
+            "description": "Calculation result",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "properties": {
+                    "result": { "type": "number" }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+
+## 3. System Context
+
+```mermaid
+flowchart TD
+    FE[Frontend] -->|GET /api/tools| TW[Tools Web Module]
+    TW -->|get_tools| TR[Tool Registry Module]
+    TR -->|GET /openapi.json| TS1[Tool Service A]
+    TR -->|GET /openapi.json| TS2[Tool Service B]
+    FE -->|POST /api/responses with tool names| CR[Chat Router]
+    CR --> CA[Chat Agent Module]
+    CA -->|lookup tool by name| TR
+    CA -->|HTTP trigger| TS1
+    CA -->|HTTP trigger| TS2
+```
+
+**Flow**:
+1. Frontend calls `GET /api/tools` to discover all available tools
+2. Tools Web Module asks the Tool Registry for all tools (OpenAPI specs + url + method)
+3. Tools Web Module transforms the OpenAPI specs into **OpenAI function-calling format** and returns them to the frontend
+4. User selects which tools to enable for a chat session
+5. Frontend sends `POST /api/responses` with tool names (as received from `GET /api/tools`)
+6. When the LLM emits a `tool_call` with a function name, the Chat Agent **looks up** that name in the Tool Registry to get the tool's url and method
+7. The Chat Agent sends an HTTP request to the tool's microservice endpoint and returns the result to the LLM
+
+## 4. Module Architecture
+
+### 4.1 Tool Registry Module (Plain Module)
+
+**Purpose**: Aggregates OpenAPI specs from all configured tool microservices and provides tool lookup for invocation.
+
+**Responsibilities**:
+- Maintain a list of configured tool microservice URLs and their HTTP methods
+- Fetch the OpenAPI spec from each tool microservice
+- Return all tools with their OpenAPI specs, urls, and methods (unmodified)
+- Provide lookup by tool name — given a function name (derived from `operationId`), return the tool's url, method, and parameters
+- Handle unavailable tool services gracefully (skip with warning, don't fail the whole request)
+
+**No module dependencies**: The registry does not depend on other modAI modules. Tool microservices are external HTTP services configured via the module's config.
+
+### 4.2 Tools Web Module (Web Module)
+
+**Purpose**: Exposes `GET /api/tools` endpoint. Transforms tool definitions from OpenAPI format into OpenAI function-calling format so the frontend can use them directly.
+
+**Dependencies**: Tool Registry Module (injected via `module_dependencies`)
+
+**Responsibilities**:
+- Expose `GET /api/tools` endpoint
+- Call the Tool Registry to get all available tools with their OpenAPI specs
+- Transform each tool's OpenAPI spec into OpenAI function-calling format (see section 5.1)
+- Return the transformed tools to the frontend
+
+### 4.3 Chat Agent Module (existing, updated dependency)
+
+The Chat Agent Module receives a `tool_registry` dependency. When the LLM emits a `tool_call`:
+1. Extract the function name from the tool call
+2. Look up the function name in the Tool Registry to get url + method
+3. Send the HTTP request with the tool call arguments to the tool's endpoint
+4. Return the response to the LLM
+
+## 5. API Endpoints
+
+- `GET /api/tools` — List all available tools in OpenAI function-calling format
+
+### 5.1 List Available Tools
+
+**Endpoint**: `GET /api/tools`
+
+**Purpose**: Returns all available tools in OpenAI function-calling format. The frontend can pass these tool definitions directly when calling `/api/responses`.
+
+The Tools Web Module fetches tool data from the registry (OpenAPI specs + metadata) and transforms each tool into OpenAI format.
+
+**OpenAPI → OpenAI Transformation**:
+- `operationId` → `function.name`
+- `summary` (or `description`) → `function.description`
+- Request body `schema` → `function.parameters`
+
+**Response Format (200 OK)**:
+```json
+{
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "calculate",
+        "description": "Evaluate a math expression",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "expression": {
+              "type": "string",
+              "description": "Math expression to evaluate"
+            }
+          },
+          "required": ["expression"]
+        }
+      }
+    },
+    {
+      "type": "function",
+      "function": {
+        "name": "web_search",
+        "description": "Search the web for current information",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "query": {
+              "type": "string",
+              "description": "Search query"
+            }
+          },
+          "required": ["query"]
+        }
+      }
+    }
+  ]
+}
+```
+
+If a tool service is unreachable, it is omitted from the response and a warning is logged. The endpoint never fails due to a single unavailable tool.
+
+## 6. Configuration
+
+```yaml
+modules:
+  tool_registry:
+    class: modai.modules.tools.tool_registry.HttpToolRegistryModule
+    config:
+      tools:
+        - url: http://calculator-service:8000/calculate
+          method: POST
+        - url: http://web-search-service:8000/search
+          method: PUT
+
+  tools_web:
+    class: modai.modules.tools.tools_web_module.ToolsWebModule
+    module_dependencies:
+      tool_registry: tool_registry
+
+  chat_openai:
+    class: modai.modules.chat.openai_agent_chat.StrandsAgentChatModule
+    module_dependencies:
+      llm_provider_module: openai_model_provider
+      tool_registry: tool_registry
+```
+
+Each entry in `tools` (on the registry) has:
+- `url`: The full trigger endpoint URL of the tool microservice
+- `method`: The HTTP method used to invoke the tool (e.g. PUT, POST, GET)
+
+The registry derives the base URL from `url` to fetch the OpenAPI spec (appending `/openapi.json` to the base).
+
+## 7. Design Decisions
+
+- **Decision 1**: Tools are independent microservices, not modAI modules.
+  - **Rationale**: Maximum decoupling — tools can be written in any language, deployed independently, and reused across systems.
+  - **Trade-off**: Network overhead for spec fetching and tool invocation vs. in-process calls.
+
+- **Decision 2**: OpenAPI spec is fetched at request time, not cached.
+  - **Rationale**: Simplicity — no cache invalidation needed. Tool services can update their specs and changes are immediately visible.
+  - **Trade-off**: Higher latency on `GET /api/tools`. Can be optimized with caching later if needed.
+
+- **Decision 3**: The Tool Registry stores OpenAPI specs unmodified. The Tools Web Module transforms them.
+  - **Rationale**: Separation of concerns — the registry is a pure aggregator, the web module handles format conversion. This keeps each module focused on one job.
+
+- **Decision 4**: Tool name for lookup is derived from `operationId` in the OpenAPI spec.
+  - **Rationale**: `operationId` is a standard OpenAPI field designed to uniquely identify an operation, making it a natural tool name.
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 1626236..619dcf4 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "sqlalchemy",
     "strands-agents",
     "strands-agents-tools",
+    "httpx>=0.28.1",
 ]
 
 [dependency-groups]
diff --git a/backend/src/modai/default_config.yaml b/backend/src/modai/default_config.yaml
index 24aa6c7..b4e066a 100644
--- a/backend/src/modai/default_config.yaml
+++ b/backend/src/modai/default_config.yaml
@@ -12,6 +12,7 @@ modules:
     class: modai.modules.chat.openai_agent_chat.StrandsAgentChatModule
     module_dependencies:
       llm_provider_module: openai_model_provider
+      tool_registry: tool_registry
   model_provider_store:
     class: modai.modules.model_provider_store.sql_model_provider_store.SQLAlchemyModelProviderStore
     config:
@@ -57,3 +58,13 @@ modules:
     module_dependencies:
       session: "session"
       user_settings_store: "user_settings_store"
+
+  tool_registry:
+    class: modai.modules.tools.tool_registry.HttpToolRegistryModule
+    config:
+      tools: []
+
+  tools_web:
+    class: modai.modules.tools.tools_web_module.OpenAIToolsWebModule
+    module_dependencies:
+      tool_registry: tool_registry
diff --git a/backend/src/modai/modules/tools/__init__.py b/backend/src/modai/modules/tools/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/src/modai/modules/tools/__tests__/__init__.py b/backend/src/modai/modules/tools/__tests__/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/backend/src/modai/modules/tools/__tests__/test_tool_registry.py b/backend/src/modai/modules/tools/__tests__/test_tool_registry.py
new file mode 100644
index 0000000..5348fe2
--- /dev/null
+++ b/backend/src/modai/modules/tools/__tests__/test_tool_registry.py
@@ -0,0 +1,352 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from modai.module import ModuleDependencies
+from modai.modules.tools.module import ToolDefinition
+from modai.modules.tools.tool_registry import (
+    HttpToolRegistryModule,
+    _derive_base_url,
+    _extract_operation_id,
+    _fetch_openapi_spec,
+)
+
+
+SAMPLE_OPENAPI_SPEC = {
+    "openapi": "3.1.0",
+    "info": {"title": "Calculator Tool", "version": "1.0.0"},
+    "paths": {
+        "/calculate": {
+            "post": {
+                "summary": "Evaluate a math expression",
+                "operationId": "calculate",
+                "requestBody": {
+                    "required": True,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {"expression": {"type": "string"}},
+                                "required": ["expression"],
+                            }
+                        }
+                    },
+                },
+            }
+        }
+    },
+}
+
+
+class TestHttpToolRegistryModule:
+    def _make_module(self, tools: list[dict]) -> HttpToolRegistryModule:
+        deps = ModuleDependencies()
+        config = {"tools": tools}
+        return HttpToolRegistryModule(deps, config)
+
+    @pytest.mark.asyncio
+    async def test_get_tools_empty_config(self):
+        module = self._make_module([])
+        result = await module.get_tools()
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_get_tools_returns_specs_from_all_services(self):
+        module = self._make_module(
+            [
+                {"url": "http://calc:8000/calculate", "method": "POST"},
+                {"url": "http://search:8000/search", "method": "PUT"},
+            ]
+        )
+
+        spec_a = {**SAMPLE_OPENAPI_SPEC, "info": {"title": "Calc", "version": "1.0.0"}}
+        spec_b = {
+            **SAMPLE_OPENAPI_SPEC,
+            "info": {"title": "Search", "version": "1.0.0"},
+        }
+
+        mock_response_a = MagicMock()
+        mock_response_a.status_code = 200
+        mock_response_a.raise_for_status = lambda: None
+        mock_response_a.json.return_value = spec_a
+
+        mock_response_b = MagicMock()
+        mock_response_b.status_code = 200
+        mock_response_b.raise_for_status = lambda: None
+        mock_response_b.json.return_value = spec_b
+
+        async def mock_get(url, **kwargs):
+            if "calc" in url:
+                return mock_response_a
+            return mock_response_b
+
+        with patch(
+            "modai.modules.tools.tool_registry.httpx.AsyncClient"
+        ) as mock_client_cls:
+            mock_client = AsyncMock()
+            mock_client.get = mock_get
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=False)
+            mock_client_cls.return_value = mock_client
+
+            result = await module.get_tools()
+
+        assert len(result) == 2
+        assert result[0].url == "http://calc:8000/calculate"
+        assert result[0].method == "POST"
+        assert result[0].openapi_spec["info"]["title"] == "Calc"
+        assert result[1].url == "http://search:8000/search"
+        assert result[1].method == "PUT"
+        assert result[1].openapi_spec["info"]["title"] == "Search"
+
+    @pytest.mark.asyncio
+    async def test_get_tools_skips_unavailable_service(self):
+        module = self._make_module(
+            [
+                {"url": "http://good:8000/run", "method": "POST"},
+                {"url": "http://bad:8000/run", "method": "POST"},
+            ]
+        )
+
+        mock_response_good = MagicMock()
+        mock_response_good.status_code = 200
+        mock_response_good.raise_for_status = lambda: None
+        mock_response_good.json.return_value = SAMPLE_OPENAPI_SPEC
+
+        async def mock_get(url, **kwargs):
+            if "bad" in url:
+                raise httpx.ConnectError("Connection refused")
+            return mock_response_good
+
+        with patch(
+            "modai.modules.tools.tool_registry.httpx.AsyncClient"
+        ) as mock_client_cls:
+            mock_client = AsyncMock()
+            mock_client.get = mock_get
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=False)
+            mock_client_cls.return_value = mock_client
+
+            result = await module.get_tools()
+
+        assert len(result) == 1
+        assert result[0].url == "http://good:8000/run"
+
+    @pytest.mark.asyncio
+    async def test_specs_are_returned_unmodified(self):
+        module = self._make_module([{"url": "http://tool:8000/run", "method": "PUT"}])
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.raise_for_status = lambda: None
+        mock_response.json.return_value = SAMPLE_OPENAPI_SPEC
+
+        async def mock_get(url, **kwargs):
+            return mock_response
+
+        with patch(
+            "modai.modules.tools.tool_registry.httpx.AsyncClient"
+        ) as mock_client_cls:
+            mock_client = AsyncMock()
+            mock_client.get = mock_get
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=False)
+            mock_client_cls.return_value = mock_client
+
+            result = await module.get_tools()
+
+        assert result[0].openapi_spec == SAMPLE_OPENAPI_SPEC
+
+    def test_has_no_router(self):
+        module = self._make_module([])
+        assert not hasattr(module, "router")
+
+    def test_stores_tool_services_from_config(self):
+        tools = [
+            {"url": "http://a:8000/run", "method": "POST"},
+            {"url": "http://b:9000/exec", "method": "PUT"},
+        ]
+        module = self._make_module(tools)
+        assert module.tool_services == tools
+
+    def test_defaults_to_empty_tools_list(self):
+        deps = ModuleDependencies()
+        module = HttpToolRegistryModule(deps, {})
+        assert module.tool_services == []
+
+
+class TestFetchOpenapiSpec:
+    @pytest.mark.asyncio
+    async def test_success(self):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.raise_for_status = lambda: None
+        mock_response.json.return_value = SAMPLE_OPENAPI_SPEC
+
+        client = AsyncMock()
+        client.get = AsyncMock(return_value=mock_response)
+
+        result = await _fetch_openapi_spec(client, "http://tool:8000")
+        assert result == SAMPLE_OPENAPI_SPEC
+        client.get.assert_called_once_with("http://tool:8000/openapi.json")
+
+    @pytest.mark.asyncio
+    async def test_strips_trailing_slash(self):
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.raise_for_status = lambda: None
+        mock_response.json.return_value = SAMPLE_OPENAPI_SPEC
+
+        client = AsyncMock()
+        client.get = AsyncMock(return_value=mock_response)
+
+        await _fetch_openapi_spec(client, "http://tool:8000/")
+        client.get.assert_called_once_with("http://tool:8000/openapi.json")
+
+    @pytest.mark.asyncio
+    async def test_http_error_returns_none(self):
+        mock_response = MagicMock()
+        mock_response.status_code = 500
+        mock_response.raise_for_status.side_effect = httpx.HTTPStatusError(
+            "Server Error",
+            request=httpx.Request("GET", "http://tool:8000/openapi.json"),
+            response=mock_response,
+        )
+
+        client = AsyncMock()
+        client.get = AsyncMock(return_value=mock_response)
+
+        result = await _fetch_openapi_spec(client, "http://tool:8000")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_connection_error_returns_none(self):
+        client = AsyncMock()
+        client.get = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
+
+        result = await _fetch_openapi_spec(client, "http://tool:8000")
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_unexpected_error_returns_none(self):
+        client = AsyncMock()
+        client.get = AsyncMock(side_effect=RuntimeError("something went wrong"))
+
+        result = await _fetch_openapi_spec(client, "http://tool:8000")
+        assert result is None
+
+
+class TestDeriveBaseUrl:
+    def test_strips_path(self):
+        assert _derive_base_url("http://calc:8000/calculate") == "http://calc:8000"
+
+    def test_strips_nested_path(self):
+        assert _derive_base_url("http://host:9000/api/v1/run") == "http://host:9000"
+
+    def test_no_path(self):
+        assert _derive_base_url("http://tool:8000") == "http://tool:8000"
+
+    def test_trailing_slash(self):
+        assert _derive_base_url("http://tool:8000/") == "http://tool:8000"
+
+
+class TestExtractOperationId:
+    def test_extracts_from_valid_spec(self):
+        assert _extract_operation_id(SAMPLE_OPENAPI_SPEC) == "calculate"
+
+    def test_returns_none_for_empty_paths(self):
+        assert _extract_operation_id({"paths": {}}) is None
+
+    def test_returns_none_for_missing_paths(self):
+        assert _extract_operation_id({}) is None
+
+    def test_returns_none_for_no_operation_id(self):
+        spec = {"paths": {"/run": {"post": {"summary": "No operationId here"}}}}
+        assert _extract_operation_id(spec) is None
+
+    def test_skips_non_dict_entries(self):
+        spec = {
+            "paths": {
+                "/run": {
+                    "parameters": [{"name": "x"}],
+                    "post": {"operationId": "run_it", "summary": "Run"},
+                }
+            }
+        }
+        assert _extract_operation_id(spec) == "run_it"
+
+
+class TestGetToolByName:
+    def _make_module(self, tools: list[dict]) -> HttpToolRegistryModule:
+        deps = ModuleDependencies()
+        config = {"tools": tools}
+        return HttpToolRegistryModule(deps, config)
+
+    def _mock_httpx(self, spec_map: dict[str, dict]):
+        """Return a context manager that patches httpx.AsyncClient.
+
+        spec_map: domain substring -> openapi spec to return
+        """
+        mock_responses = {}
+        for key, spec in spec_map.items():
+            resp = MagicMock()
+            resp.status_code = 200
+            resp.raise_for_status = lambda: None
+            resp.json.return_value = spec
+            mock_responses[key] = resp
+
+        async def mock_get(url, **kwargs):
+            for key, resp in mock_responses.items():
+                if key in url:
+                    return resp
+            raise httpx.ConnectError("No mock for " + url)
+
+        mock_client_cls = patch("modai.modules.tools.tool_registry.httpx.AsyncClient")
+        return mock_client_cls, mock_get
+
+    @pytest.mark.asyncio
+    async def test_finds_tool_by_operation_id(self):
+        module = self._make_module(
+            [{"url": "http://calc:8000/calculate", "method": "POST"}]
+        )
+
+        mock_client_cls, mock_get = self._mock_httpx({"calc": SAMPLE_OPENAPI_SPEC})
+        with mock_client_cls as cls:
+            mock_client = AsyncMock()
+            mock_client.get = mock_get
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=False)
+            cls.return_value = mock_client
+
+            result = await module.get_tool_by_name("calculate")
+
+        assert result == ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_OPENAPI_SPEC,
+        )
+
+    @pytest.mark.asyncio
+    async def test_returns_none_for_unknown_name(self):
+        module = self._make_module(
+            [{"url": "http://calc:8000/calculate", "method": "POST"}]
+        )
+
+        mock_client_cls, mock_get = self._mock_httpx({"calc": SAMPLE_OPENAPI_SPEC})
+        with mock_client_cls as cls:
+            mock_client = AsyncMock()
+            mock_client.get = mock_get
+            mock_client.__aenter__ = AsyncMock(return_value=mock_client)
+            mock_client.__aexit__ = AsyncMock(return_value=False)
+            cls.return_value = mock_client
+
+            result = await module.get_tool_by_name("nonexistent")
+
+        assert result is None
+
+    @pytest.mark.asyncio
+    async def test_returns_none_for_empty_registry(self):
+        module = self._make_module([])
+        result = await module.get_tool_by_name("calculate")
+        assert result is None
diff --git a/backend/src/modai/modules/tools/__tests__/test_tools_web_module.py b/backend/src/modai/modules/tools/__tests__/test_tools_web_module.py
new file mode 100644
index 0000000..ced2dd5
--- /dev/null
+++ b/backend/src/modai/modules/tools/__tests__/test_tools_web_module.py
@@ -0,0 +1,269 @@
+from unittest.mock import AsyncMock
+
+import pytest
+
+from modai.module import ModuleDependencies
+from modai.modules.tools.module import ToolDefinition
+from modai.modules.tools.tools_web_module import (
+    OpenAIToolsWebModule,
+    _extract_parameters,
+    _transform_openapi_to_openai,
+)
+
+
+SAMPLE_OPENAPI_SPEC = {
+    "openapi": "3.1.0",
+    "info": {"title": "Calculator Tool", "version": "1.0.0"},
+    "paths": {
+        "/calculate": {
+            "post": {
+                "summary": "Evaluate a math expression",
+                "operationId": "calculate",
+                "requestBody": {
+                    "required": True,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {
+                                    "expression": {
+                                        "type": "string",
+                                        "description": "Math expression to evaluate",
+                                    }
+                                },
+                                "required": ["expression"],
+                            }
+                        }
+                    },
+                },
+            }
+        }
+    },
+}
+
+
+class TestTransformOpenapiToOpenai:
+    def test_transforms_valid_spec(self):
+        result = _transform_openapi_to_openai(SAMPLE_OPENAPI_SPEC)
+        assert result == {
+            "type": "function",
+            "function": {
+                "name": "calculate",
+                "description": "Evaluate a math expression",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "expression": {
+                            "type": "string",
+                            "description": "Math expression to evaluate",
+                        }
+                    },
+                    "required": ["expression"],
+                },
+                "strict": True,
+            },
+        }
+
+    def test_uses_description_when_no_summary(self):
+        spec = {
+            "paths": {
+                "/run": {
+                    "post": {
+                        "description": "Runs something",
+                        "operationId": "run_task",
+                        "requestBody": {
+                            "content": {
+                                "application/json": {
+                                    "schema": {"type": "object", "properties": {}}
+                                }
+                            }
+                        },
+                    }
+                }
+            }
+        }
+        result = _transform_openapi_to_openai(spec)
+        assert result["function"]["description"] == "Runs something"
+
+    def test_empty_description_when_none_provided(self):
+        spec = {
+            "paths": {
+                "/run": {
+                    "post": {
+                        "operationId": "run_task",
+                    }
+                }
+            }
+        }
+        result = _transform_openapi_to_openai(spec)
+        assert result["function"]["description"] == ""
+
+    def test_returns_none_for_no_operation_id(self):
+        spec = {"paths": {"/run": {"post": {"summary": "No operationId"}}}}
+        result = _transform_openapi_to_openai(spec)
+        assert result is None
+
+    def test_returns_none_for_empty_paths(self):
+        spec = {"paths": {}}
+        result = _transform_openapi_to_openai(spec)
+        assert result is None
+
+    def test_returns_none_for_missing_paths(self):
+        result = _transform_openapi_to_openai({})
+        assert result is None
+
+    def test_default_parameters_when_no_request_body(self):
+        spec = {
+            "paths": {
+                "/status": {
+                    "get": {
+                        "operationId": "get_status",
+                        "summary": "Get status",
+                    }
+                }
+            }
+        }
+        result = _transform_openapi_to_openai(spec)
+        assert result["function"]["parameters"] == {
+            "type": "object",
+            "properties": {},
+        }
+
+
+class TestExtractParameters:
+    def test_extracts_json_schema(self):
+        operation = {
+            "requestBody": {
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {"x": {"type": "integer"}},
+                        }
+                    }
+                }
+            }
+        }
+        result = _extract_parameters(operation)
+        assert result == {
+            "type": "object",
+            "properties": {"x": {"type": "integer"}},
+        }
+
+    def test_returns_default_when_no_request_body(self):
+        result = _extract_parameters({})
+        assert result == {"type": "object", "properties": {}}
+
+    def test_returns_default_when_no_json_content(self):
+        operation = {
+            "requestBody": {"content": {"text/plain": {"schema": {"type": "string"}}}}
+        }
+        result = _extract_parameters(operation)
+        assert result == {"type": "object", "properties": {}}
+
+
+class TestToolsWebModule:
+    def _make_module(
+        self, registry_tools: list[ToolDefinition]
+    ) -> OpenAIToolsWebModule:
+        mock_registry = AsyncMock()
+        mock_registry.get_tools = AsyncMock(return_value=registry_tools)
+        deps = ModuleDependencies(modules={"tool_registry": mock_registry})
+        return OpenAIToolsWebModule(deps, {})
+
+    def test_has_router_with_tools_endpoint(self):
+        module = self._make_module([])
+        assert hasattr(module, "router")
+        routes = [r.path for r in module.router.routes]
+        assert "/api/tools" in routes
+
+    @pytest.mark.asyncio
+    async def test_returns_empty_tools_when_registry_empty(self):
+        module = self._make_module([])
+        result = await module.get_tools()
+        assert result == {"tools": []}
+
+    @pytest.mark.asyncio
+    async def test_transforms_registry_tools_to_openai_format(self):
+        registry_tools = [
+            ToolDefinition(
+                url="http://calc:8000/calculate",
+                method="POST",
+                openapi_spec=SAMPLE_OPENAPI_SPEC,
+            )
+        ]
+        module = self._make_module(registry_tools)
+        result = await module.get_tools()
+
+        assert len(result["tools"]) == 1
+        tool = result["tools"][0]
+        assert tool["type"] == "function"
+        assert tool["function"]["name"] == "calculate"
+        assert tool["function"]["description"] == "Evaluate a math expression"
+        assert "expression" in tool["function"]["parameters"]["properties"]
+
+    @pytest.mark.asyncio
+    async def test_skips_tools_without_operation_id(self):
+        bad_spec = {"paths": {"/run": {"post": {"summary": "No operationId"}}}}
+        registry_tools = [
+            ToolDefinition(
+                url="http://calc:8000/calculate",
+                method="POST",
+                openapi_spec=SAMPLE_OPENAPI_SPEC,
+            ),
+            ToolDefinition(
+                url="http://bad:8000/run",
+                method="POST",
+                openapi_spec=bad_spec,
+            ),
+        ]
+        module = self._make_module(registry_tools)
+        result = await module.get_tools()
+
+        assert len(result["tools"]) == 1
+        assert result["tools"][0]["function"]["name"] == "calculate"
+
+    @pytest.mark.asyncio
+    async def test_multiple_tools_transformed(self):
+        search_spec = {
+            "openapi": "3.1.0",
+            "info": {"title": "Search", "version": "1.0.0"},
+            "paths": {
+                "/search": {
+                    "put": {
+                        "summary": "Search the web",
+                        "operationId": "web_search",
+                        "requestBody": {
+                            "content": {
+                                "application/json": {
+                                    "schema": {
+                                        "type": "object",
+                                        "properties": {"query": {"type": "string"}},
+                                        "required": ["query"],
+                                    }
+                                }
+                            }
+                        },
+                    }
+                }
+            },
+        }
+        registry_tools = [
+            ToolDefinition(
+                url="http://calc:8000/calculate",
+                method="POST",
+                openapi_spec=SAMPLE_OPENAPI_SPEC,
+            ),
+            ToolDefinition(
+                url="http://search:8000/search",
+                method="PUT",
+                openapi_spec=search_spec,
+            ),
+        ]
+        module = self._make_module(registry_tools)
+        result = await module.get_tools()
+
+        assert len(result["tools"]) == 2
+        names = [t["function"]["name"] for t in result["tools"]]
+        assert "calculate" in names
+        assert "web_search" in names
diff --git a/backend/src/modai/modules/tools/module.py b/backend/src/modai/modules/tools/module.py
new file mode 100644
index 0000000..bfdfec5
--- /dev/null
+++ b/backend/src/modai/modules/tools/module.py
@@ -0,0 +1,96 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any
+
+from fastapi import APIRouter
+
+from modai.module import ModaiModule, ModuleDependencies
+
+
+@dataclass(frozen=True)
+class ToolDefinition:
+    """A tool's metadata as returned by the Tool Registry."""
+
+    url: str
+    method: str
+    openapi_spec: dict[str, Any]
+
+
+class ToolRegistryModule(ModaiModule, ABC):
+    """
+    Module Declaration for: Tool Registry (Plain Module)
+
+    Aggregates OpenAPI specs from all configured tools.
+
+    Each tool is an independent microservice that:
+    - Exposes an HTTP endpoint to trigger the tool (method chosen by the tool)
+    - Provides an OpenAPI spec describing all its endpoints and parameters
+
+    The registry fetches each tool's OpenAPI spec and returns them grouped
+    together (unmodified).
+
+    Configuration:
+        tools: list of dicts, each with:
+          - "url": the full trigger endpoint URL of the tool microservice
+          - "method": the HTTP method to invoke the tool (e.g. PUT, POST, GET)
+        The registry derives the base URL from "url" and appends
+        "/openapi.json" to fetch the spec.
+
+    Example config:
+        tools:
+          - url: http://calculator-service:8000/calculate
+            method: POST
+          - url: http://web-search-service:8000/search
+            method: PUT
+    """
+
+    def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
+        super().__init__(dependencies, config)
+
+    @abstractmethod
+    async def get_tools(self) -> list[ToolDefinition]:
+        """
+        Returns all configured tool definitions.
+
+        Each ToolDefinition contains the tool's trigger url, HTTP method,
+        and its full OpenAPI spec (unmodified).
+
+        Unavailable tool services are omitted from the result with a
+        warning logged.
+        """
+        pass
+
+    @abstractmethod
+    async def get_tool_by_name(self, name: str) -> ToolDefinition | None:
+        """
+        Look up a tool by its function name (derived from operationId).
+
+        Returns the matching ToolDefinition if found,
+        or None if the tool name is not found.
+        """
+        pass
+
+
+class ToolsWebModule(ModaiModule, ABC):
+    """
+    Module Declaration for: Tools Web Module (Web Module)
+
+    Exposes GET /api/tools. Retrieves tool definitions from the Tool Registry
+    and returns them in a format suitable for the consumer (e.g. frontend, chat agent).
+    """
+
+    def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
+        super().__init__(dependencies, config)
+        self.router = APIRouter()
+        self.router.add_api_route("/api/tools", self.get_tools, methods=["GET"])
+
+    @abstractmethod
+    async def get_tools(self) -> dict[str, Any]:
+        """
+        Returns all available tools in a consumer-specific format.
+
+        The response must contain a "tools" key with a list of tool definitions.
+        The exact structure of each tool definition is determined by the
+        implementation.
+        """
+        pass
diff --git a/backend/src/modai/modules/tools/tool_registry.py b/backend/src/modai/modules/tools/tool_registry.py
new file mode 100644
index 0000000..ae388c2
--- /dev/null
+++ b/backend/src/modai/modules/tools/tool_registry.py
@@ -0,0 +1,92 @@
+import logging
+from typing import Any
+from urllib.parse import urlparse
+
+import httpx
+
+from modai.module import ModuleDependencies
+from modai.modules.tools.module import (
+    ToolDefinition,
+    ToolRegistryModule,
+)
+
+logger = logging.getLogger(__name__)
+
+HTTP_TIMEOUT_SECONDS = 10.0
+
+
+class HttpToolRegistryModule(ToolRegistryModule):
+    """
+    Tool Registry implementation that fetches OpenAPI specs from
+    configured tool microservices over HTTP.
+    """
+
+    def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
+        super().__init__(dependencies, config)
+        self.tool_services: list[dict[str, str]] = config.get("tools", [])
+
+    async def get_tools(self) -> list[ToolDefinition]:
+        tools: list[ToolDefinition] = []
+
+        async with httpx.AsyncClient(timeout=HTTP_TIMEOUT_SECONDS) as client:
+            for service in self.tool_services:
+                url = service["url"]
+                method = service["method"]
+                base_url = _derive_base_url(url)
+                spec = await _fetch_openapi_spec(client, base_url)
+                if spec is not None:
+                    tools.append(
+                        ToolDefinition(url=url, method=method, openapi_spec=spec)
+                    )
+
+        return tools
+
+    async def get_tool_by_name(self, name: str) -> ToolDefinition | None:
+        tools = await self.get_tools()
+        for tool in tools:
+            operation_id = _extract_operation_id(tool.openapi_spec)
+            if operation_id == name:
+                return tool
+        return None
+
+
+def _extract_operation_id(spec: dict[str, Any]) -> str | None:
+    """Extract the operationId from the first operation in an OpenAPI spec."""
+    paths = spec.get("paths", {})
+    for _path, methods in paths.items():
+        for _method, operation in methods.items():
+            if isinstance(operation, dict) and "operationId" in operation:
+                return operation["operationId"]
+    return None
+
+
+def _derive_base_url(trigger_url: str) -> str:
+    """Derive the service base URL from a full trigger endpoint URL.
+
+    E.g. 'http://calc:8000/calculate' -> 'http://calc:8000'
+    """
+    parsed = urlparse(trigger_url)
+    return f"{parsed.scheme}://{parsed.netloc}"
+
+
+async def _fetch_openapi_spec(
+    client: httpx.AsyncClient, base_url: str
+) -> dict[str, Any] | None:
+    openapi_url = f"{base_url.rstrip('/')}/openapi.json"
+    try:
+        response = await client.get(openapi_url)
+        response.raise_for_status()
+        return response.json()
+    except httpx.HTTPStatusError as e:
+        logger.warning(
+            "Tool service %s returned HTTP %s", base_url, e.response.status_code
+        )
+        return None
+    except httpx.RequestError as e:
+        logger.warning("Failed to reach tool service %s: %s", base_url, e)
+        return None
+    except Exception:
+        logger.warning(
+            "Unexpected error fetching spec from %s", base_url, exc_info=True
+        )
+        return None
diff --git a/backend/src/modai/modules/tools/tools_web_module.py b/backend/src/modai/modules/tools/tools_web_module.py
new file mode 100644
index 0000000..ec8e874
--- /dev/null
+++ b/backend/src/modai/modules/tools/tools_web_module.py
@@ -0,0 +1,86 @@
+import logging
+from typing import Any
+
+from modai.module import ModuleDependencies
+from modai.modules.tools.module import ToolRegistryModule, ToolsWebModule
+
+logger = logging.getLogger(__name__)
+
+
+class OpenAIToolsWebModule(ToolsWebModule):
+    """
+    ToolsWebModule implementation that returns tools in OpenAI
+    function-calling format.
+
+    Transforms each tool's OpenAPI spec into the format expected by
+    the OpenAI Chat Completions API:
+    {
+        "type": "function",
+        "function": {
+            "name": "<operationId>",
+            "description": "<summary or description>",
+            "parameters": { <request body schema> },
+            "strict": true
+        }
+    }
+    """
+
+    def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
+        super().__init__(dependencies, config)
+
+        self.tool_registry: ToolRegistryModule = dependencies.get_module(
+            "tool_registry"
+        )
+
+    async def get_tools(self) -> dict[str, Any]:
+        tools = await self.tool_registry.get_tools()
+        openai_tools = []
+        for tool in tools:
+            openai_tool = _transform_openapi_to_openai(tool.openapi_spec)
+            if openai_tool is not None:
+                openai_tools.append(openai_tool)
+        return {"tools": openai_tools}
+
+
+def _transform_openapi_to_openai(spec: dict[str, Any]) -> dict[str, Any] | None:
+    """
+    Transform an OpenAPI spec into OpenAI function-calling format.
+
+    Transformation rules:
+    - operationId → function.name
+    - summary (or description) → function.description
+    - Request body schema → function.parameters
+    - strict = True → function.strict (enables structured outputs)
+    """
+    paths = spec.get("paths", {})
+    for _path, methods in paths.items():
+        for _method, operation in methods.items():
+            if not isinstance(operation, dict) or "operationId" not in operation:
+                continue
+
+            name = operation["operationId"]
+            description = operation.get("summary") or operation.get("description", "")
+            parameters = _extract_parameters(operation)
+
+            return {
+                "type": "function",
+                "function": {
+                    "name": name,
+                    "description": description,
+                    "parameters": parameters,
+                    "strict": True,
+                },
+            }
+
+    logger.warning(
+        "No operation with operationId found in spec: %s", spec.get("info", {})
+    )
+    return None
+
+
+def _extract_parameters(operation: dict[str, Any]) -> dict[str, Any]:
+    """Extract parameter schema from an OpenAPI operation's request body."""
+    request_body = operation.get("requestBody", {})
+    content = request_body.get("content", {})
+    json_content = content.get("application/json", {})
+    return json_content.get("schema", {"type": "object", "properties": {}})

From 96c6dce9ff0293967d447a9b4cc6f80864bcb0a5 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 15:54:14 +0100
Subject: [PATCH 02/10] feat: integrate tools into agent loop

---
 .../chat/__tests__/test_strands_agent_chat.py | 388 ++++++++++++++++++
 .../modai/modules/chat/openai_agent_chat.py   | 146 ++++++-
 2 files changed, 530 insertions(+), 4 deletions(-)

diff --git a/backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py b/backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
index fee2cc1..85fd9bd 100644
--- a/backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
+++ b/backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
@@ -19,11 +19,16 @@
     _to_strands_message,
     _message_text,
     _build_openai_response,
+    _extract_tool_names,
+    _resolve_request_tools,
+    _create_http_tool,
+    _extract_operation,
 )
 from modai.modules.model_provider.module import (
     ModelProviderResponse,
     ModelProvidersListResponse,
 )
+from modai.modules.tools.module import ToolDefinition
 import openai
 
 working_dir = Path.cwd()
@@ -212,6 +217,272 @@ def test_builds_valid_response(self):
         assert resp.usage.total_tokens == 15
 
 
+# ---------------------------------------------------------------------------
+# _extract_tool_names
+# ---------------------------------------------------------------------------
+
+SAMPLE_OPENAPI_SPEC = {
+    "openapi": "3.1.0",
+    "info": {"title": "Calculator Tool", "version": "1.0.0"},
+    "paths": {
+        "/calculate": {
+            "post": {
+                "summary": "Evaluate a math expression",
+                "operationId": "calculate",
+                "requestBody": {
+                    "required": True,
+                    "content": {
+                        "application/json": {
+                            "schema": {
+                                "type": "object",
+                                "properties": {
+                                    "expression": {
+                                        "type": "string",
+                                        "description": "Math expression to evaluate",
+                                    }
+                                },
+                                "required": ["expression"],
+                            }
+                        }
+                    },
+                },
+                "responses": {
+                    "200": {
+                        "description": "Calculation result",
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "type": "object",
+                                    "properties": {"result": {"type": "number"}},
+                                }
+                            }
+                        },
+                    }
+                },
+            }
+        }
+    },
+}
+
+
+class TestExtractToolNames:
+    def test_extracts_function_tool_names(self):
+        body = {
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {"name": "calculate", "description": "calc"},
+                },
+                {
+                    "type": "function",
+                    "function": {"name": "web_search", "description": "search"},
+                },
+            ]
+        }
+        assert _extract_tool_names(body) == ["calculate", "web_search"]
+
+    def test_empty_tools(self):
+        assert _extract_tool_names({"tools": []}) == []
+
+    def test_no_tools_key(self):
+        assert _extract_tool_names({"model": "gpt-4o"}) == []
+
+    def test_skips_non_function_types(self):
+        body = {
+            "tools": [
+                {"type": "code_interpreter"},
+                {
+                    "type": "function",
+                    "function": {"name": "calculate"},
+                },
+            ]
+        }
+        assert _extract_tool_names(body) == ["calculate"]
+
+    def test_skips_missing_name(self):
+        body = {"tools": [{"type": "function", "function": {"description": "no name"}}]}
+        assert _extract_tool_names(body) == []
+
+
+# ---------------------------------------------------------------------------
+# _extract_operation
+# ---------------------------------------------------------------------------
+
+
+class TestExtractOperation:
+    def test_extracts_first_operation(self):
+        op = _extract_operation(SAMPLE_OPENAPI_SPEC)
+        assert op is not None
+        assert op["operationId"] == "calculate"
+        assert op["summary"] == "Evaluate a math expression"
+
+    def test_returns_none_for_empty_spec(self):
+        assert _extract_operation({}) is None
+        assert _extract_operation({"paths": {}}) is None
+
+    def test_skips_non_dict_operations(self):
+        spec = {"paths": {"/foo": {"post": "not a dict"}}}
+        assert _extract_operation(spec) is None
+
+    def test_skips_operations_without_operation_id(self):
+        spec = {"paths": {"/foo": {"post": {"summary": "no id"}}}}
+        assert _extract_operation(spec) is None
+
+
+# ---------------------------------------------------------------------------
+# _create_http_tool
+# ---------------------------------------------------------------------------
+
+
+class TestCreateHttpTool:
+    def test_creates_tool_from_valid_definition(self):
+        tool_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_OPENAPI_SPEC,
+        )
+        tool = _create_http_tool(tool_def)
+        assert tool is not None
+        assert tool.tool_name == "calculate"
+        assert tool.tool_spec["name"] == "calculate"
+        assert tool.tool_spec["description"] == "Evaluate a math expression"
+        schema = tool.tool_spec["inputSchema"]["json"]
+        assert "expression" in schema["properties"]
+
+    def test_returns_none_for_empty_spec(self):
+        tool_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec={"paths": {}},
+        )
+        assert _create_http_tool(tool_def) is None
+
+    def test_tool_handler_success(self):
+        tool_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_OPENAPI_SPEC,
+        )
+        tool = _create_http_tool(tool_def)
+        assert tool is not None
+
+        mock_response = Mock()
+        mock_response.raise_for_status = Mock()
+        mock_response.text = '{"result": 42}'
+
+        with patch(
+            "modai.modules.chat.openai_agent_chat.httpx.Client"
+        ) as mock_client_cls:
+            mock_client = Mock()
+            mock_client.__enter__ = Mock(return_value=mock_client)
+            mock_client.__exit__ = Mock(return_value=False)
+            mock_client.request.return_value = mock_response
+            mock_client_cls.return_value = mock_client
+
+            result = tool._tool_func(
+                {
+                    "toolUseId": "tu_123",
+                    "name": "calculate",
+                    "input": {"expression": "6*7"},
+                },
+            )
+
+        assert result["status"] == "success"
+        assert result["toolUseId"] == "tu_123"
+        assert '{"result": 42}' in result["content"][0]["text"]
+        mock_client.request.assert_called_once_with(
+            method="POST",
+            url="http://calc:8000/calculate",
+            json={"expression": "6*7"},
+        )
+
+    def test_tool_handler_http_error(self):
+        tool_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_OPENAPI_SPEC,
+        )
+        tool = _create_http_tool(tool_def)
+        assert tool is not None
+
+        with patch(
+            "modai.modules.chat.openai_agent_chat.httpx.Client"
+        ) as mock_client_cls:
+            mock_client = Mock()
+            mock_client.__enter__ = Mock(return_value=mock_client)
+            mock_client.__exit__ = Mock(return_value=False)
+            mock_client.request.side_effect = Exception("Connection refused")
+            mock_client_cls.return_value = mock_client
+
+            result = tool._tool_func(
+                {
+                    "toolUseId": "tu_456",
+                    "name": "calculate",
+                    "input": {"expression": "1/0"},
+                },
+            )
+
+        assert result["status"] == "error"
+        assert result["toolUseId"] == "tu_456"
+        assert "Connection refused" in result["content"][0]["text"]
+
+
+# ---------------------------------------------------------------------------
+# _resolve_request_tools
+# ---------------------------------------------------------------------------
+
+
+class TestResolveRequestTools:
+    @pytest.mark.asyncio
+    async def test_returns_empty_when_no_registry(self):
+        body = {
+            "tools": [
+                {"type": "function", "function": {"name": "calculate"}},
+            ]
+        }
+        result = await _resolve_request_tools(body, None)
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_returns_empty_when_no_tools_in_request(self):
+        mock_registry = Mock()
+        result = await _resolve_request_tools({"model": "gpt-4o"}, mock_registry)
+        assert result == []
+
+    @pytest.mark.asyncio
+    async def test_resolves_tools_from_registry(self):
+        tool_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_OPENAPI_SPEC,
+        )
+        mock_registry = Mock()
+        mock_registry.get_tool_by_name = AsyncMock(return_value=tool_def)
+
+        body = {
+            "tools": [
+                {"type": "function", "function": {"name": "calculate"}},
+            ]
+        }
+        result = await _resolve_request_tools(body, mock_registry)
+        assert len(result) == 1
+        assert result[0].tool_name == "calculate"
+        mock_registry.get_tool_by_name.assert_called_once_with("calculate")
+
+    @pytest.mark.asyncio
+    async def test_skips_unknown_tools(self):
+        mock_registry = Mock()
+        mock_registry.get_tool_by_name = AsyncMock(return_value=None)
+
+        body = {
+            "tools": [
+                {"type": "function", "function": {"name": "unknown_tool"}},
+            ]
+        }
+        result = await _resolve_request_tools(body, mock_registry)
+        assert result == []
+
+
 # ---------------------------------------------------------------------------
 # StrandsAgentChatModule.__init__
 # ---------------------------------------------------------------------------
@@ -228,6 +499,20 @@ def test_creates_with_provider(self):
         module = StrandsAgentChatModule(dependencies=deps, config={})
         assert module.provider_module is not None
 
+    def test_tool_registry_is_none_when_not_configured(self):
+        deps = _make_dependencies()
+        module = StrandsAgentChatModule(dependencies=deps, config={})
+        assert module.tool_registry is None
+
+    def test_tool_registry_set_when_configured(self):
+        mock_registry = Mock()
+        provider_module = _make_mock_provider_module()
+        deps = ModuleDependencies(
+            {"llm_provider_module": provider_module, "tool_registry": mock_registry}
+        )
+        module = StrandsAgentChatModule(dependencies=deps, config={})
+        assert module.tool_registry is mock_registry
+
 
 # ---------------------------------------------------------------------------
 # StrandsAgentChatModule.generate_response (mocked)
@@ -357,6 +642,109 @@ async def fake_stream_async(prompt):
     assert events[4].response.output[0].content[0].text == "Hello world"
 
 
+@pytest.mark.asyncio
+async def test_generate_response_with_tools():
+    """Tools from the request body are resolved and passed to the agent."""
+    tool_def = ToolDefinition(
+        url="http://calc:8000/calculate",
+        method="POST",
+        openapi_spec=SAMPLE_OPENAPI_SPEC,
+    )
+    mock_registry = Mock()
+    mock_registry.get_tool_by_name = AsyncMock(return_value=tool_def)
+
+    provider_module = _make_mock_provider_module()
+    deps = ModuleDependencies(
+        {"llm_provider_module": provider_module, "tool_registry": mock_registry}
+    )
+    module = StrandsAgentChatModule(dependencies=deps, config={})
+    request = Mock(spec=Request)
+
+    fake_result = _FakeAgentResult()
+
+    with (
+        patch(
+            "modai.modules.chat.openai_agent_chat._create_agent"
+        ) as mock_create_agent,
+        patch("asyncio.to_thread", new_callable=AsyncMock, return_value=fake_result),
+    ):
+        mock_agent = Mock()
+        mock_create_agent.return_value = mock_agent
+
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": [{"role": "user", "content": "Calculate 6*7"}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "calculate",
+                        "description": "Evaluate a math expression",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {"expression": {"type": "string"}},
+                        },
+                    },
+                }
+            ],
+        }
+
+        result = await module.generate_response(request, body)
+
+    # Verify the tool registry was queried
+    mock_registry.get_tool_by_name.assert_called_once_with("calculate")
+
+    # Verify _create_agent was called with tools
+    call_args = mock_create_agent.call_args
+    tools_arg = call_args[0][3] if len(call_args[0]) > 3 else call_args[1].get("tools")
+    assert tools_arg is not None
+    assert len(tools_arg) == 1
+    assert tools_arg[0].tool_name == "calculate"
+
+    assert isinstance(result, openai.types.responses.Response)
+
+
+@pytest.mark.asyncio
+async def test_generate_response_without_tool_registry():
+    """Without tool_registry configured, tools in request are ignored."""
+    deps = _make_dependencies()
+    module = StrandsAgentChatModule(dependencies=deps, config={})
+    request = Mock(spec=Request)
+
+    fake_result = _FakeAgentResult()
+
+    with (
+        patch(
+            "modai.modules.chat.openai_agent_chat._create_agent"
+        ) as mock_create_agent,
+        patch("asyncio.to_thread", new_callable=AsyncMock, return_value=fake_result),
+    ):
+        mock_agent = Mock()
+        mock_create_agent.return_value = mock_agent
+
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": [{"role": "user", "content": "Hello"}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {"name": "calculate"},
+                }
+            ],
+        }
+
+        result = await module.generate_response(request, body)
+
+    # _create_agent should be called with empty tools list
+    call_args = mock_create_agent.call_args
+    tools_arg = (
+        call_args[0][3] if len(call_args[0]) > 3 else call_args[1].get("tools", [])
+    )
+    assert tools_arg == []
+
+    assert isinstance(result, openai.types.responses.Response)
+
+
 # ---------------------------------------------------------------------------
 # Provider resolution
 # ---------------------------------------------------------------------------
diff --git a/backend/src/modai/modules/chat/openai_agent_chat.py b/backend/src/modai/modules/chat/openai_agent_chat.py
index 428f904..9a3645c 100644
--- a/backend/src/modai/modules/chat/openai_agent_chat.py
+++ b/backend/src/modai/modules/chat/openai_agent_chat.py
@@ -2,8 +2,9 @@
 Strands Agent Chat Module: ChatLLMModule implementation using Strands Agents SDK.
 
 Routes OpenAI-compatible requests through the Strands Agent framework with
-OpenAI model provider. Tool support is planned for later — this module currently
-only serves model requests via the framework.
+OpenAI model provider. Supports external tool microservices via the Tool
+Registry — requested tools are resolved, wrapped as Strands agent tools,
+and invoked over HTTP during the agent's reasoning loop.
 """
 
 import asyncio
@@ -12,6 +13,7 @@
 from datetime import datetime, timezone
 from typing import Any, AsyncGenerator
 
+import httpx
 from fastapi import Request
 from openai.types.responses import (
     Response as OpenAIResponse,
@@ -26,6 +28,8 @@
 )
 from strands import Agent
 from strands.models import OpenAIModel
+from strands.tools.tools import PythonAgentTool
+from strands.types.tools import ToolResult, ToolSpec, ToolUse
 
 from modai.module import ModuleDependencies
 from modai.modules.chat.module import ChatLLMModule
@@ -33,17 +37,20 @@
     ModelProviderModule,
     ModelProviderResponse,
 )
+from modai.modules.tools.module import ToolDefinition, ToolRegistryModule
 
 logger = logging.getLogger(__name__)
 
 DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
+TOOL_HTTP_TIMEOUT_SECONDS = 30.0
 
 
 class StrandsAgentChatModule(ChatLLMModule):
     """Strands Agent LLM Provider for Chat Responses.
 
     Implements the ChatLLMModule interface using the Strands Agents SDK
-    with OpenAI model provider.  No tool support yet.
+    with OpenAI model provider.  Supports external tool microservices
+    via an optional Tool Registry dependency.
     """
 
     def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
@@ -57,12 +64,17 @@ def __init__(self, dependencies: ModuleDependencies, config: dict[str, Any]):
                 "StrandsAgentChatModule requires 'llm_provider_module' module dependency"
             )
 
+        self.tool_registry: ToolRegistryModule | None = dependencies.get_module(
+            "tool_registry"
+        )
+
     async def generate_response(
         self, request: Request, body_json: OpenAICreateResponse
     ) -> OpenAIResponse | AsyncGenerator[OpenAIResponseStreamEvent, None]:
         provider_name, actual_model = _parse_model(body_json.get("model", ""))
         provider = await self._resolve_provider(request, provider_name)
-        agent = _create_agent(provider, actual_model, body_json)
+        tools = await _resolve_request_tools(body_json, self.tool_registry)
+        agent = _create_agent(provider, actual_model, body_json, tools)
         user_message = _extract_last_user_message(body_json)
 
         if body_json.get("stream", False):
@@ -104,6 +116,7 @@ def _create_agent(
     provider: ModelProviderResponse,
     model_id: str,
     body_json: OpenAICreateResponse,
+    tools: list[PythonAgentTool] | None = None,
 ) -> Agent:
     """Build a fresh Strands ``Agent`` for this request."""
     client_args: dict[str, Any] = {"api_key": provider.api_key}
@@ -119,6 +132,7 @@ def _create_agent(
         model=model,
         system_prompt=system_prompt,
         messages=prior_messages or None,
+        tools=tools or [],
         callback_handler=None,  # suppress default stdout printing
     )
 
@@ -178,6 +192,130 @@ def _message_text(msg: Any) -> str:
     return ""
 
 
+# ---------------------------------------------------------------------------
+# Tool resolution helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_tool_names(body_json: OpenAICreateResponse) -> list[str]:
+    """Extract tool function names from the OpenAI-format request body."""
+    tools = body_json.get("tools", [])
+    names: list[str] = []
+    for tool in tools:
+        if isinstance(tool, dict) and tool.get("type") == "function":
+            fn = tool.get("function", {})
+            if isinstance(fn, dict):
+                name = fn.get("name")
+                if name:
+                    names.append(name)
+    return names
+
+
+async def _resolve_request_tools(
+    body_json: OpenAICreateResponse,
+    tool_registry: ToolRegistryModule | None,
+) -> list[PythonAgentTool]:
+    """Resolve requested tools from the request body into Strands agent tools.
+
+    For each tool name in the request, the corresponding ``ToolDefinition``
+    is looked up in the registry and wrapped as a ``PythonAgentTool`` that
+    invokes the tool microservice over HTTP.
+
+    Returns an empty list when no registry is configured or no tools are
+    requested.
+    """
+    if not tool_registry:
+        return []
+
+    tool_names = _extract_tool_names(body_json)
+    if not tool_names:
+        return []
+
+    strands_tools: list[PythonAgentTool] = []
+    for name in tool_names:
+        tool_def = await tool_registry.get_tool_by_name(name)
+        if tool_def is None:
+            logger.warning("Tool '%s' not found in registry, skipping", name)
+            continue
+        strands_tool = _create_http_tool(tool_def)
+        if strands_tool:
+            strands_tools.append(strands_tool)
+
+    return strands_tools
+
+
+def _create_http_tool(tool_def: ToolDefinition) -> PythonAgentTool | None:
+    """Create a Strands ``PythonAgentTool`` that invokes a tool via HTTP.
+
+    The tool spec (name, description, input schema) is derived from the
+    tool's OpenAPI spec.  The handler makes an HTTP request to the tool's
+    endpoint and returns the response body to the LLM.
+    """
+    operation = _extract_operation(tool_def.openapi_spec)
+    if not operation:
+        logger.warning(
+            "No operation found in OpenAPI spec for tool at %s", tool_def.url
+        )
+        return None
+
+    operation_id = operation.get("operationId", "")
+    description = operation.get("summary") or operation.get("description", "")
+
+    request_body = operation.get("requestBody", {})
+    content = request_body.get("content", {})
+    json_content = content.get("application/json", {})
+    parameters_schema = json_content.get("schema", {"type": "object", "properties": {}})
+
+    tool_spec: ToolSpec = {
+        "name": operation_id,
+        "description": description,
+        "inputSchema": {"json": parameters_schema},
+    }
+
+    url = tool_def.url
+    method = tool_def.method
+
+    def _handler(tool_use: ToolUse, **kwargs: Any) -> ToolResult:  # noqa: ARG001
+        """Invoke the tool microservice over HTTP."""
+        params = tool_use["input"]
+        try:
+            with httpx.Client(timeout=TOOL_HTTP_TIMEOUT_SECONDS) as client:
+                response = client.request(
+                    method=method.upper(),
+                    url=url,
+                    json=params,
+                )
+                response.raise_for_status()
+                return {
+                    "toolUseId": tool_use["toolUseId"],
+                    "status": "success",
+                    "content": [{"text": response.text}],
+                }
+        except Exception as exc:
+            logger.error("Tool '%s' invocation failed: %s", operation_id, exc)
+            return {
+                "toolUseId": tool_use["toolUseId"],
+                "status": "error",
+                "content": [{"text": f"Tool invocation failed: {exc}"}],
+            }
+
+    return PythonAgentTool(
+        tool_name=operation_id,
+        tool_spec=tool_spec,
+        tool_func=_handler,
+    )
+
+
+def _extract_operation(spec: dict[str, Any]) -> dict[str, Any] | None:
+    """Extract the first operation from an OpenAPI spec."""
+    paths = spec.get("paths", {})
+    for _path, methods in paths.items():
+        for _method, operation in methods.items():
+            if isinstance(operation, dict) and "operationId" in operation:
+                return operation
+    return None
+
+
 # ---------------------------------------------------------------------------
 # Response builders
 # ---------------------------------------------------------------------------

From 8420ff67b8999a14d7bd73afcd9f5cbeb1738143 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 16:03:59 +0100
Subject: [PATCH 03/10] chore: move backend

---
 .agents/skills/tool-microservice/SKILL.md     |  2 +-
 .github/workflows/ci.yml                      | 10 +++----
 .github/workflows/docker-build.yml            |  2 +-
 AGENTS.md                                     | 26 +++++++++----------
 README.md                                     |  4 +--
 backend/{ => omni}/.env.sample                |  0
 backend/{ => omni}/.python-version            |  0
 backend/{ => omni}/.uvrc                      |  0
 backend/{ => omni}/CHANGELOG.md               |  0
 backend/{ => omni}/Dockerfile                 |  0
 backend/{ => omni}/README.md                  |  2 +-
 backend/{ => omni}/config.yaml                |  0
 backend/{ => omni}/docs/architecture/auth.md  |  0
 backend/{ => omni}/docs/architecture/chat.md  |  0
 backend/{ => omni}/docs/architecture/core.md  |  0
 .../docs/architecture/persistence.md          |  0
 backend/{ => omni}/docs/architecture/tools.md |  0
 .../docs/learnings/INSTRUCTION_UPDATES.md     |  0
 backend/{ => omni}/pyproject.toml             |  0
 backend/{ => omni}/src/modai/__init__.py      |  0
 .../src/modai/__tests__/__init__.py           |  0
 .../src/modai/__tests__/test_module_loader.py |  0
 .../{ => omni}/src/modai/default_config.yaml  |  0
 backend/{ => omni}/src/modai/main.py          |  0
 backend/{ => omni}/src/modai/module.py        |  0
 backend/{ => omni}/src/modai/module_loader.py |  0
 .../{ => omni}/src/modai/modules/__init__.py  |  0
 .../modai/modules/authentication/__init__.py  |  0
 .../authentication/__tests__/__init__.py      |  0
 .../__tests__/test_authentication.py          |  0
 .../modai/modules/authentication/module.py    |  0
 .../password_authentication_module.py         |  0
 .../src/modai/modules/chat/__init__.py        |  0
 .../modai/modules/chat/__tests__/__init__.py  |  0
 .../chat/__tests__/test_openai_raw_chat.py    |  0
 .../chat/__tests__/test_strands_agent_chat.py |  0
 .../src/modai/modules/chat/module.py          |  0
 .../modai/modules/chat/openai_agent_chat.py   |  0
 .../src/modai/modules/chat/openai_raw_chat.py |  0
 .../src/modai/modules/chat/web_chat_router.py |  0
 .../src/modai/modules/health/__init__.py      |  0
 .../modules/health/__tests__/__init__.py      |  0
 .../modules/health/__tests__/test_health.py   |  0
 .../src/modai/modules/health/module.py        |  0
 .../modules/health/simple_health_module.py    |  0
 .../modai/modules/model_provider/__init__.py  |  0
 .../model_provider/__tests__/__init__.py      |  0
 .../test_central_model_provider_router.py     |  0
 .../__tests__/test_model_provider.py          |  0
 .../modules/model_provider/central_router.py  |  0
 .../modai/modules/model_provider/module.py    |  0
 .../modules/model_provider/openai_provider.py |  0
 .../modules/model_provider_store/__init__.py  |  0
 .../__tests__/__init__.py                     |  0
 .../abstract_model_provider_store_test.py     |  0
 .../test_sql_model_provider_store.py          |  0
 .../modules/model_provider_store/module.py    |  0
 .../sql_model_provider_store.py               |  0
 .../src/modai/modules/session/__init__.py     |  0
 .../modules/session/__tests__/__init__.py     |  0
 .../modules/session/__tests__/test_session.py |  0
 .../modules/session/jwt_session_module.py     |  0
 .../src/modai/modules/session/module.py       |  0
 .../startup_config/__tests__/__init__.py      |  0
 .../__tests__/test_config_loader.py           |  0
 .../modai/modules/startup_config/module.py    |  0
 .../startup_config/yaml_config_module.py      |  0
 .../src/modai/modules/tools/__init__.py       |  0
 .../modai/modules/tools/__tests__/__init__.py |  0
 .../tools/__tests__/test_tool_registry.py     |  0
 .../tools/__tests__/test_tools_web_module.py  |  0
 .../src/modai/modules/tools/module.py         |  0
 .../src/modai/modules/tools/tool_registry.py  |  0
 .../modai/modules/tools/tools_web_module.py   |  0
 .../src/modai/modules/user/__init__.py        |  0
 .../modai/modules/user/__tests__/__init__.py  |  0
 .../modai/modules/user/__tests__/test_user.py |  0
 .../src/modai/modules/user/module.py          |  0
 .../modai/modules/user/simple_user_module.py  |  0
 .../src/modai/modules/user_settings/README.md |  0
 .../modai/modules/user_settings/__init__.py   |  0
 .../user_settings/__tests__/__init__.py       |  0
 .../__tests__/test_user_settings.py           |  0
 .../src/modai/modules/user_settings/module.py |  0
 .../simple_user_settings_module.py            |  0
 .../modules/user_settings_store/__init__.py   |  0
 .../user_settings_store/__tests__/__init__.py |  0
 .../__tests__/test_user_settings_store.py     |  0
 .../inmemory_user_settings_store.py           |  0
 .../modules/user_settings_store/module.py     |  0
 .../sqlalchemy_user_settings_store.py         |  0
 .../src/modai/modules/user_store/__init__.py  |  0
 .../modules/user_store/__tests__/__init__.py  |  0
 .../__tests__/abstract_user_store_test.py     |  0
 .../__tests__/test_inmemory_user_store.py     |  0
 .../__tests__/test_sql_model_user_store.py    |  0
 .../modules/user_store/inmemory_user_store.py |  0
 .../src/modai/modules/user_store/module.py    |  0
 .../user_store/sql_model_user_store.py        |  0
 .../user_store/sqlalchemy_user_store.py       |  0
 .../tests_omni_full/playwright.config.ts      |  2 +-
 .../compose-onmi-full-build.yaml              |  2 +-
 102 files changed, 25 insertions(+), 25 deletions(-)
 rename backend/{ => omni}/.env.sample (100%)
 rename backend/{ => omni}/.python-version (100%)
 rename backend/{ => omni}/.uvrc (100%)
 rename backend/{ => omni}/CHANGELOG.md (100%)
 rename backend/{ => omni}/Dockerfile (100%)
 rename backend/{ => omni}/README.md (95%)
 rename backend/{ => omni}/config.yaml (100%)
 rename backend/{ => omni}/docs/architecture/auth.md (100%)
 rename backend/{ => omni}/docs/architecture/chat.md (100%)
 rename backend/{ => omni}/docs/architecture/core.md (100%)
 rename backend/{ => omni}/docs/architecture/persistence.md (100%)
 rename backend/{ => omni}/docs/architecture/tools.md (100%)
 rename backend/{ => omni}/docs/learnings/INSTRUCTION_UPDATES.md (100%)
 rename backend/{ => omni}/pyproject.toml (100%)
 rename backend/{ => omni}/src/modai/__init__.py (100%)
 rename backend/{ => omni}/src/modai/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/__tests__/test_module_loader.py (100%)
 rename backend/{ => omni}/src/modai/default_config.yaml (100%)
 rename backend/{ => omni}/src/modai/main.py (100%)
 rename backend/{ => omni}/src/modai/module.py (100%)
 rename backend/{ => omni}/src/modai/module_loader.py (100%)
 rename backend/{ => omni}/src/modai/modules/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/authentication/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/authentication/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/authentication/__tests__/test_authentication.py (100%)
 rename backend/{ => omni}/src/modai/modules/authentication/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/authentication/password_authentication_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/__tests__/test_openai_raw_chat.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/__tests__/test_strands_agent_chat.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/openai_agent_chat.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/openai_raw_chat.py (100%)
 rename backend/{ => omni}/src/modai/modules/chat/web_chat_router.py (100%)
 rename backend/{ => omni}/src/modai/modules/health/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/health/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/health/__tests__/test_health.py (100%)
 rename backend/{ => omni}/src/modai/modules/health/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/health/simple_health_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/__tests__/test_central_model_provider_router.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/__tests__/test_model_provider.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/central_router.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider/openai_provider.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/__tests__/abstract_model_provider_store_test.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/__tests__/test_sql_model_provider_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/model_provider_store/sql_model_provider_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/session/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/session/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/session/__tests__/test_session.py (100%)
 rename backend/{ => omni}/src/modai/modules/session/jwt_session_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/session/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/startup_config/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/startup_config/__tests__/test_config_loader.py (100%)
 rename backend/{ => omni}/src/modai/modules/startup_config/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/startup_config/yaml_config_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/__tests__/test_tool_registry.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/__tests__/test_tools_web_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/tool_registry.py (100%)
 rename backend/{ => omni}/src/modai/modules/tools/tools_web_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user/__tests__/test_user.py (100%)
 rename backend/{ => omni}/src/modai/modules/user/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user/simple_user_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/README.md (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/__tests__/test_user_settings.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings/simple_user_settings_module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/__tests__/test_user_settings_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/inmemory_user_settings_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_settings_store/sqlalchemy_user_settings_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/__tests__/__init__.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/__tests__/abstract_user_store_test.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/__tests__/test_inmemory_user_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/__tests__/test_sql_model_user_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/inmemory_user_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/module.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/sql_model_user_store.py (100%)
 rename backend/{ => omni}/src/modai/modules/user_store/sqlalchemy_user_store.py (100%)

diff --git a/.agents/skills/tool-microservice/SKILL.md b/.agents/skills/tool-microservice/SKILL.md
index 6d23422..ca1366e 100644
--- a/.agents/skills/tool-microservice/SKILL.md
+++ b/.agents/skills/tool-microservice/SKILL.md
@@ -155,4 +155,4 @@ Expected:
 
 ## Architecture Reference
 
-See `backend/docs/architecture/tools.md` for the full tools architecture including the registry module, web module, and chat agent integration.
+See `backend/omni/docs/architecture/tools.md` for the full tools architecture including the registry module, web module, and chat agent integration.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c8ebd49..89607fa 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,15 +20,15 @@ jobs:
         run: uv python install 3.13
 
       - name: Install dependencies
-        working-directory: backend
+        working-directory: backend/omni
         run: uv sync --all-extras
 
       - name: Check formatting
-        working-directory: backend
+        working-directory: backend/omni
         run: uv run ruff format --check .
 
       - name: Lint
-        working-directory: backend
+        working-directory: backend/omni
         run: uv run ruff check .
 
   frontend-format:
@@ -70,11 +70,11 @@ jobs:
         run: uv python install 3.13
 
       - name: Install dependencies
-        working-directory: backend
+        working-directory: backend/omni
         run: uv sync --all-extras
 
       - name: Run tests
-        working-directory: backend
+        working-directory: backend/omni
         run: uv run pytest -v
 
   frontend-unit:
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
index 30b7edb..6170778 100644
--- a/.github/workflows/docker-build.yml
+++ b/.github/workflows/docker-build.yml
@@ -12,7 +12,7 @@ jobs:
       matrix:
         include:
           - name: backend
-            context: backend
+            context: backend/omni
             package: modai-chat-backend
           - name: frontend
             context: frontend_omni
diff --git a/AGENTS.md b/AGENTS.md
index c8f1d3a..95d25ad 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -9,8 +9,8 @@ This project is designed for AI-first development. All agents MUST follow these
 ### 0. Context Loading (ALWAYS FIRST)
 - **RULE**: ALWAYS read project context before starting ANY task.
 - **PROCESS**:
-    1. Read `{frontend|backend}/docs/architecture/*.md` to understand frontend system design.
-    2. Read `{frontend|backend}/docs/learnings/*.md` to learn from past corrections.
+    1. Read `{frontend_omni|backend/omni}/docs/architecture/*.md` to understand frontend system design.
+    2. Read `{frontend_omni|backend/omni}/docs/learnings/*.md` to learn from past corrections.
     3. Check `.agents/skills/` for relevant technology skills.
 - **WHY**: These files contain critical knowledge from past work. Skipping them leads to repeated mistakes and inconsistent code.
 
@@ -25,14 +25,14 @@ This project is designed for AI-first development. All agents MUST follow these
 ### 2. Architecture First
 - **RULE**: Architecture MUST be adapted/reviewed before coding starts.
 - **PROCESS**:
-    1. For any new feature/package, update `{frontend|backend}/docs/DECISIONS.md`.
+    1. For any new feature/package, update `{frontend_omni|backend/omni}/docs/DECISIONS.md`.
     2. Ensure the architecture aligns with the overall project goals.
 
 ### 3. Learning from Corrections
 - **RULE**: If the user corrects a mistake, update the instructions immediately.
 - **PROCESS**:
     1. Identify the root cause of the mistake.
-    2. Update `{frontend|backend}/docs/learnings/INSTRUCTION_UPDATES.md` with a new rule to prevent recurrence.
+    2. Update `{frontend_omni|backend/omni}/docs/learnings/INSTRUCTION_UPDATES.md` with a new rule to prevent recurrence.
     3. Append relevant rules to `AGENTS.md` if they are project-wide.
 
 ### 4. Test-Driven Completion
@@ -50,8 +50,8 @@ This project is designed for AI-first development. All agents MUST follow these
 ### 6. Documentation Updates for API Changes (MANDATORY)
 - **RULE**: When adding, modifying, or deleting API endpoints, ALWAYS update documentation.
 - **PROCESS**:
-    1. Check `{frontend|backend}/README.md` for endpoint references and usage examples.
-    2. Check `{frontend|backend}/docs/architecture/*.md` for endpoint documentation.
+    1. Check `{frontend_omni|backend/omni}/README.md` for endpoint references and usage examples.
+    2. Check `{frontend_omni|backend/omni}/docs/architecture/*.md` for endpoint documentation.
     3. Check any other docs that reference API endpoints.
     4. Update all affected documentation before marking task as complete.
 
@@ -105,7 +105,7 @@ modAI-chat is a full-stack application with separate backend and frontend compon
 
 ### Environment Setup
 
-- **Location**: Work in `backend/` directory. When executing commands, switch to the backend dir.
+- **Location**: Work in `backend/omni/` directory. When executing commands, switch to the backend dir.
 - **Package Manager**: Use [UV](https://docs.astral.sh/uv/) for dependency management
 - **Start Server**: `uv run uvicorn modai.main:app`
 - **Install Dependencies**: `uv add <package-name>`
@@ -114,17 +114,17 @@ modAI-chat is a full-stack application with separate backend and frontend compon
 
 Before any backend work, read relevant architecture documents:
 
-- **Always read**: `backend/architecture/core.md`
-- **Authentication work**: `backend/architecture/auth.md`
-- **Chat/AI features**: `backend/architecture/chat.md`
-- **Database work**: `backend/architecture/persistence.md`
+- **Always read**: `backend/omni/docs/architecture/core.md`
+- **Authentication work**: `backend/omni/docs/architecture/auth.md`
+- **Chat/AI features**: `backend/omni/docs/architecture/chat.md`
+- **Database work**: `backend/omni/docs/architecture/persistence.md`
 - **SQLModel work**: https://fastapi.tiangolo.com/tutorial/sql-databases/
 
 ### Testing
 
 - **Framework**: pytest
 - **Command**: `uv run pytest`
-- **Location**: Tests live alongside source code in `__tests__/` directories under `backend/src/modai/` and `backend/src/modai/modules/*/`
+- **Location**: Tests live alongside source code in `__tests__/` directories under `backend/omni/src/modai/` and `backend/omni/src/modai/modules/*/`
 - **Test Coverage**: Always add unit tests for new features or bug fixes
 - **Test Isolation**: Use mocking for external dependencies
 - **Atomic Tests**: Each test function should test one specific behavior
@@ -195,6 +195,6 @@ For comprehensive e2e testing best practices and patterns, refer to `e2e_tests/B
 
 - **Backend**: Never expose internal error details to users
 - **Frontend**: Validate all user inputs
-- **Authentication**: Follow architecture guidelines in `backend/architecture/auth.md`
+- **Authentication**: Follow architecture guidelines in `backend/omni/docs/architecture/auth.md`
 
 This guide provides AI agents with the essential context needed to work effectively on both backend and frontend components while maintaining project standards and architectural integrity.
diff --git a/README.md b/README.md
index d6bc075..8077b79 100644
--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ We try to keep the root of the repo slim and add things like `package.json` file
 
 **Folder hierarchy:**
 
-* **./backend**: Contains the Python Backend code
+* **./backend/omni**: Contains the Python Backend code
 * **./frontend_omni**: Universal full fledged chat frontend
 
 ## 🖥️ modAI Frontends
@@ -40,7 +40,7 @@ Why more than one frontend: because there are different use cases. e.g. one full
 
 ### Backend Setup
 ```bash
-cd backend
+cd backend/omni
 uv sync
 uv run uvicorn modai.main:app --reload
 ```
diff --git a/backend/.env.sample b/backend/omni/.env.sample
similarity index 100%
rename from backend/.env.sample
rename to backend/omni/.env.sample
diff --git a/backend/.python-version b/backend/omni/.python-version
similarity index 100%
rename from backend/.python-version
rename to backend/omni/.python-version
diff --git a/backend/.uvrc b/backend/omni/.uvrc
similarity index 100%
rename from backend/.uvrc
rename to backend/omni/.uvrc
diff --git a/backend/CHANGELOG.md b/backend/omni/CHANGELOG.md
similarity index 100%
rename from backend/CHANGELOG.md
rename to backend/omni/CHANGELOG.md
diff --git a/backend/Dockerfile b/backend/omni/Dockerfile
similarity index 100%
rename from backend/Dockerfile
rename to backend/omni/Dockerfile
diff --git a/backend/README.md b/backend/omni/README.md
similarity index 95%
rename from backend/README.md
rename to backend/omni/README.md
index 6fa9ca7..ee69207 100644
--- a/backend/README.md
+++ b/backend/omni/README.md
@@ -45,7 +45,7 @@ properly in the `.vscode/settings.json`:
     "python.testing.pytestArgs": [],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
-    "python.testing.cwd": "${workspaceFolder}/backend"
+    "python.testing.cwd": "${workspaceFolder}/backend/omni"
 }
 ```
 
diff --git a/backend/config.yaml b/backend/omni/config.yaml
similarity index 100%
rename from backend/config.yaml
rename to backend/omni/config.yaml
diff --git a/backend/docs/architecture/auth.md b/backend/omni/docs/architecture/auth.md
similarity index 100%
rename from backend/docs/architecture/auth.md
rename to backend/omni/docs/architecture/auth.md
diff --git a/backend/docs/architecture/chat.md b/backend/omni/docs/architecture/chat.md
similarity index 100%
rename from backend/docs/architecture/chat.md
rename to backend/omni/docs/architecture/chat.md
diff --git a/backend/docs/architecture/core.md b/backend/omni/docs/architecture/core.md
similarity index 100%
rename from backend/docs/architecture/core.md
rename to backend/omni/docs/architecture/core.md
diff --git a/backend/docs/architecture/persistence.md b/backend/omni/docs/architecture/persistence.md
similarity index 100%
rename from backend/docs/architecture/persistence.md
rename to backend/omni/docs/architecture/persistence.md
diff --git a/backend/docs/architecture/tools.md b/backend/omni/docs/architecture/tools.md
similarity index 100%
rename from backend/docs/architecture/tools.md
rename to backend/omni/docs/architecture/tools.md
diff --git a/backend/docs/learnings/INSTRUCTION_UPDATES.md b/backend/omni/docs/learnings/INSTRUCTION_UPDATES.md
similarity index 100%
rename from backend/docs/learnings/INSTRUCTION_UPDATES.md
rename to backend/omni/docs/learnings/INSTRUCTION_UPDATES.md
diff --git a/backend/pyproject.toml b/backend/omni/pyproject.toml
similarity index 100%
rename from backend/pyproject.toml
rename to backend/omni/pyproject.toml
diff --git a/backend/src/modai/__init__.py b/backend/omni/src/modai/__init__.py
similarity index 100%
rename from backend/src/modai/__init__.py
rename to backend/omni/src/modai/__init__.py
diff --git a/backend/src/modai/__tests__/__init__.py b/backend/omni/src/modai/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/__tests__/__init__.py
rename to backend/omni/src/modai/__tests__/__init__.py
diff --git a/backend/src/modai/__tests__/test_module_loader.py b/backend/omni/src/modai/__tests__/test_module_loader.py
similarity index 100%
rename from backend/src/modai/__tests__/test_module_loader.py
rename to backend/omni/src/modai/__tests__/test_module_loader.py
diff --git a/backend/src/modai/default_config.yaml b/backend/omni/src/modai/default_config.yaml
similarity index 100%
rename from backend/src/modai/default_config.yaml
rename to backend/omni/src/modai/default_config.yaml
diff --git a/backend/src/modai/main.py b/backend/omni/src/modai/main.py
similarity index 100%
rename from backend/src/modai/main.py
rename to backend/omni/src/modai/main.py
diff --git a/backend/src/modai/module.py b/backend/omni/src/modai/module.py
similarity index 100%
rename from backend/src/modai/module.py
rename to backend/omni/src/modai/module.py
diff --git a/backend/src/modai/module_loader.py b/backend/omni/src/modai/module_loader.py
similarity index 100%
rename from backend/src/modai/module_loader.py
rename to backend/omni/src/modai/module_loader.py
diff --git a/backend/src/modai/modules/__init__.py b/backend/omni/src/modai/modules/__init__.py
similarity index 100%
rename from backend/src/modai/modules/__init__.py
rename to backend/omni/src/modai/modules/__init__.py
diff --git a/backend/src/modai/modules/authentication/__init__.py b/backend/omni/src/modai/modules/authentication/__init__.py
similarity index 100%
rename from backend/src/modai/modules/authentication/__init__.py
rename to backend/omni/src/modai/modules/authentication/__init__.py
diff --git a/backend/src/modai/modules/authentication/__tests__/__init__.py b/backend/omni/src/modai/modules/authentication/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/authentication/__tests__/__init__.py
rename to backend/omni/src/modai/modules/authentication/__tests__/__init__.py
diff --git a/backend/src/modai/modules/authentication/__tests__/test_authentication.py b/backend/omni/src/modai/modules/authentication/__tests__/test_authentication.py
similarity index 100%
rename from backend/src/modai/modules/authentication/__tests__/test_authentication.py
rename to backend/omni/src/modai/modules/authentication/__tests__/test_authentication.py
diff --git a/backend/src/modai/modules/authentication/module.py b/backend/omni/src/modai/modules/authentication/module.py
similarity index 100%
rename from backend/src/modai/modules/authentication/module.py
rename to backend/omni/src/modai/modules/authentication/module.py
diff --git a/backend/src/modai/modules/authentication/password_authentication_module.py b/backend/omni/src/modai/modules/authentication/password_authentication_module.py
similarity index 100%
rename from backend/src/modai/modules/authentication/password_authentication_module.py
rename to backend/omni/src/modai/modules/authentication/password_authentication_module.py
diff --git a/backend/src/modai/modules/chat/__init__.py b/backend/omni/src/modai/modules/chat/__init__.py
similarity index 100%
rename from backend/src/modai/modules/chat/__init__.py
rename to backend/omni/src/modai/modules/chat/__init__.py
diff --git a/backend/src/modai/modules/chat/__tests__/__init__.py b/backend/omni/src/modai/modules/chat/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/chat/__tests__/__init__.py
rename to backend/omni/src/modai/modules/chat/__tests__/__init__.py
diff --git a/backend/src/modai/modules/chat/__tests__/test_openai_raw_chat.py b/backend/omni/src/modai/modules/chat/__tests__/test_openai_raw_chat.py
similarity index 100%
rename from backend/src/modai/modules/chat/__tests__/test_openai_raw_chat.py
rename to backend/omni/src/modai/modules/chat/__tests__/test_openai_raw_chat.py
diff --git a/backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py b/backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
similarity index 100%
rename from backend/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
rename to backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
diff --git a/backend/src/modai/modules/chat/module.py b/backend/omni/src/modai/modules/chat/module.py
similarity index 100%
rename from backend/src/modai/modules/chat/module.py
rename to backend/omni/src/modai/modules/chat/module.py
diff --git a/backend/src/modai/modules/chat/openai_agent_chat.py b/backend/omni/src/modai/modules/chat/openai_agent_chat.py
similarity index 100%
rename from backend/src/modai/modules/chat/openai_agent_chat.py
rename to backend/omni/src/modai/modules/chat/openai_agent_chat.py
diff --git a/backend/src/modai/modules/chat/openai_raw_chat.py b/backend/omni/src/modai/modules/chat/openai_raw_chat.py
similarity index 100%
rename from backend/src/modai/modules/chat/openai_raw_chat.py
rename to backend/omni/src/modai/modules/chat/openai_raw_chat.py
diff --git a/backend/src/modai/modules/chat/web_chat_router.py b/backend/omni/src/modai/modules/chat/web_chat_router.py
similarity index 100%
rename from backend/src/modai/modules/chat/web_chat_router.py
rename to backend/omni/src/modai/modules/chat/web_chat_router.py
diff --git a/backend/src/modai/modules/health/__init__.py b/backend/omni/src/modai/modules/health/__init__.py
similarity index 100%
rename from backend/src/modai/modules/health/__init__.py
rename to backend/omni/src/modai/modules/health/__init__.py
diff --git a/backend/src/modai/modules/health/__tests__/__init__.py b/backend/omni/src/modai/modules/health/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/health/__tests__/__init__.py
rename to backend/omni/src/modai/modules/health/__tests__/__init__.py
diff --git a/backend/src/modai/modules/health/__tests__/test_health.py b/backend/omni/src/modai/modules/health/__tests__/test_health.py
similarity index 100%
rename from backend/src/modai/modules/health/__tests__/test_health.py
rename to backend/omni/src/modai/modules/health/__tests__/test_health.py
diff --git a/backend/src/modai/modules/health/module.py b/backend/omni/src/modai/modules/health/module.py
similarity index 100%
rename from backend/src/modai/modules/health/module.py
rename to backend/omni/src/modai/modules/health/module.py
diff --git a/backend/src/modai/modules/health/simple_health_module.py b/backend/omni/src/modai/modules/health/simple_health_module.py
similarity index 100%
rename from backend/src/modai/modules/health/simple_health_module.py
rename to backend/omni/src/modai/modules/health/simple_health_module.py
diff --git a/backend/src/modai/modules/model_provider/__init__.py b/backend/omni/src/modai/modules/model_provider/__init__.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/__init__.py
rename to backend/omni/src/modai/modules/model_provider/__init__.py
diff --git a/backend/src/modai/modules/model_provider/__tests__/__init__.py b/backend/omni/src/modai/modules/model_provider/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/__tests__/__init__.py
rename to backend/omni/src/modai/modules/model_provider/__tests__/__init__.py
diff --git a/backend/src/modai/modules/model_provider/__tests__/test_central_model_provider_router.py b/backend/omni/src/modai/modules/model_provider/__tests__/test_central_model_provider_router.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/__tests__/test_central_model_provider_router.py
rename to backend/omni/src/modai/modules/model_provider/__tests__/test_central_model_provider_router.py
diff --git a/backend/src/modai/modules/model_provider/__tests__/test_model_provider.py b/backend/omni/src/modai/modules/model_provider/__tests__/test_model_provider.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/__tests__/test_model_provider.py
rename to backend/omni/src/modai/modules/model_provider/__tests__/test_model_provider.py
diff --git a/backend/src/modai/modules/model_provider/central_router.py b/backend/omni/src/modai/modules/model_provider/central_router.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/central_router.py
rename to backend/omni/src/modai/modules/model_provider/central_router.py
diff --git a/backend/src/modai/modules/model_provider/module.py b/backend/omni/src/modai/modules/model_provider/module.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/module.py
rename to backend/omni/src/modai/modules/model_provider/module.py
diff --git a/backend/src/modai/modules/model_provider/openai_provider.py b/backend/omni/src/modai/modules/model_provider/openai_provider.py
similarity index 100%
rename from backend/src/modai/modules/model_provider/openai_provider.py
rename to backend/omni/src/modai/modules/model_provider/openai_provider.py
diff --git a/backend/src/modai/modules/model_provider_store/__init__.py b/backend/omni/src/modai/modules/model_provider_store/__init__.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/__init__.py
rename to backend/omni/src/modai/modules/model_provider_store/__init__.py
diff --git a/backend/src/modai/modules/model_provider_store/__tests__/__init__.py b/backend/omni/src/modai/modules/model_provider_store/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/__tests__/__init__.py
rename to backend/omni/src/modai/modules/model_provider_store/__tests__/__init__.py
diff --git a/backend/src/modai/modules/model_provider_store/__tests__/abstract_model_provider_store_test.py b/backend/omni/src/modai/modules/model_provider_store/__tests__/abstract_model_provider_store_test.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/__tests__/abstract_model_provider_store_test.py
rename to backend/omni/src/modai/modules/model_provider_store/__tests__/abstract_model_provider_store_test.py
diff --git a/backend/src/modai/modules/model_provider_store/__tests__/test_sql_model_provider_store.py b/backend/omni/src/modai/modules/model_provider_store/__tests__/test_sql_model_provider_store.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/__tests__/test_sql_model_provider_store.py
rename to backend/omni/src/modai/modules/model_provider_store/__tests__/test_sql_model_provider_store.py
diff --git a/backend/src/modai/modules/model_provider_store/module.py b/backend/omni/src/modai/modules/model_provider_store/module.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/module.py
rename to backend/omni/src/modai/modules/model_provider_store/module.py
diff --git a/backend/src/modai/modules/model_provider_store/sql_model_provider_store.py b/backend/omni/src/modai/modules/model_provider_store/sql_model_provider_store.py
similarity index 100%
rename from backend/src/modai/modules/model_provider_store/sql_model_provider_store.py
rename to backend/omni/src/modai/modules/model_provider_store/sql_model_provider_store.py
diff --git a/backend/src/modai/modules/session/__init__.py b/backend/omni/src/modai/modules/session/__init__.py
similarity index 100%
rename from backend/src/modai/modules/session/__init__.py
rename to backend/omni/src/modai/modules/session/__init__.py
diff --git a/backend/src/modai/modules/session/__tests__/__init__.py b/backend/omni/src/modai/modules/session/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/session/__tests__/__init__.py
rename to backend/omni/src/modai/modules/session/__tests__/__init__.py
diff --git a/backend/src/modai/modules/session/__tests__/test_session.py b/backend/omni/src/modai/modules/session/__tests__/test_session.py
similarity index 100%
rename from backend/src/modai/modules/session/__tests__/test_session.py
rename to backend/omni/src/modai/modules/session/__tests__/test_session.py
diff --git a/backend/src/modai/modules/session/jwt_session_module.py b/backend/omni/src/modai/modules/session/jwt_session_module.py
similarity index 100%
rename from backend/src/modai/modules/session/jwt_session_module.py
rename to backend/omni/src/modai/modules/session/jwt_session_module.py
diff --git a/backend/src/modai/modules/session/module.py b/backend/omni/src/modai/modules/session/module.py
similarity index 100%
rename from backend/src/modai/modules/session/module.py
rename to backend/omni/src/modai/modules/session/module.py
diff --git a/backend/src/modai/modules/startup_config/__tests__/__init__.py b/backend/omni/src/modai/modules/startup_config/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/startup_config/__tests__/__init__.py
rename to backend/omni/src/modai/modules/startup_config/__tests__/__init__.py
diff --git a/backend/src/modai/modules/startup_config/__tests__/test_config_loader.py b/backend/omni/src/modai/modules/startup_config/__tests__/test_config_loader.py
similarity index 100%
rename from backend/src/modai/modules/startup_config/__tests__/test_config_loader.py
rename to backend/omni/src/modai/modules/startup_config/__tests__/test_config_loader.py
diff --git a/backend/src/modai/modules/startup_config/module.py b/backend/omni/src/modai/modules/startup_config/module.py
similarity index 100%
rename from backend/src/modai/modules/startup_config/module.py
rename to backend/omni/src/modai/modules/startup_config/module.py
diff --git a/backend/src/modai/modules/startup_config/yaml_config_module.py b/backend/omni/src/modai/modules/startup_config/yaml_config_module.py
similarity index 100%
rename from backend/src/modai/modules/startup_config/yaml_config_module.py
rename to backend/omni/src/modai/modules/startup_config/yaml_config_module.py
diff --git a/backend/src/modai/modules/tools/__init__.py b/backend/omni/src/modai/modules/tools/__init__.py
similarity index 100%
rename from backend/src/modai/modules/tools/__init__.py
rename to backend/omni/src/modai/modules/tools/__init__.py
diff --git a/backend/src/modai/modules/tools/__tests__/__init__.py b/backend/omni/src/modai/modules/tools/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/tools/__tests__/__init__.py
rename to backend/omni/src/modai/modules/tools/__tests__/__init__.py
diff --git a/backend/src/modai/modules/tools/__tests__/test_tool_registry.py b/backend/omni/src/modai/modules/tools/__tests__/test_tool_registry.py
similarity index 100%
rename from backend/src/modai/modules/tools/__tests__/test_tool_registry.py
rename to backend/omni/src/modai/modules/tools/__tests__/test_tool_registry.py
diff --git a/backend/src/modai/modules/tools/__tests__/test_tools_web_module.py b/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
similarity index 100%
rename from backend/src/modai/modules/tools/__tests__/test_tools_web_module.py
rename to backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
diff --git a/backend/src/modai/modules/tools/module.py b/backend/omni/src/modai/modules/tools/module.py
similarity index 100%
rename from backend/src/modai/modules/tools/module.py
rename to backend/omni/src/modai/modules/tools/module.py
diff --git a/backend/src/modai/modules/tools/tool_registry.py b/backend/omni/src/modai/modules/tools/tool_registry.py
similarity index 100%
rename from backend/src/modai/modules/tools/tool_registry.py
rename to backend/omni/src/modai/modules/tools/tool_registry.py
diff --git a/backend/src/modai/modules/tools/tools_web_module.py b/backend/omni/src/modai/modules/tools/tools_web_module.py
similarity index 100%
rename from backend/src/modai/modules/tools/tools_web_module.py
rename to backend/omni/src/modai/modules/tools/tools_web_module.py
diff --git a/backend/src/modai/modules/user/__init__.py b/backend/omni/src/modai/modules/user/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user/__init__.py
rename to backend/omni/src/modai/modules/user/__init__.py
diff --git a/backend/src/modai/modules/user/__tests__/__init__.py b/backend/omni/src/modai/modules/user/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user/__tests__/__init__.py
rename to backend/omni/src/modai/modules/user/__tests__/__init__.py
diff --git a/backend/src/modai/modules/user/__tests__/test_user.py b/backend/omni/src/modai/modules/user/__tests__/test_user.py
similarity index 100%
rename from backend/src/modai/modules/user/__tests__/test_user.py
rename to backend/omni/src/modai/modules/user/__tests__/test_user.py
diff --git a/backend/src/modai/modules/user/module.py b/backend/omni/src/modai/modules/user/module.py
similarity index 100%
rename from backend/src/modai/modules/user/module.py
rename to backend/omni/src/modai/modules/user/module.py
diff --git a/backend/src/modai/modules/user/simple_user_module.py b/backend/omni/src/modai/modules/user/simple_user_module.py
similarity index 100%
rename from backend/src/modai/modules/user/simple_user_module.py
rename to backend/omni/src/modai/modules/user/simple_user_module.py
diff --git a/backend/src/modai/modules/user_settings/README.md b/backend/omni/src/modai/modules/user_settings/README.md
similarity index 100%
rename from backend/src/modai/modules/user_settings/README.md
rename to backend/omni/src/modai/modules/user_settings/README.md
diff --git a/backend/src/modai/modules/user_settings/__init__.py b/backend/omni/src/modai/modules/user_settings/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_settings/__init__.py
rename to backend/omni/src/modai/modules/user_settings/__init__.py
diff --git a/backend/src/modai/modules/user_settings/__tests__/__init__.py b/backend/omni/src/modai/modules/user_settings/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_settings/__tests__/__init__.py
rename to backend/omni/src/modai/modules/user_settings/__tests__/__init__.py
diff --git a/backend/src/modai/modules/user_settings/__tests__/test_user_settings.py b/backend/omni/src/modai/modules/user_settings/__tests__/test_user_settings.py
similarity index 100%
rename from backend/src/modai/modules/user_settings/__tests__/test_user_settings.py
rename to backend/omni/src/modai/modules/user_settings/__tests__/test_user_settings.py
diff --git a/backend/src/modai/modules/user_settings/module.py b/backend/omni/src/modai/modules/user_settings/module.py
similarity index 100%
rename from backend/src/modai/modules/user_settings/module.py
rename to backend/omni/src/modai/modules/user_settings/module.py
diff --git a/backend/src/modai/modules/user_settings/simple_user_settings_module.py b/backend/omni/src/modai/modules/user_settings/simple_user_settings_module.py
similarity index 100%
rename from backend/src/modai/modules/user_settings/simple_user_settings_module.py
rename to backend/omni/src/modai/modules/user_settings/simple_user_settings_module.py
diff --git a/backend/src/modai/modules/user_settings_store/__init__.py b/backend/omni/src/modai/modules/user_settings_store/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/__init__.py
rename to backend/omni/src/modai/modules/user_settings_store/__init__.py
diff --git a/backend/src/modai/modules/user_settings_store/__tests__/__init__.py b/backend/omni/src/modai/modules/user_settings_store/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/__tests__/__init__.py
rename to backend/omni/src/modai/modules/user_settings_store/__tests__/__init__.py
diff --git a/backend/src/modai/modules/user_settings_store/__tests__/test_user_settings_store.py b/backend/omni/src/modai/modules/user_settings_store/__tests__/test_user_settings_store.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/__tests__/test_user_settings_store.py
rename to backend/omni/src/modai/modules/user_settings_store/__tests__/test_user_settings_store.py
diff --git a/backend/src/modai/modules/user_settings_store/inmemory_user_settings_store.py b/backend/omni/src/modai/modules/user_settings_store/inmemory_user_settings_store.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/inmemory_user_settings_store.py
rename to backend/omni/src/modai/modules/user_settings_store/inmemory_user_settings_store.py
diff --git a/backend/src/modai/modules/user_settings_store/module.py b/backend/omni/src/modai/modules/user_settings_store/module.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/module.py
rename to backend/omni/src/modai/modules/user_settings_store/module.py
diff --git a/backend/src/modai/modules/user_settings_store/sqlalchemy_user_settings_store.py b/backend/omni/src/modai/modules/user_settings_store/sqlalchemy_user_settings_store.py
similarity index 100%
rename from backend/src/modai/modules/user_settings_store/sqlalchemy_user_settings_store.py
rename to backend/omni/src/modai/modules/user_settings_store/sqlalchemy_user_settings_store.py
diff --git a/backend/src/modai/modules/user_store/__init__.py b/backend/omni/src/modai/modules/user_store/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_store/__init__.py
rename to backend/omni/src/modai/modules/user_store/__init__.py
diff --git a/backend/src/modai/modules/user_store/__tests__/__init__.py b/backend/omni/src/modai/modules/user_store/__tests__/__init__.py
similarity index 100%
rename from backend/src/modai/modules/user_store/__tests__/__init__.py
rename to backend/omni/src/modai/modules/user_store/__tests__/__init__.py
diff --git a/backend/src/modai/modules/user_store/__tests__/abstract_user_store_test.py b/backend/omni/src/modai/modules/user_store/__tests__/abstract_user_store_test.py
similarity index 100%
rename from backend/src/modai/modules/user_store/__tests__/abstract_user_store_test.py
rename to backend/omni/src/modai/modules/user_store/__tests__/abstract_user_store_test.py
diff --git a/backend/src/modai/modules/user_store/__tests__/test_inmemory_user_store.py b/backend/omni/src/modai/modules/user_store/__tests__/test_inmemory_user_store.py
similarity index 100%
rename from backend/src/modai/modules/user_store/__tests__/test_inmemory_user_store.py
rename to backend/omni/src/modai/modules/user_store/__tests__/test_inmemory_user_store.py
diff --git a/backend/src/modai/modules/user_store/__tests__/test_sql_model_user_store.py b/backend/omni/src/modai/modules/user_store/__tests__/test_sql_model_user_store.py
similarity index 100%
rename from backend/src/modai/modules/user_store/__tests__/test_sql_model_user_store.py
rename to backend/omni/src/modai/modules/user_store/__tests__/test_sql_model_user_store.py
diff --git a/backend/src/modai/modules/user_store/inmemory_user_store.py b/backend/omni/src/modai/modules/user_store/inmemory_user_store.py
similarity index 100%
rename from backend/src/modai/modules/user_store/inmemory_user_store.py
rename to backend/omni/src/modai/modules/user_store/inmemory_user_store.py
diff --git a/backend/src/modai/modules/user_store/module.py b/backend/omni/src/modai/modules/user_store/module.py
similarity index 100%
rename from backend/src/modai/modules/user_store/module.py
rename to backend/omni/src/modai/modules/user_store/module.py
diff --git a/backend/src/modai/modules/user_store/sql_model_user_store.py b/backend/omni/src/modai/modules/user_store/sql_model_user_store.py
similarity index 100%
rename from backend/src/modai/modules/user_store/sql_model_user_store.py
rename to backend/omni/src/modai/modules/user_store/sql_model_user_store.py
diff --git a/backend/src/modai/modules/user_store/sqlalchemy_user_store.py b/backend/omni/src/modai/modules/user_store/sqlalchemy_user_store.py
similarity index 100%
rename from backend/src/modai/modules/user_store/sqlalchemy_user_store.py
rename to backend/omni/src/modai/modules/user_store/sqlalchemy_user_store.py
diff --git a/e2e_tests/tests_omni_full/playwright.config.ts b/e2e_tests/tests_omni_full/playwright.config.ts
index ead6215..d477e46 100644
--- a/e2e_tests/tests_omni_full/playwright.config.ts
+++ b/e2e_tests/tests_omni_full/playwright.config.ts
@@ -36,7 +36,7 @@ export default defineConfig({
             reuseExistingServer: !process.env.CI,
         },
         {
-            command: "cd ../../backend && rm -f *.db && uv run uvicorn modai.main:app",
+            command: "cd ../../backend/omni && rm -f *.db && uv run uvicorn modai.main:app",
             url: "http://localhost:8000/api/health",
             reuseExistingServer: !process.env.CI,
         },
diff --git a/resources/compose-files/compose-onmi-full-build.yaml b/resources/compose-files/compose-onmi-full-build.yaml
index 0348d68..0f4f467 100644
--- a/resources/compose-files/compose-onmi-full-build.yaml
+++ b/resources/compose-files/compose-onmi-full-build.yaml
@@ -2,7 +2,7 @@
 services:
   backend:
     build:
-      context: ../../backend
+      context: ../../backend/omni
       dockerfile: Dockerfile
 
   frontend:

From 2fd239f5e24b1874b73ad89d0674b220fa43e9ea Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 16:22:00 +0100
Subject: [PATCH 04/10] feat: add tool dice roller

---
 .agents/skills/tool-microservice/SKILL.md     | 65 ++++++++-----------
 .../tool-microservice/references/main.py      | 26 ++++++++
 .../references/pyproject.toml                 |  6 ++
 backend/omni/config.yaml                      |  4 +-
 backend/tools/dice-roller/main.py             | 27 ++++++++
 backend/tools/dice-roller/pyproject.toml      |  6 ++
 6 files changed, 96 insertions(+), 38 deletions(-)
 create mode 100644 .agents/skills/tool-microservice/references/main.py
 create mode 100644 .agents/skills/tool-microservice/references/pyproject.toml
 create mode 100644 backend/tools/dice-roller/main.py
 create mode 100644 backend/tools/dice-roller/pyproject.toml

diff --git a/.agents/skills/tool-microservice/SKILL.md b/.agents/skills/tool-microservice/SKILL.md
index ca1366e..bf729af 100644
--- a/.agents/skills/tool-microservice/SKILL.md
+++ b/.agents/skills/tool-microservice/SKILL.md
@@ -29,59 +29,50 @@ A valid tool microservice MUST:
 
 ### 1. Create the Microservice
 
-Use any HTTP framework. Example with Python FastAPI:
-
-```python
-from fastapi import FastAPI
-
-app = FastAPI(
-    title="Calculator Tool",
-    version="1.0.0",
-    description="Evaluate mathematical expressions",
-)
-
-@app.post("/calculate", operation_id="calculate", summary="Evaluate a math expression")
-async def calculate(expression: str) -> dict:
-    """Evaluate the given math expression and return the result."""
-    result = eval(expression)  # Use a safe evaluator in production
-    return {"result": result}
-```
+Tools live under `backend/tools/<tool-name>/`. Each tool needs a `pyproject.toml` and a `main.py`.
+
+Use the Python template files as a starting point:
+- [references/pyproject.toml](references/pyproject.toml) — minimal UV project definition
+- [references/main.py](references/main.py) — FastAPI app template with request/response models
+
+Copy them into your tool directory, rename classes/endpoints, and implement your logic. FastAPI automatically generates the `/openapi.json` spec from the route definition.
+
+A working example is available at `backend/tools/dice-roller/`.
 
-FastAPI automatically generates the `/openapi.json` spec from the route definition.
+**Run the tool:**
+
+```bash
+cd backend/tools/<tool-name>
+uv sync
+uv run uvicorn main:app --port <port>
+```
 
 ### 2. Verify the OpenAPI Spec
 
 Start the service and check that `/openapi.json` contains:
 
-- `operationId` — unique name for the tool (e.g. `"calculate"`)
+- `operationId` — unique name for the tool (e.g. `"roll_dice"`)
 - `summary` or `description` — what the tool does (shown to the LLM)
 - `requestBody.content.application/json.schema` — input parameters
 
 ```bash
-curl http://localhost:8000/openapi.json | jq '.paths'
+curl http://localhost:8001/openapi.json | jq '.paths'
 ```
 
-Expected structure:
+The dice roller produces this structure:
 
 ```json
 {
-  "/calculate": {
+  "/roll": {
     "post": {
-      "summary": "Evaluate a math expression",
-      "operationId": "calculate",
+      "summary": "Roll dice and return the results",
+      "operationId": "roll_dice",
       "requestBody": {
         "required": true,
         "content": {
           "application/json": {
             "schema": {
-              "type": "object",
-              "properties": {
-                "expression": {
-                  "type": "string",
-                  "description": "Math expression to evaluate"
-                }
-              },
-              "required": ["expression"]
+              "$ref": "#/components/schemas/DiceRequest"
             }
           }
         }
@@ -101,7 +92,7 @@ modules:
     class: modai.modules.tools.tool_registry.HttpToolRegistryModule
     config:
       tools:
-        - url: http://calculator-service:8000/calculate
+        - url: http://localhost:8001/roll
           method: POST
 ```
 
@@ -118,7 +109,7 @@ The registry derives the base URL from `url` (strips the path) and appends `/ope
 3. Call `GET /api/tools` and verify your tool appears in OpenAI function-calling format:
 
 ```bash
-curl http://localhost:8000/api/tools | jq '.tools[] | select(.function.name == "calculate")'
+curl http://localhost:8000/api/tools | jq '.tools[] | select(.function.name == "roll_dice")'
 ```
 
 Expected:
@@ -127,8 +118,8 @@ Expected:
 {
   "type": "function",
   "function": {
-    "name": "calculate",
-    "description": "Evaluate a math expression",
+    "name": "roll_dice",
+    "description": "Roll dice and return the results",
     "parameters": { ... },
     "strict": true
   }
@@ -149,7 +140,7 @@ Expected:
 ## Common Pitfalls
 
 - **Missing `operationId`**: The tool will be silently skipped. Always set `operationId` on your trigger operation.
-- **Wrong URL in config**: The `url` must be the full trigger endpoint (e.g. `/calculate`), not just the base URL. The registry strips the path to derive the base for fetching `/openapi.json`.
+- **Wrong URL in config**: The `url` must be the full trigger endpoint (e.g. `http://localhost:8001/roll`), not just the base URL. The registry strips the path to derive the base for fetching `/openapi.json`.
 - **Multiple operations**: The registry uses the **first** operation with an `operationId` it finds. Keep one trigger operation per tool service.
 - **Non-JSON responses**: The LLM expects JSON results. Always return `application/json`.
 
diff --git a/.agents/skills/tool-microservice/references/main.py b/.agents/skills/tool-microservice/references/main.py
new file mode 100644
index 0000000..d443144
--- /dev/null
+++ b/.agents/skills/tool-microservice/references/main.py
@@ -0,0 +1,26 @@
+from fastapi import FastAPI
+from pydantic import BaseModel, Field
+
+app = FastAPI(
+    title="My Tool",
+    version="1.0.0",
+    description="Short description of the tool",
+)
+
+
+class MyRequest(BaseModel):
+    """Define input parameters with Field descriptions — these become the tool's parameters for the LLM."""
+
+    my_param: str = Field(description="Describe what this parameter does")
+
+
+class MyResponse(BaseModel):
+    """Define the response schema — the LLM sees this as the tool's output."""
+
+    result: str
+
+
+@app.post("/my-endpoint", operation_id="my_tool_name", summary="One-line description shown to the LLM")
+async def my_endpoint(request: MyRequest) -> MyResponse:
+    """Implement the tool logic here."""
+    return MyResponse(result=f"You said: {request.my_param}")
diff --git a/.agents/skills/tool-microservice/references/pyproject.toml b/.agents/skills/tool-microservice/references/pyproject.toml
new file mode 100644
index 0000000..88b8bba
--- /dev/null
+++ b/.agents/skills/tool-microservice/references/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+name = "my-tool"
+version = "1.0.0"
+description = "Short description of the tool"
+requires-python = ">=3.12"
+dependencies = ["fastapi", "uvicorn"]
diff --git a/backend/omni/config.yaml b/backend/omni/config.yaml
index 801948b..f8173e1 100644
--- a/backend/omni/config.yaml
+++ b/backend/omni/config.yaml
@@ -65,7 +65,9 @@ modules:
   tool_registry:
     class: modai.modules.tools.tool_registry.HttpToolRegistryModule
     config:
-      tools: []
+      tools:
+        - url: http://localhost:8001/roll
+          method: POST
       # Example:
       # tools:
       #   - url: http://calculator-service:8000/calculate
diff --git a/backend/tools/dice-roller/main.py b/backend/tools/dice-roller/main.py
new file mode 100644
index 0000000..f082882
--- /dev/null
+++ b/backend/tools/dice-roller/main.py
@@ -0,0 +1,27 @@
+import random
+
+from fastapi import FastAPI
+from pydantic import BaseModel, Field
+
+app = FastAPI(
+    title="Dice Roller Tool",
+    version="1.0.0",
+    description="Roll dice and get results",
+)
+
+
+class DiceRequest(BaseModel):
+    count: int = Field(default=1, description="Number of dice to roll", ge=1, le=100)
+    sides: int = Field(default=6, description="Number of sides per die", ge=2, le=100)
+
+
+class DiceResponse(BaseModel):
+    rolls: list[int]
+    total: int
+
+
+@app.post("/roll", operation_id="roll_dice", summary="Roll dice and return the results")
+async def roll_dice(request: DiceRequest) -> DiceResponse:
+    """Roll the specified number of dice with the given number of sides."""
+    rolls = [random.randint(1, request.sides) for _ in range(request.count)]
+    return DiceResponse(rolls=rolls, total=sum(rolls))
diff --git a/backend/tools/dice-roller/pyproject.toml b/backend/tools/dice-roller/pyproject.toml
new file mode 100644
index 0000000..cf2ab3c
--- /dev/null
+++ b/backend/tools/dice-roller/pyproject.toml
@@ -0,0 +1,6 @@
+[project]
+name = "dice-roller"
+version = "1.0.0"
+description = "A showcase tool that rolls dice"
+requires-python = ">=3.12"
+dependencies = ["fastapi", "uvicorn"]

From 3a48e48acdd989ce1c872df3f89357509fce1b44 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 16:25:29 +0100
Subject: [PATCH 05/10] fix: handle  proper in tool definitions

---
 .../tools/__tests__/test_tools_web_module.py  | 148 +++++++++++++++++-
 .../modai/modules/tools/tools_web_module.py   |  43 ++++-
 2 files changed, 184 insertions(+), 7 deletions(-)

diff --git a/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py b/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
index ced2dd5..803def3 100644
--- a/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
+++ b/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
@@ -7,6 +7,7 @@
 from modai.modules.tools.tools_web_module import (
     OpenAIToolsWebModule,
     _extract_parameters,
+    _resolve_refs,
     _transform_openapi_to_openai,
 )
 
@@ -144,23 +145,57 @@ def test_extracts_json_schema(self):
                 }
             }
         }
-        result = _extract_parameters(operation)
+        result = _extract_parameters(operation, {})
         assert result == {
             "type": "object",
             "properties": {"x": {"type": "integer"}},
         }
 
     def test_returns_default_when_no_request_body(self):
-        result = _extract_parameters({})
+        result = _extract_parameters({}, {})
         assert result == {"type": "object", "properties": {}}
 
     def test_returns_default_when_no_json_content(self):
         operation = {
             "requestBody": {"content": {"text/plain": {"schema": {"type": "string"}}}}
         }
-        result = _extract_parameters(operation)
+        result = _extract_parameters(operation, {})
         assert result == {"type": "object", "properties": {}}
 
+    def test_resolves_ref_in_schema(self):
+        spec = {
+            "components": {
+                "schemas": {
+                    "DiceRequest": {
+                        "type": "object",
+                        "properties": {
+                            "count": {"type": "integer", "description": "Number of dice"},
+                            "sides": {"type": "integer", "description": "Sides per die"},
+                        },
+                        "required": ["count", "sides"],
+                    }
+                }
+            }
+        }
+        operation = {
+            "requestBody": {
+                "content": {
+                    "application/json": {
+                        "schema": {"$ref": "#/components/schemas/DiceRequest"}
+                    }
+                }
+            }
+        }
+        result = _extract_parameters(operation, spec)
+        assert result == {
+            "type": "object",
+            "properties": {
+                "count": {"type": "integer", "description": "Number of dice"},
+                "sides": {"type": "integer", "description": "Sides per die"},
+            },
+            "required": ["count", "sides"],
+        }
+
 
 class TestToolsWebModule:
     def _make_module(
@@ -267,3 +302,110 @@ async def test_multiple_tools_transformed(self):
         names = [t["function"]["name"] for t in result["tools"]]
         assert "calculate" in names
         assert "web_search" in names
+
+
+class TestResolveRefs:
+    def test_returns_primitive_as_is(self):
+        assert _resolve_refs("hello", {}) == "hello"
+        assert _resolve_refs(42, {}) == 42
+        assert _resolve_refs(None, {}) is None
+
+    def test_returns_dict_without_refs_unchanged(self):
+        node = {"type": "string", "description": "test"}
+        assert _resolve_refs(node, {}) == node
+
+    def test_resolves_top_level_ref(self):
+        spec = {"components": {"schemas": {"Foo": {"type": "object", "properties": {}}}}}
+        node = {"$ref": "#/components/schemas/Foo"}
+        assert _resolve_refs(node, spec) == {"type": "object", "properties": {}}
+
+    def test_resolves_nested_ref(self):
+        spec = {
+            "components": {
+                "schemas": {
+                    "Bar": {"type": "string", "description": "A bar"},
+                }
+            }
+        }
+        node = {
+            "type": "object",
+            "properties": {
+                "bar": {"$ref": "#/components/schemas/Bar"},
+            },
+        }
+        result = _resolve_refs(node, spec)
+        assert result == {
+            "type": "object",
+            "properties": {
+                "bar": {"type": "string", "description": "A bar"},
+            },
+        }
+
+    def test_resolves_refs_in_list(self):
+        spec = {"components": {"schemas": {"X": {"type": "integer"}}}}
+        node = [{"$ref": "#/components/schemas/X"}, {"type": "string"}]
+        result = _resolve_refs(node, spec)
+        assert result == [{"type": "integer"}, {"type": "string"}]
+
+    def test_returns_empty_dict_for_unresolvable_ref(self):
+        result = _resolve_refs({"$ref": "#/components/schemas/Missing"}, {})
+        assert result == {}
+
+    def test_returns_empty_dict_for_non_local_ref(self):
+        result = _resolve_refs({"$ref": "https://example.com/schema.json"}, {})
+        assert result == {}
+
+
+class TestTransformWithRefs:
+    """Integration test: full OpenAPI spec with $ref (like FastAPI generates)."""
+
+    DICE_ROLLER_SPEC = {
+        "openapi": "3.1.0",
+        "info": {"title": "Dice Roller Tool", "version": "1.0.0"},
+        "paths": {
+            "/roll": {
+                "post": {
+                    "summary": "Roll dice and return the results",
+                    "operationId": "roll_dice",
+                    "requestBody": {
+                        "required": True,
+                        "content": {
+                            "application/json": {
+                                "schema": {
+                                    "$ref": "#/components/schemas/DiceRequest"
+                                }
+                            }
+                        },
+                    },
+                }
+            }
+        },
+        "components": {
+            "schemas": {
+                "DiceRequest": {
+                    "type": "object",
+                    "properties": {
+                        "count": {
+                            "type": "integer",
+                            "default": 1,
+                            "description": "Number of dice to roll",
+                        },
+                        "sides": {
+                            "type": "integer",
+                            "default": 6,
+                            "description": "Number of sides per die",
+                        },
+                    },
+                }
+            }
+        },
+    }
+
+    def test_transform_resolves_refs(self):
+        result = _transform_openapi_to_openai(self.DICE_ROLLER_SPEC)
+        assert result is not None
+        params = result["function"]["parameters"]
+        assert params["type"] == "object"
+        assert "count" in params["properties"]
+        assert "sides" in params["properties"]
+        assert "$ref" not in str(params)
diff --git a/backend/omni/src/modai/modules/tools/tools_web_module.py b/backend/omni/src/modai/modules/tools/tools_web_module.py
index ec8e874..ef42537 100644
--- a/backend/omni/src/modai/modules/tools/tools_web_module.py
+++ b/backend/omni/src/modai/modules/tools/tools_web_module.py
@@ -60,7 +60,7 @@ def _transform_openapi_to_openai(spec: dict[str, Any]) -> dict[str, Any] | None:
 
             name = operation["operationId"]
             description = operation.get("summary") or operation.get("description", "")
-            parameters = _extract_parameters(operation)
+            parameters = _extract_parameters(operation, spec)
 
             return {
                 "type": "function",
@@ -78,9 +78,44 @@ def _transform_openapi_to_openai(spec: dict[str, Any]) -> dict[str, Any] | None:
     return None
 
 
-def _extract_parameters(operation: dict[str, Any]) -> dict[str, Any]:
-    """Extract parameter schema from an OpenAPI operation's request body."""
+def _extract_parameters(
+    operation: dict[str, Any], spec: dict[str, Any]
+) -> dict[str, Any]:
+    """Extract parameter schema from an OpenAPI operation's request body.
+
+    Resolves any $ref references against the full OpenAPI spec so the
+    returned schema is fully inlined (OpenAI does not support $ref).
+    """
     request_body = operation.get("requestBody", {})
     content = request_body.get("content", {})
     json_content = content.get("application/json", {})
-    return json_content.get("schema", {"type": "object", "properties": {}})
+    schema = json_content.get("schema", {"type": "object", "properties": {}})
+    return _resolve_refs(schema, spec)
+
+
+def _resolve_refs(node: Any, spec: dict[str, Any]) -> Any:
+    """Recursively resolve all $ref pointers in a JSON Schema against the OpenAPI spec."""
+    if isinstance(node, dict):
+        if "$ref" in node:
+            resolved = _follow_ref(node["$ref"], spec)
+            return _resolve_refs(resolved, spec)
+        return {key: _resolve_refs(value, spec) for key, value in node.items()}
+    if isinstance(node, list):
+        return [_resolve_refs(item, spec) for item in node]
+    return node
+
+
+def _follow_ref(ref: str, spec: dict[str, Any]) -> dict[str, Any]:
+    """Follow a JSON Pointer reference like '#/components/schemas/Foo'."""
+    if not ref.startswith("#/"):
+        logger.warning("Unsupported $ref format: %s", ref)
+        return {}
+    parts = ref.lstrip("#/").split("/")
+    current: Any = spec
+    for part in parts:
+        if isinstance(current, dict):
+            current = current.get(part)
+        else:
+            logger.warning("Could not resolve $ref path: %s", ref)
+            return {}
+    return current if isinstance(current, dict) else {}

From 8891348da9372609cb471ab348724d26a548d541 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Tue, 3 Mar 2026 16:28:04 +0100
Subject: [PATCH 06/10] docs: add readme to dice roller

---
 .agents/skills/tool-microservice/references/README.md | 10 ++++++++++
 backend/tools/dice-roller/README.md                   | 10 ++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 .agents/skills/tool-microservice/references/README.md
 create mode 100644 backend/tools/dice-roller/README.md

diff --git a/.agents/skills/tool-microservice/references/README.md b/.agents/skills/tool-microservice/references/README.md
new file mode 100644
index 0000000..4b725c9
--- /dev/null
+++ b/.agents/skills/tool-microservice/references/README.md
@@ -0,0 +1,10 @@
+# Python Tool Template
+
+Template files for creating a tool microservice with Python/FastAPI.
+
+## Run
+
+```bash
+uv sync
+uv run uvicorn main:app --port <port>
+```
diff --git a/backend/tools/dice-roller/README.md b/backend/tools/dice-roller/README.md
new file mode 100644
index 0000000..b6bd7c5
--- /dev/null
+++ b/backend/tools/dice-roller/README.md
@@ -0,0 +1,10 @@
+# Dice Roller Tool
+
+A showcase tool microservice that rolls dice.
+
+## Run
+
+```bash
+uv sync
+uv run uvicorn main:app --port 8001
+```

From 693ce62bd41182cd9c480933497258cf69013fbc Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Wed, 4 Mar 2026 11:42:39 +0100
Subject: [PATCH 07/10] docs: add llmock skill

---
 .agents/skills/llmock/SKILL.md               | 239 +++++++++++++++++++
 .agents/skills/llmock/references/config.yaml |  35 +++
 2 files changed, 274 insertions(+)
 create mode 100644 .agents/skills/llmock/SKILL.md
 create mode 100644 .agents/skills/llmock/references/config.yaml

diff --git a/.agents/skills/llmock/SKILL.md b/.agents/skills/llmock/SKILL.md
new file mode 100644
index 0000000..76aee8b
--- /dev/null
+++ b/.agents/skills/llmock/SKILL.md
@@ -0,0 +1,239 @@
+---
+name: llmock-skill
+description: >-
+  Run and configure llmock via Docker, an OpenAI-compatible mock server for
+  testing LLM integrations. Use when you need a local mock for OpenAI endpoints
+  (/models, /chat/completions, /responses), when testing tool calling,
+  error handling, or streaming against a deterministic server, or when
+  configuring mock behaviors via config.yaml and Docker environment variables.
+license: MIT
+metadata:
+  author: modAI-systems
+  version: "0.0.1"
+---
+
+# llmock — OpenAI-Compatible Mock Server (Docker)
+
+llmock is a lightweight Docker-based mock server that implements OpenAI's API. It lets you test LLM integrations without hitting a real API. By default it echoes input back as output (mirror strategy), and supports config-driven tool calls, error simulation, and streaming.
+
+## When to Use This Skill
+
+- You need a local OpenAI-compatible server for integration tests
+- You want deterministic, reproducible responses from an "LLM"
+- You need to test tool calling, error handling, or streaming logic
+- You want to avoid API costs and rate limits during development
+
+## Running with Docker
+
+### Basic Start
+
+```bash
+docker container run -p 8000:8000 ghcr.io/modai-systems/llmock:latest
+```
+
+The server is available at `http://localhost:8000`. Health check: `GET /health` (no auth).
+
+### With Custom Configuration
+
+The container reads config from `/app/config.yaml`. Mount a local file to override:
+
+```bash
+docker container run -p 8000:8000 \
+  -v ./config.yaml:/app/config.yaml:ro \
+  ghcr.io/modai-systems/llmock:latest
+```
+
+### With Environment Variable Overrides
+
+Override individual config values using `LLMOCK_`-prefixed environment variables:
+
+```bash
+docker container run -p 8000:8000 \
+  -e LLMOCK_API_KEY=my-custom-key \
+  -e LLMOCK_CORS_ALLOW_ORIGINS="http://localhost:3000;http://localhost:5173" \
+  ghcr.io/modai-systems/llmock:latest
+```
+
+Environment variable rules:
+- Nested keys joined with underscores: `cors.allow-origins` → `LLMOCK_CORS_ALLOW_ORIGINS`
+- Dashes converted to underscores: `api-key` → `LLMOCK_API_KEY`
+- Lists parsed from semicolon-separated values
+- Only keys present in `config.yaml` are overridden
+
+### Verify It's Running
+
+```bash
+curl http://localhost:8000/health
+```
+
+## Connecting an OpenAI Client
+
+Point any OpenAI SDK client at the mock server. The default API key is `your-secret-api-key`.
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://localhost:8000/",
+    api_key="your-secret-api-key",
+)
+```
+
+Any language's OpenAI SDK works — just change `base_url`.
+
+## General Behavior
+
+### Default: Mirror Strategy
+
+Without any special config, llmock echoes the last user message back as the response. Send `"Hello!"` and get `"Hello!"` back.
+
+### Strategy System
+
+Strategies are an ordered list in `config.yaml`. They run in sequence; the **first strategy that returns a non-empty result wins**. Remaining strategies are skipped.
+
+```yaml
+strategies:
+  - ErrorStrategy      # Check for error triggers first
+  - ToolCallStrategy   # Then check for tool call matches
+  - MirrorStrategy     # Fall back to echoing input
+```
+
+| Strategy | Behavior |
+|----------|----------|
+| `MirrorStrategy` | Echoes the last user message |
+| `ToolCallStrategy` | Returns tool calls triggered by `call tool '<name>' with '<json>'` phrase in the last user message |
+| `ErrorStrategy` | Returns HTTP errors triggered by `raise error <json>` phrase in the last user message |
+
+If `strategies` is omitted, defaults to `["MirrorStrategy"]`. Unknown names are skipped with a warning.
+
+### Model Validation
+
+Requests must specify a model that exists in the `models` config list. Invalid models return a `404` error. Model validation runs **before** any strategy.
+
+### Authentication
+
+If `api-key` is set in config, clients must send `Authorization: Bearer <key>`. If `api-key` is not set, all requests are allowed. The `/health` endpoint never requires auth.
+
+## Endpoints
+
+| Endpoint | Method | Description |
+|----------|--------|-------------|
+| `/models` | GET | List configured models |
+| `/models/{model_id}` | GET | Retrieve a single model |
+| `/chat/completions` | POST | Chat Completions API (streaming supported) |
+| `/responses` | POST | Responses API (streaming supported) |
+| `/health` | GET | Health check (no auth required) |
+
+Both `/chat/completions` and `/responses` support `stream=True` (SSE, word-level chunking) and `stream_options.include_usage` for usage stats.
+
+## Configuration
+
+The container reads `/app/config.yaml`. See [references/CONFIG.md](references/CONFIG.md) for the full field reference.
+
+### Minimal Custom Config
+
+```yaml
+api-key: "test-key"
+models:
+  - id: "gpt-4o"
+    created: 1715367049
+    owned_by: "openai"
+```
+
+### Full Config with All Features
+
+```yaml
+api-key: "your-secret-api-key"
+
+cors:
+  allow-origins:
+    - "http://localhost:8000"
+
+models:
+  - id: "gpt-4o"
+    created: 1715367049
+    owned_by: "openai"
+  - id: "gpt-4o-mini"
+    created: 1721172741
+    owned_by: "openai"
+  - id: "gpt-3.5-turbo"
+    created: 1677610602
+    owned_by: "openai"
+
+strategies:
+  - ErrorStrategy
+  - ToolCallStrategy
+  - MirrorStrategy
+```
+
+### Tool Calling
+
+When `ToolCallStrategy` is in the strategies list, llmock scans the last user message line-by-line for the pattern:
+
+```
+call tool '<name>' with '<json>'
+```
+
+- `<name>` must match one of the tools declared in the request's `tools` list.
+- `<json>` is the arguments string passed back as the tool call arguments (use `'{}'` for no arguments).
+- Multiple matching lines each produce a separate tool call response.
+- If no line matches, or the named tool is not in `request.tools`, the strategy returns an empty list and the next strategy runs.
+- **The strategy only fires when the last message in the conversation is a `user` message.** If the last message has any other role (`assistant`, `tool`, `system`), the strategy is skipped entirely. This prevents the infinite loop that would otherwise occur when the trigger phrase persists in the conversation history across multiple cycles.
+
+### Error Simulation
+
+When `ErrorStrategy` is in the strategies list, llmock scans the last user message line-by-line for the pattern:
+
+```
+raise error {"code": 429, "message": "Rate limit exceeded"}
+```
+
+| Field | Required | Maps to |
+|-------|----------|---------|
+| `code` | yes (int) | HTTP response status code (e.g. `429`) |
+| `message` | yes (string) | `error.message` in the JSON body |
+| `type` | no (string) | `error.type` in the JSON body — defaults to `"api_error"` |
+| `error_code` | no (string) | `error.code` in the JSON body — defaults to `"error"` |
+
+Example with all fields:
+
+```
+raise error {"code": 429, "message": "Rate limit exceeded", "type": "rate_limit_error", "error_code": "rate_limit_exceeded"}
+```
+
+Produces HTTP 429 with body:
+
+```json
+{
+  "error": {
+    "message": "Rate limit exceeded",
+    "type": "rate_limit_error",
+    "param": null,
+    "code": "rate_limit_exceeded"
+  }
+}
+```
+
+- The phrase can appear anywhere in the message — the line is scanned, not the whole message.
+- First matching line wins; remaining lines are ignored.
+- If no line matches, the strategy returns an empty list and the next strategy runs.
+- Works on both `/chat/completions` and `/responses`.
+
+## Default Models
+
+Out of the box, the container serves:
+
+| Model ID | Created | Owner |
+|----------|---------|-------|
+| `gpt-4o` | 1715367049 | openai |
+| `gpt-4o-mini` | 1721172741 | openai |
+| `gpt-3.5-turbo` | 1677610602 | openai |
+
+## Key Rules
+
+1. **Mirror is the default** — without tool calls or error triggers, the server echoes the last user message.
+2. **Strategy order matters** — first non-empty result wins; remaining strategies are skipped.
+3. **Model must be valid** — model validation runs before strategies; unknown models → 404.
+4. **Auth is optional** — no `api-key` in config = all requests allowed.
+5. **Config path in Docker is `/app/config.yaml`** — mount with `-v ./config.yaml:/app/config.yaml:ro`.
+6. **Use `docker container` syntax** — always `docker container run`, `docker container stop`, etc.
diff --git a/.agents/skills/llmock/references/config.yaml b/.agents/skills/llmock/references/config.yaml
new file mode 100644
index 0000000..37c1841
--- /dev/null
+++ b/.agents/skills/llmock/references/config.yaml
@@ -0,0 +1,35 @@
+# llmock Configuration
+# Each section is consumed by its respective router/component
+
+# API key for authentication (optional - if not set, no auth required)
+api-key: "your-secret-api-key"
+
+# CORS configuration
+cors:
+  allow-origins:
+    - "http://localhost:8000"
+
+# Models configuration (used by models router)
+models:
+  - id: "gpt-4o"
+    created: 1715367049
+    owned_by: "openai"
+  - id: "gpt-4o-mini"
+    created: 1721172741
+    owned_by: "openai"
+  - id: "gpt-3.5-turbo"
+    created: 1677610602
+    owned_by: "openai"
+
+# Ordered list of strategies to try. The first strategy that returns a
+# non-empty result wins. At least one must be set.
+#
+# Available strategies:
+#   MirrorStrategy    - echoes back the last user message
+#   ToolCallStrategy  - triggered by `call tool '<name>' with '<json>'` phrase in last user message
+#   ErrorStrategy     - triggered by `raise error <json>` phrase in last user message
+#
+strategies:
+  - ErrorStrategy
+  - ToolCallStrategy
+  - MirrorStrategy

From b0b4fead6aad4a126c34858e3b0402fb265a5338 Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Wed, 4 Mar 2026 14:42:07 +0100
Subject: [PATCH 08/10] docs: update instructions on WHITEBOX testing

---
 AGENTS.md                                     |  5 ++++
 .../docs/learnings/INSTRUCTION_UPDATES.md     | 25 +++++++++++++++++++
 .../docs/learnings/INSTRUCTION_UPDATES.md     | 10 ++++++++
 3 files changed, 40 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 95d25ad..f7429e4 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -40,6 +40,7 @@ This project is designed for AI-first development. All agents MUST follow these
 - **PROCESS**:
     1. Every work package MUST include tests.
     2. Tests MUST pass before the task is marked `completed` in the todo list.
+    3. **NO WHITEBOX TESTING**: Tests MUST only test the **public interface / observable behavior**. NEVER test private/internal functions directly. Private helpers are covered indirectly through the public API. This applies to every layer: backend modules, API endpoints, frontend components, and utility files.
 
 ### 5. Code Quality Gate (MANDATORY)
 - **RULE**: ALWAYS run linting and formatting before completing any code task.
@@ -128,6 +129,8 @@ Before any backend work, read relevant architecture documents:
 - **Test Coverage**: Always add unit tests for new features or bug fixes
 - **Test Isolation**: Use mocking for external dependencies
 - **Atomic Tests**: Each test function should test one specific behavior
+- **NO WHITEBOX TESTING — Behavior Testing Only**: Only test the public interface of a module/class/endpoint. NEVER import or directly call functions prefixed with `_`. NEVER assert on internal instance attributes or private state. Private/internal logic is tested indirectly through the public API. If you feel the need to test a private function directly, it is a signal that the public API test coverage is insufficient — fix the public tests instead.
+- **Coverage targets**: Happy paths (e.g. streaming, non-streaming, tool calling) AND error paths (e.g. connection errors, timeouts, invalid input, unavailable dependencies) — all exercised through the public API only.
 
 ### Persistence
 
@@ -175,6 +178,8 @@ pnpm test            # Run javascript unit tests (vitest)
 pnpm check           # Run linter
 ```
 
+- **NO WHITEBOX TESTING — Behavior Testing Only**: Only test public component behavior and exported functions. NEVER directly test internal/private helpers or assert on internal component state. Internal logic is tested indirectly through the public API. Tests should cover happy paths and error paths. If you feel the need to test an unexported helper, improve the public-API test coverage instead.
+
 ### Frontend
 
 ## E2E Testing
diff --git a/backend/omni/docs/learnings/INSTRUCTION_UPDATES.md b/backend/omni/docs/learnings/INSTRUCTION_UPDATES.md
index a31f128..5ffb87a 100644
--- a/backend/omni/docs/learnings/INSTRUCTION_UPDATES.md
+++ b/backend/omni/docs/learnings/INSTRUCTION_UPDATES.md
@@ -9,3 +9,28 @@ This file tracks corrections provided by the user to improve future performance.
 - **New Rule**: How to prevent this in the future? (Update `AGENTS.md` if necessary)
 
 ---
+
+### 2026-03-04 - Test structure for StrandsAgentChatModule
+- **Mistake**: Tests imported and directly tested private/internal functions (e.g. `_resolve_request_tools`, `_parse_model`, `_extract_last_user_message`, `_create_http_tool`).
+- **Correction**: Tests should only exercise the public interface (`__init__`, `generate_response`). Private helpers are tested indirectly through those public methods.
+- **New Rule**: Only test public functions/behavior. Never import or directly test functions prefixed with `_`. Cover happy paths (streaming, non-streaming, tool calling) and error paths (LLM unreachable, LLM errors, tool not available, tool errors) through the public API. Updated `AGENTS.md` backend testing section.
+
+### 2026-03-04 - No white-box assertions on internal state
+- **Mistake**: Tests asserted on internal fields like `module.provider_module is not None` or `module.tool_registry is registry`.
+- **Correction**: Tests should only verify observable behavior (e.g. construction succeeds/fails, method returns expected result), never assert on internal instance attributes.
+- **New Rule**: Never assert on internal object fields/state in tests. Only verify behavior: does it raise? Does the return value match expectations? Does it produce the correct side-effects?
+
+### 2026-03-04 - No `patch` in tests — use testcontainers + pytest-httpserver
+- **Mistake**: Used `unittest.mock.patch` to spy on `_create_agent` and mock `httpx.Client` to test tool handler behaviour.
+- **Correction**: `patch` must not be used. The file under test should be modular enough that all external dependencies can be configured. For the LLM side use an llmock testcontainer; for tool HTTP endpoints use `pytest-httpserver`.
+- **New Rule**: Never use `patch`. For tool invocation tests, configure llmock with `ToolCallStrategy` and point `ToolDefinition.url` at a `pytest-httpserver` instance.
+
+### 2026-03-04 - Explicit user directive: no whitebox testing anywhere
+- **Mistake**: Writing tests that target individual private/internal functions to achieve coverage.
+- **Correction**: All tests must exercise only the public interface. If internal logic needs coverage, improve public-API tests, not private-function tests.
+- **New Rule**: NO WHITEBOX TESTING. Never test `_prefixed` functions or assert on private object state. A test that does so is incorrect by definition and must be rewritten to go through the public API. Updated `AGENTS.md`.
+
+### 2026-03-04 - llmock v2: no `/v1` path prefix, use trailing slash in base_url
+- **Mistake**: Passed `base_url = f"{root_url}/v1"` — the updated llmock no longer mounts routes under `/v1`.
+- **Correction**: All llmock endpoints are now at the root (`/chat/completions`, `/models`, `/health`). Pass `base_url = f"{root_url}/"` (trailing slash) so the OpenAI SDK does not append `/v1`.
+- **New Rule**: Always use `base_url = "http://<host>:<port>/"` (trailing slash) when connecting to llmock. The SKILL.md has been updated.
diff --git a/frontend_omni/docs/learnings/INSTRUCTION_UPDATES.md b/frontend_omni/docs/learnings/INSTRUCTION_UPDATES.md
index a31f128..c71a69c 100644
--- a/frontend_omni/docs/learnings/INSTRUCTION_UPDATES.md
+++ b/frontend_omni/docs/learnings/INSTRUCTION_UPDATES.md
@@ -9,3 +9,13 @@ This file tracks corrections provided by the user to improve future performance.
 - **New Rule**: How to prevent this in the future? (Update `AGENTS.md` if necessary)
 
 ---
+
+### 2026-03-04 - Testing philosophy: behavior over internals
+- **Mistake**: Tests directly tested internal/private helper functions instead of public behavior.
+- **Correction**: Tests must only exercise the public interface / exported API. Internal helpers are covered indirectly.
+- **New Rule**: Only test public component behavior and exported functions. Never directly test internal helpers. Cover happy paths and error paths through the public API. Updated `AGENTS.md` frontend testing section.
+
+### 2026-03-04 - Explicit user directive: no whitebox testing anywhere
+- **Mistake**: Writing tests that target unexported helpers or assert on internal component state.
+- **Correction**: All tests must go through the public/exported API only. If internal logic feels undertested, the fix is better public-API test cases, not exporting or directly calling internal code.
+- **New Rule**: NO WHITEBOX TESTING. If you need to test internal logic, improve the public-API test coverage instead. Updated `AGENTS.md`.

From 79627dddf2ff1563fd48df2e71f549e59bf9d00e Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Thu, 5 Mar 2026 07:06:12 +0100
Subject: [PATCH 09/10] chore: work over agent tests

---
 backend/omni/pyproject.toml                   |    2 +
 .../chat/__tests__/test_strands_agent_chat.py | 1351 ++++++++---------
 .../modai/modules/chat/openai_agent_chat.py   |    1 +
 .../tools/__tests__/test_tools_web_module.py  |   18 +-
 4 files changed, 686 insertions(+), 686 deletions(-)

diff --git a/backend/omni/pyproject.toml b/backend/omni/pyproject.toml
index 619dcf4..69edd18 100644
--- a/backend/omni/pyproject.toml
+++ b/backend/omni/pyproject.toml
@@ -22,7 +22,9 @@ dev = [
     "datamodel-code-generator[ruff]",
     "pytest",
     "pytest-asyncio",
+    "pytest-httpserver",
     "ruff",
+    "testcontainers",
 ]
 
 [tool.pytest.ini_options]
diff --git a/backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py b/backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
index 85fd9bd..41daa43 100644
--- a/backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
+++ b/backend/omni/src/modai/modules/chat/__tests__/test_strands_agent_chat.py
@@ -1,227 +1,60 @@
-"""Tests for the StrandsAgentChatModule."""
+"""Tests for StrandsAgentChatModule — public interface only.
 
+Every test exercises only the two public entry-points:
+  * ``StrandsAgentChatModule.__init__``
+  * ``StrandsAgentChatModule.generate_response``
+
+Internal / private helpers are tested **indirectly** through these methods.
+
+A llmock testcontainer (``ghcr.io/modai-systems/llmock:latest``) is used as
+a deterministic mock LLM server.  By default it echoes the last user message
+back (MirrorStrategy) and can return HTTP errors via ErrorStrategy triggers.
+"""
+
+import json
 import os
+import time
 from pathlib import Path
+from typing import Any
+from unittest.mock import AsyncMock, Mock
 
+import httpx as httpx_lib
+import openai
 import pytest
+import yaml
 from dotenv import find_dotenv, load_dotenv
-from unittest.mock import AsyncMock, Mock, patch
-from dataclasses import dataclass, field
-
 from fastapi import Request
+from testcontainers.core.container import DockerContainer
 
 from modai.module import ModuleDependencies
-from modai.modules.chat.openai_agent_chat import (
-    StrandsAgentChatModule,
-    _parse_model,
-    _extract_last_user_message,
-    _build_conversation_history,
-    _to_strands_message,
-    _message_text,
-    _build_openai_response,
-    _extract_tool_names,
-    _resolve_request_tools,
-    _create_http_tool,
-    _extract_operation,
-)
+from modai.modules.chat.openai_agent_chat import StrandsAgentChatModule
 from modai.modules.model_provider.module import (
     ModelProviderResponse,
     ModelProvidersListResponse,
 )
 from modai.modules.tools.module import ToolDefinition
-import openai
 
 working_dir = Path.cwd()
 load_dotenv(find_dotenv(str(working_dir / ".env")))
 
 
 # ---------------------------------------------------------------------------
-# Fixtures
-# ---------------------------------------------------------------------------
-
-
-def _make_provider(name: str = "myprovider") -> ModelProviderResponse:
-    return ModelProviderResponse(
-        id="provider_1",
-        type="openai",
-        name=name,
-        base_url="https://api.openai.com/v1",
-        api_key="sk-test-key",
-        properties={},
-        created_at=None,
-        updated_at=None,
-    )
-
-
-def _make_mock_provider_module(providers: list[ModelProviderResponse] | None = None):
-    providers = providers or [_make_provider()]
-    mock = Mock()
-    mock.get_providers = AsyncMock(
-        return_value=ModelProvidersListResponse(
-            providers=providers,
-            total=len(providers),
-            limit=None,
-            offset=None,
-        )
-    )
-    return mock
-
-
-def _make_dependencies(provider_module=None):
-    provider_module = provider_module or _make_mock_provider_module()
-    deps = ModuleDependencies({"llm_provider_module": provider_module})
-    return deps
-
-
-# ---------------------------------------------------------------------------
-# _parse_model
+# llmock container
 # ---------------------------------------------------------------------------
 
+LLMOCK_IMAGE = "ghcr.io/modai-systems/llmock:latest"
+LLMOCK_PORT = 8000
+LLMOCK_API_KEY = "test-key"
 
-class TestParseModel:
-    def test_valid_model(self):
-        provider, model = _parse_model("myprovider/gpt-4o")
-        assert provider == "myprovider"
-        assert model == "gpt-4o"
-
-    def test_valid_model_with_slash_in_name(self):
-        provider, model = _parse_model("myprovider/azure/gpt-5")
-        assert provider == "myprovider"
-        assert model == "azure/gpt-5"
-
-    def test_invalid_no_slash(self):
-        with pytest.raises(ValueError, match="Invalid model format"):
-            _parse_model("gpt-4o")
-
-
-# ---------------------------------------------------------------------------
-# _extract_last_user_message
-# ---------------------------------------------------------------------------
-
-
-class TestExtractLastUserMessage:
-    def test_string_input(self):
-        body = {"input": "Hello there"}
-        assert _extract_last_user_message(body) == "Hello there"
-
-    def test_list_simple_content(self):
-        body = {"input": [{"role": "user", "content": "Hi"}]}
-        assert _extract_last_user_message(body) == "Hi"
-
-    def test_list_structured_content(self):
-        body = {
-            "input": [
-                {"role": "user", "content": [{"type": "input_text", "text": "Hello"}]}
-            ]
-        }
-        assert _extract_last_user_message(body) == "Hello"
-
-    def test_multiple_messages_returns_last(self):
-        body = {
-            "input": [
-                {"role": "user", "content": "First"},
-                {"role": "assistant", "content": "Response"},
-                {"role": "user", "content": "Second"},
-            ]
-        }
-        assert _extract_last_user_message(body) == "Second"
-
-    def test_empty_input(self):
-        assert _extract_last_user_message({"input": ""}) == ""
-        assert _extract_last_user_message({"input": []}) == ""
-        assert _extract_last_user_message({}) == ""
-
-
-# ---------------------------------------------------------------------------
-# _build_conversation_history
-# ---------------------------------------------------------------------------
-
-
-class TestBuildConversationHistory:
-    def test_string_input_returns_empty(self):
-        assert _build_conversation_history({"input": "Hello"}) == []
-
-    def test_single_message_returns_empty(self):
-        body = {"input": [{"role": "user", "content": "Hi"}]}
-        assert _build_conversation_history(body) == []
-
-    def test_multi_turn_excludes_last(self):
-        body = {
-            "input": [
-                {"role": "user", "content": "Hello"},
-                {"role": "assistant", "content": "Hi"},
-                {"role": "user", "content": "How are you?"},
-            ]
-        }
-        history = _build_conversation_history(body)
-        assert len(history) == 2
-        assert history[0]["role"] == "user"
-        assert history[0]["content"] == [{"text": "Hello"}]
-        assert history[1]["role"] == "assistant"
-        assert history[1]["content"] == [{"text": "Hi"}]
-
-
-# ---------------------------------------------------------------------------
-# _to_strands_message / _message_text
-# ---------------------------------------------------------------------------
-
-
-class TestMessageConversion:
-    def test_to_strands_message_simple(self):
-        msg = _to_strands_message({"role": "user", "content": "Hello"})
-        assert msg == {"role": "user", "content": [{"text": "Hello"}]}
-
-    def test_to_strands_message_structured(self):
-        msg = _to_strands_message(
-            {"role": "assistant", "content": [{"type": "output_text", "text": "Hey"}]}
-        )
-        assert msg == {"role": "assistant", "content": [{"text": "Hey"}]}
-
-    def test_message_text_string(self):
-        assert _message_text("Hello") == "Hello"
-
-    def test_message_text_dict_string_content(self):
-        assert _message_text({"content": "Hello"}) == "Hello"
-
-    def test_message_text_dict_list_content(self):
-        assert (
-            _message_text({"content": [{"type": "input_text", "text": "Hi"}]}) == "Hi"
-        )
-
-    def test_message_text_none(self):
-        assert _message_text(None) == ""
-
-
-# ---------------------------------------------------------------------------
-# _build_openai_response
-# ---------------------------------------------------------------------------
-
-
-class TestBuildOpenAIResponse:
-    def test_builds_valid_response(self):
-        resp = _build_openai_response(
-            text="Hello!",
-            model="gpt-4o",
-            response_id="resp_test123",
-            msg_id="msg_test456",
-            input_tokens=10,
-            output_tokens=5,
-        )
-        assert isinstance(resp, openai.types.responses.Response)
-        assert resp.id == "resp_test123"
-        assert resp.model == "gpt-4o"
-        assert resp.status == "completed"
-        assert resp.output[0].content[0].text == "Hello!"
-        assert resp.usage.input_tokens == 10
-        assert resp.usage.output_tokens == 5
-        assert resp.usage.total_tokens == 15
-
-
-# ---------------------------------------------------------------------------
-# _extract_tool_names
-# ---------------------------------------------------------------------------
+LLMOCK_CONFIG: dict[str, Any] = {
+    "api-key": LLMOCK_API_KEY,
+    "models": [
+        {"id": "gpt-4o", "created": 1715367049, "owned_by": "openai"},
+    ],
+    "strategies": ["ErrorStrategy", "ToolCallStrategy", "MirrorStrategy"],
+}
 
-SAMPLE_OPENAPI_SPEC = {
+SAMPLE_TOOL_OPENAPI_SPEC: dict[str, Any] = {
     "openapi": "3.1.0",
     "info": {"title": "Calculator Tool", "version": "1.0.0"},
     "paths": {
@@ -238,7 +71,7 @@ def test_builds_valid_response(self):
                                 "properties": {
                                     "expression": {
                                         "type": "string",
-                                        "description": "Math expression to evaluate",
+                                        "description": "Math expression",
                                     }
                                 },
                                 "required": ["expression"],
@@ -265,613 +98,771 @@ def test_builds_valid_response(self):
 }
 
 
-class TestExtractToolNames:
-    def test_extracts_function_tool_names(self):
+@pytest.fixture(scope="module")
+def llmock_base_url(
+    request: pytest.FixtureRequest, tmp_path_factory: pytest.TempPathFactory
+) -> str:
+    """llmock container with ErrorStrategy + ToolCallStrategy + MirrorStrategy (module-scoped)."""
+    config_file: Path = tmp_path_factory.mktemp("llmock") / "config.yaml"
+    config_file.write_text(yaml.dump(LLMOCK_CONFIG))
+    os.chmod(config_file, 0o644)
+
+    container = (
+        DockerContainer(LLMOCK_IMAGE)
+        .with_exposed_ports(LLMOCK_PORT)
+        .with_volume_mapping(str(config_file), "/app/config.yaml", "ro")
+    )
+    container.start()
+
+    host = container.get_container_host_ip()
+    port = container.get_exposed_port(LLMOCK_PORT)
+    root_url = f"http://{host}:{port}"
+    _wait_for_health(root_url)
+
+    request.addfinalizer(container.stop)
+    return f"{root_url}/"
+
+
+def _wait_for_health(base_url: str, timeout: float = 30.0) -> None:
+    """Poll the llmock health endpoint until it responds."""
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            resp = httpx_lib.get(f"{base_url}/health", timeout=2.0)
+            if resp.status_code == 200:
+                return
+        except Exception:
+            pass
+        time.sleep(0.5)
+    raise TimeoutError(
+        f"llmock health check at {base_url}/health did not respond within {timeout}s"
+    )
+
+
+# ===================================================================
+# 1) Construction tests  (__init__)
+# ===================================================================
+
+
+class TestConstruction:
+    """Tests for module construction via __init__."""
+
+    def test_raises_without_provider_module(self):
+        deps = ModuleDependencies({})
+        with pytest.raises(ValueError, match="llm_provider_module"):
+            StrandsAgentChatModule(dependencies=deps, config={})
+
+    def test_creates_successfully_with_valid_dependencies(self):
+        """Construction with a valid provider module must not raise."""
+        StrandsAgentChatModule(dependencies=_make_dependencies(), config={})
+
+
+# ===================================================================
+# 2) Happy-path: non-streaming generate_response
+# ===================================================================
+
+
+class TestNonStreamingHappyPath:
+    """generate_response returns an OpenAI Response when stream is False.
+
+    llmock MirrorStrategy echoes the last user message back, so the
+    response text matches the input.
+    """
+
+    @pytest.mark.asyncio
+    async def test_response_contains_mirrored_text(self, llmock_base_url):
+        """llmock MirrorStrategy echoes the last user message."""
+        module = _llmock_module(llmock_base_url)
         body = {
-            "tools": [
-                {
-                    "type": "function",
-                    "function": {"name": "calculate", "description": "calc"},
-                },
-                {
-                    "type": "function",
-                    "function": {"name": "web_search", "description": "search"},
-                },
-            ]
+            "model": "myprovider/gpt-4o",
+            "input": "Say hello",
         }
-        assert _extract_tool_names(body) == ["calculate", "web_search"]
 
-    def test_empty_tools(self):
-        assert _extract_tool_names({"tools": []}) == []
+        result = await module.generate_response(_make_request(), body)
 
-    def test_no_tools_key(self):
-        assert _extract_tool_names({"model": "gpt-4o"}) == []
+        assert result.status == "completed"
+        assert "Say hello" in result.output[0].content[0].text
 
-    def test_skips_non_function_types(self):
+    @pytest.mark.asyncio
+    async def test_response_reports_token_usage(self, llmock_base_url):
+        module = _llmock_module(llmock_base_url)
         body = {
-            "tools": [
-                {"type": "code_interpreter"},
-                {
-                    "type": "function",
-                    "function": {"name": "calculate"},
-                },
-            ]
+            "model": "myprovider/gpt-4o",
+            "input": "Hi",
         }
-        assert _extract_tool_names(body) == ["calculate"]
 
-    def test_skips_missing_name(self):
-        body = {"tools": [{"type": "function", "function": {"description": "no name"}}]}
-        assert _extract_tool_names(body) == []
+        result = await module.generate_response(_make_request(), body)
 
+        assert result.usage.input_tokens > 0
+        assert result.usage.output_tokens > 0
+        assert result.usage.total_tokens > 0
 
-# ---------------------------------------------------------------------------
-# _extract_operation
-# ---------------------------------------------------------------------------
+    @pytest.mark.asyncio
+    async def test_multi_turn_conversation_succeeds(self, llmock_base_url):
+        """Multi-turn conversation with prior history produces a valid response."""
+        module = _llmock_module(llmock_base_url)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": [
+                {"role": "user", "content": "Hi"},
+                {"role": "assistant", "content": "Hello!"},
+                {"role": "user", "content": "How are you?"},
+            ],
+        }
 
+        result = await module.generate_response(_make_request(), body)
 
-class TestExtractOperation:
-    def test_extracts_first_operation(self):
-        op = _extract_operation(SAMPLE_OPENAPI_SPEC)
-        assert op is not None
-        assert op["operationId"] == "calculate"
-        assert op["summary"] == "Evaluate a math expression"
+        assert isinstance(result, openai.types.responses.Response)
+        assert result.status == "completed"
+        assert len(result.output) > 0
 
-    def test_returns_none_for_empty_spec(self):
-        assert _extract_operation({}) is None
-        assert _extract_operation({"paths": {}}) is None
+    @pytest.mark.asyncio
+    async def test_system_prompt_from_instructions_field(self, llmock_base_url):
+        """The 'instructions' field is accepted and the response succeeds."""
+        module = _llmock_module(llmock_base_url)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": "Hi",
+            "instructions": "You are a pirate.",
+        }
 
-    def test_skips_non_dict_operations(self):
-        spec = {"paths": {"/foo": {"post": "not a dict"}}}
-        assert _extract_operation(spec) is None
+        result = await module.generate_response(_make_request(), body)
 
-    def test_skips_operations_without_operation_id(self):
-        spec = {"paths": {"/foo": {"post": {"summary": "no id"}}}}
-        assert _extract_operation(spec) is None
+        assert isinstance(result, openai.types.responses.Response)
+        assert result.status == "completed"
 
 
-# ---------------------------------------------------------------------------
-# _create_http_tool
-# ---------------------------------------------------------------------------
+# ===================================================================
+# 3) Happy-path: streaming generate_response
+# ===================================================================
 
 
-class TestCreateHttpTool:
-    def test_creates_tool_from_valid_definition(self):
-        tool_def = ToolDefinition(
-            url="http://calc:8000/calculate",
-            method="POST",
-            openapi_spec=SAMPLE_OPENAPI_SPEC,
-        )
-        tool = _create_http_tool(tool_def)
-        assert tool is not None
-        assert tool.tool_name == "calculate"
-        assert tool.tool_spec["name"] == "calculate"
-        assert tool.tool_spec["description"] == "Evaluate a math expression"
-        schema = tool.tool_spec["inputSchema"]["json"]
-        assert "expression" in schema["properties"]
-
-    def test_returns_none_for_empty_spec(self):
-        tool_def = ToolDefinition(
-            url="http://calc:8000/calculate",
-            method="POST",
-            openapi_spec={"paths": {}},
-        )
-        assert _create_http_tool(tool_def) is None
+class TestStreamingHappyPath:
+    """generate_response returns an async generator when stream=True."""
 
-    def test_tool_handler_success(self):
-        tool_def = ToolDefinition(
-            url="http://calc:8000/calculate",
-            method="POST",
-            openapi_spec=SAMPLE_OPENAPI_SPEC,
+    @pytest.mark.asyncio
+    async def test_stream_assembled_text_echoes_input(self, llmock_base_url):
+        """The assembled text from all deltas matches the mirrored input."""
+        module = _llmock_module(llmock_base_url)
+
+        gen = await module.generate_response(
+            _make_request(),
+            {
+                "model": "myprovider/gpt-4o",
+                "input": "Hola Mundo",
+                "stream": True,
+            },
         )
-        tool = _create_http_tool(tool_def)
-        assert tool is not None
-
-        mock_response = Mock()
-        mock_response.raise_for_status = Mock()
-        mock_response.text = '{"result": 42}'
-
-        with patch(
-            "modai.modules.chat.openai_agent_chat.httpx.Client"
-        ) as mock_client_cls:
-            mock_client = Mock()
-            mock_client.__enter__ = Mock(return_value=mock_client)
-            mock_client.__exit__ = Mock(return_value=False)
-            mock_client.request.return_value = mock_response
-            mock_client_cls.return_value = mock_client
-
-            result = tool._tool_func(
-                {
-                    "toolUseId": "tu_123",
-                    "name": "calculate",
-                    "input": {"expression": "6*7"},
-                },
-            )
 
-        assert result["status"] == "success"
-        assert result["toolUseId"] == "tu_123"
-        assert '{"result": 42}' in result["content"][0]["text"]
-        mock_client.request.assert_called_once_with(
-            method="POST",
-            url="http://calc:8000/calculate",
-            json={"expression": "6*7"},
+        events = [e async for e in gen]
+
+        full_text = "".join(
+            e.delta
+            for e in events
+            if getattr(e, "type", None) == "response.output_text.delta"
         )
+        assert "Hola Mundo" in full_text
+        assert len(events) > 2  # at least created + completed events
 
-    def test_tool_handler_http_error(self):
-        tool_def = ToolDefinition(
-            url="http://calc:8000/calculate",
-            method="POST",
-            openapi_spec=SAMPLE_OPENAPI_SPEC,
+    @pytest.mark.asyncio
+    async def test_stream_completed_response_is_valid(self, llmock_base_url):
+        """The final completed event carries a valid OpenAI Response."""
+        module = _llmock_module(llmock_base_url)
+
+        gen = await module.generate_response(
+            _make_request(),
+            {
+                "model": "myprovider/gpt-4o",
+                "input": "Test message",
+                "stream": True,
+            },
         )
-        tool = _create_http_tool(tool_def)
-        assert tool is not None
-
-        with patch(
-            "modai.modules.chat.openai_agent_chat.httpx.Client"
-        ) as mock_client_cls:
-            mock_client = Mock()
-            mock_client.__enter__ = Mock(return_value=mock_client)
-            mock_client.__exit__ = Mock(return_value=False)
-            mock_client.request.side_effect = Exception("Connection refused")
-            mock_client_cls.return_value = mock_client
-
-            result = tool._tool_func(
-                {
-                    "toolUseId": "tu_456",
-                    "name": "calculate",
-                    "input": {"expression": "1/0"},
-                },
-            )
 
-        assert result["status"] == "error"
-        assert result["toolUseId"] == "tu_456"
-        assert "Connection refused" in result["content"][0]["text"]
+        events = [e async for e in gen]
+        completed = events[-1]
 
+        assert isinstance(completed.response, openai.types.responses.Response)
+        assert completed.response.status == "completed"
+        assert len(completed.response.output) > 0
+        assert completed.response.output[0].content[0].text != ""
 
-# ---------------------------------------------------------------------------
-# _resolve_request_tools
-# ---------------------------------------------------------------------------
 
+# ===================================================================
+# 4) Happy-path: tool calling
+# ===================================================================
 
-class TestResolveRequestTools:
-    @pytest.mark.asyncio
-    async def test_returns_empty_when_no_registry(self):
-        body = {
-            "tools": [
-                {"type": "function", "function": {"name": "calculate"}},
-            ]
-        }
-        result = await _resolve_request_tools(body, None)
-        assert result == []
 
-    @pytest.mark.asyncio
-    async def test_returns_empty_when_no_tools_in_request(self):
-        mock_registry = Mock()
-        result = await _resolve_request_tools({"model": "gpt-4o"}, mock_registry)
-        assert result == []
+class TestToolCallingHappyPath:
+    """Tools are resolved from the registry and forwarded to the agent."""
+
+    def _make_tool_registry(self, tool_def: ToolDefinition | None = None) -> Mock:
+        registry = Mock()
+        if tool_def:
+            registry.get_tool_by_name = AsyncMock(return_value=tool_def)
+        else:
+            registry.get_tool_by_name = AsyncMock(return_value=None)
+        return registry
 
     @pytest.mark.asyncio
-    async def test_resolves_tools_from_registry(self):
+    async def test_actual_http_call_reaches_tool_endpoint(
+        self, llmock_base_url, httpserver
+    ):
+        """The tool HTTP endpoint receives the POST request with the correct JSON body.
+
+        A real local HTTP server (pytest-httpserver) acts as the tool endpoint.
+        ToolCallStrategy fires exactly once when the user message contains the
+        trigger phrase. The tool responds with a result; on the next turn
+        MirrorStrategy takes over and the agent completes successfully.
+        This is a full end-to-end exercise of the httpx.Client call in _create_http_tool.
+        """
+        from werkzeug.wrappers import Response as WerkzeugResponse
+
+        captured_body: dict[str, Any] = {}
+
+        def _capture(request):
+            nonlocal captured_body
+            captured_body = request.get_json()
+            return WerkzeugResponse(
+                json.dumps({"result": 42}),
+                status=200,
+                content_type="application/json",
+            )
+
+        httpserver.expect_oneshot_request(
+            "/calculate", method="POST"
+        ).respond_with_handler(_capture)
+
         tool_def = ToolDefinition(
-            url="http://calc:8000/calculate",
+            url=httpserver.url_for("/calculate"),
             method="POST",
-            openapi_spec=SAMPLE_OPENAPI_SPEC,
+            openapi_spec=SAMPLE_TOOL_OPENAPI_SPEC,
         )
-        mock_registry = Mock()
-        mock_registry.get_tool_by_name = AsyncMock(return_value=tool_def)
+        registry = self._make_tool_registry(tool_def)
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
 
         body = {
-            "tools": [
-                {"type": "function", "function": {"name": "calculate"}},
-            ]
+            "model": "myprovider/gpt-4o",
+            "input": "call tool 'calculate' with '{\"expression\": \"6*7\"}'",
+            "tools": [{"type": "function", "function": {"name": "calculate"}}],
         }
-        result = await _resolve_request_tools(body, mock_registry)
-        assert len(result) == 1
-        assert result[0].tool_name == "calculate"
-        mock_registry.get_tool_by_name.assert_called_once_with("calculate")
+
+        result = await module.generate_response(_make_request(), body)
+
+        assert result.status == "completed"
+        httpserver.check_assertions()
+        assert isinstance(captured_body, dict)
+        assert "expression" in captured_body
+        assert captured_body["expression"] == "6*7"
 
     @pytest.mark.asyncio
-    async def test_skips_unknown_tools(self):
-        mock_registry = Mock()
-        mock_registry.get_tool_by_name = AsyncMock(return_value=None)
+    async def test_actual_http_call_reaches_tool_endpoint_streaming(
+        self, llmock_base_url, httpserver
+    ):
+        """Same as above but for streaming: the tool endpoint receives the call
+        and the stream completes successfully.
+        """
+        from werkzeug.wrappers import Response as WerkzeugResponse
+
+        captured_body: dict[str, Any] = {}
+
+        def _capture(request):
+            nonlocal captured_body
+            captured_body = request.get_json()
+            return WerkzeugResponse(
+                json.dumps({"result": 42}),
+                status=200,
+                content_type="application/json",
+            )
+
+        httpserver.expect_oneshot_request(
+            "/calculate", method="POST"
+        ).respond_with_handler(_capture)
+
+        tool_def = ToolDefinition(
+            url=httpserver.url_for("/calculate"),
+            method="POST",
+            openapi_spec=SAMPLE_TOOL_OPENAPI_SPEC,
+        )
+        registry = self._make_tool_registry(tool_def)
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
 
         body = {
-            "tools": [
-                {"type": "function", "function": {"name": "unknown_tool"}},
-            ]
+            "model": "myprovider/gpt-4o",
+            "input": "call tool 'calculate' with '{\"expression\": \"6*7\"}'",
+            "tools": [{"type": "function", "function": {"name": "calculate"}}],
+            "stream": True,
         }
-        result = await _resolve_request_tools(body, mock_registry)
-        assert result == []
 
+        gen = await module.generate_response(_make_request(), body)
+        events = [e async for e in gen]
 
-# ---------------------------------------------------------------------------
-# StrandsAgentChatModule.__init__
-# ---------------------------------------------------------------------------
+        assert events[-1].type == "response.completed"
+        httpserver.check_assertions()
+        assert isinstance(captured_body, dict)
+        assert "expression" in captured_body
 
 
-class TestStrandsAgentChatModuleInit:
-    def test_raises_without_provider(self):
-        deps = ModuleDependencies({})
-        with pytest.raises(ValueError, match="llm_provider_module"):
-            StrandsAgentChatModule(dependencies=deps, config={})
+# ===================================================================
+# 5) Error-path: invalid model / provider issues
+# ===================================================================
 
-    def test_creates_with_provider(self):
-        deps = _make_dependencies()
-        module = StrandsAgentChatModule(dependencies=deps, config={})
-        assert module.provider_module is not None
-
-    def test_tool_registry_is_none_when_not_configured(self):
-        deps = _make_dependencies()
-        module = StrandsAgentChatModule(dependencies=deps, config={})
-        assert module.tool_registry is None
-
-    def test_tool_registry_set_when_configured(self):
-        mock_registry = Mock()
-        provider_module = _make_mock_provider_module()
-        deps = ModuleDependencies(
-            {"llm_provider_module": provider_module, "tool_registry": mock_registry}
-        )
-        module = StrandsAgentChatModule(dependencies=deps, config={})
-        assert module.tool_registry is mock_registry
 
+class TestModelAndProviderErrors:
+    """Errors when the model string is malformed or provider is unknown."""
 
-# ---------------------------------------------------------------------------
-# StrandsAgentChatModule.generate_response (mocked)
-# ---------------------------------------------------------------------------
+    @pytest.mark.asyncio
+    async def test_invalid_model_format_no_slash(self):
+        module = StrandsAgentChatModule(dependencies=_make_dependencies(), config={})
+        with pytest.raises(ValueError, match="Invalid model format"):
+            await module.generate_response(
+                _make_request(),
+                {"model": "gpt-4o", "input": "Hi"},
+            )
 
+    @pytest.mark.asyncio
+    async def test_invalid_model_format_empty_provider(self):
+        module = StrandsAgentChatModule(dependencies=_make_dependencies(), config={})
+        with pytest.raises(ValueError, match="Invalid model format"):
+            await module.generate_response(
+                _make_request(),
+                {"model": "/gpt-4o", "input": "Hi"},
+            )
 
-@dataclass
-class _FakeUsage:
-    inputTokens: int = 10
-    outputTokens: int = 20
-    totalTokens: int = 30
+    @pytest.mark.asyncio
+    async def test_invalid_model_format_empty_model(self):
+        module = StrandsAgentChatModule(dependencies=_make_dependencies(), config={})
+        with pytest.raises(ValueError, match="Invalid model format"):
+            await module.generate_response(
+                _make_request(),
+                {"model": "provider/", "input": "Hi"},
+            )
 
-    def get(self, key, default=0):
-        return getattr(self, key, default)
+    @pytest.mark.asyncio
+    async def test_provider_not_found(self):
+        module = StrandsAgentChatModule(dependencies=_make_dependencies(), config={})
+        with pytest.raises(ValueError, match="Provider 'unknown' not found"):
+            await module.generate_response(
+                _make_request(),
+                {"model": "unknown/gpt-4o", "input": "Hi"},
+            )
 
+    @pytest.mark.asyncio
+    async def test_provider_module_raises_propagates(self):
+        """If the provider module itself raises, the error propagates."""
+        provider_module = Mock()
+        provider_module.get_providers = AsyncMock(
+            side_effect=RuntimeError("DB connection lost")
+        )
+        module = StrandsAgentChatModule(
+            dependencies=_make_dependencies(provider_module=provider_module), config={}
+        )
 
-@dataclass
-class _FakeMetrics:
-    accumulated_usage: dict = field(
-        default_factory=lambda: {
-            "inputTokens": 10,
-            "outputTokens": 20,
-            "totalTokens": 30,
-        }
-    )
+        with pytest.raises(RuntimeError, match="DB connection lost"):
+            await module.generate_response(
+                _make_request(),
+                {"model": "myprovider/gpt-4o", "input": "Hi"},
+            )
 
 
-@dataclass
-class _FakeAgentResult:
-    text: str = "Mocked response"
-    metrics: _FakeMetrics = field(default_factory=_FakeMetrics)
-    stop_reason: str = "end_turn"
-    message: dict = field(
-        default_factory=lambda: {
-            "role": "assistant",
-            "content": [{"text": "Mocked response"}],
-        }
-    )
+# ===================================================================
+# 6) Error-path: LLM unreachable / LLM errors
+# ===================================================================
 
-    def __str__(self) -> str:
-        return self.text
 
+class TestLLMErrors:
+    """Errors during the actual LLM call (non-streaming and streaming)."""
 
-@pytest.mark.asyncio
-async def test_generate_response_non_streaming():
-    """Non-streaming generate_response returns an OpenAI Response."""
-    deps = _make_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
+    @pytest.mark.asyncio
+    async def test_non_streaming_error_trigger_429(self, llmock_base_url):
+        """llmock ErrorStrategy returns 429 when message matches trigger."""
+        module = _llmock_module(llmock_base_url)
 
-    fake_result = _FakeAgentResult()
+        with pytest.raises(Exception):
+            await module.generate_response(
+                _make_request(),
+                {
+                    "model": "myprovider/gpt-4o",
+                    "input": 'raise error {"code": 429, "message": "Rate limit exceeded"}',
+                },
+            )
 
-    with (
-        patch(
-            "modai.modules.chat.openai_agent_chat._create_agent"
-        ) as mock_create_agent,
-        patch("asyncio.to_thread", new_callable=AsyncMock, return_value=fake_result),
-    ):
-        mock_agent = Mock()
-        mock_create_agent.return_value = mock_agent
+    @pytest.mark.asyncio
+    async def test_non_streaming_error_trigger_500(self, llmock_base_url):
+        """llmock ErrorStrategy returns 500 when message matches trigger."""
+        module = _llmock_module(llmock_base_url)
 
-        body = {
-            "model": "myprovider/gpt-4o",
-            "input": [{"role": "user", "content": "Hello"}],
-        }
+        with pytest.raises(Exception):
+            await module.generate_response(
+                _make_request(),
+                {
+                    "model": "myprovider/gpt-4o",
+                    "input": 'raise error {"code": 500, "message": "Internal server error"}',
+                },
+            )
 
-        result = await module.generate_response(request, body)
+    @pytest.mark.asyncio
+    async def test_non_streaming_connection_error(self):
+        """Connection error when the LLM is unreachable."""
+        provider = _make_provider(base_url="http://localhost:1/v1", api_key="unused")
+        module = StrandsAgentChatModule(
+            dependencies=_make_dependencies(
+                provider_module=_make_provider_module([provider])
+            ),
+            config={},
+        )
 
-    assert isinstance(result, openai.types.responses.Response)
-    assert result.status == "completed"
-    assert result.output[0].content[0].text == "Mocked response"
-    assert result.usage.input_tokens == 10
-    assert result.usage.output_tokens == 20
+        with pytest.raises(Exception):
+            await module.generate_response(
+                _make_request(),
+                {"model": "myprovider/gpt-4o", "input": "Hi"},
+            )
 
+    @pytest.mark.asyncio
+    async def test_streaming_error_trigger(self, llmock_base_url):
+        """Error during streaming when llmock ErrorStrategy is triggered."""
+        module = _llmock_module(llmock_base_url)
+
+        gen = await module.generate_response(
+            _make_request(),
+            {
+                "model": "myprovider/gpt-4o",
+                "input": 'raise error {"code": 500, "message": "Internal server error"}',
+                "stream": True,
+            },
+        )
 
-@pytest.mark.asyncio
-async def test_generate_response_streaming():
-    """Streaming generate_response returns an async generator of events."""
-    deps = _make_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
+        with pytest.raises(Exception):
+            async for _ in gen:
+                pass
 
-    async def fake_stream_async(prompt):
-        yield {"data": "Hello"}
-        yield {"data": " world"}
+    @pytest.mark.asyncio
+    async def test_streaming_connection_error(self):
+        """Connection error during streaming when LLM is unreachable."""
+        provider = _make_provider(base_url="http://localhost:1/v1", api_key="unused")
+        module = StrandsAgentChatModule(
+            dependencies=_make_dependencies(
+                provider_module=_make_provider_module([provider])
+            ),
+            config={},
+        )
 
-    with patch(
-        "modai.modules.chat.openai_agent_chat._create_agent"
-    ) as mock_create_agent:
-        mock_agent = Mock()
-        mock_agent.stream_async = fake_stream_async
-        mock_create_agent.return_value = mock_agent
+        gen = await module.generate_response(
+            _make_request(),
+            {
+                "model": "myprovider/gpt-4o",
+                "input": "Hi",
+                "stream": True,
+            },
+        )
 
-        body = {
-            "model": "myprovider/gpt-4o",
-            "input": [{"role": "user", "content": "Hi"}],
-            "stream": True,
-        }
+        with pytest.raises(Exception):
+            async for _ in gen:
+                pass
 
-        result = await module.generate_response(request, body)
 
-    # Result should be an async generator
-    assert hasattr(result, "__aiter__")
+# ===================================================================
+# 7) Error-path: tool not available / tool errors
+# ===================================================================
 
-    events = []
-    async for event in result:
-        events.append(event)
 
-    # Expected: created, 2 text deltas, text done, completed
-    assert len(events) == 5
+class TestToolErrors:
+    """Errors during tool resolution and tool invocation."""
 
-    # First event is response.created
-    assert events[0].type == "response.created"
+    @pytest.mark.asyncio
+    async def test_unknown_tool_is_silently_skipped(self, llmock_base_url):
+        """A tool name not found in the registry is skipped; response still succeeds."""
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(return_value=None)
 
-    # Delta events
-    assert events[1].type == "response.output_text.delta"
-    assert events[1].delta == "Hello"
-    assert events[2].type == "response.output_text.delta"
-    assert events[2].delta == " world"
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": "Hi",
+            "tools": [
+                {"type": "function", "function": {"name": "nonexistent_tool"}},
+            ],
+        }
 
-    # Text done
-    assert events[3].type == "response.output_text.done"
-    assert events[3].text == "Hello world"
+        result = await module.generate_response(_make_request(), body)
 
-    # Completed
-    assert events[4].type == "response.completed"
-    assert events[4].response.output[0].content[0].text == "Hello world"
+        assert isinstance(result, openai.types.responses.Response)
+        assert result.status == "completed"
 
+    @pytest.mark.asyncio
+    async def test_tool_with_invalid_openapi_spec_is_skipped(self, llmock_base_url):
+        """A tool whose OpenAPI spec has no valid operation is skipped."""
+        bad_tool_def = ToolDefinition(
+            url="http://broken:8000/noop",
+            method="POST",
+            openapi_spec={"paths": {}},  # no operations
+        )
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(return_value=bad_tool_def)
 
-@pytest.mark.asyncio
-async def test_generate_response_with_tools():
-    """Tools from the request body are resolved and passed to the agent."""
-    tool_def = ToolDefinition(
-        url="http://calc:8000/calculate",
-        method="POST",
-        openapi_spec=SAMPLE_OPENAPI_SPEC,
-    )
-    mock_registry = Mock()
-    mock_registry.get_tool_by_name = AsyncMock(return_value=tool_def)
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": "Hi",
+            "tools": [
+                {"type": "function", "function": {"name": "broken_tool"}},
+            ],
+        }
 
-    provider_module = _make_mock_provider_module()
-    deps = ModuleDependencies(
-        {"llm_provider_module": provider_module, "tool_registry": mock_registry}
-    )
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
+        result = await module.generate_response(_make_request(), body)
 
-    fake_result = _FakeAgentResult()
+        assert isinstance(result, openai.types.responses.Response)
+        assert result.status == "completed"
 
-    with (
-        patch(
-            "modai.modules.chat.openai_agent_chat._create_agent"
-        ) as mock_create_agent,
-        patch("asyncio.to_thread", new_callable=AsyncMock, return_value=fake_result),
-    ):
-        mock_agent = Mock()
-        mock_create_agent.return_value = mock_agent
+    @pytest.mark.asyncio
+    async def test_tool_registry_error_propagates(self):
+        """If the tool registry raises, the error propagates."""
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(
+            side_effect=RuntimeError("Registry unavailable")
+        )
 
+        module = StrandsAgentChatModule(
+            dependencies=_make_dependencies(tool_registry=registry), config={}
+        )
         body = {
             "model": "myprovider/gpt-4o",
-            "input": [{"role": "user", "content": "Calculate 6*7"}],
+            "input": "Hi",
             "tools": [
-                {
-                    "type": "function",
-                    "function": {
-                        "name": "calculate",
-                        "description": "Evaluate a math expression",
-                        "parameters": {
-                            "type": "object",
-                            "properties": {"expression": {"type": "string"}},
-                        },
-                    },
-                }
+                {"type": "function", "function": {"name": "calculate"}},
             ],
         }
 
-        result = await module.generate_response(request, body)
+        with pytest.raises(RuntimeError, match="Registry unavailable"):
+            await module.generate_response(_make_request(), body)
 
-    # Verify the tool registry was queried
-    mock_registry.get_tool_by_name.assert_called_once_with("calculate")
+    @pytest.mark.asyncio
+    async def test_tool_invocation_http_error_agent_handles_gracefully(
+        self, llmock_base_url
+    ):
+        """When a tool URL is unreachable the agent receives a tool error.
 
-    # Verify _create_agent was called with tools
-    call_args = mock_create_agent.call_args
-    tools_arg = call_args[0][3] if len(call_args[0]) > 3 else call_args[1].get("tools")
-    assert tools_arg is not None
-    assert len(tools_arg) == 1
-    assert tools_arg[0].tool_name == "calculate"
+        ToolCallStrategy fires exactly once (only when the last conversation
+        message is a user message). On the next turn the last message is the
+        tool result, so MirrorStrategy takes over and the agent completes.
+        """
+        tool_def = ToolDefinition(
+            url="http://localhost:1/calculate",  # unreachable
+            method="POST",
+            openapi_spec=SAMPLE_TOOL_OPENAPI_SPEC,
+        )
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(return_value=tool_def)
 
-    assert isinstance(result, openai.types.responses.Response)
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": "call tool 'calculate' with '{}'",
+            "tools": [{"type": "function", "function": {"name": "calculate"}}],
+        }
 
+        result = await module.generate_response(_make_request(), body)
+        assert result.status == "completed"
 
-@pytest.mark.asyncio
-async def test_generate_response_without_tool_registry():
-    """Without tool_registry configured, tools in request are ignored."""
-    deps = _make_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
+    @pytest.mark.asyncio
+    async def test_tool_invocation_success_request_sent_to_tool(
+        self, llmock_base_url, httpserver
+    ):
+        """The tool HTTP endpoint receives the call forwarded by the agent.
 
-    fake_result = _FakeAgentResult()
+        ``pytest-httpserver`` acts as the real tool endpoint — no patching.
+        ToolCallStrategy fires exactly once (user-message-only trigger). The
+        tool responds; on the next turn the last message is the tool result so
+        MirrorStrategy takes over and returns a completed response.
+        """
+        httpserver.expect_oneshot_request("/calculate").respond_with_json({"result": 4})
 
-    with (
-        patch(
-            "modai.modules.chat.openai_agent_chat._create_agent"
-        ) as mock_create_agent,
-        patch("asyncio.to_thread", new_callable=AsyncMock, return_value=fake_result),
-    ):
-        mock_agent = Mock()
-        mock_create_agent.return_value = mock_agent
+        tool_def = ToolDefinition(
+            url=httpserver.url_for("/calculate"),
+            method="POST",
+            openapi_spec=SAMPLE_TOOL_OPENAPI_SPEC,
+        )
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(return_value=tool_def)
 
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
         body = {
             "model": "myprovider/gpt-4o",
-            "input": [{"role": "user", "content": "Hello"}],
-            "tools": [
-                {
-                    "type": "function",
-                    "function": {"name": "calculate"},
-                }
-            ],
+            "input": "call tool 'calculate' with '{\"expression\": \"2+2\"}'",
+            "tools": [{"type": "function", "function": {"name": "calculate"}}],
         }
 
-        result = await module.generate_response(request, body)
-
-    # _create_agent should be called with empty tools list
-    call_args = mock_create_agent.call_args
-    tools_arg = (
-        call_args[0][3] if len(call_args[0]) > 3 else call_args[1].get("tools", [])
-    )
-    assert tools_arg == []
+        result = await module.generate_response(_make_request(), body)
+        assert result.status == "completed"
+        httpserver.check_assertions()
 
-    assert isinstance(result, openai.types.responses.Response)
+    @pytest.mark.asyncio
+    async def test_partial_tools_resolved_when_some_missing(self, llmock_base_url):
+        """When some tools are found and others not, only found tools are used."""
+        calc_def = ToolDefinition(
+            url="http://calc:8000/calculate",
+            method="POST",
+            openapi_spec=SAMPLE_TOOL_OPENAPI_SPEC,
+        )
+        registry = Mock()
+        registry.get_tool_by_name = AsyncMock(
+            side_effect=lambda name: calc_def if name == "calculate" else None
+        )
 
+        module = _llmock_module(llmock_base_url, tool_registry=registry)
+        body = {
+            "model": "myprovider/gpt-4o",
+            "input": "Do stuff",
+            "tools": [
+                {"type": "function", "function": {"name": "calculate"}},
+                {"type": "function", "function": {"name": "missing_tool"}},
+            ],
+        }
 
-# ---------------------------------------------------------------------------
-# Provider resolution
-# ---------------------------------------------------------------------------
+        result = await module.generate_response(_make_request(), body)
 
+        assert registry.get_tool_by_name.call_count == 2
+        assert isinstance(result, openai.types.responses.Response)
 
-@pytest.mark.asyncio
-async def test_invalid_model_format():
-    """Raises ValueError for an invalid model string."""
-    deps = _make_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
 
-    body = {
-        "model": "no_slash_model",
-        "input": "Hello",
-    }
-    with pytest.raises(ValueError, match="Invalid model format"):
-        await module.generate_response(request, body)
+# ===================================================================
+# 8) Integration tests (require OPENAI_API_KEY in .env)
+# ===================================================================
 
 
-@pytest.mark.asyncio
-async def test_provider_not_found():
-    """Raises ValueError when provider name is unknown."""
-    deps = _make_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
+@pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
+class TestRealProviderIntegration:
+    """End-to-end tests against a real LLM. Skipped if OPENAI_API_KEY is absent."""
+
+    @staticmethod
+    def _real_provider() -> ModelProviderResponse:
+        return ModelProviderResponse(
+            id="test_provider",
+            type="openai",
+            name="myopenai",
+            base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
+            api_key=os.environ.get("OPENAI_API_KEY", ""),
+            properties={},
+            created_at=None,
+            updated_at=None,
+        )
 
-    body = {
-        "model": "unknown/gpt-4o",
-        "input": "Hello",
-    }
-    with pytest.raises(ValueError, match="Provider 'unknown' not found"):
-        await module.generate_response(request, body)
+    @staticmethod
+    def _real_model() -> str:
+        model = os.environ.get("OPENAI_MODEL", "gpt-4o")
+        return f"myopenai/{model}"
 
+    def _real_deps(self) -> ModuleDependencies:
+        provider = self._real_provider()
+        return _make_dependencies(provider_module=_make_provider_module([provider]))
 
-# ---------------------------------------------------------------------------
-# Integration tests (require OPENAI_API_KEY in .env)
-# ---------------------------------------------------------------------------
+    @pytest.mark.asyncio
+    async def test_non_streaming_integration(self):
+        module = StrandsAgentChatModule(dependencies=self._real_deps(), config={})
+        body = {
+            "model": self._real_model(),
+            "input": [{"role": "user", "content": "Just echo the word 'Hello'"}],
+        }
 
+        result = await module.generate_response(_make_request(), body)
 
-def _make_real_provider() -> ModelProviderResponse:
-    """Create a provider backed by the env-var credentials."""
-    return ModelProviderResponse(
-        id="test_provider",
-        type="openai",
-        name="myopenai",
-        base_url=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
-        api_key=os.environ.get("OPENAI_API_KEY", ""),
-        properties={},
-        created_at=None,
-        updated_at=None,
-    )
+        assert isinstance(result, openai.types.responses.Response)
+        assert result.status == "completed"
+        assert "Hello" in result.output[0].content[0].text
+        assert result.usage.input_tokens > 0
+        assert result.usage.output_tokens > 0
 
+    @pytest.mark.asyncio
+    async def test_streaming_integration(self):
+        module = StrandsAgentChatModule(dependencies=self._real_deps(), config={})
+        body = {
+            "model": self._real_model(),
+            "input": [{"role": "user", "content": "Just echo the word 'Hello'"}],
+            "stream": True,
+        }
 
-def _make_real_dependencies() -> ModuleDependencies:
-    """Dependencies wired to the real provider from env vars."""
-    provider = _make_real_provider()
-    provider_module = _make_mock_provider_module([provider])
-    return ModuleDependencies({"llm_provider_module": provider_module})
+        gen = await module.generate_response(_make_request(), body)
+        assert hasattr(gen, "__aiter__")
 
+        events = []
+        async for event in gen:
+            events.append(event)
 
-def _real_model() -> str:
-    """Return 'myopenai/<model>' using OPENAI_MODEL from env."""
-    model = os.environ.get("OPENAI_MODEL", "gpt-4o")
-    return f"myopenai/{model}"
+        assert len(events) >= 3
+        assert events[0].type == "response.created"
 
+        full_text = "".join(
+            e.delta
+            for e in events
+            if getattr(e, "type", None) == "response.output_text.delta"
+        )
+        assert "Hello" in full_text
 
-@pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
-@pytest.mark.asyncio
-async def test_strands_generate_response_non_streaming_integration():
-    """Integration: non-streaming response via Strands Agent + real LLM."""
-    deps = _make_real_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
-
-    body = {
-        "model": _real_model(),
-        "input": [{"role": "user", "content": "Just echo the word 'Hello'"}],
-    }
+        assert events[-1].type == "response.completed"
+        assert events[-1].response.output[0].content[0].text == full_text
 
-    result = await module.generate_response(request, body)
 
-    assert isinstance(result, openai.types.responses.Response)
-    assert result.status == "completed"
-    assert result.output
-    assert len(result.output) > 0
-    text = result.output[0].content[0].text
-    assert "Hello" in text
-    assert result.usage.input_tokens > 0
-    assert result.usage.output_tokens > 0
+# ---------------------------------------------------------------------------
+# Shared helpers / fixtures
+# ---------------------------------------------------------------------------
 
 
-@pytest.mark.skipif("OPENAI_API_KEY" not in os.environ, reason="OPENAI_API_KEY not set")
-@pytest.mark.asyncio
-async def test_strands_generate_response_streaming_integration():
-    """Integration: streaming response via Strands Agent + real LLM."""
-    deps = _make_real_dependencies()
-    module = StrandsAgentChatModule(dependencies=deps, config={})
-    request = Mock(spec=Request)
-
-    body = {
-        "model": _real_model(),
-        "input": [{"role": "user", "content": "Just echo the word 'Hello'"}],
-        "stream": True,
+def _make_dependencies(
+    provider_module=None,
+    tool_registry=None,
+) -> ModuleDependencies:
+    modules: dict[str, Any] = {
+        "llm_provider_module": provider_module or _make_provider_module(),
     }
+    if tool_registry is not None:
+        modules["tool_registry"] = tool_registry
+    return ModuleDependencies(modules)
+
+
+def _make_request() -> Request:
+    return Mock(spec=Request)
+
+
+def _llmock_module(
+    base_url: str,
+    tool_registry=None,
+) -> StrandsAgentChatModule:
+    """Create a ``StrandsAgentChatModule`` pointing at the llmock container."""
+    provider = _make_provider(base_url=base_url, api_key=LLMOCK_API_KEY)
+    return StrandsAgentChatModule(
+        dependencies=_make_dependencies(
+            provider_module=_make_provider_module([provider]),
+            tool_registry=tool_registry,
+        ),
+        config={},
+    )
 
-    result = await module.generate_response(request, body)
-    assert hasattr(result, "__aiter__")
-
-    events = []
-    async for event in result:
-        events.append(event)
-
-    # Must have at least created + text done + completed
-    assert len(events) >= 3
-
-    # First is response.created
-    assert events[0].type == "response.created"
 
-    # Collect text deltas
-    full_text = ""
-    for evt in events:
-        if hasattr(evt, "type") and evt.type == "response.output_text.delta":
-            full_text += evt.delta
+def _make_provider_module(providers: list[ModelProviderResponse] | None = None):
+    providers = providers or [_make_provider()]
+    mock = Mock()
+    mock.get_providers = AsyncMock(
+        return_value=ModelProvidersListResponse(
+            providers=providers,
+            total=len(providers),
+            limit=None,
+            offset=None,
+        )
+    )
+    return mock
 
-    assert "Hello" in full_text
 
-    # Last is response.completed
-    assert events[-1].type == "response.completed"
-    assert events[-1].response.output[0].content[0].text == full_text
+def _make_provider(
+    name: str = "myprovider",
+    base_url: str = "https://api.openai.com/v1",
+    api_key: str = "sk-test-key",
+) -> ModelProviderResponse:
+    return ModelProviderResponse(
+        id="provider_1",
+        type="openai",
+        name=name,
+        base_url=base_url,
+        api_key=api_key,
+        properties={},
+        created_at=None,
+        updated_at=None,
+    )
diff --git a/backend/omni/src/modai/modules/chat/openai_agent_chat.py b/backend/omni/src/modai/modules/chat/openai_agent_chat.py
index 9a3645c..5653e5d 100644
--- a/backend/omni/src/modai/modules/chat/openai_agent_chat.py
+++ b/backend/omni/src/modai/modules/chat/openai_agent_chat.py
@@ -133,6 +133,7 @@ def _create_agent(
         system_prompt=system_prompt,
         messages=prior_messages or None,
         tools=tools or [],
+        retry_strategy=None,  # For now, now retry handling. Could be added in the future if needed.
         callback_handler=None,  # suppress default stdout printing
     )
 
diff --git a/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py b/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
index 803def3..9b58d3d 100644
--- a/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
+++ b/backend/omni/src/modai/modules/tools/__tests__/test_tools_web_module.py
@@ -169,8 +169,14 @@ def test_resolves_ref_in_schema(self):
                     "DiceRequest": {
                         "type": "object",
                         "properties": {
-                            "count": {"type": "integer", "description": "Number of dice"},
-                            "sides": {"type": "integer", "description": "Sides per die"},
+                            "count": {
+                                "type": "integer",
+                                "description": "Number of dice",
+                            },
+                            "sides": {
+                                "type": "integer",
+                                "description": "Sides per die",
+                            },
                         },
                         "required": ["count", "sides"],
                     }
@@ -315,7 +321,9 @@ def test_returns_dict_without_refs_unchanged(self):
         assert _resolve_refs(node, {}) == node
 
     def test_resolves_top_level_ref(self):
-        spec = {"components": {"schemas": {"Foo": {"type": "object", "properties": {}}}}}
+        spec = {
+            "components": {"schemas": {"Foo": {"type": "object", "properties": {}}}}
+        }
         node = {"$ref": "#/components/schemas/Foo"}
         assert _resolve_refs(node, spec) == {"type": "object", "properties": {}}
 
@@ -371,9 +379,7 @@ class TestTransformWithRefs:
                         "required": True,
                         "content": {
                             "application/json": {
-                                "schema": {
-                                    "$ref": "#/components/schemas/DiceRequest"
-                                }
+                                "schema": {"$ref": "#/components/schemas/DiceRequest"}
                             }
                         },
                     },

From 862340a8a194b8ee477832e673428b88d2e5e4fb Mon Sep 17 00:00:00 2001
From: guenhter <grill.guenther@gmail.com>
Date: Thu, 5 Mar 2026 07:17:34 +0100
Subject: [PATCH 10/10] fix: fix test

---
 e2e_tests/tests_omni_full/src/chat.spec.ts               | 2 +-
 e2e_tests/tests_omni_light/src/llm-configuration.spec.ts | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/e2e_tests/tests_omni_full/src/chat.spec.ts b/e2e_tests/tests_omni_full/src/chat.spec.ts
index eaa262e..461f44f 100644
--- a/e2e_tests/tests_omni_full/src/chat.spec.ts
+++ b/e2e_tests/tests_omni_full/src/chat.spec.ts
@@ -40,7 +40,7 @@ test.describe("Chat", () => {
         await llmProviderPage.navigateTo();
         await llmProviderPage.addProvider(
             "Mock Provider",
-            "http://localhost:3001/v1",
+            "http://localhost:3001/",
             "your-secret-api-key",
         );
 
diff --git a/e2e_tests/tests_omni_light/src/llm-configuration.spec.ts b/e2e_tests/tests_omni_light/src/llm-configuration.spec.ts
index 7c4c110..588096f 100644
--- a/e2e_tests/tests_omni_light/src/llm-configuration.spec.ts
+++ b/e2e_tests/tests_omni_light/src/llm-configuration.spec.ts
@@ -19,7 +19,7 @@ test.describe("LLM Picker", () => {
         await llmProviderPage.navigateTo();
         await llmProviderPage.addProvider(
             "Test Provider",
-            "http://localhost:3001/v1",
+            "http://localhost:3001",
             "your-secret-api-key",
         );
 
@@ -38,7 +38,7 @@ test.describe("LLM Picker", () => {
         await llmProviderPage.navigateTo();
         await llmProviderPage.addProvider(
             "Test Provider",
-            "http://localhost:3001/v1",
+            "http://localhost:3001",
             "your-secret-api-key",
         );
         await llmProviderPage.addProvider(