docker · VasiliyRad · Mar 12, 2026 · Mar 12, 2026 · Mar 14, 2026
diff --git a/ag2/.dockerignore b/ag2/.dockerignore
@@ -0,0 +1,3 @@
+*
+!pyproject.toml
+!agent.py
diff --git a/ag2/.gitignore b/ag2/.gitignore
@@ -0,0 +1,8 @@
+/secret.*
+/.vscode
+/.venv
+/.mypy_cache
+/.ruff_cache
+/.python-version
+__pycache__
+*.pyc
diff --git a/ag2/.ruff.toml b/ag2/.ruff.toml
@@ -0,0 +1,8 @@
+target-version = "py313"
+line-length = 100
+
+[lint]
+select = ["E4", "E7", "E9", "F", "I", "RUF022"]
+
+[lint.isort]
+force-sort-within-sections = true
diff --git a/ag2/Dockerfile b/ag2/Dockerfile
@@ -0,0 +1,37 @@
+FROM python:3.13-slim
+ENV PYTHONUNBUFFERED=1
+
+RUN pip install uv
+
+WORKDIR /app
+COPY pyproject.toml ./
+RUN --mount=type=cache,target=/root/.cache/uv \
+    UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy \
+    uv pip install --system .
+COPY agent.py .
+
+COPY <<EOF /entrypoint.sh
+#!/bin/sh
+set -e
+if test -f /run/secrets/openai-api-key; then
+    export OPENAI_API_KEY=$(cat /run/secrets/openai-api-key)
+fi
+if test -n "${OPENAI_API_KEY}"; then
+    echo "Using OpenAI with ${OPENAI_MODEL_NAME}"
+    export MODEL_BASE_URL=https://api.openai.com/v1
+    export MODEL_NAME=${OPENAI_MODEL_NAME}
+else
+    echo "Using Docker Model Runner with ${MODEL_RUNNER_MODEL}"
+    export MODEL_BASE_URL=${MODEL_RUNNER_URL}
+    export MODEL_NAME=${MODEL_RUNNER_MODEL}
+    export OPENAI_API_KEY=cannot_be_empty
+fi
+exec python agent.py
+EOF
+RUN chmod +x /entrypoint.sh
+
+RUN useradd --create-home --shell /bin/bash app \
+    && chown -R app:app /app
+USER app
+
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/ag2/README.md b/ag2/README.md
@@ -0,0 +1,88 @@
+# AG2 Multi-Agent Research Team
+
+A three-agent web research system built with [AG2](https://github.com/ag2ai/ag2) (formerly AutoGen),
+Docker Model Runner, and the DuckDuckGo MCP tool. A Gradio browser interface streams each agent's
+output as the conversation progresses.
+
+Three agents collaborate under `GroupChat` with LLM-driven speaker selection:
+
+- **researcher** — searches the web for information via MCP DuckDuckGo
+- **analyst** — critiques findings, identifies gaps, requests follow-up searches
+- **writer** — synthesises the final Markdown report and signals termination
+
+The GroupChatManager determines which agent speaks next based on conversation context — no
+hardcoded routing logic or keyword matching.
+
+## Requirements
+
+- [Docker Desktop](https://docs.docker.com/desktop/) 4.43.0+ or [Docker Engine](https://docs.docker.com/engine/) with Docker Compose 2.38.1+
+- A machine with a GPU for running open models locally, or use [Docker Offload](https://docs.docker.com/offload/) for remote GPU inference
+
+## Run locally (Docker Model Runner)
+
+```sh
+docker compose up --build
+```
+
+Open http://localhost:7860 in your browser.
+
+## Run with Docker Offload
+
+```sh
+docker compose -f compose.yaml -f compose.offload.yaml up --build
+```
+
+Uses a 14B parameter model with larger context window.
+
+## Run with OpenAI
+
+Create a `secret.openai-api-key` file containing your API key:
+
+```
+sk-...
+```
+
+Then start with the OpenAI override:
+
+```sh
+docker compose -f compose.yaml -f compose.openai.yaml up --build
+```
+
+## How it works
+
+```mermaid
+graph LR
+    User -->|research topic| GroupChatManager
+    GroupChatManager -->|auto-selects speaker| researcher
+    GroupChatManager -->|auto-selects speaker| analyst
+    GroupChatManager -->|auto-selects speaker| writer
+    researcher -->|search| mcp-gateway
+    mcp-gateway -->|DuckDuckGo results| researcher
+    writer -->|TERMINATE| GroupChatManager
+```
+
+The Gradio interface streams each agent's messages as they are generated,
+making the multi-agent collaboration visible in real time.
+
+## Project structure
+
+| File | Purpose |
+|---|---|
+| `compose.yaml` | Defines the agent service and mcp-gateway sidecar |
+| `compose.openai.yaml` | Override to use OpenAI instead of Docker Model Runner |
+| `compose.offload.yaml` | Override to use a larger model via Docker Offload |
+| `Dockerfile` | Builds the agent container |
+| `pyproject.toml` | Python dependencies |
+| `agent.py` | Agent definitions, GroupChat setup, Gradio interface |
+| `tests/test_agent.py` | Unit tests for agent instantiation (no LLM calls) |
+
+## AG2 concepts demonstrated
+
+- `GroupChat` with `speaker_selection_method="auto"` — the manager LLM selects which agent
+  speaks next based on conversation context, not hardcoded sequencing
+- `create_toolkit` from `autogen.mcp` — wraps any MCP server's tools as AG2-native `Tool`
+  objects, compatible with `register_for_llm` / `register_for_execution`
+- `a_initiate_chat` — async-native GroupChat execution, compatible with Gradio's async
+  streaming generator interface
+- Gradio `ChatInterface` with an async generator — yields intermediate results as each
+  agent's reply is added to `groupchat.messages`
diff --git a/ag2/agent.py b/ag2/agent.py
@@ -0,0 +1,132 @@
+"""
+Multi-agent research team using AG2 (formerly AutoGen) with Gradio browser UI.
+
+Open http://localhost:7860 after starting with docker compose up --build.
+
+Three agents collaborate under GroupChat with LLM-driven speaker selection:
+  researcher  -- searches the web via MCP DuckDuckGo
+  analyst     -- critiques and validates research findings
+  writer      -- synthesises the final Markdown report
+"""
+import asyncio
+import os
+from collections.abc import AsyncGenerator
+
+import gradio as gr
+from mcp import ClientSession
+from mcp.client.sse import sse_client
+
+from autogen import ConversableAgent, GroupChat, GroupChatManager, LLMConfig
+from autogen.mcp import create_toolkit
+
+MCP_SERVER_URL = os.environ["MCP_SERVER_URL"]
+MODEL_BASE_URL = os.environ["MODEL_BASE_URL"]
+MODEL_NAME = os.environ["MODEL_NAME"]
+API_KEY = os.environ.get("OPENAI_API_KEY", "cannot_be_empty")
+
+llm_config = LLMConfig(
+    {"model": MODEL_NAME, "base_url": MODEL_BASE_URL, "api_key": API_KEY},
+    temperature=0.3,
+)
+
+
+async def research_topic(message: str, history: list) -> AsyncGenerator[str, None]:
+    """Run a GroupChat research session and stream each agent reply to Gradio."""
+    async with (
+        sse_client(MCP_SERVER_URL, timeout=60) as (read, write),
+        ClientSession(read, write) as session,
+    ):
+        await session.initialize()
+        toolkit = await create_toolkit(session=session)
+
+        researcher = ConversableAgent(
+            name="researcher",
+            system_message=(
+                "You are a research specialist. Search the web for accurate, up-to-date "
+                "information on the assigned topic. Perform at least 3 focused searches. "
+                "Summarise findings clearly and cite sources."
+            ),
+            llm_config=llm_config,
+        )
+        analyst = ConversableAgent(
+            name="analyst",
+            system_message=(
+                "You are a critical analyst. Review the researcher's findings. "
+                "Identify gaps, contradictions, and areas needing deeper investigation. "
+                "Ask the researcher for follow-up searches when needed."
+            ),
+            llm_config=llm_config,
+        )
+        writer = ConversableAgent(
+            name="writer",
+            system_message=(
+                "You are a technical writer. Once research and analysis are complete, "
+                "produce a well-structured Markdown report with:\n"
+                "## Executive Summary\n## Key Findings\n## Conclusion\n"
+                "End your final message with TERMINATE."
+            ),
+            llm_config=llm_config,
+            is_termination_msg=lambda msg: "TERMINATE" in (msg.get("content") or ""),
+        )
+
+        # Register all MCP tools on the researcher (schema for LLM + callable for execution)
+        toolkit.register_for_llm(researcher)
+        toolkit.register_for_execution(researcher)
+
+        groupchat = GroupChat(
+            agents=[researcher, analyst, writer],
+            messages=[],
+            max_round=12,
+            speaker_selection_method="auto",
+        )
+        manager = GroupChatManager(groupchat=groupchat, llm_config=llm_config)
+
+        async def run() -> None:
+            await researcher.a_initiate_chat(
+                manager,
+                message=f"Research the following topic thoroughly: {message}",
+            )
+
+        task = asyncio.create_task(run())
+
+        # Stream messages as they appear in groupchat.messages.
+        # a_initiate_chat yields control at every LLM await point, allowing
+        # this polling loop to observe intermediate state without threading.
+        accumulated: list[str] = []
+        last_count = 0
+
+        while not task.done() or last_count < len(groupchat.messages):
+            current = list(groupchat.messages)
+            if len(current) > last_count:
+                for msg in current[last_count:]:
+                    speaker = msg.get("name") or msg.get("role", "agent")
+                    content = (msg.get("content") or "").strip()
+                    if content and speaker not in ("user", "chat_manager"):
+                        accumulated.append(f"**{speaker}:** {content}")
+                        yield "\n\n---\n\n".join(accumulated)
+                last_count = len(current)
+            await asyncio.sleep(0.3)
+
+        try:
+            await task
+        except Exception as exc:
+            accumulated.append(f"**error:** {exc}")
+            yield "\n\n---\n\n".join(accumulated)
+
+
+demo = gr.ChatInterface(
+    fn=research_topic,
+    title="AG2 Multi-Agent Research Team",
+    description=(
+        "Enter a research topic. Three AG2 agents (researcher, analyst, writer) collaborate "
+        "using DuckDuckGo via MCP. Speaker order is determined dynamically by the LLM."
+    ),
+    examples=[
+        "Latest developments in AI agent frameworks in 2025",
+        "How does quantum computing affect modern cryptography?",
+        "What are the environmental impacts of large language models?",
+    ],
+)
+
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)
diff --git a/ag2/compose.offload.yaml b/ag2/compose.offload.yaml
@@ -0,0 +1,4 @@
+models:
+  qwen3:
+    model: ai/qwen3:14B-Q6_K
+    context_size: 32768
diff --git a/ag2/compose.openai.yaml b/ag2/compose.openai.yaml
@@ -0,0 +1,10 @@
+services:
+  agent:
+    environment:
+      - OPENAI_MODEL_NAME=gpt-4.1-mini
+    secrets:
+      - openai-api-key
+
+secrets:
+  openai-api-key:
+    file: secret.openai-api-key
diff --git a/ag2/compose.yaml b/ag2/compose.yaml
@@ -0,0 +1,30 @@
+services:
+  agent:
+    build:
+      context: .
+    ports:
+      # Gradio browser UI
+      - 7860:7860
+    environment:
+      - MCP_SERVER_URL=http://mcp-gateway:8811/sse
+    depends_on:
+      - mcp-gateway
+    models:
+      qwen3:
+        endpoint_var: MODEL_RUNNER_URL
+        model_var: MODEL_RUNNER_MODEL
+
+  mcp-gateway:
+    image: docker/mcp-gateway:latest
+    use_api_socket: true
+    command:
+      - --transport=sse
+      - --servers=duckduckgo
+      - --tools=search
+
+models:
+  qwen3:
+    model: ai/qwen3:8B-Q4_0
+    context_size: 8192
+    runtime_flags:
+      - --no-prefill-assistant
diff --git a/ag2/pyproject.toml b/ag2/pyproject.toml
@@ -0,0 +1,15 @@
+[project]
+name = "ag2-demo"
+version = "0.1.0"
+description = "AG2 multi-agent research team with Gradio browser UI"
+requires-python = ">=3.13"
+dependencies = [
+    "ag2[openai,mcp]>=0.11.0",
+    "gradio>=4.0",
+]
+
+[dependency-groups]
+dev = [
+    "pytest>=8.0",
+    "ruff>=0.8",
+]
diff --git a/ag2/tests/__init__.py b/ag2/tests/__init__.py