From b2cd9d32430c10b0b37e553f9b6db8e63b06c4a7 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 12:20:11 +0000
Subject: [PATCH 01/18] feat: add deepgram tts extension with voice-assistant
 integration
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

add Deepgram TTS extension using WebSocket streaming API with Aura-2
voices. Wire into voice-assistant example as voice_assistant_deepgram_tts
graph variant. Include progressive disclosure AI documentation.

addresses PR #2128 review feedback:
- remove raw config logging that exposed API keys
- extract _finalize_request() helper (consolidate 5 duplicate patterns)
- await client.start() instead of fire-and-forget asyncio.create_task()
- add _reconnect_client() for immediate reconnect after errors
- consume EVENT_TTS_FLUSH internally (don't leak to caller)
- add early text validation in get() for empty/whitespace text
- reduce websocket recv timeout from 10s to 5s
- drop audio chunks received after cancellation flag is set
- reconnect websocket after cancel for clean state on next request
- change manifest.json sample_rate type from int64 to int32

test results:
- standalone: 13/13 passed
- guarder: 14/16 passed
  - test_flush: PASS (was failing — fixed cancel race condition)
  - test_invalid_text_handling: PASS (was skipped — fixed with text
    validation + timeout reduction)
  - test_interleaved_requests: FAIL — websocket state from previous
    request causes timeout on request 8/8. needs duplex websocket
    architecture (separate send/receive tasks) to fully resolve.
  - test_subtitle_alignment: FAIL — feature gap, deepgram tts api
    does not provide word-level timing data. config file not present.
---
 AGENTS.md                                     |  26 +
 CLAUDE.md                                     |   1 +
 .../voice-assistant/tenapp/manifest.json      |   3 +
 .../voice-assistant/tenapp/property.json      | 184 +++++
 .../extension/deepgram_tts/README.md          |  97 +++
 .../extension/deepgram_tts/__init__.py        |   6 +
 .../extension/deepgram_tts/addon.py           |  20 +
 .../extension/deepgram_tts/config.py          |  75 ++
 .../extension/deepgram_tts/deepgram_tts.py    | 298 ++++++++
 .../extension/deepgram_tts/extension.py       | 489 +++++++++++++
 .../extension/deepgram_tts/manifest.json      |  65 ++
 .../extension/deepgram_tts/property.json      |  11 +
 .../extension/deepgram_tts/requirements.txt   |   1 +
 .../extension/deepgram_tts/tests/__init__.py  |   5 +
 .../extension/deepgram_tts/tests/bin/start    |  21 +
 .../property_basic_audio_setting1.json        |  10 +
 .../property_basic_audio_setting2.json        |  10 +
 .../tests/configs/property_dump.json          |  10 +
 .../tests/configs/property_invalid.json       |   5 +
 .../tests/configs/property_miss_required.json |   5 +
 .../extension/deepgram_tts/tests/conftest.py  |  99 +++
 .../deepgram_tts/tests/test_basic.py          | 325 +++++++++
 .../deepgram_tts/tests/test_error_msg.py      | 174 +++++
 .../deepgram_tts/tests/test_metrics.py        | 135 ++++
 .../deepgram_tts/tests/test_params.py         | 157 +++++
 .../deepgram_tts/tests/test_robustness.py     | 277 ++++++++
 docs/ai/L0_repo_card.md                       |  31 +
 docs/ai/L1/01_setup.md                        | 118 ++++
 docs/ai/L1/02_architecture.md                 | 142 ++++
 docs/ai/L1/03_code_map.md                     | 117 ++++
 docs/ai/L1/04_conventions.md                  | 138 ++++
 docs/ai/L1/05_workflows.md                    | 181 +++++
 docs/ai/L1/06_interfaces.md                   | 150 ++++
 docs/ai/L1/07_gotchas.md                      | 235 +++++++
 docs/ai/L1/08_security.md                     |  88 +++
 docs/ai/L1/deep_dives/_index.md               |   9 +
 docs/ai/L1/deep_dives/deployment.md           | 206 ++++++
 .../ai/L1/deep_dives/extension_development.md | 653 ++++++++++++++++++
 docs/ai/L1/deep_dives/graph_configuration.md  | 410 +++++++++++
 docs/ai/L1/deep_dives/server_architecture.md  | 211 ++++++
 docs/ai/L1/deep_dives/testing.md              | 295 ++++++++
 41 files changed, 5493 insertions(+)
 create mode 100644 AGENTS.md
 create mode 100644 CLAUDE.md
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/__init__.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/addon.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/property.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/requirements.txt
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/__init__.py
 create mode 100755 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/bin/start
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting1.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting2.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_dump.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_invalid.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_miss_required.json
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
 create mode 100644 docs/ai/L0_repo_card.md
 create mode 100644 docs/ai/L1/01_setup.md
 create mode 100644 docs/ai/L1/02_architecture.md
 create mode 100644 docs/ai/L1/03_code_map.md
 create mode 100644 docs/ai/L1/04_conventions.md
 create mode 100644 docs/ai/L1/05_workflows.md
 create mode 100644 docs/ai/L1/06_interfaces.md
 create mode 100644 docs/ai/L1/07_gotchas.md
 create mode 100644 docs/ai/L1/08_security.md
 create mode 100644 docs/ai/L1/deep_dives/_index.md
 create mode 100644 docs/ai/L1/deep_dives/deployment.md
 create mode 100644 docs/ai/L1/deep_dives/extension_development.md
 create mode 100644 docs/ai/L1/deep_dives/graph_configuration.md
 create mode 100644 docs/ai/L1/deep_dives/server_architecture.md
 create mode 100644 docs/ai/L1/deep_dives/testing.md

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000000..d23c0aa719
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,26 @@
+# AI Agent Instructions
+
+This repository uses progressive disclosure documentation to help AI coding
+agents work efficiently. Documentation is structured in three levels under
+`docs/ai/`.
+
+## How to Load
+
+1. Read [docs/ai/L0_repo_card.md](docs/ai/L0_repo_card.md) to identify the repo.
+2. Load ALL 8 files in `docs/ai/L1/`. They are small — load all of them upfront.
+   This gives you setup, architecture, code map, conventions, workflows,
+   interfaces, gotchas, and security.
+3. If a task needs more detail than L1 provides, follow links to L2 deep dives
+   in `docs/ai/L1/deep_dives/`. Load only the specific L2 file you need.
+
+## Levels
+
+- **L0 (Repo Card):** Identity and L1 index. Table of contents.
+- **L1 (Summaries):** Eight structured summaries. Load all at session start.
+- **L2 (Deep Dives):** Full specifications. Load only when L1 isn't detailed enough.
+
+## Working Areas
+
+- **AI Agents development**: `ai_agents/` — see `ai_agents/AGENTS.md` for workspace-specific context
+- **Core framework**: `core/`, `packages/`, `build/`
+- **Operational reference**: `ai/AI_working_with_ten.md` (full), `ai/AI_working_with_ten_compact.md` (quick)
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000000..c2c4fb4158
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+Read @AGENTS.md for AI agent instructions and progressive disclosure docs.
diff --git a/ai_agents/agents/examples/voice-assistant/tenapp/manifest.json b/ai_agents/agents/examples/voice-assistant/tenapp/manifest.json
index 020768c826..97f3d5c3a2 100644
--- a/ai_agents/agents/examples/voice-assistant/tenapp/manifest.json
+++ b/ai_agents/agents/examples/voice-assistant/tenapp/manifest.json
@@ -152,6 +152,9 @@
     },
     {
       "path": "../../../ten_packages/extension/oracle_tts_python"
+    },
+    {
+      "path": "../../../ten_packages/extension/deepgram_tts"
     }
   ],
   "scripts": {
diff --git a/ai_agents/agents/examples/voice-assistant/tenapp/property.json b/ai_agents/agents/examples/voice-assistant/tenapp/property.json
index 270bfb77be..dcd0b8e214 100644
--- a/ai_agents/agents/examples/voice-assistant/tenapp/property.json
+++ b/ai_agents/agents/examples/voice-assistant/tenapp/property.json
@@ -185,6 +185,190 @@
           ]
         }
       },
+      {
+        "name": "voice_assistant_deepgram_tts",
+        "auto_start": false,
+        "graph": {
+          "nodes": [
+            {
+              "type": "extension",
+              "name": "agora_rtc",
+              "addon": "agora_rtc",
+              "extension_group": "default",
+              "property": {
+                "app_id": "${env:AGORA_APP_ID}",
+                "app_certificate": "${env:AGORA_APP_CERTIFICATE|}",
+                "channel": "ten_agent_test",
+                "stream_id": 1234,
+                "remote_stream_id": 123,
+                "subscribe_audio": true,
+                "publish_audio": true,
+                "publish_data": true,
+                "enable_agora_asr": false
+              }
+            },
+            {
+              "type": "extension",
+              "name": "stt",
+              "addon": "deepgram_asr_python",
+              "extension_group": "stt",
+              "property": {
+                "params": {
+                  "api_key": "${env:DEEPGRAM_API_KEY}",
+                  "language": "en-US",
+                  "model": "nova-3"
+                }
+              }
+            },
+            {
+              "type": "extension",
+              "name": "llm",
+              "addon": "openai_llm2_python",
+              "extension_group": "chatgpt",
+              "property": {
+                "base_url": "https://api.openai.com/v1",
+                "api_key": "${env:OPENAI_API_KEY}",
+                "frequency_penalty": 0.9,
+                "model": "${env:OPENAI_MODEL}",
+                "max_tokens": 512,
+                "prompt": "",
+                "proxy_url": "${env:OPENAI_PROXY_URL|}",
+                "greeting": "TEN Agent connected. How can I help you today?",
+                "max_memory_length": 10
+              }
+            },
+            {
+              "type": "extension",
+              "name": "tts",
+              "addon": "deepgram_tts",
+              "extension_group": "tts",
+              "property": {
+                "dump": false,
+                "dump_path": "/tmp",
+                "params": {
+                  "api_key": "${env:DEEPGRAM_API_KEY}",
+                  "model": "aura-2-thalia-en",
+                  "encoding": "linear16",
+                  "sample_rate": 24000
+                }
+              }
+            },
+            {
+              "type": "extension",
+              "name": "main_control",
+              "addon": "main_python",
+              "extension_group": "control",
+              "property": {
+                "greeting": "TEN Agent connected. How can I help you today?"
+              }
+            },
+            {
+              "type": "extension",
+              "name": "message_collector",
+              "addon": "message_collector2",
+              "extension_group": "transcriber",
+              "property": {}
+            },
+            {
+              "type": "extension",
+              "name": "weatherapi_tool_python",
+              "addon": "weatherapi_tool_python",
+              "extension_group": "default",
+              "property": {
+                "api_key": "${env:WEATHERAPI_API_KEY|}"
+              }
+            },
+            {
+              "type": "extension",
+              "name": "streamid_adapter",
+              "addon": "streamid_adapter",
+              "property": {}
+            }
+          ],
+          "connections": [
+            {
+              "extension": "main_control",
+              "cmd": [
+                {
+                  "names": [
+                    "on_user_joined",
+                    "on_user_left"
+                  ],
+                  "source": [
+                    {
+                      "extension": "agora_rtc"
+                    }
+                  ]
+                },
+                {
+                  "names": [
+                    "tool_register"
+                  ],
+                  "source": [
+                    {
+                      "extension": "weatherapi_tool_python"
+                    }
+                  ]
+                }
+              ],
+              "data": [
+                {
+                  "name": "asr_result",
+                  "source": [
+                    {
+                      "extension": "stt"
+                    }
+                  ]
+                }
+              ]
+            },
+            {
+              "extension": "agora_rtc",
+              "audio_frame": [
+                {
+                  "name": "pcm_frame",
+                  "dest": [
+                    {
+                      "extension": "streamid_adapter"
+                    }
+                  ]
+                },
+                {
+                  "name": "pcm_frame",
+                  "source": [
+                    {
+                      "extension": "tts"
+                    }
+                  ]
+                }
+              ],
+              "data": [
+                {
+                  "name": "data",
+                  "source": [
+                    {
+                      "extension": "message_collector"
+                    }
+                  ]
+                }
+              ]
+            },
+            {
+              "extension": "streamid_adapter",
+              "audio_frame": [
+                {
+                  "name": "pcm_frame",
+                  "dest": [
+                    {
+                      "extension": "stt"
+                    }
+                  ]
+                }
+              ]
+            }
+          ]
+        }
+      },
       {
         "name": "voice_assistant_oracle",
         "auto_start": false,
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md b/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
new file mode 100644
index 0000000000..c8be961b39
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
@@ -0,0 +1,97 @@
+# Deepgram TTS Extension
+
+A TEN Framework extension that provides Text-to-Speech (TTS) capabilities using Deepgram's Aura streaming API.
+
+## Features
+
+- Real-time streaming TTS via WebSocket
+- Multiple voice models (Aura-2 series)
+- Configurable sample rates (8000, 16000, 24000, 48000 Hz)
+- Linear16 PCM audio output
+- TTFB (Time to First Byte) metrics reporting
+- Audio dump capability for debugging
+
+## Configuration
+
+### Properties
+
+| Property | Type | Default | Description |
+|----------|------|---------|-------------|
+| `params.api_key` | string | Required | Deepgram API key |
+| `params.model` | string | `aura-2-thalia-en` | Voice model to use |
+| `params.encoding` | string | `linear16` | Audio encoding format |
+| `params.sample_rate` | int | `24000` | Output sample rate in Hz |
+| `params.base_url` | string | `wss://api.deepgram.com/v1/speak` | WebSocket endpoint |
+| `dump` | bool | `false` | Enable audio dumping |
+| `dump_path` | string | `/tmp` | Path for audio dump files |
+
+### Example Configuration
+
+```json
+{
+  "params": {
+    "api_key": "${env:DEEPGRAM_API_KEY}",
+    "model": "aura-2-thalia-en",
+    "encoding": "linear16",
+    "sample_rate": 24000
+  },
+  "dump": false,
+  "dump_path": "/tmp"
+}
+```
+
+## Available Voice Models
+
+Deepgram Aura-2 voices:
+- `aura-2-thalia-en` - Female, English (default)
+- `aura-2-luna-en` - Female, English
+- `aura-2-stella-en` - Female, English
+- `aura-2-athena-en` - Female, English
+- `aura-2-hera-en` - Female, English
+- `aura-2-orion-en` - Male, English
+- `aura-2-arcas-en` - Male, English
+- `aura-2-perseus-en` - Male, English
+- `aura-2-angus-en` - Male, English
+- `aura-2-orpheus-en` - Male, English
+- `aura-2-helios-en` - Male, English
+- `aura-2-zeus-en` - Male, English
+
+## Supported Sample Rates
+
+- 8000 Hz
+- 16000 Hz
+- 24000 Hz (recommended)
+- 48000 Hz
+
+## API Interface
+
+This extension implements the standard TEN TTS interface:
+
+### Input Data
+- `tts_text_input` - Text to synthesize
+- `tts_flush` - Flush pending audio
+
+### Output Data
+- `tts_audio_start` - Audio generation started
+- `tts_audio_end` - Audio generation completed
+- `metrics` - Performance metrics (TTFB, duration)
+- `error` - Error information
+
+### Output Audio
+- `pcm_frame` - PCM audio data (16-bit, mono)
+
+## Running Tests
+
+```bash
+cd deepgram_tts
+tman -y install --standalone
+./tests/bin/start
+```
+
+## Environment Variables
+
+- `DEEPGRAM_API_KEY` - Your Deepgram API key
+
+## License
+
+Apache License, Version 2.0
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/__init__.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/__init__.py
new file mode 100644
index 0000000000..72593ab225
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/__init__.py
@@ -0,0 +1,6 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from . import addon
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/addon.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/addon.py
new file mode 100644
index 0000000000..477d15e16d
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/addon.py
@@ -0,0 +1,20 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from ten_runtime import (
+    Addon,
+    register_addon_as_extension,
+    TenEnv,
+)
+
+
+@register_addon_as_extension("deepgram_tts")
+class DeepgramTTSExtensionAddon(Addon):
+
+    def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None:
+        from .extension import DeepgramTTSExtension
+
+        ten_env.log_info("DeepgramTTSExtensionAddon on_create_instance")
+        ten_env.on_create_instance_done(DeepgramTTSExtension(name), context)
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
new file mode 100644
index 0000000000..901b2eb449
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
@@ -0,0 +1,75 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from __future__ import annotations
+
+from typing import Any
+import copy
+
+from ten_ai_base import utils
+
+from pydantic import BaseModel, Field
+
+
+class DeepgramTTSConfig(BaseModel):
+    api_key: str = ""
+    base_url: str = "wss://api.deepgram.com/v1/speak"
+
+    model: str = "aura-2-thalia-en"
+    encoding: str = "linear16"
+    sample_rate: int = 24000
+
+    dump: bool = False
+    dump_path: str = "/tmp"
+    params: dict[str, Any] = Field(default_factory=dict)
+
+    def update_params(self) -> None:
+        params = self._ensure_dict(self.params)
+        self.params = params
+
+        if "api_key" in params:
+            self.api_key = params["api_key"]
+            del params["api_key"]
+
+        if "base_url" in params:
+            self.base_url = params["base_url"]
+            del params["base_url"]
+
+        if "model" in params:
+            self.model = params["model"]
+            del params["model"]
+
+        if "encoding" in params:
+            self.encoding = params["encoding"]
+            del params["encoding"]
+
+        if "sample_rate" in params:
+            self.sample_rate = params["sample_rate"]
+            del params["sample_rate"]
+
+    def to_str(self, sensitive_handling: bool = True) -> str:
+        """
+        Convert the configuration to a string representation.
+        """
+        if not sensitive_handling:
+            return f"{self}"
+
+        config = copy.deepcopy(self)
+
+        # Encrypt sensitive fields
+        if config.api_key:
+            config.api_key = utils.encrypt(config.api_key)
+        if config.params and "api_key" in config.params:
+            config.params["api_key"] = utils.encrypt(config.params["api_key"])
+
+        return f"{config}"
+
+    @staticmethod
+    def _ensure_dict(value: Any) -> dict[str, Any]:
+        if isinstance(value, dict):
+            return value
+        if value is None:
+            return {}
+        return dict(value)
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
new file mode 100644
index 0000000000..1564afcc2d
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -0,0 +1,298 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import asyncio
+import json
+from collections.abc import Callable
+from datetime import datetime
+from typing import AsyncIterator
+
+import websockets
+from websockets.asyncio.client import ClientConnection
+
+from .config import DeepgramTTSConfig
+from ten_runtime import AsyncTenEnv
+from ten_ai_base.const import LOG_CATEGORY_VENDOR
+
+# Custom event types to communicate status back to the extension
+EVENT_TTS_RESPONSE = 1
+EVENT_TTS_END = 2
+EVENT_TTS_ERROR = 3
+EVENT_TTS_FLUSH = 4
+EVENT_TTS_TTFB_METRIC = 5
+
+
+class DeepgramTTSConnectionException(Exception):
+    """Exception raised when Deepgram TTS connection fails"""
+
+    def __init__(self, status_code: int, body: str):
+        self.status_code = status_code
+        self.body = body
+        super().__init__(
+            f"Deepgram TTS connection failed (code: {status_code}): {body}"
+        )
+
+
+class DeepgramTTSClient:
+    def __init__(
+        self,
+        config: DeepgramTTSConfig,
+        ten_env: AsyncTenEnv,
+        send_fatal_tts_error: Callable[[str], asyncio.Future] | None = None,
+        send_non_fatal_tts_error: Callable[[str], asyncio.Future] | None = None,
+    ):
+        self.config = config
+        self.ten_env: AsyncTenEnv = ten_env
+        self._is_cancelled = False
+        self.ws: ClientConnection | None = None
+        self.send_fatal_tts_error = send_fatal_tts_error
+        self.send_non_fatal_tts_error = send_non_fatal_tts_error
+
+        self.sent_ts: datetime | None = None
+        self.ttfb_sent: bool = False
+
+        # Build WebSocket URL with query parameters
+        self.ws_url = self._build_ws_url()
+
+    def _build_ws_url(self) -> str:
+        """Build the WebSocket URL with query parameters"""
+        base = self.config.base_url
+        params = f"model={self.config.model}&encoding={self.config.encoding}&sample_rate={self.config.sample_rate}"
+        return f"{base}?{params}"
+
+    async def start(self) -> None:
+        """Preheating: establish websocket connection during initialization"""
+        try:
+            await self._connect()
+
+        except Exception as e:
+            self.ten_env.log_error(f"Deepgram TTS preheat failed: {e}")
+
+    async def _connect(self) -> None:
+        """Connect to the websocket"""
+        try:
+            extra_headers = {
+                "Authorization": f"Token {self.config.api_key}",
+            }
+            self.ws = await websockets.connect(
+                self.ws_url,
+                additional_headers=extra_headers,
+            )
+            self.ten_env.log_debug(
+                "vendor_status: connected to deepgram tts",
+                category=LOG_CATEGORY_VENDOR,
+            )
+
+        except Exception as e:
+            error_message = str(e)
+            if "401" in error_message or "Unauthorized" in error_message:
+                if self.send_fatal_tts_error:
+                    await self.send_fatal_tts_error(error_message=error_message)
+                else:
+                    raise DeepgramTTSConnectionException(
+                        status_code=401, body=error_message
+                    ) from e
+            else:
+                self.ten_env.log_error(
+                    f"Deepgram TTS preheat failed, unexpected error: {e}"
+                )
+                if self.send_non_fatal_tts_error:
+                    await self.send_non_fatal_tts_error(
+                        error_message=error_message
+                    )
+                raise
+
+    async def stop(self):
+        # Set cancellation flag first to stop any pending operations
+        self._is_cancelled = True
+
+        # Stop the websocket connection if it exists
+        if self.ws:
+            try:
+                # Send close message
+                await self.ws.send(json.dumps({"type": "Close"}))
+            except Exception:
+                pass
+            await self.ws.close()
+            self.ws = None
+
+    async def cancel(self):
+        """
+        Cancel the current TTS task.
+        """
+        self.ten_env.log_debug("Cancelling current TTS task.")
+        self._is_cancelled = True
+        if self.ws:
+            self.reset_ttfb()
+            # Send flush to clear any pending audio
+            try:
+                await self.ws.send(json.dumps({"type": "Flush"}))
+            except Exception:
+                pass
+
+    async def reconnect(self):
+        """Close and re-establish the websocket connection."""
+        if self.ws:
+            try:
+                await self.ws.close()
+            except Exception:
+                pass
+            self.ws = None
+        await self._connect()
+
+    def reset_ttfb(self):
+        self.sent_ts = None
+        self.ttfb_sent = False
+
+    async def get(
+        self, text: str
+    ) -> AsyncIterator[tuple[bytes | int | None, int | None]]:
+        """Generate TTS audio for the given text"""
+
+        if len(text.strip()) == 0:
+            self.ten_env.log_warn(
+                "DeepgramTTS: empty text provided, " "returning END event"
+            )
+            yield None, EVENT_TTS_END
+            return
+
+        self._is_cancelled = False
+        try:
+            await self._ensure_connection()
+            async for audio_chunk, event_status in self._process_single_tts(
+                text
+            ):
+                if event_status == EVENT_TTS_FLUSH:
+                    # Cancelled: reconnect for clean state
+                    await self.reconnect()
+                    break
+
+                yield audio_chunk, event_status
+
+        except Exception as e:
+            self.ten_env.log_error(
+                f"vendor_error: {e}",
+                category=LOG_CATEGORY_VENDOR,
+            )
+            raise
+
+    async def _ensure_connection(self) -> None:
+        """Ensure websocket connection is established"""
+        if not self.ws:
+            await self._connect()
+
+    async def _process_single_tts(
+        self, text: str
+    ) -> AsyncIterator[tuple[bytes | int | None, int | None]]:
+        """Process a single TTS request"""
+        if not self.ws:
+            self.ten_env.log_error("Deepgram websocket not connected")
+            return
+
+        self.ten_env.log_debug(f"process_single_tts, text: {text}")
+
+        if not self.ttfb_sent:
+            self.sent_ts = datetime.now()
+
+        # Send the text to Deepgram
+        speak_msg = {
+            "type": "Speak",
+            "text": text,
+        }
+        await self.ws.send(json.dumps(speak_msg))
+
+        # Send flush to get audio immediately
+        await self.ws.send(json.dumps({"type": "Flush"}))
+
+        try:
+            # Receive audio data
+            while True:
+                if self._is_cancelled:
+                    self.ten_env.log_debug(
+                        "Cancellation flag detected, stopping TTS stream."
+                    )
+                    yield None, EVENT_TTS_FLUSH
+                    break
+
+                try:
+                    message = await asyncio.wait_for(
+                        self.ws.recv(), timeout=5.0
+                    )
+                except asyncio.TimeoutError:
+                    self.ten_env.log_error(
+                        "Timeout waiting for Deepgram audio - yielding error"
+                    )
+                    yield b"Timeout waiting for Deepgram audio", EVENT_TTS_ERROR
+                    break
+
+                # Binary message = audio data
+                if isinstance(message, bytes):
+                    # Drop audio if cancelled during recv
+                    if self._is_cancelled:
+                        self.ten_env.log_debug(
+                            "Cancellation detected after recv, "
+                            "dropping audio chunk."
+                        )
+                        yield None, EVENT_TTS_FLUSH
+                        break
+
+                    # First audio chunk, calculate TTFB
+                    if self.sent_ts and not self.ttfb_sent:
+                        ttfb_ms = int(
+                            (datetime.now() - self.sent_ts).total_seconds()
+                            * 1000
+                        )
+                        yield ttfb_ms, EVENT_TTS_TTFB_METRIC
+                        self.ttfb_sent = True
+
+                    self.ten_env.log_debug(
+                        f"DeepgramTTS: sending EVENT_TTS_RESPONSE, "
+                        f"length: {len(message)}"
+                    )
+                    yield message, EVENT_TTS_RESPONSE
+
+                # Text message = JSON metadata
+                else:
+                    try:
+                        data = json.loads(message)
+                        msg_type = data.get("type", "")
+
+                        if msg_type == "Flushed":
+                            # All audio for this text has been sent
+                            self.ten_env.log_debug(
+                                "DeepgramTTS: received Flushed, "
+                                "sending EVENT_TTS_END"
+                            )
+                            yield None, EVENT_TTS_END
+                            break
+
+                        elif msg_type == "Warning":
+                            self.ten_env.log_warn(
+                                f"Deepgram warning: {data.get('warn_msg', '')}"
+                            )
+
+                        elif msg_type == "Error":
+                            error_msg = data.get("err_msg", "Unknown error")
+                            self.ten_env.log_error(
+                                f"Deepgram error: {error_msg}"
+                            )
+                            yield error_msg.encode("utf-8"), EVENT_TTS_ERROR
+                            break
+
+                    except json.JSONDecodeError:
+                        self.ten_env.log_warn(
+                            f"Failed to parse Deepgram message: {message}"
+                        )
+
+            if not self._is_cancelled:
+                self.ten_env.log_debug("DeepgramTTS: TTS complete")
+
+        except Exception as e:
+            error_message = str(e)
+            self.ten_env.log_error(
+                f"vendor_error: {error_message}",
+                category=LOG_CATEGORY_VENDOR,
+            )
+            yield error_message.encode("utf-8"), EVENT_TTS_ERROR
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
new file mode 100644
index 0000000000..749f70f4eb
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -0,0 +1,489 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import asyncio
+from datetime import datetime
+import os
+import traceback
+
+from ten_ai_base.helper import PCMWriter
+from ten_ai_base.message import (
+    ModuleError,
+    ModuleErrorCode,
+    ModuleType,
+    ModuleErrorVendorInfo,
+    TTSAudioEndReason,
+)
+from ten_ai_base.struct import TTSTextInput
+from ten_ai_base.tts2 import AsyncTTS2BaseExtension
+from ten_ai_base.const import LOG_CATEGORY_VENDOR, LOG_CATEGORY_KEY_POINT
+from .config import DeepgramTTSConfig
+
+from .deepgram_tts import (
+    EVENT_TTS_END,
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_TTFB_METRIC,
+    EVENT_TTS_ERROR,
+    DeepgramTTSClient,
+    DeepgramTTSConnectionException,
+)
+from ten_runtime import AsyncTenEnv
+
+
+class DeepgramTTSExtension(AsyncTTS2BaseExtension):
+    def __init__(self, name: str) -> None:
+        super().__init__(name)
+        self.config: DeepgramTTSConfig | None = None
+        self.client: DeepgramTTSClient | None = None
+        self.current_request_id: str | None = None
+        self.current_turn_id: int = -1
+        self.sent_ts: datetime | None = None
+        self.current_request_finished: bool = False
+        self.total_audio_bytes: int = 0
+        self._is_stopped: bool = False
+        self.recorder_map: dict[str, PCMWriter] = {}
+        self._audio_start_sent: bool = False
+
+    async def on_init(self, ten_env: AsyncTenEnv) -> None:
+        try:
+            await super().on_init(ten_env)
+            config_json_str, _ = await self.ten_env.get_property_to_json("")
+
+            if not config_json_str or config_json_str.strip() == "{}":
+                raise ValueError(
+                    "Configuration is empty. "
+                    "Required parameter 'api_key' is missing."
+                )
+
+            self.config = DeepgramTTSConfig.model_validate_json(config_json_str)
+            self.config.update_params()
+            ten_env.log_info(
+                f"LOG_CATEGORY_KEY_POINT: "
+                f"{self.config.to_str(sensitive_handling=True)}",
+                category=LOG_CATEGORY_KEY_POINT,
+            )
+
+            if not self.config.api_key:
+                raise ValueError("API key is required")
+
+            self.client = self._create_client(ten_env)
+            await self.client.start()
+            ten_env.log_debug("DeepgramTTS client initialized successfully")
+        except Exception as e:
+            ten_env.log_error(f"on_init failed: {traceback.format_exc()}")
+            await self.send_tts_error(
+                request_id="",
+                error=ModuleError(
+                    message=f"Initialization failed: {e}",
+                    module=ModuleType.TTS,
+                    code=ModuleErrorCode.FATAL_ERROR,
+                    vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
+                ),
+            )
+
+    async def on_stop(self, ten_env: AsyncTenEnv) -> None:
+        self._is_stopped = True
+        ten_env.log_debug("Extension stopping, rejecting new requests")
+
+        if self.client:
+            await self.client.stop()
+            self.client = None
+
+        for request_id, recorder in list(self.recorder_map.items()):
+            try:
+                await recorder.flush()
+                ten_env.log_debug(
+                    f"Flushed PCMWriter for request_id: " f"{request_id}"
+                )
+            except Exception as e:
+                ten_env.log_error(
+                    f"Error flushing PCMWriter for "
+                    f"request_id {request_id}: {e}"
+                )
+
+        await super().on_stop(ten_env)
+        ten_env.log_debug("on_stop")
+
+    async def on_deinit(self, ten_env: AsyncTenEnv) -> None:
+        await super().on_deinit(ten_env)
+        ten_env.log_debug("on_deinit")
+
+    async def cancel_tts(self) -> None:
+        self.current_request_finished = True
+        if self.current_request_id:
+            self.ten_env.log_debug(
+                f"Current request {self.current_request_id} "
+                f"is being cancelled. Sending INTERRUPTED."
+            )
+            if self.client:
+                await self.client.cancel()
+                if self.sent_ts:
+                    await self._finalize_request(TTSAudioEndReason.INTERRUPTED)
+        else:
+            self.ten_env.log_warn(
+                "No current request found, " "skipping TTS cancellation."
+            )
+
+    def vendor(self) -> str:
+        return "deepgram"
+
+    def synthesize_audio_sample_rate(self) -> int:
+        if self.config is None:
+            return 24000
+        return self.config.sample_rate
+
+    def _create_client(self, ten_env: AsyncTenEnv) -> DeepgramTTSClient:
+        return DeepgramTTSClient(
+            config=self.config,
+            ten_env=ten_env,
+            send_fatal_tts_error=self.send_fatal_tts_error,
+            send_non_fatal_tts_error=(self.send_non_fatal_tts_error),
+        )
+
+    async def _ensure_client(self) -> None:
+        """Ensure client is connected, reconnecting if needed."""
+        if self.client is None:
+            self.ten_env.log_debug(
+                "TTS client is not initialized, reconnecting..."
+            )
+            self.client = self._create_client(self.ten_env)
+            await self.client.start()
+            self.ten_env.log_debug("TTS client reconnected successfully.")
+
+    async def _reconnect_client(self) -> None:
+        """Destroy current client and reconnect immediately."""
+        if self.client:
+            await self.client.stop()
+            self.client = None
+        try:
+            self.client = self._create_client(self.ten_env)
+            await self.client.start()
+            self.ten_env.log_debug("Client reconnected after error.")
+        except Exception as e:
+            self.ten_env.log_error(f"Immediate reconnect failed: {e}")
+            self.client = None
+
+    async def _finalize_request(
+        self,
+        reason: TTSAudioEndReason,
+        error: ModuleError | None = None,
+    ) -> None:
+        """Send audio end, flush recorder, finish request."""
+        if not self._audio_start_sent:
+            await self.send_tts_audio_start(
+                request_id=self.current_request_id,
+            )
+            self._audio_start_sent = True
+
+        request_event_interval = self._current_request_interval_ms()
+        duration_ms = self._calculate_audio_duration_ms()
+
+        await self.send_tts_audio_end(
+            request_id=self.current_request_id,
+            request_event_interval_ms=request_event_interval,
+            request_total_audio_duration_ms=duration_ms,
+            reason=reason,
+        )
+
+        if self.current_request_id in self.recorder_map:
+            await self.recorder_map[self.current_request_id].flush()
+
+        await self.finish_request(
+            request_id=self.current_request_id,
+            reason=reason,
+            error=error,
+        )
+
+        self.sent_ts = None
+        self.ten_env.log_debug(
+            f"Finalized request, reason: {reason}, "
+            f"interval: {request_event_interval}ms, "
+            f"duration: {duration_ms}ms"
+        )
+
+    async def request_tts(self, t: TTSTextInput) -> None:
+        """Handle TTS requests."""
+        try:
+            self.ten_env.log_info(
+                f"Requesting TTS for text: {t.text}, "
+                f"text_input_end: {t.text_input_end} "
+                f"request ID: {t.request_id}",
+            )
+
+            await self._ensure_client()
+
+            if t.request_id != self.current_request_id:
+                self.ten_env.log_debug(
+                    f"New TTS request with ID: {t.request_id}"
+                )
+                if self.client:
+                    self.client.reset_ttfb()
+                self.current_request_id = t.request_id
+                self.current_request_finished = False
+                self.total_audio_bytes = 0
+                self.sent_ts = None
+                self._audio_start_sent = False
+                if t.metadata is not None:
+                    self.session_id = t.metadata.get("session_id", "")
+                    self.current_turn_id = t.metadata.get("turn_id", -1)
+                self._setup_recorder(t.request_id)
+            elif self.current_request_finished:
+                self.ten_env.log_error(
+                    f"Received a message for a finished "
+                    f"request_id '{t.request_id}' with "
+                    f"text_input_end=False."
+                )
+                return
+
+            if t.text_input_end:
+                self.ten_env.log_debug(
+                    f"KEYPOINT finish session for "
+                    f"request ID: {t.request_id}"
+                )
+                self.current_request_finished = True
+
+            prepared_text = t.text.strip()
+
+            if self._is_stopped:
+                self.ten_env.log_debug(
+                    f"TTS is stopped, skipping " f"request_id: {t.request_id}"
+                )
+                return
+
+            if prepared_text != "":
+                await self._process_tts_text(prepared_text, t)
+            elif t.text_input_end:
+                await self._finalize_request(TTSAudioEndReason.REQUEST_END)
+
+        except DeepgramTTSConnectionException as e:
+            await self._handle_connection_error(e)
+
+        except Exception as e:
+            self.ten_env.log_error(
+                f"Error in request_tts: "
+                f"{traceback.format_exc()}. text: {t.text}"
+            )
+            error = ModuleError(
+                message=str(e),
+                module=ModuleType.TTS,
+                code=ModuleErrorCode.NON_FATAL_ERROR,
+                vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
+            )
+            await self.send_tts_error(
+                request_id=self.current_request_id,
+                error=error,
+            )
+            await self.finish_request(
+                request_id=self.current_request_id,
+                reason=TTSAudioEndReason.ERROR,
+                error=error,
+            )
+            if isinstance(e, ConnectionRefusedError):
+                await self._reconnect_client()
+
+    async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
+        """Process non-empty text through the TTS pipeline."""
+        self.ten_env.log_debug(
+            f"send_text_to_tts_server: {text} "
+            f"of request_id: {t.request_id}",
+            category=LOG_CATEGORY_VENDOR,
+        )
+        data = self.client.get(text)
+
+        chunk_count = 0
+        if self.sent_ts is None:
+            self.sent_ts = datetime.now()
+
+        async for data_msg, event_status in data:
+            self.ten_env.log_debug(f"Received event_status: {event_status}")
+            if event_status == EVENT_TTS_RESPONSE:
+                if (
+                    data_msg is not None
+                    and isinstance(data_msg, bytes)
+                    and len(data_msg) > 0
+                ):
+                    chunk_count += 1
+                    self.total_audio_bytes += len(data_msg)
+                    self.ten_env.log_info(
+                        f"Received audio chunk "
+                        f"#{chunk_count}, "
+                        f"size: {len(data_msg)} bytes"
+                    )
+                    self._write_dump(data_msg)
+                    await self.send_tts_audio_data(data_msg)
+                else:
+                    self.ten_env.log_debug(
+                        "Received empty payload for " "TTS response"
+                    )
+                    if t.text_input_end:
+                        await self._finalize_request(
+                            TTSAudioEndReason.REQUEST_END
+                        )
+
+            elif event_status == EVENT_TTS_TTFB_METRIC:
+                if data_msg is not None and isinstance(data_msg, int):
+                    self.sent_ts = datetime.now()
+                    ttfb = data_msg
+                    await self.send_tts_audio_start(
+                        request_id=self.current_request_id,
+                    )
+                    self._audio_start_sent = True
+                    await self.send_tts_ttfb_metrics(
+                        request_id=self.current_request_id,
+                        ttfb_ms=ttfb,
+                        extra_metadata={
+                            "model": self.config.model,
+                        },
+                    )
+                    self.ten_env.log_debug(
+                        f"Sent TTS audio start and " f"TTFB metrics: {ttfb}ms"
+                    )
+
+            elif event_status == EVENT_TTS_END:
+                self.ten_env.log_info(
+                    "Received TTS_END event from " "Deepgram TTS"
+                )
+                if t.text_input_end:
+                    await self._finalize_request(TTSAudioEndReason.REQUEST_END)
+                break
+
+            elif event_status == EVENT_TTS_ERROR:
+                self.ten_env.log_error(
+                    "Received TTS_ERROR event from " "Deepgram TTS"
+                )
+                error_msg = (
+                    data_msg.decode("utf-8")
+                    if isinstance(data_msg, bytes)
+                    else str(data_msg)
+                )
+                if t.text_input_end:
+                    await self._finalize_request(
+                        TTSAudioEndReason.ERROR,
+                        error=ModuleError(
+                            message=error_msg,
+                            module=ModuleType.TTS,
+                            code=(ModuleErrorCode.NON_FATAL_ERROR),
+                            vendor_info=ModuleErrorVendorInfo(
+                                vendor=self.vendor()
+                            ),
+                        ),
+                    )
+                break
+
+        self.ten_env.log_debug(
+            f"TTS processing completed, " f"total chunks: {chunk_count}"
+        )
+
+    async def _handle_connection_error(
+        self, e: DeepgramTTSConnectionException
+    ) -> None:
+        """Handle Deepgram connection errors."""
+        self.ten_env.log_error(
+            f"DeepgramTTSConnectionException in request_tts: " f"{e.body}"
+        )
+        if e.status_code == 401:
+            code = ModuleErrorCode.FATAL_ERROR
+        else:
+            code = ModuleErrorCode.NON_FATAL_ERROR
+
+        error = ModuleError(
+            message=e.body,
+            module=ModuleType.TTS,
+            code=code,
+            vendor_info=ModuleErrorVendorInfo(
+                vendor=self.vendor(),
+                code=str(e.status_code),
+                message=e.body,
+            ),
+        )
+        await self.send_tts_error(
+            request_id=self.current_request_id,
+            error=error,
+        )
+        await self.finish_request(
+            request_id=self.current_request_id,
+            reason=TTSAudioEndReason.ERROR,
+            error=error,
+        )
+
+    def _setup_recorder(self, request_id: str) -> None:
+        """Set up PCMWriter for a new request."""
+        if not (self.config and self.config.dump):
+            return
+        # Clean up old PCMWriters
+        for old_rid in [
+            rid for rid in self.recorder_map.keys() if rid != request_id
+        ]:
+            try:
+                asyncio.create_task(self.recorder_map[old_rid].flush())
+                del self.recorder_map[old_rid]
+                self.ten_env.log_debug(
+                    f"Cleaned up old PCMWriter for " f"request_id: {old_rid}"
+                )
+            except Exception as e:
+                self.ten_env.log_error(
+                    f"Error cleaning up PCMWriter for "
+                    f"request_id {old_rid}: {e}"
+                )
+
+        if request_id not in self.recorder_map:
+            dump_file_path = os.path.join(
+                self.config.dump_path,
+                f"deepgram_dump_{request_id}.pcm",
+            )
+            self.recorder_map[request_id] = PCMWriter(dump_file_path)
+            self.ten_env.log_debug(
+                f"Created PCMWriter for request_id: "
+                f"{request_id}, file: {dump_file_path}"
+            )
+
+    def _write_dump(self, data: bytes) -> None:
+        """Write audio data to dump file if enabled."""
+        if (
+            self.config
+            and self.config.dump
+            and self.current_request_id
+            and self.current_request_id in self.recorder_map
+        ):
+            asyncio.create_task(
+                self.recorder_map[self.current_request_id].write(data)
+            )
+
+    async def send_fatal_tts_error(self, error_message: str) -> None:
+        await self.send_tts_error(
+            request_id=self.current_request_id or "",
+            error=ModuleError(
+                message=error_message,
+                module=ModuleType.TTS,
+                code=ModuleErrorCode.FATAL_ERROR,
+                vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
+            ),
+        )
+
+    async def send_non_fatal_tts_error(self, error_message: str) -> None:
+        await self.send_tts_error(
+            request_id=self.current_request_id or "",
+            error=ModuleError(
+                message=error_message,
+                module=ModuleType.TTS,
+                code=ModuleErrorCode.NON_FATAL_ERROR,
+                vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
+            ),
+        )
+
+    def _current_request_interval_ms(self) -> int:
+        if not self.sent_ts:
+            return 0
+        return int((datetime.now() - self.sent_ts).total_seconds() * 1000)
+
+    def _calculate_audio_duration_ms(self) -> int:
+        if self.config is None:
+            return 0
+        bytes_per_sample = 2  # 16-bit PCM
+        channels = 1  # Mono
+        duration_sec = self.total_audio_bytes / (
+            self.synthesize_audio_sample_rate() * bytes_per_sample * channels
+        )
+        return int(duration_sec * 1000)
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
new file mode 100644
index 0000000000..c2ef9bb7a0
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
@@ -0,0 +1,65 @@
+{
+  "type": "extension",
+  "name": "deepgram_tts",
+  "version": "0.1.0",
+  "dependencies": [
+    {
+      "type": "system",
+      "name": "ten_runtime_python",
+      "version": "0.11"
+    },
+    {
+      "type": "system",
+      "name": "ten_ai_base",
+      "version": "0.7"
+    }
+  ],
+  "package": {
+    "include": [
+      "manifest.json",
+      "property.json",
+      "BUILD.gn",
+      "**.tent",
+      "**.py",
+      "README.md",
+      "requirements.txt"
+    ]
+  },
+  "api": {
+    "interface": [
+      {
+        "import_uri": "../../system/ten_ai_base/api/tts-interface.json"
+      }
+    ],
+    "property": {
+      "properties": {
+        "dump": {
+          "type": "bool"
+        },
+        "dump_path": {
+          "type": "string"
+        },
+        "params": {
+          "type": "object",
+          "properties": {
+            "api_key": {
+              "type": "string"
+            },
+            "base_url": {
+              "type": "string"
+            },
+            "model": {
+              "type": "string"
+            },
+            "encoding": {
+              "type": "string"
+            },
+            "sample_rate": {
+              "type": "int32"
+            }
+          }
+        }
+      }
+    }
+  }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/property.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/property.json
new file mode 100644
index 0000000000..313cff84f4
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/property.json
@@ -0,0 +1,11 @@
+{
+  "dump": false,
+  "dump_path": "/tmp",
+  "params": {
+    "api_key": "${env:DEEPGRAM_API_KEY}",
+    "base_url": "wss://api.deepgram.com/v1/speak",
+    "model": "aura-2-thalia-en",
+    "encoding": "linear16",
+    "sample_rate": 24000
+  }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/requirements.txt b/ai_agents/agents/ten_packages/extension/deepgram_tts/requirements.txt
new file mode 100644
index 0000000000..31b5e2f348
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/requirements.txt
@@ -0,0 +1 @@
+websockets>=12.0
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/__init__.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/__init__.py
new file mode 100644
index 0000000000..da402faf43
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/__init__.py
@@ -0,0 +1,5 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/bin/start b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/bin/start
new file mode 100755
index 0000000000..41da3fdb45
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/bin/start
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+set -e
+
+cd "$(dirname "${BASH_SOURCE[0]}")/../.."
+
+export PYTHONPATH=.ten/app:.ten/app/ten_packages/system/ten_runtime_python/lib:.ten/app/ten_packages/system/ten_runtime_python/interface:.ten/app/ten_packages/system/ten_ai_base/interface:$PYTHONPATH
+
+# If the Python app imports some modules that are compiled with a different
+# version of libstdc++ (ex: PyTorch), the Python app may encounter confusing
+# errors. To solve this problem, we can preload the correct version of
+# libstdc++.
+#
+# export LD_PRELOAD=/lib/x86_64-linux-gnu/libstdc++.so.6
+#
+# Another solution is to make sure the module 'ten_runtime_python' is imported
+# _after_ the module that requires another version of libstdc++ is imported.
+#
+# Refer to https://github.com/pytorch/pytorch/issues/102360?from_wecom=1#issuecomment-1708989096
+
+pytest tests/ "$@"
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting1.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting1.json
new file mode 100644
index 0000000000..ff0a081e87
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting1.json
@@ -0,0 +1,10 @@
+{
+    "dump": true,
+    "dump_path": "./tests/keep_dump_output/",
+    "params": {
+        "api_key": "${env:DEEPGRAM_API_KEY}",
+        "model": "aura-2-thalia-en",
+        "encoding": "linear16",
+        "sample_rate": 24000
+    }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting2.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting2.json
new file mode 100644
index 0000000000..c753384856
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_basic_audio_setting2.json
@@ -0,0 +1,10 @@
+{
+    "dump": true,
+    "dump_path": "./tests/keep_dump_output/",
+    "params": {
+        "api_key": "${env:DEEPGRAM_API_KEY}",
+        "model": "aura-2-luna-en",
+        "encoding": "linear16",
+        "sample_rate": 16000
+    }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_dump.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_dump.json
new file mode 100644
index 0000000000..4690fecb76
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_dump.json
@@ -0,0 +1,10 @@
+{
+    "dump": true,
+    "dump_path": "./dump/",
+    "params": {
+        "api_key": "${env:DEEPGRAM_API_KEY}",
+        "model": "aura-2-thalia-en",
+        "encoding": "linear16",
+        "sample_rate": 24000
+    }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_invalid.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_invalid.json
new file mode 100644
index 0000000000..6233cf106a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_invalid.json
@@ -0,0 +1,5 @@
+{
+    "params": {
+        "api_key": "invalid"
+    }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_miss_required.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_miss_required.json
new file mode 100644
index 0000000000..df133e721a
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/configs/property_miss_required.json
@@ -0,0 +1,5 @@
+{
+    "params": {
+        "api_key": ""
+    }
+}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
new file mode 100644
index 0000000000..001977148c
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
@@ -0,0 +1,99 @@
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import json
+import threading
+from typing_extensions import override
+import pytest
+from ten_runtime import (
+    App,
+    TenEnv,
+)
+
+
+class FakeApp(App):
+    def __init__(self):
+        super().__init__()
+        self.event: threading.Event | None = None
+
+    # In the case of a fake app, we use `on_init` to allow the blocked testing
+    # fixture to continue execution, rather than using `on_configure`. The
+    # reason is that in the TEN runtime C core, the relationship between the
+    # addon manager and the (fake) app is bound after `on_configure_done` is
+    # called. So we only need to let the testing fixture continue execution
+    # after this action in the TEN runtime C core, and at the upper layer
+    # timing, the earliest point is within the `on_init()` function of the upper
+    # TEN app. Therefore, we release the testing fixture lock within the user
+    # layer's `on_init()` of the TEN app.
+    @override
+    def on_init(self, ten_env: TenEnv) -> None:
+        assert self.event
+        self.event.set()
+
+        ten_env.on_init_done()
+
+    @override
+    def on_configure(self, ten_env: TenEnv) -> None:
+        ten_env.init_property_from_json(
+            json.dumps(
+                {
+                    "ten": {
+                        "log": {
+                            "handlers": [
+                                {
+                                    "matchers": [{"level": "debug"}],
+                                    "formatter": {
+                                        "type": "plain",
+                                        "colored": True,
+                                    },
+                                    "emitter": {
+                                        "type": "console",
+                                        "config": {"stream": "stdout"},
+                                    },
+                                }
+                            ]
+                        }
+                    }
+                }
+            ),
+        )
+
+        ten_env.on_configure_done()
+
+
+class FakeAppCtx:
+    def __init__(self, event: threading.Event):
+        self.fake_app: FakeApp | None = None
+        self.event = event
+
+
+def run_fake_app(fake_app_ctx: FakeAppCtx):
+    app = FakeApp()
+    app.event = fake_app_ctx.event
+    fake_app_ctx.fake_app = app
+    app.run(False)
+
+
+@pytest.fixture(scope="session", autouse=True)
+def global_setup_and_teardown():
+    event = threading.Event()
+    fake_app_ctx = FakeAppCtx(event)
+
+    fake_app_thread = threading.Thread(
+        target=run_fake_app, args=(fake_app_ctx,)
+    )
+    fake_app_thread.start()
+
+    event.wait()
+
+    assert fake_app_ctx.fake_app is not None
+
+    # Yield control to the test; after the test execution is complete, continue
+    # with the teardown process.
+    yield
+
+    # Teardown part.
+    fake_app_ctx.fake_app.close()
+    fake_app_thread.join()
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
new file mode 100644
index 0000000000..d47e898a16
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
@@ -0,0 +1,325 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path to allow running tests from this directory
+# The project root is 6 levels up from the parent directory of this file.
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+from pathlib import Path
+import json
+from unittest.mock import patch, AsyncMock
+import os
+import asyncio
+import filecmp
+import shutil
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput, TTSFlush
+from deepgram_tts.deepgram_tts import (
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_END,
+    EVENT_TTS_FLUSH,
+    EVENT_TTS_TTFB_METRIC,
+)
+
+
+# ================ test dump file functionality ================
+class ExtensionTesterDump(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.dump_dir = "./dump/"
+        self.test_dump_file_path = os.path.join(
+            self.dump_dir, "test_manual_dump.pcm"
+        )
+        self.audio_end_received = False
+        self.received_audio_chunks = []
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Dump test started, sending TTS request.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_1",
+            text="hello word, hello agora",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end, stopping test.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+    def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
+        buf = audio_frame.lock_buf()
+        try:
+            copied_data = bytes(buf)
+            self.received_audio_chunks.append(copied_data)
+        finally:
+            audio_frame.unlock_buf(buf)
+
+    def write_test_dump_file(self):
+        with open(self.test_dump_file_path, "wb") as f:
+            for chunk in self.received_audio_chunks:
+                f.write(chunk)
+
+    def find_tts_dump_file(self) -> str | None:
+        if not os.path.exists(self.dump_dir):
+            return None
+        for filename in os.listdir(self.dump_dir):
+            if filename.endswith(".pcm") and filename != os.path.basename(
+                self.test_dump_file_path
+            ):
+                return os.path.join(self.dump_dir, filename)
+        return None
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_dump_functionality(MockDeepgramTTSClient):
+    """Tests that the dump file from the TTS extension matches the audio received."""
+    print("Starting test_dump_functionality with mock...")
+
+    DUMP_PATH = "./dump/"
+
+    if os.path.exists(DUMP_PATH):
+        shutil.rmtree(DUMP_PATH)
+    os.makedirs(DUMP_PATH)
+
+    mock_instance = MockDeepgramTTSClient.return_value
+    mock_instance.start = AsyncMock()
+    mock_instance.stop = AsyncMock()
+    mock_instance.cancel = AsyncMock()
+    mock_instance.reset_ttfb = lambda: None
+
+    fake_audio_chunk_1 = b"\x11\x22\x33\x44" * 20
+    fake_audio_chunk_2 = b"\xaa\xbb\xcc\xdd" * 20
+
+    async def mock_get_audio_stream(text: str):
+        yield (255, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio_chunk_1, EVENT_TTS_RESPONSE)
+        await asyncio.sleep(0.01)
+        yield (fake_audio_chunk_2, EVENT_TTS_RESPONSE)
+        await asyncio.sleep(0.01)
+        yield (None, EVENT_TTS_END)
+
+    mock_instance.get.side_effect = mock_get_audio_stream
+
+    tester = ExtensionTesterDump()
+
+    dump_config = {
+        "dump": True,
+        "dump_path": DUMP_PATH,
+        "params": {
+            "api_key": "test_api_key",
+            "model": "aura-2-thalia-en",
+            "encoding": "linear16",
+            "sample_rate": 24000,
+        },
+    }
+
+    tester.set_test_mode_single("deepgram_tts", json.dumps(dump_config))
+
+    print("Running dump test...")
+    tester.run()
+    print("Dump test completed.")
+
+    assert tester.audio_end_received, "Expected to receive tts_audio_end"
+    assert (
+        len(tester.received_audio_chunks) > 0
+    ), "Expected to receive audio chunks"
+
+    tester.write_test_dump_file()
+
+    tts_dump_file = tester.find_tts_dump_file()
+    assert (
+        tts_dump_file is not None
+    ), f"Expected to find a TTS dump file in {DUMP_PATH}"
+    assert os.path.exists(
+        tts_dump_file
+    ), f"TTS dump file should exist: {tts_dump_file}"
+
+    print(
+        f"Comparing test file {tester.test_dump_file_path} with TTS dump file {tts_dump_file}"
+    )
+    assert filecmp.cmp(
+        tester.test_dump_file_path, tts_dump_file, shallow=False
+    ), "Test dump file and TTS dump file should have the same content"
+
+    print(
+        f"Dump test passed: received {len(tester.received_audio_chunks)} audio chunks"
+    )
+
+    if os.path.exists(DUMP_PATH):
+        shutil.rmtree(DUMP_PATH)
+
+
+# ================ test basic audio output ================
+class ExtensionTesterBasic(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_start_received = False
+        self.audio_end_received = False
+        self.audio_chunks_count = 0
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Basic test started, sending TTS request.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_basic",
+            text="Hello, this is a test of the Deepgram TTS extension.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_start":
+            ten_env.log_info("Received tts_audio_start.")
+            self.audio_start_received = True
+        elif name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end, stopping test.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+    def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
+        self.audio_chunks_count += 1
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_basic_audio(MockDeepgramTTSClient):
+    """Test basic TTS audio generation."""
+    mock_instance = MockDeepgramTTSClient.return_value
+    mock_instance.start = AsyncMock()
+    mock_instance.stop = AsyncMock()
+    mock_instance.cancel = AsyncMock()
+    mock_instance.reset_ttfb = lambda: None
+
+    fake_audio_chunk = b"\x00\x01\x02\x03" * 100
+
+    async def mock_get_audio_stream(text: str):
+        yield (150, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock_instance.get.side_effect = mock_get_audio_stream
+
+    tester = ExtensionTesterBasic()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert tester.audio_start_received, "tts_audio_start was not received."
+    assert tester.audio_end_received, "tts_audio_end was not received."
+    assert tester.audio_chunks_count > 0, "No audio chunks received."
+
+
+# ================ test flush functionality ================
+class ExtensionTesterFlush(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_end_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Flush test started.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_flush",
+            text="This is the first sentence.",
+            text_input_end=False,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+
+        flush = TTSFlush(flush_id="flush_1")
+        flush_data = Data.create("tts_flush")
+        flush_data.set_property_from_json(None, flush.model_dump_json())
+        ten_env_tester.send_data(flush_data)
+
+        tts_input2 = TTSTextInput(
+            request_id="tts_request_flush",
+            text="This is the final sentence.",
+            text_input_end=True,
+        )
+        data2 = Data.create("tts_text_input")
+        data2.set_property_from_json(None, tts_input2.model_dump_json())
+        ten_env_tester.send_data(data2)
+
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end, stopping test.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_flush(MockDeepgramTTSClient):
+    """Test TTS flush functionality."""
+    mock_instance = MockDeepgramTTSClient.return_value
+    mock_instance.start = AsyncMock()
+    mock_instance.stop = AsyncMock()
+    mock_instance.cancel = AsyncMock()
+    mock_instance.reset_ttfb = lambda: None
+
+    fake_audio_chunk = b"\x00\x01\x02\x03" * 50
+
+    async def mock_get_audio_stream(text: str):
+        yield (100, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock_instance.get.side_effect = mock_get_audio_stream
+
+    tester = ExtensionTesterFlush()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert (
+        tester.audio_end_received
+    ), "tts_audio_end was not received after flush."
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
new file mode 100644
index 0000000000..26e5cccf70
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
@@ -0,0 +1,174 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import json
+from unittest.mock import patch, AsyncMock, MagicMock
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput
+
+
+# ================ test empty params ================
+class ExtensionTesterEmptyParams(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.error_received = False
+        self.error_code = None
+        self.error_message = None
+        self.error_module = None
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        """Called when test starts"""
+        ten_env_tester.log_info("Test started")
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        ten_env.log_info(f"on_data name: {name}")
+
+        if name == "error":
+            self.error_received = True
+            json_str, _ = data.get_property_to_json(None)
+            error_data = json.loads(json_str)
+
+            self.error_code = error_data.get("code")
+            self.error_message = error_data.get("message", "")
+            self.error_module = error_data.get("module", "")
+
+            ten_env.log_info(
+                f"Received error: code={self.error_code}, message={self.error_message}"
+            )
+            ten_env.stop_test()
+
+
+def test_empty_params_fatal_error():
+    """Test that empty params raises FATAL ERROR with code -1000"""
+    print("Starting test_empty_params_fatal_error...")
+
+    # Empty params configuration
+    empty_params_config = {
+        "params": {
+            "api_key": "",
+        }
+    }
+
+    tester = ExtensionTesterEmptyParams()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(empty_params_config))
+
+    print("Running test...")
+    tester.run()
+    print("Test completed.")
+
+    # Verify FATAL ERROR was received
+    assert tester.error_received, "Expected to receive error message"
+    assert (
+        tester.error_code == -1000
+    ), f"Expected error code -1000 (FATAL_ERROR), got {tester.error_code}"
+    assert tester.error_message is not None, "Error message should not be None"
+    assert len(tester.error_message) > 0, "Error message should not be empty"
+
+    print(f"Empty params test passed: code={tester.error_code}")
+
+
+# ================ test invalid api key ================
+class ExtensionTesterInvalidApiKey(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.error_received = False
+        self.error_code = None
+        self.error_message = None
+        self.vendor_info = None
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        """Called when test starts, sends a TTS request to trigger the logic."""
+        ten_env_tester.log_info(
+            "Invalid API key test started, sending TTS request"
+        )
+
+        tts_input = TTSTextInput(
+            request_id="test-request-invalid-key",
+            text="This text will trigger API key validation.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        ten_env.log_info(f"on_data name: {name}")
+
+        if name == "error":
+            self.error_received = True
+            json_str, _ = data.get_property_to_json(None)
+            error_data = json.loads(json_str)
+
+            self.error_code = error_data.get("code")
+            self.error_message = error_data.get("message", "")
+            self.vendor_info = error_data.get("vendor_info", {})
+
+            ten_env.log_info(
+                f"Received error: code={self.error_code}, message={self.error_message}"
+            )
+            ten_env.stop_test()
+        elif name == "tts_audio_end":
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.deepgram_tts.websockets.connect")
+def test_invalid_api_key_error(mock_websocket_connect):
+    """Test that an invalid API key is handled correctly with a mock."""
+    print("Starting test_invalid_api_key_error with mock...")
+
+    # Mock websocket to raise 401 unauthorized error
+    mock_websocket_connect.side_effect = Exception(
+        "401 Unauthorized - Invalid API key"
+    )
+
+    # Config with invalid API key
+    invalid_key_config = {
+        "params": {
+            "api_key": "invalid_api_key_test",
+            "model": "aura-2-thalia-en",
+            "encoding": "linear16",
+            "sample_rate": 24000,
+        },
+    }
+
+    tester = ExtensionTesterInvalidApiKey()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(invalid_key_config))
+
+    print("Running test with mock...")
+    tester.run()
+    print("Test with mock completed.")
+
+    # Verify FATAL ERROR was received for incorrect API key
+    assert tester.error_received, "Expected to receive error message"
+    assert (
+        tester.error_code == -1000
+    ), f"Expected error code -1000 (FATAL_ERROR), got {tester.error_code}"
+
+    # Verify vendor_info
+    vendor_info = tester.vendor_info
+    assert vendor_info is not None, "Expected vendor_info to be present"
+    assert (
+        vendor_info.get("vendor") == "deepgram"
+    ), f"Expected vendor 'deepgram', got {vendor_info.get('vendor')}"
+
+    print(f"Invalid API key test passed: code={tester.error_code}")
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
new file mode 100644
index 0000000000..3705c130f4
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
@@ -0,0 +1,135 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import json
+from unittest.mock import patch, AsyncMock
+import asyncio
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput
+from deepgram_tts.deepgram_tts import (
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_END,
+    EVENT_TTS_TTFB_METRIC,
+)
+
+
+# ================ test metrics ================
+class ExtensionTesterMetrics(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.ttfb_received = False
+        self.ttfb_value = -1
+        self.audio_frame_received = False
+        self.audio_end_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        """Called when test starts, sends a TTS request."""
+        ten_env_tester.log_info("Metrics test started, sending TTS request.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_for_metrics",
+            text="hello, this is a metrics test.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        ten_env.log_info(f"on_data name: {name}")
+        if name == "metrics":
+            json_str, _ = data.get_property_to_json(None)
+            ten_env.log_info(f"Received metrics: {json_str}")
+            metrics_data = json.loads(json_str)
+
+            # According to the structure, 'ttfb' is nested inside a 'metrics' object.
+            nested_metrics = metrics_data.get("metrics", {})
+            if "ttfb" in nested_metrics:
+                self.ttfb_received = True
+                self.ttfb_value = nested_metrics.get("ttfb", -1)
+                ten_env.log_info(
+                    f"Received TTFB metric with value: {self.ttfb_value}"
+                )
+
+        elif name == "tts_audio_end":
+            self.audio_end_received = True
+            # Stop the test only after both TTFB and audio end are received
+            if self.ttfb_received:
+                ten_env.log_info("Received tts_audio_end, stopping test.")
+                ten_env.stop_test()
+
+    def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
+        """Receives audio frames and confirms the stream is working."""
+        if not self.audio_frame_received:
+            self.audio_frame_received = True
+            ten_env.log_info("First audio frame received.")
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_ttfb_metric_is_sent(MockDeepgramTTSClient):
+    """
+    Tests that a TTFB (Time To First Byte) metric is correctly sent after
+    receiving the first audio chunk from the TTS service.
+    """
+    print("Starting test_ttfb_metric_is_sent with mock...")
+
+    # --- Mock Configuration ---
+    mock_instance = MockDeepgramTTSClient.return_value
+    mock_instance.start = AsyncMock()
+    mock_instance.stop = AsyncMock()
+    mock_instance.cancel = AsyncMock()
+    mock_instance.reset_ttfb = lambda: None
+
+    # This async generator simulates the TTS client's get() method with a delay
+    async def mock_get_audio_with_delay(text: str):
+        await asyncio.sleep(0.2)
+        yield (255, EVENT_TTS_TTFB_METRIC)
+        yield (b"\x11\x22\x33", EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock_instance.get.side_effect = mock_get_audio_with_delay
+
+    # --- Test Setup ---
+    metrics_config = {
+        "params": {
+            "api_key": "test_api_key",
+            "model": "aura-2-thalia-en",
+            "encoding": "linear16",
+            "sample_rate": 24000,
+        }
+    }
+    tester = ExtensionTesterMetrics()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(metrics_config))
+
+    print("Running TTFB metrics test...")
+    tester.run()
+    print("TTFB metrics test completed.")
+
+    # --- Assertions ---
+    assert tester.audio_frame_received, "Did not receive any audio frame."
+    assert tester.audio_end_received, "Did not receive the tts_audio_end event."
+    assert tester.ttfb_received, "TTFB metric was not received."
+
+    # Check if the TTFB value matches what we sent
+    assert (
+        tester.ttfb_value == 255
+    ), f"Expected TTFB to be 255ms, but got {tester.ttfb_value}ms."
+
+    print(f"TTFB metric test passed. Received TTFB: {tester.ttfb_value}ms.")
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
new file mode 100644
index 0000000000..d597cd6a52
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
@@ -0,0 +1,157 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import json
+from unittest.mock import patch, AsyncMock
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput
+from deepgram_tts.deepgram_tts import (
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_END,
+    EVENT_TTS_TTFB_METRIC,
+)
+
+
+# ================ test different sample rates ================
+class ExtensionTesterSampleRate(ExtensionTester):
+    def __init__(self, sample_rate: int):
+        super().__init__()
+        self.sample_rate = sample_rate
+        self.audio_end_received = False
+        self.audio_chunks_count = 0
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info(f"Sample rate test: {self.sample_rate}Hz")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_sr",
+            text="Testing different sample rates.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+    def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
+        self.audio_chunks_count += 1
+
+
+def _create_mock_client():
+    """Helper to create a mock client for tests."""
+    from unittest.mock import MagicMock
+
+    mock = MagicMock()
+    mock.start = AsyncMock()
+    mock.stop = AsyncMock()
+    mock.cancel = AsyncMock()
+    mock.reset_ttfb = lambda: None
+
+    fake_audio_chunk = b"\x00\x01\x02\x03" * 100
+
+    async def mock_get_audio_stream(text: str):
+        yield (100, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock.get.side_effect = mock_get_audio_stream
+    return mock
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_sample_rate_16000(MockDeepgramTTSClient):
+    """Test with 16000 Hz sample rate."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterSampleRate(16000)
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 16000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert tester.audio_end_received, "tts_audio_end was not received."
+    assert tester.audio_chunks_count > 0, "No audio chunks received."
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_sample_rate_24000(MockDeepgramTTSClient):
+    """Test with 24000 Hz sample rate."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterSampleRate(24000)
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert tester.audio_end_received, "tts_audio_end was not received."
+    assert tester.audio_chunks_count > 0, "No audio chunks received."
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_sample_rate_48000(MockDeepgramTTSClient):
+    """Test with 48000 Hz sample rate."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterSampleRate(48000)
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 48000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert tester.audio_end_received, "tts_audio_end was not received."
+    assert tester.audio_chunks_count > 0, "No audio chunks received."
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
new file mode 100644
index 0000000000..b807fe5834
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
@@ -0,0 +1,277 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import json
+from unittest.mock import patch, AsyncMock
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput
+from deepgram_tts.deepgram_tts import (
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_END,
+    EVENT_TTS_TTFB_METRIC,
+)
+
+
+def _create_mock_client():
+    """Helper to create a mock client for tests."""
+    from unittest.mock import MagicMock
+
+    mock = MagicMock()
+    mock.start = AsyncMock()
+    mock.stop = AsyncMock()
+    mock.cancel = AsyncMock()
+    mock.reset_ttfb = lambda: None
+
+    fake_audio_chunk = b"\x00\x01\x02\x03" * 100
+
+    async def mock_get_audio_stream(text: str):
+        yield (100, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock.get.side_effect = mock_get_audio_stream
+    return mock
+
+
+# ================ test empty text ================
+class ExtensionTesterEmptyText(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_end_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Empty text test started.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_empty",
+            text="",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end for empty text.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_empty_text(MockDeepgramTTSClient):
+    """Test that empty text is handled gracefully."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterEmptyText()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert (
+        tester.audio_end_received
+    ), "tts_audio_end should be sent for empty text."
+
+
+# ================ test whitespace only text ================
+class ExtensionTesterWhitespaceText(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_end_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Whitespace text test started.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_whitespace",
+            text="   \n\t   ",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end for whitespace text.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_whitespace_text(MockDeepgramTTSClient):
+    """Test that whitespace-only text is handled gracefully."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterWhitespaceText()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert (
+        tester.audio_end_received
+    ), "tts_audio_end should be sent for whitespace text."
+
+
+# ================ test long text ================
+class ExtensionTesterLongText(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_end_received = False
+        self.audio_chunks_count = 0
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Long text test started.")
+
+        long_text = "This is a longer piece of text. " * 20
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_long",
+            text=long_text,
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            ten_env.log_info("Received tts_audio_end for long text.")
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+    def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
+        self.audio_chunks_count += 1
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_long_text(MockDeepgramTTSClient):
+    """Test that long text is handled correctly."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterLongText()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert (
+        tester.audio_end_received
+    ), "tts_audio_end was not received for long text."
+    assert (
+        tester.audio_chunks_count > 0
+    ), "No audio chunks received for long text."
+
+
+# ================ test special characters ================
+class ExtensionTesterSpecialChars(ExtensionTester):
+    def __init__(self):
+        super().__init__()
+        self.audio_end_received = False
+        self.error_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Special characters test started.")
+
+        tts_input = TTSTextInput(
+            request_id="tts_request_special",
+            text="Hello! How are you? I'm fine, thanks. $100 is 100%.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            self.audio_end_received = True
+            ten_env.stop_test()
+        elif name == "error":
+            self.error_received = True
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_special_characters(MockDeepgramTTSClient):
+    """Test that special characters are handled correctly."""
+    MockDeepgramTTSClient.return_value = _create_mock_client()
+
+    tester = ExtensionTesterSpecialChars()
+    tester.set_test_mode_single(
+        "deepgram_tts",
+        json.dumps(
+            {
+                "params": {
+                    "api_key": "test_api_key",
+                    "model": "aura-2-thalia-en",
+                    "encoding": "linear16",
+                    "sample_rate": 24000,
+                },
+            }
+        ),
+    )
+
+    tester.run()
+
+    assert tester.audio_end_received, "tts_audio_end was not received."
+    assert (
+        not tester.error_received
+    ), "Error should not be received for special chars."
diff --git a/docs/ai/L0_repo_card.md b/docs/ai/L0_repo_card.md
new file mode 100644
index 0000000000..53c8a56953
--- /dev/null
+++ b/docs/ai/L0_repo_card.md
@@ -0,0 +1,31 @@
+# TEN Framework — Repo Card
+
+> Open-source platform for building real-time multimodal AI agents with voice, video, and tool capabilities.
+
+## Identity
+
+| Field         | Value                                                                |
+| ------------- | -------------------------------------------------------------------- |
+| Repo          | `TEN-framework/TEN-Agent`                                           |
+| Type          | `framework` (SDK-library + API-service + frontend)                   |
+| Language      | Python (extensions), Go (API server), TypeScript/React (playground)  |
+| Deploy Target | Docker container (`ten_agent_dev`), Taskfile-based build             |
+| Owner         | TEN Framework team                                                   |
+| Last Reviewed | 2026-04-02                                                           |
+
+## L1 — Summaries
+
+| File                                     | Purpose                                                  |
+| ---------------------------------------- | -------------------------------------------------------- |
+| [01_setup](L1/01_setup.md)               | Docker, .env, ports, health checks, restart procedures   |
+| [02_architecture](L1/02_architecture.md) | Extensions, graphs, connections, RTC-first design        |
+| [03_code_map](L1/03_code_map.md)         | Directory tree, key files, base classes, 93+ extensions  |
+| [04_conventions](L1/04_conventions.md)   | Naming, Pydantic configs, params pattern, formatting     |
+| [05_workflows](L1/05_workflows.md)       | Create extension, modify graph, test, restart, deploy    |
+| [06_interfaces](L1/06_interfaces.md)     | REST API, connection schemas, base class abstract methods|
+| [07_gotchas](L1/07_gotchas.md)           | Property tuples, signal handlers, zombies, .env timing   |
+| [08_security](L1/08_security.md)         | API keys, .env, sensitive logging, git hooks             |
+
+## L2 — Deep Dives
+
+See [L1/deep_dives/_index.md](L1/deep_dives/_index.md) for extended guides referenced by L1 files.
diff --git a/docs/ai/L1/01_setup.md b/docs/ai/L1/01_setup.md
new file mode 100644
index 0000000000..c6003da202
--- /dev/null
+++ b/docs/ai/L1/01_setup.md
@@ -0,0 +1,118 @@
+# 01 Setup
+
+> Environment setup, local development, and quick commands for TEN Framework AI Agents.
+
+## Prerequisites
+
+| Requirement       | Version / Notes                                              |
+| ----------------- | ------------------------------------------------------------ |
+| Docker + Compose  | Required for container-based development                     |
+| Node.js           | LTS v18+ on host; container has Node 22                      |
+| API Keys          | Agora App ID, OpenAI, Deepgram ASR, ElevenLabs TTS (minimum)|
+| Hardware          | 2+ CPU cores, 4 GB RAM minimum                              |
+
+## Docker Container
+
+```bash
+cd /home/ubuntu/ten-framework/ai_agents
+docker compose up -d
+docker ps | grep ten_agent_dev   # Verify running
+```
+
+Container image: `ghcr.io/ten-framework/ten_agent_build:0.7.14`
+
+## Environment Variables
+
+**Single .env file**: `ai_agents/.env` — the ONLY source of environment config.
+
+| Variable                     | Purpose                      | Required |
+| ---------------------------- | ---------------------------- | -------- |
+| `AGORA_APP_ID`               | Agora RTC app identifier     | Yes      |
+| `AGORA_APP_CERTIFICATE`      | Agora RTC certificate        | No       |
+| `OPENAI_API_KEY`             | LLM provider                 | Yes      |
+| `OPENAI_MODEL`               | Model name (e.g., `gpt-4o`)  | Yes      |
+| `DEEPGRAM_API_KEY`           | ASR provider                 | Yes      |
+| `ELEVENLABS_TTS_KEY`         | TTS provider                 | Yes      |
+| `LOG_STDOUT`                 | Worker log visibility         | Yes (`true`) |
+| `SERVER_PORT`                | API server port               | Yes (`8080`) |
+| `WORKERS_MAX`                | Max concurrent sessions       | Yes (`100`)  |
+| `WORKER_QUIT_TIMEOUT_SECONDS`| Worker idle timeout           | Yes (`60`)   |
+
+See `.env.example` for the complete list. Extensions may require additional keys
+(Azure, AWS, Rime, etc.) — check extension README files.
+
+## Install and Run
+
+```bash
+# 1. Install Python dependencies (NOT persisted across container restarts)
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced/tenapp && \
+   bash scripts/install_python_deps.sh"
+
+# 2. Build and install (5-8 minutes first time)
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced && task install"
+
+# 3. Start everything (API server + playground + TMAN Designer)
+docker exec -d ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced && \
+   task run > /tmp/task_run.log 2>&1"
+```
+
+**CRITICAL**: Always use `task run` to start — never run `./bin/api` directly.
+
+## Ports
+
+| Port  | Service          |
+| ----- | ---------------- |
+| 8080  | Go API server    |
+| 3000  | Playground (Next.js) |
+| 49483 | TMAN Designer    |
+
+## Health Checks
+
+```bash
+curl -s http://localhost:8080/health
+# {"code":"0","data":null,"msg":"ok"}
+
+curl -s http://localhost:8080/graphs | jq -r '.data[].name'
+# voice_assistant, voice_assistant_heygen, etc.
+```
+
+## Restart Procedures
+
+| What Changed                    | Container? | Server?           | Frontend?         |
+| ------------------------------- | ---------- | ----------------- | ----------------- |
+| `property.json` (graphs added)  | No         | Nuclear restart   | Nuclear restart   |
+| `property.json` (config only)   | No         | No                | No                |
+| `.env` file                     | Yes        | Yes               | No                |
+| Python extension code           | No         | Yes               | No                |
+| Go server code                  | No         | Yes + `task install` | No             |
+
+**Nuclear restart** (safest after graph changes):
+
+```bash
+sudo docker exec ten_agent_dev bash -c "pkill -9 -f 'bin/api'; pkill -9 node; pkill -9 bun"
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+sleep 2
+sudo docker exec -d ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced && task run > /tmp/task_run.log 2>&1"
+```
+
+**After container restart**: always reinstall Python deps, then `task run`.
+
+**After .env changes**: `docker compose down && docker compose up -d`, reinstall deps, `task run`.
+
+## Logs
+
+```bash
+# All logs (inside container)
+docker exec ten_agent_dev tail -f /tmp/task_run.log
+
+# Filter by extension or channel
+docker exec ten_agent_dev tail -f /tmp/task_run.log | grep --line-buffered "deepgram"
+```
+
+## Related Deep Dives
+
+- [Deployment](deep_dives/deployment.md) — Docker Compose, Cloudflare tunnel, Nginx, Grafana monitoring
diff --git a/docs/ai/L1/02_architecture.md b/docs/ai/L1/02_architecture.md
new file mode 100644
index 0000000000..c35f537d7e
--- /dev/null
+++ b/docs/ai/L1/02_architecture.md
@@ -0,0 +1,142 @@
+# 02 Architecture
+
+> System design overview: extensions, graphs, connections, and the server-worker model.
+
+## TEN Ecosystem
+
+| Component             | Purpose                                                |
+| --------------------- | ------------------------------------------------------ |
+| TEN Framework         | Core runtime (C/C++, Go, Python, Node.js bindings)     |
+| TEN Agent Examples    | Pre-built agent configurations (this repo's `ai_agents/`) |
+| TEN VAD               | Low-latency voice activity detection                   |
+| TEN Turn Detection    | Full-duplex dialogue management                        |
+| TEN Portal            | Documentation and blog site                            |
+
+## Extension System
+
+Extensions are modular components that process data — ASR, TTS, LLM, tools, RTC, avatars, etc.
+Each extension has a lifecycle:
+
+```
+on_init() → on_start() → [process messages] → on_stop() → on_deinit()
+```
+
+Every extension contains:
+
+| File              | Purpose                                    |
+| ----------------- | ------------------------------------------ |
+| `addon.py`        | Registration via `@register_addon_as_extension` |
+| `extension.py`    | Main logic, inherits from a base class     |
+| `manifest.json`   | Metadata, dependencies, API interface      |
+| `property.json`   | Default configuration values               |
+
+**Base classes** (in `ten_ai_base/interface/ten_ai_base/`):
+
+| Base Class                    | Use For           |
+| ----------------------------- | ----------------- |
+| `AsyncASRBaseExtension`       | Speech-to-text    |
+| `AsyncTTS2BaseExtension`      | Text-to-speech    |
+| `AsyncLLMBaseExtension`       | Chat completion   |
+| `AsyncLLMToolBaseExtension`   | LLM function tools|
+| `AsyncExtension`              | Generic / custom  |
+
+## Graph-Based Configuration
+
+Agents are assembled by defining **graphs** in `property.json`. A graph specifies
+which extensions run (nodes) and how data flows between them (connections).
+
+```json
+{
+  "predefined_graphs": [{
+    "name": "voice_assistant",
+    "auto_start": true,
+    "graph": {
+      "nodes": [
+        {"type": "extension", "name": "stt", "addon": "deepgram_asr_python", "property": {}},
+        {"type": "extension", "name": "llm", "addon": "openai_llm2_python", "property": {}},
+        {"type": "extension", "name": "tts", "addon": "elevenlabs_tts2_python", "property": {}}
+      ],
+      "connections": [...]
+    }
+  }]
+}
+```
+
+## Connection Types
+
+| Type          | Payload              | Example                                      |
+| ------------- | -------------------- | -------------------------------------------- |
+| `cmd`         | Named commands       | `tool_register`, `on_user_joined`, `flush`   |
+| `data`        | Named data messages  | `asr_result`, `text_data`, `tts_text_input`  |
+| `audio_frame` | PCM audio streams    | `pcm_frame` (16-bit, mono, 16/24/48 kHz)    |
+| `video_frame` | Video streams        | Raw video frames for vision/avatar           |
+
+## RTC-First Design
+
+TEN uses Agora RTC (Real-Time Communication) as the default transport, not WebSockets.
+
+| Aspect           | RTC (default)                    | WebSocket                  |
+| ---------------- | -------------------------------- | -------------------------- |
+| Latency          | 50-150ms (UDP-based)             | Higher (TCP-based)         |
+| Codec support    | Opus, VP8, VP9, AV1              | Raw PCM only               |
+| Bandwidth adapt  | Built-in adaptation + FEC        | Manual implementation      |
+| Use case         | Real-time voice/video            | Signaling, configuration   |
+
+WebSockets are used for signaling and configuration; RTC handles the media path.
+
+## Server-Worker Model
+
+```
+┌─────────────────┐     ┌──────────────────┐
+│  Go HTTP Server  │────▶│  Worker Process   │  (one per session)
+│  (port 8080)     │     │  (tman run start) │
+│                  │     │                   │
+│  /start → spawn  │     │  Loads graph from │
+│  /stop  → kill   │     │  property.json    │
+│  /ping  → keep   │     │  Runs extensions  │
+└─────────────────┘     └──────────────────┘
+```
+
+- **POST /start** spawns a worker process for a channel/session
+- **POST /stop** terminates the worker
+- **POST /ping** keeps the session alive (if timeout != -1)
+
+## Property Injection
+
+When `/start` is called, the server auto-injects dynamic values into the graph:
+
+- `channel_name` → injected into every node that has a `"channel"` property
+- `remote_stream_id`, `bot_stream_id`, `token` → injected via `startPropMap`
+- `req.Properties[extensionName]` → merged into specific node properties
+
+This is future-proof: any new extension with a "channel" property automatically
+receives the dynamic channel value without code changes.
+
+## Component Diagram
+
+```
+    Client (Browser/Mobile)
+           │
+           ▼
+    ┌──────────────┐
+    │  Playground   │  Next.js frontend (port 3000)
+    │  (UI)         │
+    └──────┬───────┘
+           │ REST API
+           ▼
+    ┌──────────────┐        ┌──────────────────────────────────┐
+    │  Go Server    │──spawn─▶│  Worker Process                   │
+    │  (port 8080)  │        │  ┌─────┐  ┌─────┐  ┌─────┐     │
+    │               │        │  │ ASR │─▶│ LLM │─▶│ TTS │     │
+    │               │        │  └──┬──┘  └─────┘  └──┬──┘     │
+    └──────────────┘        │     │                   │        │
+                             │  ┌──┴───────────────────┴──┐    │
+                             │  │      Agora RTC           │    │
+                             │  └─────────────────────────┘    │
+                             └──────────────────────────────────┘
+```
+
+## Related Deep Dives
+
+- [Server Architecture](deep_dives/server_architecture.md) — Go server internals, property injection pipeline
+- [Graph Configuration](deep_dives/graph_configuration.md) — Node schema, connection wiring, parallel routing
diff --git a/docs/ai/L1/03_code_map.md b/docs/ai/L1/03_code_map.md
new file mode 100644
index 0000000000..dd60723726
--- /dev/null
+++ b/docs/ai/L1/03_code_map.md
@@ -0,0 +1,117 @@
+# 03 Code Map
+
+> Directory tree, module responsibilities, and key file locations.
+
+## Top-Level Structure
+
+All AI agent development happens inside `ai_agents/`:
+
+```
+ai_agents/
+├── agents/
+│   ├── ten_packages/
+│   │   ├── extension/           # 93+ extensions (ASR, TTS, LLM, tools, avatar)
+│   │   └── system/              # Core runtime packages
+│   │       ├── ten_ai_base/     # Base classes and API interface definitions
+│   │       ├── ten_runtime_python/
+│   │       └── ten_runtime_go/
+│   ├── examples/                # 24+ example agent configurations
+│   │   ├── voice-assistant/
+│   │   ├── voice-assistant-advanced/
+│   │   ├── voice-assistant-realtime/
+│   │   ├── voice-assistant-video/
+│   │   ├── doodler/
+│   │   └── ...
+│   ├── integration_tests/       # Test frameworks
+│   │   ├── asr_guarder/         # ASR integration tests
+│   │   └── tts_guarder/         # TTS integration tests
+│   └── scripts/                 # Build and packaging scripts
+├── server/                      # Go API server
+│   ├── main.go
+│   └── internal/
+│       ├── http_server.go       # REST endpoints, property injection
+│       └── config.go            # Parameter mapping (startPropMap)
+├── playground/                  # Next.js frontend UI (port 3000)
+│   └── src/                     # React components
+├── esp32-client/                # ESP32 hardware client
+├── Taskfile.yml                 # Root-level build/test tasks
+├── docker-compose.yml           # Container config
+├── .env                         # Environment variables (single source)
+└── .env.example                 # Template with all variables
+```
+
+Other repo-root directories: `core/` (C runtime), `packages/` (example/core extensions),
+`docs/` (framework docs), `tools/` (Grafana monitoring, profilers).
+
+## Extension Categories
+
+| Category  | Count | Examples                                                    |
+| --------- | ----- | ----------------------------------------------------------- |
+| ASR       | 10+   | `deepgram_asr_python`, `azure_asr_python`, `aws_asr_python` |
+| TTS       | 15+   | `deepgram_tts`, `elevenlabs_tts2_python`, `cartesia_tts`    |
+| LLM       | 8+    | `openai_llm2_python`, `gemini_llm2_python`, `bedrock_llm_python` |
+| Avatar    | 5+    | `heygen_avatar_python`, `anam_avatar_python`                |
+| Tools     | 8+    | `bingsearch_tool_python`, `vision_tool_python`              |
+| Transport | 3+    | `agora_rtc`, `websocket_server`, `http_server_python`       |
+| Other     | 10+   | `message_collector2`, `ten_vad_python`, `mcp_client_python` |
+
+## Extension File Structure
+
+Every extension follows this layout:
+
+| File               | Purpose                                        |
+| ------------------ | ---------------------------------------------- |
+| `__init__.py`      | Package marker                                 |
+| `addon.py`         | `@register_addon_as_extension` registration    |
+| `extension.py`     | Main logic, inherits from base class           |
+| `config.py`        | Pydantic config model (optional but common)    |
+| `manifest.json`    | Metadata, dependencies, API interface imports  |
+| `property.json`    | Default config values with `${env:VAR}` syntax |
+| `requirements.txt` | Python dependencies                            |
+| `README.md`        | Usage documentation (often multilingual)       |
+| `tests/`           | Standalone tests with `bin/start` entry point  |
+
+## Base Classes
+
+Located in example tenapp directories under `ten_packages/system/ten_ai_base/interface/ten_ai_base/`:
+
+| File       | Class                        | Purpose                    |
+| ---------- | ---------------------------- | -------------------------- |
+| `asr.py`   | `AsyncASRBaseExtension`      | Speech recognition         |
+| `tts.py`   | `AsyncTTSBaseExtension`      | Text-to-speech (basic)     |
+| `tts2.py`  | `AsyncTTS2BaseExtension`     | Text-to-speech (advanced)  |
+| `llm.py`   | `AsyncLLMBaseExtension`      | Language model completion   |
+| `llm2.py`  | `AsyncLLM2BaseExtension`     | Language model v2           |
+| `llm_tool.py` | `AsyncLLMToolBaseExtension` | LLM function calling tools |
+| `mllm.py`  | `AsyncMLLMBaseExtension`     | Multimodal LLM             |
+
+## API Interface Definitions
+
+Standard interfaces in `ten_ai_base/api/`:
+
+| File                    | Defines                           |
+| ----------------------- | --------------------------------- |
+| `asr-interface.json`    | ASR data/cmd/audio_frame schemas  |
+| `tts-interface.json`    | TTS data/cmd/audio_frame schemas  |
+| `llm-interface.json`    | LLM data/cmd schemas              |
+| `mllm-interface.json`   | Multimodal LLM schemas            |
+
+Extensions reference these via `manifest.json`:
+```json
+{"api": {"interface": [{"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}]}}
+```
+
+## Key Files Quick Reference
+
+| When working on...         | Look at                                            |
+| -------------------------- | -------------------------------------------------- |
+| New extension              | Similar extension in `agents/ten_packages/extension/` |
+| API interface changes      | `ten_ai_base/api/*.json`                           |
+| Graph configuration        | `agents/examples/*/tenapp/property.json`           |
+| Server endpoints           | `server/internal/http_server.go`                   |
+| Build/test tasks           | `Taskfile.yml` (root) and per-example              |
+| Test setup                 | `agents/ten_packages/extension/*/tests/bin/start`  |
+
+## Related Deep Dives
+
+- [Extension Development](deep_dives/extension_development.md) — Full creation guide with base class details
diff --git a/docs/ai/L1/04_conventions.md b/docs/ai/L1/04_conventions.md
new file mode 100644
index 0000000000..41f11901f8
--- /dev/null
+++ b/docs/ai/L1/04_conventions.md
@@ -0,0 +1,138 @@
+# 04 Conventions
+
+> Coding patterns, naming, configuration, and formatting standards.
+
+## Naming Conventions
+
+| Item            | Pattern                                | Example                    |
+| --------------- | -------------------------------------- | -------------------------- |
+| Extension dir   | `<vendor>_<type>_python`               | `deepgram_asr_python`      |
+| Addon name      | Same as directory name                 | `deepgram_asr_python`      |
+| Example dir     | `voice-assistant-<variant>`            | `voice-assistant-realtime` |
+| Config class    | `<Vendor><Type>Config(BaseModel)`      | `DeepgramTTSConfig`        |
+| Client class    | `<Vendor><Type>Client`                 | `DeepgramTTSClient`        |
+
+## Addon Registration
+
+Every extension must register via decorator in `addon.py`:
+
+```python
+from ten_runtime import Addon, register_addon_as_extension, TenEnv
+
+@register_addon_as_extension("deepgram_asr_python")
+class DeepgramASRExtensionAddon(Addon):
+    def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
+        ten.on_create_instance_done(DeepgramASRExtension(addon_name), context)
+```
+
+The decorator name **must match** the `addon` field in `property.json` graph nodes.
+
+## Base Class Selection
+
+| Need                    | Base Class                    | Key Abstract Methods                  |
+| ----------------------- | ----------------------------- | ------------------------------------- |
+| Speech-to-text          | `AsyncASRBaseExtension`       | `vendor()`, `start_connection()`, `send_audio()`, `finalize()` |
+| Text-to-speech (HTTP)   | `AsyncTTS2HttpExtension`      | `vendor()`, `request_tts()`, `synthesize_audio_sample_rate()` |
+| Text-to-speech (WS)     | `AsyncTTS2BaseExtension`      | `vendor()`, `request_tts()`, `cancel_tts()` |
+| Chat completion         | `AsyncLLMBaseExtension`       | `on_call_chat_completion()`, `on_data_chat_completion()` |
+| LLM function tool       | `AsyncLLMToolBaseExtension`   | `get_tool_metadata()`, `run_tool()`   |
+| Generic / custom        | `AsyncExtension`              | `on_cmd()`, `on_data()`, etc.         |
+
+## Pydantic Configuration
+
+Extensions use Pydantic models for config validation:
+
+```python
+from pydantic import BaseModel, Field
+
+class DeepgramTTSConfig(BaseModel):
+    api_key: str = ""
+    model: str = "aura-2-theia-en"
+    sample_rate: int = 24000
+    params: dict[str, Any] = Field(default_factory=dict)
+```
+
+Config is loaded from property.json in `on_init()`:
+```python
+config_json, _ = await ten_env.get_property_to_json("")
+self.config = DeepgramTTSConfig(**json.loads(config_json))
+```
+
+## Environment Variable Syntax
+
+In `property.json`, reference env vars:
+
+| Syntax                | Behavior                                |
+| --------------------- | --------------------------------------- |
+| `${env:VAR_NAME}`     | Required — error if missing             |
+| `${env:VAR_NAME\|}`   | Optional — empty string if missing      |
+| `${env:VAR_NAME\|default}` | Optional — uses default if missing |
+
+```json
+{"api_key": "${env:DEEPGRAM_API_KEY}", "region": "${env:AZURE_REGION|}"}
+```
+
+## Params Dict Pattern
+
+Extensions using HTTP/WebSocket services store all config in a `params` dictionary:
+
+1. **Store** `api_key` inside `params` dict in property.json and config
+2. **Extract** for authentication headers in the client constructor
+3. **Strip** from params **only when creating the HTTP request payload**
+
+```python
+# In client constructor — extract for auth
+self.api_key = config.params.get("api_key", "")
+self.headers = {"Authorization": f"Bearer {self.api_key}"}
+
+# In request method — strip before sending
+payload = {**self.config.params}
+payload.pop("api_key", None)
+```
+
+## Sensitive Data Logging
+
+Implement `to_str()` to encrypt sensitive fields before logging:
+
+```python
+def to_str(self, sensitive_handling: bool = True) -> str:
+    if not sensitive_handling:
+        return f"{self}"
+    config = copy.deepcopy(self)
+    if config.params and "api_key" in config.params:
+        config.params["api_key"] = utils.encrypt(config.params["api_key"])
+    return f"{config}"
+```
+
+## Logging
+
+- Use `ten_env.log_info()`, `ten_env.log_warn()`, `ten_env.log_error()`, `ten_env.log_debug()`
+- Categories: `LOG_CATEGORY_KEY_POINT` (lifecycle events), `LOG_CATEGORY_VENDOR` (vendor status)
+- All output goes to `/tmp/task_run.log` inside the container
+
+## Import Convention
+
+```python
+# Correct (v0.11+)
+from ten_runtime import Addon, register_addon_as_extension, TenEnv
+
+# Wrong (old v0.8.x — will not work)
+from ten import Addon
+```
+
+## Formatting
+
+- **Black** formatter with `--line-length 80`
+- Run: `task format` (from `ai_agents/`)
+- Check: `task check`
+- Excludes: `third_party/`, `http_server_python/`, `ten_packages/system`
+
+## Design Principles
+
+- **YAGNI**: Only implement what is needed now, not what might be needed later
+- **KISS**: Prefer simple solutions; three similar lines > premature abstraction
+- **No git-ignored files**: Never modify auto-generated files (manifest-lock.json, out/, .ten/, bin/)
+
+## Related Deep Dives
+
+- [Extension Development](deep_dives/extension_development.md) — Full creation guide with implementation walkthroughs
diff --git a/docs/ai/L1/05_workflows.md b/docs/ai/L1/05_workflows.md
new file mode 100644
index 0000000000..b8ad2b729b
--- /dev/null
+++ b/docs/ai/L1/05_workflows.md
@@ -0,0 +1,181 @@
+# 05 Workflows
+
+> Step-by-step guides for common development tasks.
+
+## Create a New TTS / ASR / LLM Extension
+
+**Fastest path**: Copy a similar extension and adapt it.
+
+| Type        | Copy From                  | Base Class                  |
+| ----------- | -------------------------- | --------------------------- |
+| TTS (HTTP)  | `rime_http_tts`            | `AsyncTTS2HttpExtension`    |
+| TTS (WS)    | `deepgram_tts`             | `AsyncTTS2BaseExtension`    |
+| ASR         | `deepgram_asr_python`      | `AsyncASRBaseExtension`     |
+| LLM         | `openai_llm2_python`       | `AsyncLLMBaseExtension`     |
+
+```bash
+cp -r agents/ten_packages/extension/deepgram_tts agents/ten_packages/extension/my_vendor_tts
+```
+
+Then:
+1. Rename addon decorator, class names, `manifest.json` `name` field
+2. Implement the abstract methods for your vendor API
+3. Create `tests/configs/` with required config files (see below)
+4. Run guarder tests: `task tts-guarder-test EXTENSION=my_vendor_tts`
+5. Run formatter: `task format`
+
+**Required test config files** for TTS: `property.json`, `property_basic_audio_setting1.json`,
+`property_basic_audio_setting2.json`, `property_dump.json`, `property_miss_required.json`,
+`property_invalid.json`
+
+**Required test config files** for ASR: `property_en.json`, `property_zh.json`,
+`property_invalid.json`, `property_dump.json`
+
+For full walkthrough with code and all 15/10 test details, see
+[Extension Development](deep_dives/extension_development.md) and [Testing](deep_dives/testing.md).
+
+## Add Extension to a Graph
+
+1. **Add node** to `predefined_graphs[].graph.nodes[]` in the example's `tenapp/property.json`:
+   ```json
+   {"type": "extension", "name": "my_tts", "addon": "my_tts_python",
+    "extension_group": "tts_group",
+    "property": {"api_key": "${env:MY_API_KEY}"}}
+   ```
+
+2. **Add connections** — wire data flow between extensions:
+   ```json
+   {"extension": "my_tts",
+    "data": [{"name": "tts_text_input", "source": [{"extension": "main"}]}],
+    "audio_frame": [{"name": "pcm_frame", "dest": [{"extension": "agora_rtc"}]}]}
+   ```
+
+3. **Add dependency** to example `tenapp/manifest.json`:
+   ```json
+   {"type": "extension", "name": "my_tts_python", "version": "0.1.0"}
+   ```
+
+4. **Install** (use `task install`, not just `tman install` — the latter can wipe `bin/main`):
+   ```bash
+   docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example> && task install"
+   ```
+
+5. **Nuclear restart** (required when graphs are added/removed):
+   ```bash
+   sudo docker exec ten_agent_dev bash -c \
+     "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; pkill -9 -f next-server; pkill -9 -f tman"
+   sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+   sleep 30  # wait for port 3000 TIME_WAIT to clear
+   sudo docker exec -d ten_agent_dev bash -c \
+     "cd /app/agents/examples/<example> && task run > /tmp/task_run.log 2>&1"
+   ```
+
+See [Graph Configuration](deep_dives/graph_configuration.md) for connection types and routing patterns.
+
+**For complex multi-graph setups** (A/B testing vendors, avatar variants), use
+`rebuild_property.py` instead of hand-editing. See
+[Generating property.json](deep_dives/graph_configuration.md#generating-propertyjson-with-rebuild_propertypy).
+
+## Customize the Main Extension
+
+The "main" extension orchestrates agent behavior (greetings, tool routing, interruption).
+Three implementation variants exist:
+
+| Variant              | File                  | Use Case                        |
+| -------------------- | --------------------- | ------------------------------- |
+| Python Cascade       | `main_python_cascade` | ASR → LLM → TTS pipeline       |
+| Python Realtime V2V  | `main_python_realtime`| OpenAI Realtime API (voice-to-voice) |
+| Node.js Cascade      | `main_nodejs_cascade` | TypeScript implementation       |
+
+Modify `on_data()` to change event routing, `on_cmd()` for tool handling.
+
+## Run Tests
+
+```bash
+# All tests
+docker exec ten_agent_dev bash -c "cd /app && task test"
+
+# Single extension (with dependency install)
+docker exec ten_agent_dev bash -c \
+  "cd /app && task test-extension EXTENSION=agents/ten_packages/extension/deepgram_tts"
+
+# Single extension (skip install — faster)
+docker exec ten_agent_dev bash -c \
+  "cd /app && task test-extension-no-install EXTENSION=agents/ten_packages/extension/deepgram_tts"
+
+# ASR guarder integration tests
+docker exec ten_agent_dev bash -c \
+  "cd /app && task asr-guarder-test EXTENSION=azure_asr_python"
+
+# TTS guarder integration tests
+docker exec ten_agent_dev bash -c \
+  "cd /app && task tts-guarder-test EXTENSION=deepgram_tts"
+```
+
+See [Testing](deep_dives/testing.md) for test structure and debugging.
+
+## Restart After Changes
+
+| What Changed                    | Action                                               |
+| ------------------------------- | ---------------------------------------------------- |
+| `property.json` (graphs added)  | Nuclear restart (kill all, remove lock, task run)    |
+| `property.json` (config only)   | No restart needed (loaded per session)               |
+| `.env`                          | `docker compose down && docker compose up -d` + deps |
+| Python code                     | Restart server only                                  |
+| Go code                         | `task install` then restart server                   |
+| Container restart               | Reinstall Python deps, then `task run`               |
+
+## Build and Install
+
+```bash
+# Full install (first time or after adding extensions) — ALWAYS prefer this
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/<example> && task install"
+
+# Install Python deps only
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/<example>/tenapp && bash scripts/install_python_deps.sh"
+
+# Install extension dependencies only (creates symlinks) — WARNING: can wipe bin/main
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/<example>/tenapp && tman install"
+```
+
+## Update Extension Code in Running Container
+
+When iterating on extension code locally:
+
+```bash
+# Copy updated files into the container (use /. to avoid nested dirs)
+sudo docker cp ./agents/ten_packages/extension/my_ext/. \
+  ten_agent_dev:/app/agents/ten_packages/extension/my_ext/
+
+# Verify symlink exists in the example's tenapp
+sudo docker exec ten_agent_dev bash -c \
+  "ls -la /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
+
+# If missing, create it manually
+sudo docker exec ten_agent_dev bash -c \
+  "ln -sf /app/agents/ten_packages/extension/my_ext \
+   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
+
+# Then nuclear restart
+```
+
+## Pre-Commit Checks
+
+```bash
+# Format Python code (Black, line-length 80)
+docker exec ten_agent_dev bash -c "cd /app && task format"
+
+# Check formatting without modifying
+docker exec ten_agent_dev bash -c "cd /app && task check"
+```
+
+Pre-commit hooks validate: API key patterns, Black formatting, conventional commit messages.
+
+## Related Deep Dives
+
+- [Extension Development](deep_dives/extension_development.md) — Full extension creation with code examples
+- [Graph Configuration](deep_dives/graph_configuration.md) — Connection wiring and routing patterns
+- [Testing](deep_dives/testing.md) — Test infrastructure, guarder tests, debugging
diff --git a/docs/ai/L1/06_interfaces.md b/docs/ai/L1/06_interfaces.md
new file mode 100644
index 0000000000..d87b91514f
--- /dev/null
+++ b/docs/ai/L1/06_interfaces.md
@@ -0,0 +1,150 @@
+# 06 Interfaces
+
+> REST API contracts, graph connection schemas, and base class abstract methods.
+
+## REST API Endpoints
+
+The Go server (`server/internal/http_server.go`) exposes:
+
+| Endpoint             | Method | Purpose                              | Key Fields                        |
+| -------------------- | ------ | ------------------------------------ | --------------------------------- |
+| `/health`            | GET    | Health check                         | Returns `{"code":"0"}`            |
+| `/graphs`            | GET    | List available graphs                | Returns `data[].name`             |
+| `/start`             | POST   | Start agent session                  | `graph_name`, `channel_name`      |
+| `/stop`              | POST   | Stop agent session                   | `channel_name`                    |
+| `/ping`              | POST   | Keep session alive                   | `channel_name`                    |
+| `/list`              | GET    | List active sessions                 | Returns worker list               |
+| `/token/generate`    | POST   | Generate Agora RTC token             | `channel_name`, `uid`             |
+
+### POST /start Request Body
+
+```json
+{
+  "request_id": "uuid",
+  "channel_name": "test_channel",
+  "user_uid": 176573,
+  "graph_name": "voice_assistant",
+  "properties": {
+    "openai_llm2_python": {"model": "gpt-4o-mini"}
+  },
+  "timeout": 60
+}
+```
+
+- `properties` — per-extension overrides merged into graph node properties
+- `timeout` — seconds of inactivity before auto-stop (-1 = never)
+
+## Graph Connection Types
+
+Connections in `property.json` define data flow between extensions:
+
+### Command Connections (`cmd`)
+
+```json
+{"extension": "main", "cmd": [
+  {"name": "tool_register", "dest": [{"extension": "llm"}]},
+  {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
+]}
+```
+
+Common commands: `tool_register`, `on_user_joined`, `flush`, `chat_completion_call`,
+`update_configs`
+
+### Data Connections (`data`)
+
+```json
+{"extension": "llm", "data": [
+  {"name": "text_data", "source": [{"extension": "main"}]},
+  {"name": "text_data", "dest": [{"extension": "tts"}]}
+]}
+```
+
+Common data: `asr_result`, `text_data`, `tts_text_input`, `tts_audio_start`,
+`tts_audio_end`, `error`
+
+### Audio Frame Connections (`audio_frame`)
+
+```json
+{"extension": "agora_rtc", "audio_frame": [
+  {"name": "pcm_frame", "dest": [{"extension": "stt"}]}
+]}
+```
+
+### Video Frame Connections (`video_frame`)
+
+```json
+{"extension": "agora_rtc", "video_frame": [
+  {"name": "video_frame", "dest": [{"extension": "vision"}]}
+]}
+```
+
+## Base Class Abstract Methods
+
+### ASR (`AsyncASRBaseExtension`)
+
+| Method                      | Returns   | Purpose                          |
+| --------------------------- | --------- | -------------------------------- |
+| `vendor()`                  | `str`     | Vendor name (e.g., "deepgram")   |
+| `start_connection()`        | `None`    | Connect to ASR service           |
+| `stop_connection()`         | `None`    | Disconnect                       |
+| `send_audio(frame)`         | `bool`    | Send audio frame to service      |
+| `finalize()`                | `None`    | Drain pending audio              |
+| `is_connected()`            | `bool`    | Connection status check          |
+| `input_audio_sample_rate()` | `int`     | Expected sample rate (e.g., 16000)|
+
+**Output helpers**: `send_asr_result()`, `send_asr_error()`, `send_asr_finalize_end()`,
+`send_connect_delay_metrics()`, `send_vendor_metrics()`
+
+### TTS (`AsyncTTS2BaseExtension`)
+
+| Method                          | Returns  | Purpose                              |
+| ------------------------------- | -------- | ------------------------------------ |
+| `vendor()`                      | `str`    | Vendor name (e.g., "elevenlabs")     |
+| `request_tts(tts_text_input)`   | `AsyncIterator` | Generate audio from text       |
+| `cancel_tts()`                  | `None`   | Handle flush/cancellation            |
+| `synthesize_audio_sample_rate()`| `int`    | Output sample rate (e.g., 24000)     |
+| `synthesize_audio_channels()`   | `int`    | Channel count (default: 1)           |
+| `synthesize_audio_sample_width()`| `int`   | Bytes per sample (default: 2)        |
+
+**Output helpers**: `send_tts_audio_data()`, `send_tts_audio_start()`, `send_tts_audio_end()`,
+`send_tts_error()`, `send_tts_ttfb_metrics()`, `send_tts_text_result()`
+
+**State machine**: QUEUED → PROCESSING → FINALIZING → COMPLETED (per request)
+
+### LLM (`AsyncLLMBaseExtension`)
+
+| Method                          | Returns | Purpose                          |
+| ------------------------------- | ------- | -------------------------------- |
+| `on_call_chat_completion()`     | varies  | Handle sync command requests     |
+| `on_data_chat_completion()`     | varies  | Handle stream-based data input   |
+| `on_tools_update(tool_metadata)`| `None`  | Handle new tool registration     |
+
+**Tool flow**: Extensions register tools via `CMD_TOOL_REGISTER` → LLM stores in
+`available_tools` → LLM calls tools during completion → results returned.
+
+## Manifest API Interface
+
+Extensions declare their API interface in `manifest.json`:
+
+```json
+{
+  "api": {
+    "interface": [
+      {"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}
+    ],
+    "property": {
+      "api_key": {"type": "string"},
+      "model": {"type": "string"},
+      "sample_rate": {"type": "int32"}
+    }
+  }
+}
+```
+
+Interface JSON files define the standard cmd/data/audio_frame schemas for each extension type.
+
+## Related Deep Dives
+
+- [Extension Development](deep_dives/extension_development.md) — Implementing abstract methods
+- [Server Architecture](deep_dives/server_architecture.md) — Endpoint handlers and property injection
+- [Graph Configuration](deep_dives/graph_configuration.md) — Full connection wiring examples
diff --git a/docs/ai/L1/07_gotchas.md b/docs/ai/L1/07_gotchas.md
new file mode 100644
index 0000000000..e61011e489
--- /dev/null
+++ b/docs/ai/L1/07_gotchas.md
@@ -0,0 +1,235 @@
+# 07 Gotchas
+
+> Critical pitfalls, tribal knowledge, and troubleshooting.
+
+## CRITICAL: Property Getters Return Tuples
+
+All `get_property_*()` methods return `(value, error_or_none)`, not the raw value.
+
+```python
+# WRONG — causes TypeError
+threshold = await ten_env.get_property_float("threshold")
+if threshold > 0.5:  # TypeError: '>' not supported between 'float' and 'tuple'
+
+# CORRECT — extract from tuple
+threshold_result = await ten_env.get_property_float("threshold")
+threshold = threshold_result[0] if isinstance(threshold_result, tuple) else threshold_result
+```
+
+This applies to `get_property_string()`, `get_property_int()`, `get_property_float()`,
+`get_property_bool()`. Always extract `[0]`.
+
+## CRITICAL: Signal Handlers Forbidden
+
+Extensions run in worker threads. Signal handlers only work in the main thread.
+
+```python
+# WRONG — raises ValueError: signal only works in main thread
+signal.signal(signal.SIGTERM, handler)
+atexit.register(cleanup)
+
+# CORRECT — use extension lifecycle
+async def on_stop(self, ten_env):
+    await self.cleanup()
+```
+
+## CRITICAL: Always Use `task run`
+
+Never start the server with `./bin/api` or `./bin/main` directly.
+`task run` sets the correct PYTHONPATH and starts all services together
+(API server + playground + TMAN Designer).
+
+## Zombie Worker Processes
+
+Worker processes (`bin/main`) run on the **host machine**, not inside Docker.
+They survive container restarts and server restarts.
+
+```bash
+# Check for zombies
+ps -elf | grep 'bin/main' | grep -v grep
+
+# Kill them
+ps -elf | grep 'bin/main' | grep -v grep | awk '{print $4}' | xargs -r sudo kill -9
+```
+
+Always kill zombies before restarting the server.
+
+## .env Loaded at Container Startup Only
+
+Editing `.env` while the container is running has **no effect**. You must:
+
+```bash
+cd /home/ubuntu/ten-framework/ai_agents
+docker compose down && docker compose up -d
+# Then reinstall Python deps and task run
+```
+
+## Node.js Version for Playground
+
+Playground requires Node.js >= 20.9.0. The host machine may have an older version.
+Always run playground from **inside the container** (has Node 22):
+
+```bash
+# WRONG: running from host with Node 18
+cd playground && npm run dev  # Fails
+
+# CORRECT: task run starts playground inside container automatically
+```
+
+## Next.js Lock File
+
+After crashes, `.next/dev/lock` becomes stale, preventing restart:
+
+```bash
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+```
+
+Always use nuclear restart after playground crashes.
+
+## Python Deps Not Persisted
+
+Python dependencies are installed into the container's filesystem and are lost
+on container restart. Always reinstall after `docker compose down && up`:
+
+```bash
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced/tenapp && bash scripts/install_python_deps.sh"
+```
+
+## tman Install Creates Symlinks
+
+Never manually create symlinks with `ln -s` for extensions.
+Always use `tman install` which resolves dependencies and creates correct links:
+
+```bash
+docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example>/tenapp && tman install"
+```
+
+**Important:** If `tman install` doesn't create a symlink for a new extension (e.g., after
+adding it to `manifest.json`), create it manually as a fallback:
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "ln -sf /app/agents/ten_packages/extension/my_ext \
+   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
+```
+
+## docker cp Creates Nested Directories
+
+When using `docker cp` to update extension code in the container, beware of
+trailing slashes creating nested directories:
+
+```bash
+# WRONG — creates /app/.../deepgram_tts/deepgram_tts/ (nested)
+sudo docker cp ./deepgram_tts/ container:/app/.../deepgram_tts/
+
+# CORRECT — copy contents into existing directory
+sudo docker cp ./deepgram_tts/. container:/app/.../deepgram_tts/
+```
+
+If you see `ModuleNotFoundError: No module named 'ten_packages.extension.X'`
+after a `docker cp`, check for nested directories inside the extension folder.
+
+## tman install Can Wipe bin/main
+
+Running `tman install` when system dependencies have newer versions will replace
+the runtime packages, which **deletes `bin/main`**. You must run the full
+`task install` (not just `tman install`) to rebuild it:
+
+```bash
+# This alone can break things if runtime versions changed:
+docker exec ten_agent_dev bash -c "cd /app/.../tenapp && tman install"
+
+# This is safe — rebuilds bin/main after tman install:
+docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example> && task install"
+```
+
+Signs: Worker fails with `bin/main: No such file or directory` in logs.
+
+## Audio Routing: Split at Source Only
+
+When routing audio to multiple destinations, the split must happen at the
+source node (e.g., `agora_rtc`), not at intermediate nodes. Splitting from
+intermediate nodes can cause crashes.
+
+```json
+// CORRECT: agora_rtc sends pcm_frame to both stt AND vad
+{"extension": "agora_rtc", "audio_frame": [
+  {"name": "pcm_frame", "dest": [{"extension": "stt"}, {"extension": "vad"}]}
+]}
+```
+
+## Frontend Caches Graph List
+
+The playground caches the `/graphs` API response. When adding or removing graphs
+from `property.json`, a nuclear restart is required — simple server restart
+is not enough.
+
+## Manifest Module Name Must Match
+
+The `name` field in extension `manifest.json` must exactly match the `addon`
+field used in graph nodes in `property.json`. Mismatches cause silent failures.
+
+## Apple Silicon Docker
+
+Docker containers may need Rosetta for x86 images on Apple Silicon Macs.
+Enable in Docker Desktop: Settings → General → Use Rosetta for x86_64/amd64 emulation.
+
+## Windows Line Endings
+
+Before cloning on Windows, configure git to preserve Unix line endings:
+
+```bash
+git config --global core.autocrlf false
+```
+
+## Nuclear Restart Recipe
+
+When in doubt, use the nuclear option. **Must kill `next-server` too** — it
+holds port 3000 even after its parent `node` process is killed:
+
+```bash
+# 1. Kill EVERYTHING (including next-server which holds port 3000)
+sudo docker exec ten_agent_dev bash -c \
+  "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; pkill -9 -f next-server; pkill -9 -f tman"
+
+# 2. Clean up stale files
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+
+# 3. Wait for port 3000 TIME_WAIT to clear (critical!)
+# If Next.js can't bind port 3000, it silently starts on 3001/3002 which
+# isn't exposed by Docker — the frontend appears down.
+sleep 30  # or check: docker exec ten_agent_dev bash -c "cat /proc/net/tcp6 | grep ':0BB8' | wc -l"
+
+# 4. Start
+sudo docker exec -d ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant && task run > /tmp/task_run.log 2>&1"
+
+# 5. Verify (wait ~12s for startup)
+sleep 12
+sudo docker exec ten_agent_dev bash -c \
+  "curl -s http://localhost:8080/health && curl -s -o /dev/null -w ' Frontend:%{http_code}' http://localhost:3000/"
+```
+
+**Verify the logs** — check Next.js started on port 3000 (not 3001+):
+```bash
+sudo docker exec ten_agent_dev bash -c "strings /tmp/task_run.log | grep -E 'Local:|Port|Ready|Error'"
+```
+
+If you see `Port 3000 is in use`, find and kill the process holding it:
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "for pid in /proc/[0-9]*/fd/*; do \
+    link=\$(readlink \$pid 2>/dev/null); \
+    echo \"\$link\" | grep -q socket: && \
+    inode=\$(echo \$link | grep -oP '\\d+') && \
+    grep -q \$inode /proc/net/tcp6 2>/dev/null && \
+    grep \$inode /proc/net/tcp6 | grep -q ':0BB8' && \
+    echo PID=\$(echo \$pid | cut -d/ -f3) && break; \
+  done"
+```
+
+## Related Deep Dives
+
+- [Deployment](deep_dives/deployment.md) — Production setup, persistent startup
+- [Server Architecture](deep_dives/server_architecture.md) — Worker lifecycle, session management
diff --git a/docs/ai/L1/08_security.md b/docs/ai/L1/08_security.md
new file mode 100644
index 0000000000..5efef433c8
--- /dev/null
+++ b/docs/ai/L1/08_security.md
@@ -0,0 +1,88 @@
+# 08 Security
+
+> Secret management, input validation, and repository hygiene.
+
+## API Key Management
+
+- **Single source**: All API keys live in `ai_agents/.env` (git-ignored)
+- **Never hardcode** keys in `property.json` — use `${env:VAR_NAME}` substitution
+- **Persistent storage**: Keep a copy of keys outside the repo (e.g., `~/api_keys.txt`)
+  so branch switches don't lose them
+- See `.env.example` for the complete variable catalog
+
+## Environment Variable Substitution
+
+In `property.json`, reference secrets via:
+
+```json
+{
+  "api_key": "${env:DEEPGRAM_API_KEY}",
+  "region": "${env:AZURE_REGION|eastus}"
+}
+```
+
+| Syntax                    | Behavior                     |
+| ------------------------- | ---------------------------- |
+| `${env:VAR}`              | Required — error if missing  |
+| `${env:VAR\|}`            | Optional — empty if missing  |
+| `${env:VAR\|default}`     | Optional — default if missing|
+
+## Sensitive Data in Logs
+
+Extensions must encrypt sensitive fields before logging:
+
+```python
+def to_str(self, sensitive_handling: bool = True) -> str:
+    config = copy.deepcopy(self)
+    if config.params and "api_key" in config.params:
+        config.params["api_key"] = utils.encrypt(config.params["api_key"])
+    return f"{config}"
+```
+
+Never log raw API keys, tokens, or credentials.
+
+## Server-Side Protections
+
+The Go server (`http_server.go`) implements:
+
+- **Path traversal prevention**: Ignores client-requested `tenapp_dir`, always uses
+  the launch-configured directory
+- **Channel name sanitization**: Validated before use in file operations
+- **Safe type conversion**: Property values are type-checked during merge
+- **Recursive property merge**: Prevents injection via nested config overrides
+
+## Pre-Commit Hooks
+
+| Hook          | What It Checks                                              |
+| ------------- | ----------------------------------------------------------- |
+| `pre-commit`  | Scans staged files for API key patterns (`API_KEY.*=[A-Za-z0-9]{20,}`) |
+| `pre-commit`  | Black formatting compliance (line-length 80)                |
+| `commit-msg`  | Conventional commit format, blocks AI tool name references  |
+
+## Git-Ignored Files
+
+These are auto-generated — never modify or commit them:
+
+| Pattern                | Source                    |
+| ---------------------- | ------------------------- |
+| `manifest-lock.json`   | `tman` dependency resolve |
+| `compile_commands.json`| Build system              |
+| `BUILD.gn`, `.gn`     | Build configuration       |
+| `out/`, `build/`       | Build output              |
+| `.ten/`                | TEN runtime files         |
+| `bin/main`, `bin/worker`| Compiled binaries        |
+| `.release/`            | Release packaging         |
+| `node_modules/`        | JS dependencies           |
+| `.env`                 | Environment secrets       |
+
+## Files That Should Never Be Committed
+
+- `.env` (API keys and secrets)
+- `*.pem` (certificates)
+- `*.pcm` (audio dumps)
+- Credential files, tokens, session data
+
+## Related Deep Dives
+
+- [Deployment](deep_dives/deployment.md) — Production security considerations
+- [Server Architecture](deep_dives/server_architecture.md) — Server-side validation details
diff --git a/docs/ai/L1/deep_dives/_index.md b/docs/ai/L1/deep_dives/_index.md
new file mode 100644
index 0000000000..34502c601f
--- /dev/null
+++ b/docs/ai/L1/deep_dives/_index.md
@@ -0,0 +1,9 @@
+# Deep Dives Index
+
+| Document                                                 | Summary                                          | Load When                                        |
+| -------------------------------------------------------- | ------------------------------------------------ | ------------------------------------------------ |
+| [extension_development.md](extension_development.md)     | Full extension creation guide, base classes, test configs, pre-submission checklist | Creating a new TTS/ASR/LLM extension |
+| [graph_configuration.md](graph_configuration.md)         | Graph nodes, connections, routing, property.json  | Modifying graphs or wiring extensions together   |
+| [testing.md](testing.md)                                 | All 15 TTS + 10 ASR guarder tests, pass criteria, config files, debugging | Running or debugging tests for an extension |
+| [deployment.md](deployment.md)                           | Docker, Cloudflare, Nginx, Grafana monitoring    | Deploying to production or setting up monitoring |
+| [server_architecture.md](server_architecture.md)         | Go server, property injection, worker lifecycle  | Understanding server internals or debugging      |
diff --git a/docs/ai/L1/deep_dives/deployment.md b/docs/ai/L1/deep_dives/deployment.md
new file mode 100644
index 0000000000..3ffae48c77
--- /dev/null
+++ b/docs/ai/L1/deep_dives/deployment.md
@@ -0,0 +1,206 @@
+# Deployment
+
+> **When to Read This:** Load this document when you are deploying to production,
+> setting up HTTPS access, configuring monitoring, or ensuring services persist
+> across session closures.
+
+## Docker Compose Setup
+
+The development container is defined in `ai_agents/docker-compose.yml`:
+
+```yaml
+services:
+  ten_agent_dev:
+    image: ghcr.io/ten-framework/ten_agent_build:0.7.14
+    container_name: ten_agent_dev
+    ports:
+      - "49483:49483"   # TMAN Designer
+      - "3000:3000"     # Playground
+      - "8000-9001:8000-9001"  # API + worker range
+    volumes:
+      - .:/app
+    environment:
+      - LOG_PATH=${LOG_PATH}
+```
+
+Start: `cd ai_agents && docker compose up -d`
+
+## Persistent Startup (Survives Session Closure)
+
+Use `-d` flag with `docker exec` to keep services running after terminal disconnect:
+
+```bash
+# 1. Clean up existing processes
+sudo docker exec ten_agent_dev bash -c "pkill -9 -f 'bin/api'; pkill -9 node; pkill -9 bun"
+ps -elf | grep 'bin/main' | grep -v grep | awk '{print $4}' | xargs -r sudo kill -9 2>/dev/null
+
+# 2. Remove stale lock files
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+
+# 3. Install Python dependencies
+sudo docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced/tenapp && bash scripts/install_python_deps.sh"
+
+# 4. Start everything in detached mode
+sudo docker exec -d ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced && task run > /tmp/task_run.log 2>&1"
+
+# 5. Wait and verify
+sleep 15
+curl -s http://localhost:8080/health && echo " API ready"
+curl -s http://localhost:8080/graphs | jq -r '.data | length' | xargs echo "Graphs:"
+curl -s http://localhost:3000 -o /dev/null -w '%{http_code}' | xargs echo "Playground:"
+```
+
+Key: `-d` flag keeps processes running. `task run` starts API + playground + TMAN Designer.
+
+## Cloudflare Tunnel (Free HTTPS)
+
+Quick HTTPS access without domain or SSL setup:
+
+```bash
+# Start tunnel
+pkill cloudflared
+nohup cloudflared tunnel --url http://localhost:3000 > /tmp/cloudflare_tunnel.log 2>&1 &
+sleep 5
+
+# Get the random URL
+grep -o 'https://[^[:space:]]*\.trycloudflare\.com' /tmp/cloudflare_tunnel.log | head -1
+# Example: https://films-colon-msgid-incentives.trycloudflare.com
+```
+
+- Free tunnels get **random URLs** that change on restart
+- No DNS configuration needed
+- Good for development and demos
+
+## Nginx Reverse Proxy (Production HTTPS)
+
+For production with custom domain and SSL certificates:
+
+```nginx
+server {
+    listen [::]:453 ssl ipv6only=on;
+    listen 453 ssl;
+    ssl_certificate /etc/letsencrypt/live/oai.agora.io/fullchain.pem;
+    ssl_certificate_key /etc/letsencrypt/live/oai.agora.io/privkey.pem;
+    include /etc/letsencrypt/options-ssl-nginx.conf;
+    ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
+
+    # API endpoints
+    location ~ ^/(health|ping|token|start|stop|graphs|list)(/|$) {
+        proxy_pass http://localhost:8080;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+    }
+
+    # Playground (with WebSocket upgrade)
+    location / {
+        proxy_pass http://localhost:3000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_http_version 1.1;
+        proxy_set_header Upgrade $http_upgrade;
+        proxy_set_header Connection "upgrade";
+    }
+}
+```
+
+Apply: `sudo nginx -t && sudo systemctl reload nginx`
+
+## Production Build
+
+```bash
+# Build optimized frontend
+docker exec ten_agent_dev bash -c "cd /app/playground && npm run build"
+
+# Start production server
+docker exec -d ten_agent_dev bash -c \
+  "cd /app/playground && npm start > /tmp/playground_prod.log 2>&1"
+```
+
+## Grafana Monitoring
+
+Located in `tools/grafana-monitoring/`. Three deployment modes:
+
+### Pull Mode (Development)
+
+Prometheus scrapes a metrics endpoint exposed by the TEN runtime:
+
+```json
+// In property.json
+{
+  "ten": {
+    "exporter": {
+      "enabled": true,
+      "type": "prometheus",
+      "prometheus": {
+        "listen_address": "0.0.0.0",
+        "listen_port": 49484
+      }
+    }
+  }
+}
+```
+
+Setup: `cd tools/grafana-monitoring && docker compose -f docker-compose.pull.yml up -d`
+
+### Push Mode (Production)
+
+Uses OTEL Collector to push metrics to Prometheus and logs to Loki:
+
+```json
+// In property.json
+{
+  "ten": {
+    "exporter": {
+      "enabled": true,
+      "type": "otlp",
+      "otlp": {
+        "endpoint": "http://otel-collector:4317"
+      }
+    }
+  }
+}
+```
+
+Setup: `cd tools/grafana-monitoring && docker compose -f docker-compose.push.yml up -d`
+
+### Hybrid Mode
+
+Both Pull and Push simultaneously — useful for A/B testing or migration.
+
+### Monitored Metrics
+
+| Metric                            | Good Threshold | What It Measures                    |
+| --------------------------------- | -------------- | ----------------------------------- |
+| Extension Lifecycle Duration      | < 1 second     | on_configure, on_init, on_start, on_stop, on_deinit |
+| Extension CMD Processing Duration | < 100ms        | P50/P95 command handling time       |
+| Thread Message Queue Wait Time    | < 50ms         | Time messages wait before processing|
+
+### Log Aggregation (Push Mode Only)
+
+Push mode sends logs to Loki for centralized querying:
+
+```
+# LogQL query examples
+{service_name="ten_agent"} |= "error"
+{service_name="ten_agent"} | json | level="error"
+{service_name="ten_agent"} |= "deepgram" | json
+```
+
+## After Container Restart Checklist
+
+1. Reinstall Python dependencies (not persisted)
+2. Start server with `task run`
+3. Restart Cloudflare tunnel (if using)
+4. Kill any zombie worker processes on host
+5. Verify with `/health` and `/graphs` endpoints
+
+## See Also
+
+- [Back to Setup](../01_setup.md)
+- [Server Architecture](server_architecture.md) — Worker lifecycle, session management
diff --git a/docs/ai/L1/deep_dives/extension_development.md b/docs/ai/L1/deep_dives/extension_development.md
new file mode 100644
index 0000000000..ba8588742d
--- /dev/null
+++ b/docs/ai/L1/deep_dives/extension_development.md
@@ -0,0 +1,653 @@
+# Extension Development
+
+> **When to Read This:** Load this document when you are creating a new TTS, ASR, or LLM
+> extension. It covers the exact files to create, base classes to inherit, abstract methods
+> to implement, test configs to provide, and guarder tests your extension must pass.
+
+## Quick Start: Copy an Existing Extension
+
+The fastest way to create a new extension is to copy a similar one:
+
+| Extension Type | Good Template to Copy           | Base Class                  |
+| -------------- | ------------------------------- | --------------------------- |
+| TTS (HTTP)     | `rime_http_tts`                 | `AsyncTTS2HttpExtension`    |
+| TTS (WebSocket)| `deepgram_tts`                  | `AsyncTTS2BaseExtension`    |
+| ASR (WebSocket)| `deepgram_asr_python`           | `AsyncASRBaseExtension`     |
+| LLM            | `openai_llm2_python`            | `AsyncLLMBaseExtension`     |
+| LLM Tool       | `bingsearch_tool_python`        | `AsyncLLMToolBaseExtension` |
+
+```bash
+cp -r agents/ten_packages/extension/deepgram_tts agents/ten_packages/extension/my_vendor_tts
+# Then rename: addon decorator, class names, manifest.json name field
+```
+
+## Directory Structure
+
+```
+my_vendor_tts_python/
+├── __init__.py              # Can be empty
+├── addon.py                 # Registration (MUST match manifest.json name)
+├── extension.py             # Main logic OR orchestration
+├── my_vendor_tts.py         # Vendor client (websocket/http logic)
+├── config.py                # Pydantic config model
+├── manifest.json            # Metadata + API interface + property schema
+├── property.json            # Defaults with ${env:VAR} syntax
+├── requirements.txt         # Python deps
+├── README.md                # Usage docs
+└── tests/
+    ├── bin/
+    │   └── start            # Test entry script (sets PYTHONPATH, runs pytest)
+    └── configs/
+        ├── property.json                     # Default test config
+        ├── property_basic_audio_setting1.json # Sample rate test 1 (e.g. 16000)
+        ├── property_basic_audio_setting2.json # Sample rate test 2 (e.g. 24000)
+        ├── property_dump.json                # Audio dump test config
+        ├── property_miss_required.json       # Missing API key test
+        └── property_invalid.json             # Invalid API key test
+```
+
+## Step 1: addon.py
+
+```python
+from ten_runtime import Addon, register_addon_as_extension, TenEnv
+
+@register_addon_as_extension("my_vendor_tts_python")
+class MyVendorTTSAddon(Addon):
+    def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
+        from .extension import MyVendorTTSExtension
+        ten.on_create_instance_done(MyVendorTTSExtension(addon_name), context)
+```
+
+The decorator name **must exactly match** `manifest.json` `name` field AND the `addon`
+field in graph nodes.
+
+## Step 2: config.py
+
+```python
+from pydantic import BaseModel, Field
+from typing import Any
+import copy
+from ten_ai_base import utils
+
+class MyVendorTTSConfig(BaseModel):
+    api_key: str = ""
+    model: str = "default-model"
+    sample_rate: int = 24000
+    dump: bool = False
+    dump_path: str = ""
+    params: dict[str, Any] = Field(default_factory=dict)
+
+    def validate(self) -> None:
+        key = self.api_key or self.params.get("api_key", "")
+        if not key:
+            raise ValueError("API key is required")
+
+    def to_str(self, sensitive_handling: bool = True) -> str:
+        if not sensitive_handling:
+            return f"{self}"
+        config = copy.deepcopy(self)
+        if config.params and "api_key" in config.params:
+            config.params["api_key"] = utils.encrypt(config.params["api_key"])
+        return f"{config}"
+```
+
+## Step 3: manifest.json
+
+```json
+{
+  "type": "extension",
+  "name": "my_vendor_tts_python",
+  "version": "0.1.0",
+  "dependencies": [
+    {"type": "system", "name": "ten_runtime_python", "version": "0.8"}
+  ],
+  "api": {
+    "interface": [
+      {"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}
+    ],
+    "property": {
+      "api_key": {"type": "string"},
+      "model": {"type": "string"},
+      "sample_rate": {"type": "int32"},
+      "dump": {"type": "bool"},
+      "dump_path": {"type": "string"},
+      "params": {"type": "object"}
+    }
+  }
+}
+```
+
+Use `tts-interface.json` for TTS, `asr-interface.json` for ASR, `llm-interface.json` for LLM.
+
+## Step 4: property.json
+
+```json
+{
+  "api_key": "${env:MY_VENDOR_API_KEY}",
+  "model": "default-model",
+  "sample_rate": 24000
+}
+```
+
+## Step 5: extension.py — Implementing the Base Class
+
+### TTS Extension (WebSocket Mode)
+
+```python
+from ten_ai_base.tts2 import AsyncTTS2BaseExtension
+
+class MyVendorTTSExtension(AsyncTTS2BaseExtension):
+    def vendor(self) -> str:
+        return "my_vendor"
+
+    async def on_init(self, ten_env) -> None:
+        await super().on_init(ten_env)
+        config_json, _ = await ten_env.get_property_to_json("")
+        self.config = MyVendorTTSConfig(**json.loads(config_json))
+        self.config.validate()
+
+    async def on_start(self, ten_env) -> None:
+        await super().on_start(ten_env)
+        self.client = MyVendorTTSClient(self.config, ten_env)
+        await self.client.connect()
+
+    async def on_stop(self, ten_env) -> None:
+        await super().on_stop(ten_env)
+        await self.client.close()
+
+    async def request_tts(self, tts_text_input) -> AsyncIterator[tuple[bytes, int | None]]:
+        text = tts_text_input.get_text()
+        request_id = tts_text_input.get_request_id()
+        async for audio_chunk in self.client.synthesize(text, request_id):
+            yield audio_chunk, None  # (bytes, event_status)
+
+    async def cancel_tts(self) -> None:
+        await self.client.cancel()
+
+    def synthesize_audio_sample_rate(self) -> int:
+        return self.config.sample_rate
+
+    def synthesize_audio_channels(self) -> int:
+        return 1  # mono
+
+    def synthesize_audio_sample_width(self) -> int:
+        return 2  # 16-bit
+```
+
+**TTS2 state machine**: The base class manages request states automatically:
+QUEUED -> PROCESSING -> FINALIZING -> COMPLETED. Your `request_tts()` just yields audio bytes.
+
+**Output events sent automatically** by the base class:
+- `tts_audio_start` — when first audio chunk is ready
+- `pcm_frame` — for each audio chunk
+- `tts_audio_end` — when request completes
+- `tts_error` — on failure
+
+### TTS Extension (HTTP Mode)
+
+Simpler — for non-streaming HTTP APIs:
+
+```python
+from ten_ai_base.tts2_http import AsyncTTS2HttpExtension
+
+class MyVendorTTSExtension(AsyncTTS2HttpExtension):
+    def vendor(self) -> str:
+        return "my_vendor"
+
+    async def request_tts(self, text: str, request_id: str) -> AsyncIterator[bytes]:
+        async with httpx.AsyncClient() as client:
+            async with client.stream("POST", self.url, json={"text": text}) as resp:
+                async for chunk in resp.aiter_bytes():
+                    yield chunk
+
+    def synthesize_audio_sample_rate(self) -> int:
+        return self.config.sample_rate
+```
+
+### ASR Extension
+
+```python
+from ten_ai_base.asr import AsyncASRBaseExtension
+
+class MyVendorASRExtension(AsyncASRBaseExtension):
+    def vendor(self) -> str:
+        return "my_vendor"
+
+    async def start_connection(self) -> None:
+        self.ws = await websockets.connect(self.url, headers=self.auth_headers)
+        # Start a listener task for results
+        asyncio.create_task(self._listen_for_results())
+
+    async def stop_connection(self) -> None:
+        if self.ws:
+            await self.ws.close()
+            self.ws = None
+
+    async def send_audio(self, frame) -> bool:
+        buf = frame.lock_buf()
+        data = bytes(buf)
+        frame.unlock_buf(buf)
+        await self.ws.send(data)
+        return True
+
+    async def finalize(self) -> None:
+        await self.ws.send(json.dumps({"type": "CloseStream"}))
+        # Wait for final results before returning
+
+    def is_connected(self) -> bool:
+        return self.ws is not None and self.ws.open
+
+    def input_audio_sample_rate(self) -> int:
+        return 16000
+
+    async def _listen_for_results(self):
+        async for msg in self.ws:
+            result = json.loads(msg)
+            if result.get("is_final"):
+                asr_result = ASRResult(text=result["transcript"], language="en-US", ...)
+                await self.send_asr_result(asr_result)
+```
+
+**ASR output methods** you must call:
+- `await self.send_asr_result(asr_result)` — for each transcription
+- `await self.send_asr_error(error, vendor_info)` — on vendor errors
+- `await self.send_asr_finalize_end()` — when finalize completes
+
+**Buffer strategy**: Override `buffer_strategy()` to return `ASRBufferConfigModeKeep`
+if you want audio buffered during disconnects (default discards).
+
+### LLM Extension
+
+```python
+from ten_ai_base.llm import AsyncLLMBaseExtension
+
+class MyLLMExtension(AsyncLLMBaseExtension):
+    async def on_call_chat_completion(self, ten_env, **kwargs):
+        # Handle command-based chat requests
+        pass
+
+    async def on_data_chat_completion(self, ten_env, **kwargs):
+        # Handle stream-based data input
+        pass
+
+    async def on_tools_update(self, ten_env, tool_metadata):
+        async with self._available_tools_lock:
+            self.available_tools = tool_metadata
+```
+
+---
+
+## TTS Audio Pipeline: Data Types and Flow
+
+Understanding the data types is critical for implementing TTS extensions correctly.
+
+### Data Flow Through the Pipeline
+
+```
+User speaks → Agora RTC → pcm_frame → ASR → asr_result → main_control
+  → text_data → LLM → text_data → main_control → tts_text_input → TTS
+  → pcm_frame → Agora RTC → User hears
+```
+
+### tts_text_input (incoming to your extension)
+
+```python
+class TTSTextInput:
+    request_id: str           # Unique request identifier
+    text: str                 # Text chunk to synthesize
+    text_input_end: bool      # True = last chunk for this request_id
+    metadata: dict            # Context: {session_id, turn_id, ...}
+```
+
+- Multiple `tts_text_input` messages can share one `request_id` (the "append" pattern)
+- `text_input_end=True` signals no more text is coming for this request
+- The base class handles queuing and buffering — your `request_tts()` receives complete inputs
+
+### tts_audio_start / tts_audio_end (outgoing from your extension)
+
+These are sent automatically by the base class. You don't need to send them manually.
+
+```json
+// tts_audio_start
+{"request_id": "req1", "metadata": {"session_id": "sess1", "turn_id": 1}}
+
+// tts_audio_end
+{
+  "request_id": "req1",
+  "request_event_interval_ms": 1500,
+  "request_total_audio_duration_ms": 3200,
+  "reason": 1,
+  "metadata": {"session_id": "sess1", "turn_id": 1}
+}
+```
+
+**Reason values**: `REQUEST_END` (1) = normal completion, `INTERRUPTED` (2) = flush/cancel,
+`ERROR` (3) = failure.
+
+### tts_flush / tts_flush_end
+
+Flush is triggered when the user interrupts (speaks while TTS is playing).
+
+```json
+// tts_flush (incoming signal)
+{"flush_id": "flush_abc123", "metadata": {"session_id": "sess1"}}
+
+// tts_flush_end (your extension's response — sent automatically by base class)
+{"flush_id": "flush_abc123", "metadata": {"session_id": "sess1"}}
+```
+
+**Critical**: `flush_id` and `metadata` must be echoed back exactly.
+
+## Flush Handling in TTS Extensions
+
+The base class (`AsyncTTS2BaseExtension`) handles most flush logic automatically.
+Your extension only needs to implement `cancel_tts()`:
+
+```python
+async def cancel_tts(self) -> None:
+    """Called when a flush signal arrives. Stop any in-progress synthesis."""
+    if self.client:
+        await self.client.cancel()
+```
+
+### What the Base Class Does on Flush
+
+1. Acquires `_put_lock` to block new `tts_text_input` arrivals
+2. Clears `_flush_complete_event` to prevent race conditions
+3. Flushes the internal queue (discards all pending items)
+4. Calls `cancel_tts()` on your extension (you stop the vendor API)
+5. Sends `tts_audio_end` with `reason=INTERRUPTED` for the current request
+6. Sends `tts_flush_end` with the echoed `flush_id` and `metadata`
+7. Resets all request state (ready for next request)
+8. Sets `_flush_complete_event` to re-enable queue processing
+
+### Request Interleaving (How Buffering Works)
+
+When multiple requests arrive with different `request_id`s:
+
+1. First request is processed immediately (`_processing_request_id = "req1"`)
+2. Messages for other request_ids are **buffered** in `_pending_messages`
+3. When req1 completes, the next buffered request is released (FIFO order)
+4. Each request maintains strict event ordering: `audio_start → frames → audio_end`
+
+Your `request_tts()` doesn't need to handle interleaving — the base class does it.
+
+## The Three property.json Files
+
+There are three distinct `property.json` files with different roles:
+
+### 1. Extension Defaults (`agents/ten_packages/extension/<name>/property.json`)
+
+Default config for the extension. Loaded when no overrides are specified:
+
+```json
+{
+  "api_key": "${env:MY_VENDOR_API_KEY}",
+  "model": "default-model",
+  "sample_rate": 24000
+}
+```
+
+### 2. App Graph Definition (`agents/examples/<name>/tenapp/property.json`)
+
+Defines the complete agent — nodes, connections, per-instance overrides:
+
+```json
+{
+  "ten": {
+    "predefined_graphs": [{
+      "name": "voice_assistant",
+      "graph": {
+        "nodes": [
+          {"name": "tts", "addon": "my_vendor_tts_python",
+           "property": {"model": "high-quality", "sample_rate": 24000}}
+        ],
+        "connections": [...]
+      }
+    }]
+  }
+}
+```
+
+Properties here **override** extension defaults for this specific graph instance.
+
+### 3. Test Configs (`agents/ten_packages/extension/<name>/tests/configs/*.json`)
+
+Used by guarder tests. Each test loads a specific config file:
+
+```json
+{
+  "dump": true,
+  "dump_path": "./tests/dump_output/",
+  "params": {"key": "${env:MY_VENDOR_API_KEY}", "sample_rate": 16000}
+}
+```
+
+**Loading order**: Extension defaults → App graph overrides → Test config overrides.
+
+---
+
+## Step 6: Test Configuration Files
+
+Your extension's `tests/configs/` directory needs these config files for the guarder tests to work:
+
+### For TTS Extensions
+
+| Config File                          | Purpose                                | Content                                |
+| ------------------------------------ | -------------------------------------- | -------------------------------------- |
+| `property.json`                      | Default test config                    | Valid API key, default model/settings  |
+| `property_basic_audio_setting1.json` | Sample rate test 1                     | `sample_rate: 16000` + valid key       |
+| `property_basic_audio_setting2.json` | Sample rate test 2                     | `sample_rate: 24000` + valid key       |
+| `property_dump.json`                 | Audio dump test                        | `dump: true, dump_path: "./tests/dump_output/"` |
+| `property_miss_required.json`        | Missing params error test              | Empty API key                          |
+| `property_invalid.json`              | Invalid params error test              | Empty or invalid API key               |
+
+**Example `property.json`** (for elevenlabs):
+```json
+{
+  "params": {
+    "key": "${env:ELEVENLABS_TTS_KEY}",
+    "model_id": "eleven_turbo_v2_5"
+  }
+}
+```
+
+**Example `property_basic_audio_setting1.json`**:
+```json
+{
+  "dump": true,
+  "dump_path": "./tests/keep_dump_output/",
+  "params": {
+    "sample_rate": 16000,
+    "key": "${env:ELEVENLABS_TTS_KEY}"
+  }
+}
+```
+
+**Example `property_basic_audio_setting2.json`**:
+```json
+{
+  "dump": true,
+  "dump_path": "./tests/keep_dump_output/",
+  "params": {
+    "sample_rate": 24000,
+    "key": "${env:ELEVENLABS_TTS_KEY}"
+  }
+}
+```
+
+**Example `property_miss_required.json`**:
+```json
+{
+  "params": {"key": ""}
+}
+```
+
+### For ASR Extensions
+
+| Config File              | Purpose                    | Content                              |
+| ------------------------ | -------------------------- | ------------------------------------ |
+| `property_en.json`       | English transcription test | Valid key + `language: "en-US"`      |
+| `property_zh.json`       | Chinese transcription test | Valid key + `language: "zh-CN"`      |
+| `property_invalid.json`  | Error handling test        | `key: "invalid", region: "invalid"` |
+| `property_dump.json`     | Audio dump test            | Valid key + `dump: true`             |
+
+---
+
+## Step 7: TTS Guarder Tests Your Extension Must Pass
+
+Run with: `task tts-guarder-test EXTENSION=my_vendor_tts_python`
+
+There are **15 tests**. Here's what each validates:
+
+### Must-Pass Tests
+
+| Test                                    | What It Validates                                        |
+| --------------------------------------- | -------------------------------------------------------- |
+| `test_append_input`                     | Multiple text inputs appended with same request_id       |
+| `test_append_input_stress`              | High volume of append operations                         |
+| `test_append_input_without_text_input_end` | Missing text_input_end flags handled gracefully       |
+| `test_append_interrupt`                 | New requests interrupting in-progress ones               |
+| `test_basic_audio_setting`              | Different sample rates produce different audio           |
+| `test_corner_input`                     | Special chars, emojis, very short/long text              |
+| `test_dump`                             | Audio dump files created with valid PCM data             |
+| `test_dump_each_request_id`             | Each request_id produces separate dump file              |
+| `test_empty_text_request`               | Empty/whitespace text: audio_end within 500ms, no crash  |
+| `test_flush`                            | Flush signal: receives flush_end, no data after 5s       |
+| `test_interleaved_requests`             | 8 concurrent requests maintain separate audio streams    |
+| `test_invalid_required_params`          | Invalid API key returns FATAL ERROR, no crash            |
+| `test_invalid_text_handling`            | Malformed text handled without crash                     |
+| `test_metrics`                          | TTFB metrics generated with valid timestamps             |
+| `test_miss_required_params`             | Missing API key returns appropriate error                |
+
+### Critical Pass Criteria
+
+- **Event ordering**: `tts_audio_start` -> `pcm_frame`(s) -> `tts_audio_end` per request
+- **Request isolation**: Interleaved requests must not mix audio streams
+- **Error handling**: Invalid/missing configs must produce errors, never crashes
+- **Empty text**: Must complete quickly (audio_end within 500ms), no audio generated
+- **Flush**: After flush_end, no more data for 5 seconds
+- **Dump files**: Valid PCM data, one file per request_id when enabled
+
+## Step 8: ASR Guarder Tests Your Extension Must Pass
+
+Run with: `task asr-guarder-test EXTENSION=my_vendor_asr_python`
+
+There are **10 tests** (1 skipped by default):
+
+| Test                        | What It Validates                                            |
+| --------------------------- | ------------------------------------------------------------ |
+| `test_connection_timing`    | Connects and transcribes English audio correctly             |
+| `test_asr_result`           | Result structure: id, text, language, session_id fields      |
+| `test_asr_finalize`         | Finalize signal produces final=True result + finalize_end    |
+| `test_reconnection`         | Recovers gracefully after connection failure                 |
+| `test_vendor_error`         | Invalid creds produce proper error with vendor info          |
+| `test_multi_language`       | English (en-US) and Chinese (zh-CN) both transcribe correctly|
+| `test_dump`                 | Audio dump files created correctly                           |
+| `test_metrics`              | TTFW and TTLW metrics: positive, TTLW > TTFW                |
+| `test_audio_timestamp`      | start_ms and duration_ms accuracy                            |
+| `test_long_duration_stream` | **Skipped by default** — 5+ min stream without timeout       |
+
+### Critical Pass Criteria
+
+- **Result fields**: Every ASR result must have `id`, `text`, `language`, `session_id`
+- **Finalize**: Must produce `final=True` result and `asr_finalize_end` response
+- **Error format**: Errors must have `id`, `module`, `code`, `message` + vendor info
+- **Metrics**: TTFW > 0, TTLW > TTFW, both in milliseconds
+- **Audio format**: Accepts 16-bit PCM, 16kHz, mono, 320 bytes per frame
+
+---
+
+## AudioFrame Creation Pattern
+
+```python
+from ten_runtime import AudioFrame, AudioFrameDataFmt
+
+frame = AudioFrame.create("pcm_frame")
+frame.set_sample_rate(16000)
+frame.set_bytes_per_sample(2)        # 16-bit
+frame.set_number_of_channels(1)      # Mono
+frame.set_data_fmt(AudioFrameDataFmt.INTERLEAVE)
+frame.set_samples_per_channel(len(pcm_data) // 2)
+frame.alloc_buf(len(pcm_data))
+buf = frame.lock_buf()
+buf[:] = pcm_data
+frame.unlock_buf(buf)
+await ten_env.send_audio_frame(frame)
+```
+
+Set all properties **before** `alloc_buf()`.
+
+## Params Dict Pattern
+
+For HTTP/WebSocket vendor APIs:
+
+1. Store all config including `api_key` in `params` dict
+2. Extract `api_key` for auth headers in client constructor
+3. Strip `api_key` from params **only when building the HTTP request payload**
+4. In `update_params()`: add vendor-required params, normalize keys
+
+```python
+# Client constructor
+self.api_key = config.params.get("api_key", "")
+self.headers = {"Authorization": f"Bearer {self.api_key}"}
+
+# Request method
+payload = {**self.config.params}
+payload.pop("api_key", None)
+```
+
+## Bidirectional Extension Pattern
+
+For extensions that both receive from and send to the graph:
+
+```python
+class MyBridge(AsyncExtension):
+    async def on_init(self, ten_env):
+        self.ten_env = ten_env  # Store for callbacks
+
+    async def on_audio_frame(self, ten_env, audio_frame):
+        buf = audio_frame.lock_buf()
+        self.external_system.send(bytes(buf))
+        audio_frame.unlock_buf(buf)
+
+    async def _external_callback(self, data):
+        frame = AudioFrame.create("pcm_frame")
+        # ... fill frame ...
+        await self.ten_env.send_audio_frame(frame)
+```
+
+## Pre-Submission Checklist
+
+- [ ] `addon.py` decorator name matches `manifest.json` `name` field
+- [ ] All abstract methods implemented (vendor, request_tts/send_audio, etc.)
+- [ ] Config validation raises ValueError for missing required params
+- [ ] `to_str()` encrypts sensitive fields before logging
+- [ ] `tests/configs/` has all required config files (see Step 6)
+- [ ] `task tts-guarder-test` or `task asr-guarder-test` passes
+- [ ] `task format` passes (Black, line-length 80)
+- [ ] `task lint-extension EXTENSION=my_vendor_tts_python` passes
+- [ ] `requirements.txt` lists all Python dependencies
+- [ ] `README.md` documents config properties and env vars
+- [ ] No hardcoded API keys anywhere
+
+## Language-Specific Notes
+
+| Language   | Create Command                                                       |
+| ---------- | -------------------------------------------------------------------- |
+| Python     | `tman create extension name --template default_async_extension_python` |
+| Go         | `tman create extension name --template default_extension_go`          |
+| C++        | `tman create extension name --template default_extension_cpp`         |
+| Node.js    | `tman create extension name --template default_extension_nodejs`      |
+
+## Portal References (Full Guides)
+
+- [Create a TTS Extension (89K)](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_tts_extension.mdx) [EXTERNAL]
+- [Create an ASR Extension (39K)](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_asr_extension.mdx) [EXTERNAL]
+- [Create a Hello World Extension](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_hello_world_extension.mdx) [EXTERNAL]
+
+## See Also
+
+- [Back to Conventions](../04_conventions.md)
+- [Back to Workflows](../05_workflows.md)
+- [Testing](testing.md) — Full guarder test details and debugging
diff --git a/docs/ai/L1/deep_dives/graph_configuration.md b/docs/ai/L1/deep_dives/graph_configuration.md
new file mode 100644
index 0000000000..bfabb78ac8
--- /dev/null
+++ b/docs/ai/L1/deep_dives/graph_configuration.md
@@ -0,0 +1,410 @@
+# Graph Configuration
+
+> **When to Read This:** Load this document when you are modifying graph definitions
+> in property.json, adding extensions to agent pipelines, or debugging data flow issues.
+
+## Overview
+
+Graphs define which extensions run and how they communicate. They are declared in
+`property.json` under the `predefined_graphs` array within the `ten` namespace.
+
+## Property.json Structure
+
+```json
+{
+  "ten": {
+    "log": {
+      "handlers": [...]
+    },
+    "predefined_graphs": [
+      {
+        "name": "voice_assistant",
+        "auto_start": true,
+        "graph": {
+          "nodes": [...],
+          "connections": [...]
+        }
+      }
+    ]
+  }
+}
+```
+
+- `name` — graph identifier, used in `/start` request's `graph_name` field
+- `auto_start` — set to `true` by the server for the selected graph at runtime
+- `graph.nodes` — extension instances
+- `graph.connections` — data flow wiring
+
+## Node Schema
+
+```json
+{
+  "type": "extension",
+  "name": "stt",
+  "addon": "deepgram_asr_python",
+  "extension_group": "transcription_group",
+  "property": {
+    "api_key": "${env:DEEPGRAM_API_KEY}",
+    "model": "nova-2",
+    "language": "en-US",
+    "sample_rate": 16000
+  }
+}
+```
+
+| Field             | Required | Purpose                                           |
+| ----------------- | -------- | ------------------------------------------------- |
+| `type`            | Yes      | Always `"extension"`                              |
+| `name`            | Yes      | Instance name (used in connections)               |
+| `addon`           | Yes      | Extension package name (must match manifest.json) |
+| `extension_group` | No       | Thread grouping for extensions                    |
+| `property`        | No       | Config overrides merged with extension defaults   |
+
+## Connection Schema
+
+Connections define how messages flow between extensions:
+
+```json
+{
+  "extension": "main",
+  "cmd": [
+    {"name": "flush", "dest": [{"extension": "llm"}, {"extension": "tts"}]},
+    {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
+  ],
+  "data": [
+    {"name": "text_data", "source": [{"extension": "llm"}]},
+    {"name": "text_data", "dest": [{"extension": "tts"}]}
+  ]
+}
+```
+
+Each connection block is **from the perspective of the named extension**:
+- `source` — "this extension receives X from these sources"
+- `dest` — "this extension sends X to these destinations"
+
+## Full Graph Example
+
+A basic voice assistant pipeline (ASR → LLM → TTS):
+
+```json
+{
+  "name": "voice_assistant",
+  "auto_start": false,
+  "graph": {
+    "nodes": [
+      {
+        "type": "extension", "name": "agora_rtc", "addon": "agora_rtc",
+        "extension_group": "rtc_group",
+        "property": {"app_id": "${env:AGORA_APP_ID}", "channel": "default"}
+      },
+      {
+        "type": "extension", "name": "stt", "addon": "deepgram_asr_python",
+        "extension_group": "stt_group",
+        "property": {"api_key": "${env:DEEPGRAM_API_KEY}", "model": "nova-2"}
+      },
+      {
+        "type": "extension", "name": "llm", "addon": "openai_llm2_python",
+        "extension_group": "llm_group",
+        "property": {"api_key": "${env:OPENAI_API_KEY}", "model": "${env:OPENAI_MODEL}"}
+      },
+      {
+        "type": "extension", "name": "tts", "addon": "elevenlabs_tts2_python",
+        "extension_group": "tts_group",
+        "property": {"api_key": "${env:ELEVENLABS_TTS_KEY}"}
+      }
+    ],
+    "connections": [
+      {
+        "extension": "agora_rtc",
+        "audio_frame": [
+          {"name": "pcm_frame", "dest": [{"extension": "stt"}]}
+        ]
+      },
+      {
+        "extension": "stt",
+        "data": [
+          {"name": "asr_result", "dest": [{"extension": "main"}]}
+        ]
+      },
+      {
+        "extension": "main",
+        "cmd": [
+          {"name": "flush", "dest": [{"extension": "llm"}, {"extension": "tts"}]},
+          {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
+        ],
+        "data": [
+          {"name": "text_data", "source": [{"extension": "llm"}]},
+          {"name": "text_data", "dest": [{"extension": "tts"}]}
+        ]
+      },
+      {
+        "extension": "tts",
+        "data": [
+          {"name": "tts_text_input", "source": [{"extension": "main"}]}
+        ],
+        "audio_frame": [
+          {"name": "pcm_frame", "dest": [{"extension": "agora_rtc"}]}
+        ]
+      }
+    ]
+  }
+}
+```
+
+## Connection Types Reference
+
+| Type          | Direction | Payload            | Example Names                       |
+| ------------- | --------- | ------------------ | ----------------------------------- |
+| `cmd`         | Both      | Named commands     | `flush`, `tool_register`, `on_user_joined`, `chat_completion_call`, `update_configs` |
+| `data`        | Both      | Named data msgs    | `asr_result`, `text_data`, `tts_text_input`, `tts_audio_start`, `tts_audio_end`, `error` |
+| `audio_frame` | Both      | PCM audio streams  | `pcm_frame`                         |
+| `video_frame` | Both      | Video streams      | `video_frame`                       |
+
+## Parallel Audio Routing
+
+When sending audio to multiple destinations, split at the **source node**:
+
+```json
+// CORRECT — split at agora_rtc (source)
+{
+  "extension": "agora_rtc",
+  "audio_frame": [
+    {"name": "pcm_frame", "dest": [
+      {"extension": "stt"},
+      {"extension": "vad"}
+    ]}
+  ]
+}
+```
+
+Do NOT split from intermediate nodes — this causes runtime crashes.
+
+## Property Injection
+
+When the server processes a `/start` request, it dynamically modifies the graph:
+
+1. **Graph selection**: Filters `predefined_graphs` to match `graph_name`, sets `auto_start: true`
+2. **Channel injection**: Scans all nodes — any node with a `"channel"` property gets `channel_name` injected
+3. **Start params**: Injects `remote_stream_id`, `bot_stream_id`, `token` via `startPropMap`
+4. **Extension overrides**: Merges `req.Properties[extensionName]` into matching node properties
+5. **Env var validation**: Resolves all `${env:VAR}` references
+
+This is why `agora_rtc` and any custom extension with a `"channel"` property automatically
+receive the dynamic channel name without code changes.
+
+## Adding a New Graph
+
+1. Add a new entry to `predefined_graphs[]` in the example's `tenapp/property.json`
+2. Ensure all referenced extensions are listed in `tenapp/manifest.json`
+3. Run `tman install` to create symlinks for new dependencies
+4. **Nuclear restart** required (frontend caches the graph list)
+
+## Generating property.json with rebuild_property.py
+
+For complex deployments with many graph variants, hand-editing property.json is
+error-prone. The `voice-assistant-advanced` example uses a Python script to generate
+it programmatically:
+
+**Location**: `agents/examples/voice-assistant-advanced/tenapp/rebuild_property.py`
+
+**Usage**:
+```bash
+docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/voice-assistant-advanced/tenapp && python3 rebuild_property.py"
+```
+
+### How It Works
+
+The script defines reusable **node configs** as Python dicts, then assembles them
+into graphs with helper functions:
+
+```python
+# 1. Define reusable node configs
+nova3_stt_100ms = {
+    "type": "extension", "name": "stt", "addon": "deepgram_ws_asr_python",
+    "extension_group": "stt",
+    "property": {
+        "params": {
+            "api_key": "${env:DEEPGRAM_API_KEY}",
+            "model": "nova-3", "language": "en-US",
+            "interim_results": True, "endpointing": 100,
+        }
+    },
+}
+
+cartesia_tts_sonic3 = {
+    "type": "extension", "name": "tts", "addon": "cartesia_tts",
+    "extension_group": "tts",
+    "property": {
+        "dump": False, "dump_path": "./",
+        "params": {
+            "api_key": "${env:CARTESIA_TTS_KEY}",
+            "model_id": "sonic-3",
+            "output_format": {"container": "raw", "sample_rate": 44100},
+        },
+    },
+}
+
+gpt51_llm = {
+    "type": "extension", "name": "llm", "addon": "openai_llm2_python",
+    "extension_group": "chatgpt",
+    "property": {
+        "base_url": "https://api.openai.com/v1",
+        "api_key": "${env:OPENAI_API_KEY}",
+        "model": "gpt-5.1", "max_tokens": 1000,
+        "prompt": "...", "greeting": "...",
+    },
+}
+
+# 2. Define reusable connection templates
+basic_connections = [
+    {"extension": "main_control", "cmd": [...], "data": [...]},
+    {"extension": "agora_rtc", "audio_frame": [...], "data": [...]},
+    {"extension": "streamid_adapter", "audio_frame": [...]},
+    {"extension": "tts", "data": [...], "audio_frame": [...]},
+    # ...
+]
+
+# 3. Assemble graphs with helper functions
+def create_basic_voice_assistant(name, has_avatar=False, avatar_type=None,
+                                  tts_config=None, stt_config=None, llm_config=None):
+    nodes = [agora_rtc_base, stt_config or nova3_stt_100ms, llm_config or ..., ...]
+    connections = copy.deepcopy(basic_connections)
+    if has_avatar:
+        # Modify connections: route TTS audio through avatar instead of direct to RTC
+        ...
+    return {"name": name, "auto_start": False, "graph": {"nodes": nodes, "connections": connections}}
+
+# 4. Build graph list and write property.json
+new_graphs = [
+    create_basic_voice_assistant("voice_assistant"),
+    create_basic_voice_assistant("voice_assistant_heygen", has_avatar=True, avatar_type="heygen"),
+    create_apollo_graph("flux_apollo_gpt_5_1_cartesia", gpt51_llm, flux_stt),
+    # ...
+]
+
+new_data = {"ten": {"log": log_config, "predefined_graphs": new_graphs}}
+with open("property.json", "w") as f:
+    json.dump(new_data, f, indent=2)
+```
+
+### Key Patterns in rebuild_property.py
+
+| Pattern                      | Purpose                                              |
+| ---------------------------- | ---------------------------------------------------- |
+| `copy.deepcopy(config)`      | Prevent mutation when reusing node configs           |
+| Parametric helper functions  | `create_basic_voice_assistant(name, tts_config=...)` |
+| Connection rewiring for avatars | Route TTS audio through avatar instead of direct to RTC |
+| Preserve existing log config | `log_config = data["ten"]["log"]` before overwriting |
+| Commented-out graph groups   | Keep old graph definitions for reference/reactivation|
+
+### When to Use rebuild_property.py
+
+- **Multiple graph variants** (A/B testing vendors: Deepgram vs Cartesia TTS)
+- **Avatar variants** (same pipeline with/without HeyGen/Anam)
+- **LLM model testing** (GPT-4o vs GPT-5.1 vs Groq)
+- **Complex connection rewiring** (avatar graphs need different audio routing)
+
+For simple single-graph setups, editing property.json directly is fine.
+
+## Manifest.json Dependencies
+
+When adding an extension to a graph, ensure its dependency is in `manifest.json`:
+
+```json
+{
+  "dependencies": [
+    {"type": "extension", "name": "my_vendor_tts_python", "version": "0.1.0"}
+  ]
+}
+```
+
+Then run:
+```bash
+docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example>/tenapp && tman install"
+```
+
+## Main Extension Customization
+
+The "main" extension controls agent orchestration. Three variants exist:
+
+| Variant              | Language   | Pattern                      | Use Case                   |
+| -------------------- | ---------- | ---------------------------- | -------------------------- |
+| Python Cascade       | Python     | ASR → LLM → TTS pipeline    | Standard voice assistant   |
+| Python Realtime V2V  | Python     | OpenAI Realtime API          | Voice-to-voice (no ASR/TTS)|
+| Node.js Cascade      | TypeScript | ASR → LLM → TTS pipeline    | TypeScript preference      |
+
+Key customization points:
+- `on_data()` — event routing (match/case dispatcher)
+- `on_cmd()` — tool registration and handling
+- Greeting logic in `on_start()` or `on_user_joined` handler
+
+## Example Apps
+
+Available in `agents/examples/`. Key examples:
+
+| Example                           | Description                                          |
+| --------------------------------- | ---------------------------------------------------- |
+| `voice-assistant`                 | Basic: Deepgram ASR + OpenAI LLM + ElevenLabs TTS   |
+| `voice-assistant-advanced`        | Multiple graph variants, vendor A/B testing          |
+| `voice-assistant-realtime`        | OpenAI Realtime API (voice-to-voice, no ASR/TTS)    |
+| `voice-assistant-video`           | Vision capability added                              |
+| `voice-assistant-live2d`          | Live2D avatar integration                            |
+| `voice-assistant-sip-twilio`      | SIP phone integration (Twilio)                       |
+| `voice-assistant-sip-telnyx`      | SIP phone integration (Telnyx)                       |
+| `voice-assistant-sip-plivo`       | SIP phone integration (Plivo)                        |
+| `voice-assistant-with-ten-vad`    | Custom VAD (Voice Activity Detection)                |
+| `voice-assistant-with-turn-detection` | Transformer-based turn detection              |
+| `voice-assistant-nodejs`          | Node.js implementation                               |
+| `doodler`                         | Spoken prompts → hand-drawn sketches                 |
+| `speaker-diarization`             | Real-time multi-speaker identification               |
+| `transcription`                   | Audio transcription tool                             |
+| `websocket-example`               | WebSocket transport (no Agora RTC)                   |
+| `http-control`                    | HTTP-based control interface                         |
+
+### voice-assistant vs voice-assistant-advanced
+
+| Aspect                | voice-assistant             | voice-assistant-advanced          |
+| --------------------- | --------------------------- | --------------------------------- |
+| Graphs                | 1 (`voice_assistant`)       | 4+ variants (Flux/Apollo/Cartesia)|
+| Vendor switching      | Fixed components            | Multiple vendor combinations      |
+| LLM prompts           | Simple greeting             | Multi-step research workflows     |
+| Use case              | Getting started             | Production A/B testing            |
+
+Both follow the same core pipeline:
+```
+Agora RTC → streamid_adapter → ASR → main_control → LLM → TTS → Agora RTC
+```
+
+### Real Graph: voice-assistant/tenapp/property.json
+
+This is a complete, working graph. Key nodes:
+
+| Node               | Addon                    | Role                               |
+| ------------------ | ------------------------ | ---------------------------------- |
+| `agora_rtc`        | `agora_rtc`              | Audio/video transport              |
+| `streamid_adapter` | `streamid_adapter`       | Stream ID routing                  |
+| `stt`              | `deepgram_asr_python`    | Speech-to-text                     |
+| `llm`              | `openai_llm2_python`     | Language model                     |
+| `tts`              | `elevenlabs_tts2_python` | Text-to-speech                     |
+| `main_control`     | `main_python`            | Orchestration (greetings, routing) |
+| `message_collector` | `message_collector2`    | Transcript collection              |
+
+Connection wiring:
+```
+agora_rtc --pcm_frame--> streamid_adapter --pcm_frame--> stt
+stt --asr_result--> main_control
+main_control --text_data--> llm --text_data--> main_control --tts_text_input--> tts
+tts --pcm_frame--> agora_rtc
+```
+
+## Portal References
+
+- [Understanding property.json](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/project_structure/property_json.md) [EXTERNAL]
+- [Customize Agent via Code](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/customize_agent/modify-main/index.mdx) [EXTERNAL]
+
+## See Also
+
+- [Back to Architecture](../02_architecture.md)
+- [Back to Workflows](../05_workflows.md)
+- [Back to Interfaces](../06_interfaces.md)
diff --git a/docs/ai/L1/deep_dives/server_architecture.md b/docs/ai/L1/deep_dives/server_architecture.md
new file mode 100644
index 0000000000..18bebf0a15
--- /dev/null
+++ b/docs/ai/L1/deep_dives/server_architecture.md
@@ -0,0 +1,211 @@
+# Server Architecture
+
+> **When to Read This:** Load this document when you need to understand how the Go API
+> server works, how property injection transforms graph configurations at runtime, or
+> how worker processes are managed.
+
+## Overview
+
+The TEN Agent server is a Go HTTP server built with the Gin framework. It manages
+agent session lifecycles — starting worker processes, injecting configuration, and
+handling session keepalive/teardown.
+
+## Server Structure
+
+```
+server/
+├── main.go                 # Entry point, parses flags, starts HTTP server
+└── internal/
+    ├── http_server.go      # All endpoint handlers + property injection
+    └── config.go           # startPropMap configuration for parameter injection
+```
+
+Key launch flag: `-tenapp_dir=<path>` — points to the example's `tenapp/` directory
+containing `property.json` and `manifest.json`.
+
+## Endpoint Handlers
+
+| Handler                          | Route              | Purpose                             |
+| -------------------------------- | ------------------ | ----------------------------------- |
+| `handlerHealth()`                | `GET /health`      | Returns `{"code":"0"}` if running   |
+| `handleGraphs()`                 | `GET /graphs`      | Reads predefined_graphs from property.json |
+| `handlerStart()`                 | `POST /start`      | Spawns worker process for a session |
+| `handlerStop()`                  | `POST /stop`       | Terminates worker process           |
+| `handlerPing()`                  | `POST /ping`       | Resets session timeout timer        |
+| `handlerList()`                  | `GET /list`        | Lists active workers/channels       |
+| `handlerGenerateToken()`         | `POST /token/generate` | Generates Agora RTC tokens     |
+| `handleAddonDefaultProperties()` | `GET /addon/default-properties` | Extension property.json files |
+| `handlerVectorDocumentUpdate()`  | `POST /vector/document/update` | Vector DB updates          |
+| `handlerVectorDocumentUpload()`  | `POST /vector/document/upload` | File uploads for vector DB |
+
+## Property Injection Pipeline
+
+When `/start` is called, the server transforms the static `property.json` into a
+session-specific configuration. This is the core of the `processProperty` function:
+
+### Step 1: Read Base Configuration
+
+```go
+// Read property.json from the configured tenapp_dir
+propertyJsonFile := filepath.Join(s.config.TenappDir, "property.json")
+content, _ := os.ReadFile(propertyJsonFile)
+```
+
+### Step 2: Filter Graphs
+
+Only the requested graph is kept; its `auto_start` is set to `true`:
+
+```go
+// Find matching graph by name
+for _, graph := range predefinedGraphs {
+    if graph.Name == req.GraphName {
+        graph.AutoStart = true
+        filteredGraphs = append(filteredGraphs, graph)
+    }
+}
+```
+
+### Step 3: Merge Dynamic Properties
+
+Per-extension property overrides from the request are merged:
+
+```go
+// req.Properties = {"openai_llm2_python": {"model": "gpt-4o-mini"}}
+for _, node := range graph.Nodes {
+    if props, ok := req.Properties[node.Name]; ok {
+        mergeProperties(node.Property, props)
+    }
+}
+```
+
+### Step 4: Inject Start Parameters
+
+The `startPropMap` (defined in `config.go`) maps request fields to node properties:
+
+```go
+var startPropMap = map[string]string{
+    "RemoteStreamId":      "remote_stream_id",
+    "BotStreamId":         "agora_uid",
+    "Token":               "token",
+    "WorkerHttpServerPort": "server_port",
+}
+```
+
+These values are injected into every node that has the corresponding property defined.
+
+### Step 5: Channel Auto-Injection
+
+Any node with a `"channel"` property automatically receives the request's `channel_name`:
+
+```go
+// Scan all nodes — if node has "channel" property, inject channel_name
+for _, node := range graph.Nodes {
+    if _, hasChannel := node.Property["channel"]; hasChannel {
+        node.Property["channel"] = req.ChannelName
+    }
+}
+```
+
+This is future-proof: adding a new extension with a `"channel"` property requires
+zero server code changes.
+
+### Step 6: Environment Variable Resolution
+
+All `${env:VAR}` and `${env:VAR|default}` references in the property JSON are
+resolved against the container's environment.
+
+### Step 7: Write Temp File and Spawn Worker
+
+The modified property JSON is written to a temporary file, and a worker process
+is spawned:
+
+```go
+// Write modified config
+tmpFile := filepath.Join(tmpDir, "property.json")
+os.WriteFile(tmpFile, modifiedJSON, 0644)
+
+// Spawn worker
+cmd := exec.Command("tman", "run", "start", "--property", tmpFile)
+```
+
+## Worker Process Lifecycle
+
+```
+/start request
+    │
+    ▼
+Server: processProperty() → temp property.json
+    │
+    ▼
+Server: exec("tman run start --property <tmp>")
+    │
+    ▼
+Worker process starts → loads graph → initializes extensions
+    │
+    ├── Extensions call on_init() → on_start()
+    ├── Extensions process messages (cmd, data, audio_frame, video_frame)
+    │
+    ├── /ping requests reset the timeout timer
+    │
+    ▼
+/stop request OR timeout
+    │
+    ▼
+Worker: extensions call on_stop() → on_deinit()
+    │
+    ▼
+Worker process terminates
+```
+
+**Important**: Worker processes run on the **host machine**, not inside Docker.
+They can outlive the server process and even container restarts. Always check for
+zombie workers with `ps -elf | grep 'bin/main'`.
+
+## Session Management
+
+| Action         | Server Behavior                                    |
+| -------------- | -------------------------------------------------- |
+| `/start`       | Spawns worker, stores in active workers map        |
+| `/stop`        | Sends SIGTERM to worker, removes from map          |
+| `/ping`        | Resets timeout timer for the channel               |
+| Timeout        | Auto-sends SIGTERM after `timeout` seconds idle    |
+| `/list`        | Returns all active channel → worker mappings       |
+
+Timeout of `-1` means the session never auto-stops (requires explicit `/stop`).
+
+## LOG_STDOUT for Worker Output
+
+Worker processes write to stdout. To see their output in `/tmp/task_run.log`,
+the `.env` must have:
+
+```bash
+LOG_STDOUT=true
+```
+
+Without this, extension logs (Python `print()`, `ten_env.log_*()`) are invisible.
+
+## Security Measures
+
+- **Path traversal prevention**: The server ignores any client-provided `tenapp_dir`
+  and always uses the launch-configured path
+- **Channel name sanitization**: Channel names are validated before use in file paths
+- **Safe property merge**: `mergeProperties()` handles nested configs safely with
+  type checking
+
+## Configuration (config.go)
+
+The `startPropMap` in `config.go` controls which request fields map to which
+node properties:
+
+| Request Field          | Node Property        | Purpose                        |
+| ---------------------- | -------------------- | ------------------------------ |
+| `RemoteStreamId`       | `remote_stream_id`   | Remote user's stream ID        |
+| `BotStreamId`          | `agora_uid`          | Bot's Agora UID                |
+| `Token`                | `token`              | Agora RTC token                |
+| `WorkerHttpServerPort` | `server_port`        | Worker's HTTP server port      |
+
+## See Also
+
+- [Back to Architecture](../02_architecture.md)
+- [Graph Configuration](graph_configuration.md) — Property.json structure and connections
+- [Back to Interfaces](../06_interfaces.md)
diff --git a/docs/ai/L1/deep_dives/testing.md b/docs/ai/L1/deep_dives/testing.md
new file mode 100644
index 0000000000..1ad2d72ecd
--- /dev/null
+++ b/docs/ai/L1/deep_dives/testing.md
@@ -0,0 +1,295 @@
+# Testing
+
+> **When to Read This:** Load this document when you need to run tests for an extension,
+> understand what the guarder tests validate, or debug test failures.
+
+## Overview
+
+Three levels of testing:
+1. **Extension standalone tests** — per-extension unit/integration tests in `tests/`
+2. **Guarder integration tests** — framework-level ASR/TTS validation suites
+3. **Root-level tasks** — orchestrated via `Taskfile.yml`
+
+## Running Tests
+
+```bash
+# All tests
+docker exec ten_agent_dev bash -c "cd /app && task test"
+
+# Single extension with dependency install
+docker exec ten_agent_dev bash -c \
+  "cd /app && task test-extension EXTENSION=agents/ten_packages/extension/deepgram_tts"
+
+# Single extension, skip install (faster iteration)
+docker exec ten_agent_dev bash -c \
+  "cd /app && task test-extension-no-install EXTENSION=agents/ten_packages/extension/deepgram_tts"
+
+# TTS guarder (all 15 tests)
+docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts"
+
+# ASR guarder (all 10 tests)
+docker exec ten_agent_dev bash -c "cd /app && task asr-guarder-test EXTENSION=azure_asr_python"
+
+# Specific test only
+docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts -- -k test_flush"
+```
+
+## Extension Standalone Tests
+
+Each extension can have `tests/` with a `bin/start` entry point:
+
+```
+my_extension/tests/
+├── bin/start            # Sets PYTHONPATH, runs pytest
+├── configs/             # Test config JSON files
+│   ├── property.json
+│   ├── property_basic_audio_setting1.json
+│   ├── property_basic_audio_setting2.json
+│   ├── property_dump.json
+│   ├── property_miss_required.json
+│   └── property_invalid.json
+├── conftest.py          # Fixtures
+└── test_*.py            # Test files
+```
+
+### PYTHONPATH
+
+Tests need this to import TEN runtime:
+
+```bash
+export PYTHONPATH=".:ten_packages/system/ten_runtime_python/lib:\
+ten_packages/system/ten_runtime_python/interface:\
+ten_packages/system/ten_ai_base/interface:\
+ten_packages/extension/${EXT_NAME}:$PYTHONPATH"
+```
+
+---
+
+## TTS Guarder Tests (15 Tests)
+
+**Location**: `agents/integration_tests/tts_guarder/`
+
+These tests run against any TTS extension. The manifest template (`manifest-tmpl.json`)
+substitutes `{{extension_name}}` with your extension name at runtime.
+
+### Test Inventory
+
+| # | Test | What It Validates | Pass Criteria |
+|---|------|-------------------|---------------|
+| 1 | `test_append_input` | Multiple texts appended with same request_id | audio_start -> frames -> audio_end per group, correct request_id |
+| 2 | `test_append_input_stress` | High volume append operations | All appends processed without errors |
+| 3 | `test_append_input_without_text_input_end` | Missing text_input_end flag | Processes correctly despite missing flags |
+| 4 | `test_append_interrupt` | New requests interrupting in-progress ones | Interrupts handled without crash or malformed audio |
+| 5 | `test_basic_audio_setting` | Different sample rates produce different audio | Two configs with different sample_rate yield different output rates |
+| 6 | `test_corner_input` | Special chars, emojis, punctuation-only, very short/long | All processed without errors |
+| 7 | `test_dump` | Audio dump file creation | Dump file exists, contains valid PCM, size matches duration |
+| 8 | `test_dump_each_request_id` | Separate dump files per request_id | Each request_id has own dump file |
+| 9 | `test_empty_text_request` | Empty/whitespace text | audio_end within 500ms, no audio data, no crash |
+| 10 | `test_flush` | Flush signal handling | Receives flush_end with matching flush_id, no data for 5s after |
+| 11 | `test_interleaved_requests` | 8 concurrent requests with different request_ids | Each maintains separate audio stream, correct ordering per request |
+| 12 | `test_invalid_required_params` | Invalid API key | Returns FATAL ERROR with message, no crash |
+| 13 | `test_invalid_text_handling` | Malformed text, null chars, very long strings | Handled gracefully without crash |
+| 14 | `test_metrics` | TTFB metric generation | Metrics data present with valid timestamps |
+| 15 | `test_miss_required_params` | Missing API key | Appropriate error returned |
+
+### Critical TTS Invariants
+
+1. **Event ordering must be**: `tts_audio_start` -> `pcm_frame`(s) -> `tts_audio_end` per request
+2. **Request isolation**: Interleaved requests must never mix audio streams
+3. **Error handling**: Invalid/missing configs produce errors, never crashes
+4. **Empty text**: Must complete fast (audio_end within 500ms), generate no audio
+5. **Flush**: After flush_end, zero data output for 5 seconds
+
+### Required TTS Config Files
+
+Your `tests/configs/` must provide:
+
+```
+property.json                      # Valid API key + default settings
+property_basic_audio_setting1.json # sample_rate: 16000 + valid key + dump:true
+property_basic_audio_setting2.json # sample_rate: 24000 + valid key + dump:true
+property_dump.json                 # dump:true + dump_path + valid key
+property_miss_required.json        # Empty/missing API key
+property_invalid.json              # Empty/invalid API key
+```
+
+**Template** (`property_basic_audio_setting1.json`):
+```json
+{
+  "dump": true,
+  "dump_path": "./tests/keep_dump_output/",
+  "params": {
+    "sample_rate": 16000,
+    "key": "${env:MY_VENDOR_API_KEY}"
+  }
+}
+```
+
+### Sample Rate Test Notes
+
+Some extensions don't support multiple sample rates. To skip the sample rate
+comparison (test still runs, just doesn't assert rates differ), the test runner
+checks `ENABLE_SAMPLE_RATE` env var. Extensions like `openai_tts_python` and
+`humeai_tts_python` set this to `False`.
+
+---
+
+## ASR Guarder Tests (10 Tests, 1 Skipped)
+
+**Location**: `agents/integration_tests/asr_guarder/`
+
+### Test Audio Format
+
+- 16-bit PCM, 16kHz sample rate, mono
+- Test files: `test_data/16k_en_us.pcm` (English), `test_data/16k_zh_cn.pcm` (Chinese)
+- Chunk size: 320 bytes per frame
+- Send interval: 10ms between frames
+
+### Test Inventory
+
+| # | Test | What It Validates | Pass Criteria |
+|---|------|-------------------|---------------|
+| 1 | `test_connection_timing` | Connect + transcribe English audio | Results received, language="en-US" |
+| 2 | `test_asr_result` | Result structure and data integrity | Fields: id, text, language, session_id all present |
+| 3 | `test_asr_finalize` | Finalize signal → final result + finalize_end | final=True in result, finalize_end received |
+| 4 | `test_reconnection` | Recovery after connection failure | Error detected, no crash, can reconnect |
+| 5 | `test_vendor_error` | Invalid creds → proper error format | Error has id, module, code, message + vendor info |
+| 6 | `test_multi_language` | English + Chinese transcription | en-US and zh-CN both detected correctly |
+| 7 | `test_dump` | Audio dump functionality | Dump files created with correct data |
+| 8 | `test_metrics` | TTFW and TTLW metrics | TTFW > 0, TTLW > TTFW, both in milliseconds |
+| 9 | `test_audio_timestamp` | start_ms and duration_ms accuracy | Timestamps accurate within tolerance |
+| 10 | `test_long_duration_stream` | **SKIPPED** — 5+ min stream | No timeout or connection drop |
+
+### Critical ASR Invariants
+
+1. **Result fields**: Every result must have `id`, `text`, `language`, `session_id`
+2. **Finalize flow**: `asr_finalize` cmd -> `final=True` result -> `asr_finalize_end` response
+3. **Error format**: `{id, module, code, message, vendor_info: {vendor, code, message}}`
+4. **Metrics**: TTFW (Time To First Word) > 0, TTLW (Time To Last Word) > TTFW
+
+### Required ASR Config Files
+
+```
+property_en.json       # Valid key + language: "en-US"
+property_zh.json       # Valid key + language: "zh-CN"
+property_invalid.json  # key: "invalid" (triggers vendor error test)
+property_dump.json     # Valid key + dump: true
+```
+
+**Template** (`property_en.json` for Deepgram):
+```json
+{
+  "params": {
+    "key": "${env:DEEPGRAM_API_KEY}",
+    "model": "nova-2",
+    "sample_rate": 16000,
+    "encoding": "linear16",
+    "language": "en-US"
+  }
+}
+```
+
+---
+
+## Guarder Test Framework Internals
+
+### Manifest Template System
+
+Both guarders use template manifests with `{{extension_name}}` placeholders:
+
+```json
+{
+  "type": "app",
+  "name": "tts_guarder",
+  "version": "0.1.0",
+  "dependencies": [
+    {"path": "../../ten_packages/extension/{{extension_name}}"}
+  ]
+}
+```
+
+The Taskfile substitutes this at runtime with `sed`.
+
+### conftest.py Pattern
+
+Both guarders use a session-scoped FakeApp:
+
+```python
+@pytest.fixture(scope="session", autouse=True)
+def global_setup_and_teardown():
+    event = threading.Event()
+    fake_app_ctx = FakeAppCtx(event)
+    fake_app_thread = threading.Thread(target=run_fake_app, args=(fake_app_ctx,))
+    fake_app_thread.start()
+    event.wait()
+    yield
+    fake_app_ctx.fake_app.close()
+    fake_app_thread.join()
+```
+
+Each test creates its own `ExtensionTester` within this shared app context.
+Tests share the session-scoped app but get fresh extension instances.
+
+### Pytest Options
+
+- `--extension_name` — extension to test (required)
+- `--config_dir` — path to configs directory (required)
+- `--enable_sample_rate` — "True"/"False" for sample rate comparison (TTS only)
+
+---
+
+## Common Test Failures and Fixes
+
+### "Timeout waiting for audio"
+- **Cause**: External API not responding within timeout
+- **Fix**: Check API key is valid, check network, increase timeout if needed
+- **Note**: Some flakiness is expected with external APIs — run individually to confirm
+
+### "Received error data" / FATAL ERROR
+- **Cause**: Extension detected invalid config and raised error (this is correct behavior for error tests)
+- **Fix**: If this happens on non-error tests, check your config files have valid API keys
+
+### "Found N dump files, expected M"
+- **Cause**: Some requests timed out and didn't produce dump files
+- **Fix**: Usually API timeout flakiness — rerun the test
+
+### "Received additional data after flush_end"
+- **Cause**: Extension sent audio data after it should have stopped
+- **Fix**: Ensure your cancel_tts/flush handling stops all pending output immediately
+
+### "Test failed: sample rates are the same"
+- **Cause**: Your extension ignores the sample_rate config
+- **Fix**: Implement sample_rate support, or set ENABLE_SAMPLE_RATE=False if your API doesn't support it
+
+### Import errors
+- **Cause**: PYTHONPATH doesn't include ten_runtime_python and ten_ai_base
+- **Fix**: Check `tests/bin/start` script sets PYTHONPATH correctly
+
+### "ModuleNotFoundError: No module named 'ten_packages.extension.xxx'"
+- **Cause**: Extension not installed in test environment
+- **Fix**: Run `tman install --standalone` in extension directory, or use `task test-extension` (does it automatically)
+
+---
+
+## CI/CD Pipeline
+
+### Manual Guarder Tests (GitHub Actions)
+
+ASR and TTS guarder tests can be triggered manually:
+
+- Workflow: `.github/workflows/manual_test_asr_guarder.yml`
+- Inputs: `extension` name, `config_dir`, `branch`, `env_vars` (semicolon-separated secret names)
+- API keys loaded from GitHub Secrets at runtime
+
+### Extension Publishing
+
+- Workflow: `.github/workflows/manual_publish_extension.yml`
+- Steps: `tman install --standalone` -> `tman run build` -> `tman publish`
+- Requires `TEN_CLOUD_STORE` secret for publishing
+
+---
+
+## See Also
+
+- [Extension Development](extension_development.md) — Config files and pre-submission checklist
+- [Back to Workflows](../05_workflows.md)

From a92239ccf4ac0890e283d551a4a8d4a38cb6962d Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 12:39:50 +0000
Subject: [PATCH 02/18] refactor: rewrite deepgram tts client with duplex
 websocket pattern
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rewrite DeepgramTTSClient with separate send and receive async tasks
on a single websocket, matching the cartesia_tts architecture. this
replaces the serial send-then-receive pattern that caused state leaks
between interleaved requests.

key changes:
- _send_loop(): reads from _text_queue, sends Speak+Flush to WS
- _receive_loop(): reads from WS, puts events into _output_queue
- _connection_loop(): auto-reconnect with exponential backoff
- cancel drops audio in receive loop, Flushed always signals END
- update docs/ai gotchas with deployment lessons learned

test results unchanged at 14/16 guarder passed:
- test_interleaved_requests: still fails — request 8/8 gets timeout
  because output queue has stale END from cancelled request. needs
  per-request-id event routing (next iteration).
- test_subtitle_alignment: feature gap (no word-level timing)
---
 .../extension/deepgram_tts/deepgram_tts.py    | 512 +++++++++++-------
 1 file changed, 323 insertions(+), 189 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 1564afcc2d..855f33148a 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -16,13 +16,18 @@
 from ten_runtime import AsyncTenEnv
 from ten_ai_base.const import LOG_CATEGORY_VENDOR
 
-# Custom event types to communicate status back to the extension
+# Event types for the output queue
 EVENT_TTS_RESPONSE = 1
 EVENT_TTS_END = 2
 EVENT_TTS_ERROR = 3
 EVENT_TTS_FLUSH = 4
 EVENT_TTS_TTFB_METRIC = 5
 
+MAX_RETRY_TIMES = 5
+
+# Sentinel to signal the send loop to stop
+_SEND_STOP = None
+
 
 class DeepgramTTSConnectionException(Exception):
     """Exception raised when Deepgram TTS connection fails"""
@@ -31,11 +36,18 @@ def __init__(self, status_code: int, body: str):
         self.status_code = status_code
         self.body = body
         super().__init__(
-            f"Deepgram TTS connection failed (code: {status_code}): {body}"
+            f"Deepgram TTS connection failed " f"(code: {status_code}): {body}"
         )
 
 
 class DeepgramTTSClient:
+    """Duplex WebSocket client for Deepgram TTS.
+
+    Uses separate send and receive tasks on a single WebSocket
+    connection. Text goes into _text_queue via send_text(),
+    audio/events come out of _output_queue via get().
+    """
+
     def __init__(
         self,
         config: DeepgramTTSConfig,
@@ -44,47 +56,247 @@ def __init__(
         send_non_fatal_tts_error: Callable[[str], asyncio.Future] | None = None,
     ):
         self.config = config
-        self.ten_env: AsyncTenEnv = ten_env
-        self._is_cancelled = False
-        self.ws: ClientConnection | None = None
+        self.ten_env = ten_env
         self.send_fatal_tts_error = send_fatal_tts_error
         self.send_non_fatal_tts_error = send_non_fatal_tts_error
 
-        self.sent_ts: datetime | None = None
-        self.ttfb_sent: bool = False
+        self._ws: ClientConnection | None = None
+        self._closing = False
+        self._is_cancelled = False
+
+        # Duplex queues
+        self._text_queue: asyncio.Queue[str | None] = asyncio.Queue()
+        self._output_queue: asyncio.Queue[tuple[bytes | int | None, int]] = (
+            asyncio.Queue()
+        )
+
+        # Background tasks
+        self._connection_task: asyncio.Task | None = None
+        self._channel_tasks: list[asyncio.Task] = []
+        self._connect_failures = 0
+
+        # TTFB tracking
+        self._sent_ts: datetime | None = None
+        self._ttfb_sent: bool = False
 
-        # Build WebSocket URL with query parameters
-        self.ws_url = self._build_ws_url()
+        self._ws_url = self._build_ws_url()
 
     def _build_ws_url(self) -> str:
-        """Build the WebSocket URL with query parameters"""
         base = self.config.base_url
-        params = f"model={self.config.model}&encoding={self.config.encoding}&sample_rate={self.config.sample_rate}"
+        params = (
+            f"model={self.config.model}"
+            f"&encoding={self.config.encoding}"
+            f"&sample_rate={self.config.sample_rate}"
+        )
         return f"{base}?{params}"
 
+    # ── Lifecycle ────────────────────────────────────────────────
+
     async def start(self) -> None:
-        """Preheating: establish websocket connection during initialization"""
-        try:
-            await self._connect()
+        """Start client: connect and launch send/receive loops."""
+        self._closing = False
+        self._connection_task = asyncio.create_task(self._connection_loop())
+        # Wait briefly for connection to establish
+        await asyncio.sleep(0.1)
+
+    async def stop(self) -> None:
+        """Stop client: close connection and cancel tasks."""
+        self._closing = True
+        self._is_cancelled = True
 
-        except Exception as e:
-            self.ten_env.log_error(f"Deepgram TTS preheat failed: {e}")
+        # Signal send loop to exit
+        await self._text_queue.put(_SEND_STOP)
+
+        # Cancel channel tasks
+        for task in self._channel_tasks:
+            task.cancel()
+        self._channel_tasks.clear()
+
+        if self._connection_task:
+            self._connection_task.cancel()
+            try:
+                await self._connection_task
+            except asyncio.CancelledError:
+                pass
+            self._connection_task = None
+
+        # Signal any consumer waiting on output_queue
+        await self._output_queue.put((None, EVENT_TTS_END))
+
+        if self._ws:
+            try:
+                await self._ws.send(json.dumps({"type": "Close"}))
+            except Exception:
+                pass
+            try:
+                await self._ws.close()
+            except Exception:
+                pass
+            self._ws = None
+
+    async def cancel(self) -> None:
+        """Cancel current TTS request."""
+        self.ten_env.log_debug("Cancelling current TTS task.")
+        self._is_cancelled = True
+        self.reset_ttfb()
+        # Send Flush to Deepgram to stop audio generation
+        if self._ws:
+            try:
+                await self._ws.send(json.dumps({"type": "Flush"}))
+            except Exception:
+                pass
+
+    def reset_ttfb(self) -> None:
+        self._sent_ts = None
+        self._ttfb_sent = False
+
+    # ── Public interface for extension ───────────────────────────
+
+    async def send_text(self, text: str) -> None:
+        """Queue text for sending to Deepgram."""
+        await self._text_queue.put(text)
+
+    async def get(
+        self, text: str
+    ) -> AsyncIterator[tuple[bytes | int | None, int]]:
+        """Send text and yield audio events.
+
+        For empty text, immediately yields EVENT_TTS_END.
+        Otherwise sends text to the send loop and reads
+        events from the output queue until END or ERROR.
+        """
+        if len(text.strip()) == 0:
+            self.ten_env.log_warn("DeepgramTTS: empty text, returning END")
+            yield None, EVENT_TTS_END
+            return
+
+        self._is_cancelled = False
+
+        # Track TTFB from when we send
+        if not self._ttfb_sent:
+            self._sent_ts = datetime.now()
+
+        # Put text into send queue
+        await self._text_queue.put(text)
+
+        # Read events from output queue
+        while True:
+            try:
+                data_msg, event = await asyncio.wait_for(
+                    self._output_queue.get(), timeout=5.0
+                )
+            except asyncio.TimeoutError:
+                self.ten_env.log_error("Timeout waiting for Deepgram audio")
+                yield (
+                    b"Timeout waiting for Deepgram audio",
+                    EVENT_TTS_ERROR,
+                )
+                break
+
+            if event == EVENT_TTS_END:
+                yield None, EVENT_TTS_END
+                break
+            elif event == EVENT_TTS_ERROR:
+                yield data_msg, EVENT_TTS_ERROR
+                break
+            else:
+                yield data_msg, event
+
+    # ── Connection loop with auto-reconnect ─────────────────────
+
+    async def _connection_loop(self) -> None:
+        min_delay = 0.1
+        max_delay = 3.0
+
+        while not self._closing:
+            try:
+                await self._connect()
+                self._connect_failures = 0
+
+                if self._closing:
+                    return
+
+                # Launch duplex tasks
+                self._channel_tasks = [
+                    asyncio.create_task(self._send_loop()),
+                    asyncio.create_task(self._receive_loop()),
+                ]
+
+                # Wait for either to finish
+                done, pending = await asyncio.wait(
+                    self._channel_tasks,
+                    return_when=asyncio.FIRST_COMPLETED,
+                )
+
+                for task in pending:
+                    task.cancel()
+                self._channel_tasks.clear()
+
+                for task in done:
+                    exc = task.exception()
+                    if exc and not isinstance(exc, asyncio.CancelledError):
+                        self.ten_env.log_warn(
+                            f"Channel task exception: {exc}",
+                            category=LOG_CATEGORY_VENDOR,
+                        )
+
+            except DeepgramTTSConnectionException:
+                raise
+
+            except asyncio.CancelledError:
+                return
+
+            except Exception as e:
+                self.ten_env.log_warn(
+                    f"vendor_status: connection error: {e}",
+                    category=LOG_CATEGORY_VENDOR,
+                )
+
+            finally:
+                if self._ws:
+                    try:
+                        await self._ws.close()
+                    except Exception:
+                        pass
+                    self._ws = None
+
+            if self._closing:
+                return
+
+            self._connect_failures += 1
+            if self._connect_failures > MAX_RETRY_TIMES:
+                self.ten_env.log_error(
+                    f"Max retries ({MAX_RETRY_TIMES}) " f"exceeded",
+                    category=LOG_CATEGORY_VENDOR,
+                )
+                return
+
+            delay = min(
+                min_delay * (2 ** (self._connect_failures - 1)),
+                max_delay,
+            )
+            self.ten_env.log_debug(
+                f"vendor_status: reconnecting in "
+                f"{delay:.1f}s "
+                f"(attempt {self._connect_failures}"
+                f"/{MAX_RETRY_TIMES})",
+                category=LOG_CATEGORY_VENDOR,
+            )
+            await asyncio.sleep(delay)
 
     async def _connect(self) -> None:
-        """Connect to the websocket"""
         try:
             extra_headers = {
                 "Authorization": f"Token {self.config.api_key}",
             }
-            self.ws = await websockets.connect(
-                self.ws_url,
+            self._ws = await websockets.connect(
+                self._ws_url,
                 additional_headers=extra_headers,
             )
             self.ten_env.log_debug(
                 "vendor_status: connected to deepgram tts",
                 category=LOG_CATEGORY_VENDOR,
             )
-
         except Exception as e:
             error_message = str(e)
             if "401" in error_message or "Unauthorized" in error_message:
@@ -95,204 +307,126 @@ async def _connect(self) -> None:
                         status_code=401, body=error_message
                     ) from e
             else:
-                self.ten_env.log_error(
-                    f"Deepgram TTS preheat failed, unexpected error: {e}"
-                )
+                self.ten_env.log_error(f"Deepgram TTS connection failed: {e}")
                 if self.send_non_fatal_tts_error:
                     await self.send_non_fatal_tts_error(
                         error_message=error_message
                     )
                 raise
 
-    async def stop(self):
-        # Set cancellation flag first to stop any pending operations
-        self._is_cancelled = True
-
-        # Stop the websocket connection if it exists
-        if self.ws:
-            try:
-                # Send close message
-                await self.ws.send(json.dumps({"type": "Close"}))
-            except Exception:
-                pass
-            await self.ws.close()
-            self.ws = None
+    # ── Send loop ───────────────────────────────────────────────
 
-    async def cancel(self):
-        """
-        Cancel the current TTS task.
-        """
-        self.ten_env.log_debug("Cancelling current TTS task.")
-        self._is_cancelled = True
-        if self.ws:
-            self.reset_ttfb()
-            # Send flush to clear any pending audio
-            try:
-                await self.ws.send(json.dumps({"type": "Flush"}))
-            except Exception:
-                pass
+    async def _send_loop(self) -> None:
+        """Read text from queue and send Speak+Flush to WS."""
+        try:
+            while not self._closing:
+                text = await self._text_queue.get()
+                if text is _SEND_STOP:
+                    return
+
+                if not self._ws:
+                    self.ten_env.log_error("WS not connected in send loop")
+                    return
+
+                self.ten_env.log_debug(
+                    f"send_text: {text[:80]}",
+                    category=LOG_CATEGORY_VENDOR,
+                )
 
-    async def reconnect(self):
-        """Close and re-establish the websocket connection."""
-        if self.ws:
-            try:
-                await self.ws.close()
-            except Exception:
-                pass
-            self.ws = None
-        await self._connect()
+                speak_msg = {"type": "Speak", "text": text}
+                await self._ws.send(json.dumps(speak_msg))
+                await self._ws.send(json.dumps({"type": "Flush"}))
 
-    def reset_ttfb(self):
-        self.sent_ts = None
-        self.ttfb_sent = False
+        except asyncio.CancelledError:
+            return
+        except Exception as e:
+            self.ten_env.log_error(
+                f"vendor_error: send_loop error: {e}",
+                category=LOG_CATEGORY_VENDOR,
+            )
+            raise
 
-    async def get(
-        self, text: str
-    ) -> AsyncIterator[tuple[bytes | int | None, int | None]]:
-        """Generate TTS audio for the given text"""
+    # ── Receive loop ────────────────────────────────────────────
 
-        if len(text.strip()) == 0:
-            self.ten_env.log_warn(
-                "DeepgramTTS: empty text provided, " "returning END event"
-            )
-            yield None, EVENT_TTS_END
+    async def _receive_loop(self) -> None:
+        """Read from WS and dispatch to output queue."""
+        if not self._ws:
             return
 
-        self._is_cancelled = False
         try:
-            await self._ensure_connection()
-            async for audio_chunk, event_status in self._process_single_tts(
-                text
-            ):
-                if event_status == EVENT_TTS_FLUSH:
-                    # Cancelled: reconnect for clean state
-                    await self.reconnect()
-                    break
+            async for message in self._ws:
+                if self._closing:
+                    return
 
-                yield audio_chunk, event_status
+                if isinstance(message, bytes):
+                    await self._handle_audio(message)
+                else:
+                    await self._handle_text_message(message)
 
+        except asyncio.CancelledError:
+            return
+        except websockets.exceptions.ConnectionClosed:
+            self.ten_env.log_warn(
+                "vendor_status: WS closed by server",
+                category=LOG_CATEGORY_VENDOR,
+            )
         except Exception as e:
             self.ten_env.log_error(
-                f"vendor_error: {e}",
+                f"vendor_error: receive_loop: {e}",
                 category=LOG_CATEGORY_VENDOR,
             )
             raise
 
-    async def _ensure_connection(self) -> None:
-        """Ensure websocket connection is established"""
-        if not self.ws:
-            await self._connect()
-
-    async def _process_single_tts(
-        self, text: str
-    ) -> AsyncIterator[tuple[bytes | int | None, int | None]]:
-        """Process a single TTS request"""
-        if not self.ws:
-            self.ten_env.log_error("Deepgram websocket not connected")
+    async def _handle_audio(self, data: bytes) -> None:
+        """Handle binary audio message from WS."""
+        if self._is_cancelled:
+            self.ten_env.log_debug("Dropping audio chunk (cancelled)")
             return
 
-        self.ten_env.log_debug(f"process_single_tts, text: {text}")
-
-        if not self.ttfb_sent:
-            self.sent_ts = datetime.now()
-
-        # Send the text to Deepgram
-        speak_msg = {
-            "type": "Speak",
-            "text": text,
-        }
-        await self.ws.send(json.dumps(speak_msg))
+        # TTFB on first audio chunk
+        if self._sent_ts and not self._ttfb_sent:
+            ttfb_ms = int(
+                (datetime.now() - self._sent_ts).total_seconds() * 1000
+            )
+            await self._output_queue.put((ttfb_ms, EVENT_TTS_TTFB_METRIC))
+            self._ttfb_sent = True
 
-        # Send flush to get audio immediately
-        await self.ws.send(json.dumps({"type": "Flush"}))
+        self.ten_env.log_debug(
+            f"DeepgramTTS: audio chunk, " f"length: {len(data)}"
+        )
+        await self._output_queue.put((data, EVENT_TTS_RESPONSE))
 
+    async def _handle_text_message(self, raw: str) -> None:
+        """Handle JSON text message from WS."""
         try:
-            # Receive audio data
-            while True:
-                if self._is_cancelled:
-                    self.ten_env.log_debug(
-                        "Cancellation flag detected, stopping TTS stream."
-                    )
-                    yield None, EVENT_TTS_FLUSH
-                    break
-
-                try:
-                    message = await asyncio.wait_for(
-                        self.ws.recv(), timeout=5.0
-                    )
-                except asyncio.TimeoutError:
-                    self.ten_env.log_error(
-                        "Timeout waiting for Deepgram audio - yielding error"
-                    )
-                    yield b"Timeout waiting for Deepgram audio", EVENT_TTS_ERROR
-                    break
+            data = json.loads(raw)
+        except json.JSONDecodeError:
+            self.ten_env.log_warn(f"Failed to parse message: {raw}")
+            return
 
-                # Binary message = audio data
-                if isinstance(message, bytes):
-                    # Drop audio if cancelled during recv
-                    if self._is_cancelled:
-                        self.ten_env.log_debug(
-                            "Cancellation detected after recv, "
-                            "dropping audio chunk."
-                        )
-                        yield None, EVENT_TTS_FLUSH
-                        break
-
-                    # First audio chunk, calculate TTFB
-                    if self.sent_ts and not self.ttfb_sent:
-                        ttfb_ms = int(
-                            (datetime.now() - self.sent_ts).total_seconds()
-                            * 1000
-                        )
-                        yield ttfb_ms, EVENT_TTS_TTFB_METRIC
-                        self.ttfb_sent = True
+        msg_type = data.get("type", "")
 
-                    self.ten_env.log_debug(
-                        f"DeepgramTTS: sending EVENT_TTS_RESPONSE, "
-                        f"length: {len(message)}"
-                    )
-                    yield message, EVENT_TTS_RESPONSE
-
-                # Text message = JSON metadata
-                else:
-                    try:
-                        data = json.loads(message)
-                        msg_type = data.get("type", "")
-
-                        if msg_type == "Flushed":
-                            # All audio for this text has been sent
-                            self.ten_env.log_debug(
-                                "DeepgramTTS: received Flushed, "
-                                "sending EVENT_TTS_END"
-                            )
-                            yield None, EVENT_TTS_END
-                            break
-
-                        elif msg_type == "Warning":
-                            self.ten_env.log_warn(
-                                f"Deepgram warning: {data.get('warn_msg', '')}"
-                            )
-
-                        elif msg_type == "Error":
-                            error_msg = data.get("err_msg", "Unknown error")
-                            self.ten_env.log_error(
-                                f"Deepgram error: {error_msg}"
-                            )
-                            yield error_msg.encode("utf-8"), EVENT_TTS_ERROR
-                            break
-
-                    except json.JSONDecodeError:
-                        self.ten_env.log_warn(
-                            f"Failed to parse Deepgram message: {message}"
-                        )
+        if msg_type == "Flushed":
+            self.ten_env.log_debug("DeepgramTTS: Flushed received")
+            # Always signal END so get() returns promptly
+            # (even after cancel — the extension checks
+            # cancel state separately)
+            await self._output_queue.put((None, EVENT_TTS_END))
 
-            if not self._is_cancelled:
-                self.ten_env.log_debug("DeepgramTTS: TTS complete")
+        elif msg_type == "Warning":
+            self.ten_env.log_warn(
+                f"Deepgram warning: " f"{data.get('warn_msg', '')}"
+            )
 
-        except Exception as e:
-            error_message = str(e)
-            self.ten_env.log_error(
-                f"vendor_error: {error_message}",
-                category=LOG_CATEGORY_VENDOR,
+        elif msg_type == "Error":
+            error_msg = data.get("err_msg", "Unknown error")
+            self.ten_env.log_error(f"Deepgram error: {error_msg}")
+            await self._output_queue.put(
+                (
+                    error_msg.encode("utf-8"),
+                    EVENT_TTS_ERROR,
+                )
             )
-            yield error_message.encode("utf-8"), EVENT_TTS_ERROR
+
+        else:
+            self.ten_env.log_debug(f"Unknown message type: {msg_type}")

From 27c71be46798940e615d379077ca94388e195548 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 13:00:52 +0000
Subject: [PATCH 03/18] fix: reconnect websocket per request_id to fix
 interleaved requests

revert from duplex pattern to clean serial model with key improvement:
reconnect websocket when request_id changes. this prevents deepgram's
connection from going stale after many rapid Speak+Flush cycles.

cancel() now drains until Flushed before returning so the connection
is clean for subsequent requests. mark_needs_reconnect() called by
extension on request_id change triggers fresh connection.

test_interleaved_requests now passes (was timing out on request 8/8
because deepgram stopped responding on a long-lived connection).
---
 .../extension/deepgram_tts/deepgram_tts.py    | 417 ++++++------------
 .../extension/deepgram_tts/extension.py       |   3 +
 .../deepgram_tts/tests/test_basic.py          |   1 -
 3 files changed, 139 insertions(+), 282 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 855f33148a..9f532bedef 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -16,18 +16,12 @@
 from ten_runtime import AsyncTenEnv
 from ten_ai_base.const import LOG_CATEGORY_VENDOR
 
-# Event types for the output queue
+# Event types communicated back to the extension
 EVENT_TTS_RESPONSE = 1
 EVENT_TTS_END = 2
 EVENT_TTS_ERROR = 3
-EVENT_TTS_FLUSH = 4
 EVENT_TTS_TTFB_METRIC = 5
 
-MAX_RETRY_TIMES = 5
-
-# Sentinel to signal the send loop to stop
-_SEND_STOP = None
-
 
 class DeepgramTTSConnectionException(Exception):
     """Exception raised when Deepgram TTS connection fails"""
@@ -41,11 +35,11 @@ def __init__(self, status_code: int, body: str):
 
 
 class DeepgramTTSClient:
-    """Duplex WebSocket client for Deepgram TTS.
+    """WebSocket client for Deepgram TTS.
 
-    Uses separate send and receive tasks on a single WebSocket
-    connection. Text goes into _text_queue via send_text(),
-    audio/events come out of _output_queue via get().
+    Each get() call sends Speak+Flush and streams audio
+    until Flushed. Connection is reused across calls but
+    reconnected when needed (cancel, error, new request).
     """
 
     def __init__(
@@ -61,19 +55,8 @@ def __init__(
         self.send_non_fatal_tts_error = send_non_fatal_tts_error
 
         self._ws: ClientConnection | None = None
-        self._closing = False
         self._is_cancelled = False
-
-        # Duplex queues
-        self._text_queue: asyncio.Queue[str | None] = asyncio.Queue()
-        self._output_queue: asyncio.Queue[tuple[bytes | int | None, int]] = (
-            asyncio.Queue()
-        )
-
-        # Background tasks
-        self._connection_task: asyncio.Task | None = None
-        self._channel_tasks: list[asyncio.Task] = []
-        self._connect_failures = 0
+        self._needs_reconnect = False
 
         # TTFB tracking
         self._sent_ts: datetime | None = None
@@ -90,39 +73,15 @@ def _build_ws_url(self) -> str:
         )
         return f"{base}?{params}"
 
-    # ── Lifecycle ────────────────────────────────────────────────
-
     async def start(self) -> None:
-        """Start client: connect and launch send/receive loops."""
-        self._closing = False
-        self._connection_task = asyncio.create_task(self._connection_loop())
-        # Wait briefly for connection to establish
-        await asyncio.sleep(0.1)
+        """Preheat: establish initial connection."""
+        try:
+            await self._connect()
+        except Exception as e:
+            self.ten_env.log_error(f"Deepgram TTS preheat failed: {e}")
 
     async def stop(self) -> None:
-        """Stop client: close connection and cancel tasks."""
-        self._closing = True
         self._is_cancelled = True
-
-        # Signal send loop to exit
-        await self._text_queue.put(_SEND_STOP)
-
-        # Cancel channel tasks
-        for task in self._channel_tasks:
-            task.cancel()
-        self._channel_tasks.clear()
-
-        if self._connection_task:
-            self._connection_task.cancel()
-            try:
-                await self._connection_task
-            except asyncio.CancelledError:
-                pass
-            self._connection_task = None
-
-        # Signal any consumer waiting on output_queue
-        await self._output_queue.put((None, EVENT_TTS_END))
-
         if self._ws:
             try:
                 await self._ws.send(json.dumps({"type": "Close"}))
@@ -135,36 +94,50 @@ async def stop(self) -> None:
             self._ws = None
 
     async def cancel(self) -> None:
-        """Cancel current TTS request."""
+        """Cancel current TTS.
+
+        Sends Flush and drains until Flushed so the
+        connection is clean for the next request.
+        """
         self.ten_env.log_debug("Cancelling current TTS task.")
         self._is_cancelled = True
         self.reset_ttfb()
-        # Send Flush to Deepgram to stop audio generation
         if self._ws:
             try:
                 await self._ws.send(json.dumps({"type": "Flush"}))
-            except Exception:
-                pass
+                # Drain until Flushed to leave connection clean
+                await asyncio.wait_for(self._drain_until_flushed(), timeout=3.0)
+            except (asyncio.TimeoutError, Exception) as e:
+                self.ten_env.log_warn(
+                    f"Cancel drain failed: {e}, "
+                    "will reconnect on next request"
+                )
+                self._needs_reconnect = True
+
+    async def _drain_until_flushed(self) -> None:
+        """Read and discard WS messages until Flushed."""
+        while self._ws:
+            msg = await self._ws.recv()
+            if isinstance(msg, str):
+                try:
+                    data = json.loads(msg)
+                    if data.get("type") == "Flushed":
+                        return
+                except json.JSONDecodeError:
+                    pass
 
     def reset_ttfb(self) -> None:
         self._sent_ts = None
         self._ttfb_sent = False
 
-    # ── Public interface for extension ───────────────────────────
-
-    async def send_text(self, text: str) -> None:
-        """Queue text for sending to Deepgram."""
-        await self._text_queue.put(text)
+    def mark_needs_reconnect(self) -> None:
+        """Called by extension when request_id changes."""
+        self._needs_reconnect = True
 
     async def get(
         self, text: str
     ) -> AsyncIterator[tuple[bytes | int | None, int]]:
-        """Send text and yield audio events.
-
-        For empty text, immediately yields EVENT_TTS_END.
-        Otherwise sends text to the send loop and reads
-        events from the output queue until END or ERROR.
-        """
+        """Send text and yield audio events."""
         if len(text.strip()) == 0:
             self.ten_env.log_warn("DeepgramTTS: empty text, returning END")
             yield None, EVENT_TTS_END
@@ -172,117 +145,102 @@ async def get(
 
         self._is_cancelled = False
 
-        # Track TTFB from when we send
-        if not self._ttfb_sent:
-            self._sent_ts = datetime.now()
-
-        # Put text into send queue
-        await self._text_queue.put(text)
+        # Reconnect if needed (new request_id or after error)
+        if self._needs_reconnect:
+            await self._reconnect()
+            self._needs_reconnect = False
 
-        # Read events from output queue
-        while True:
-            try:
-                data_msg, event = await asyncio.wait_for(
-                    self._output_queue.get(), timeout=5.0
-                )
-            except asyncio.TimeoutError:
-                self.ten_env.log_error("Timeout waiting for Deepgram audio")
-                yield (
-                    b"Timeout waiting for Deepgram audio",
-                    EVENT_TTS_ERROR,
-                )
-                break
-
-            if event == EVENT_TTS_END:
-                yield None, EVENT_TTS_END
-                break
-            elif event == EVENT_TTS_ERROR:
-                yield data_msg, EVENT_TTS_ERROR
-                break
-            else:
-                yield data_msg, event
-
-    # ── Connection loop with auto-reconnect ─────────────────────
+        await self._ensure_connection()
 
-    async def _connection_loop(self) -> None:
-        min_delay = 0.1
-        max_delay = 3.0
+        if not self._ttfb_sent:
+            self._sent_ts = datetime.now()
 
-        while not self._closing:
-            try:
-                await self._connect()
-                self._connect_failures = 0
-
-                if self._closing:
-                    return
-
-                # Launch duplex tasks
-                self._channel_tasks = [
-                    asyncio.create_task(self._send_loop()),
-                    asyncio.create_task(self._receive_loop()),
-                ]
-
-                # Wait for either to finish
-                done, pending = await asyncio.wait(
-                    self._channel_tasks,
-                    return_when=asyncio.FIRST_COMPLETED,
-                )
+        # Send Speak + Flush
+        speak_msg = {"type": "Speak", "text": text}
+        await self._ws.send(json.dumps(speak_msg))
+        await self._ws.send(json.dumps({"type": "Flush"}))
 
-                for task in pending:
-                    task.cancel()
-                self._channel_tasks.clear()
+        # Receive audio until Flushed
+        try:
+            while True:
+                if self._is_cancelled:
+                    self.ten_env.log_debug("Cancelled, stopping stream.")
+                    break
+
+                try:
+                    message = await asyncio.wait_for(
+                        self._ws.recv(), timeout=8.0
+                    )
+                except asyncio.TimeoutError:
+                    self.ten_env.log_error("Timeout waiting for Deepgram audio")
+                    self._needs_reconnect = True
+                    yield (
+                        b"Timeout waiting for Deepgram audio",
+                        EVENT_TTS_ERROR,
+                    )
+                    break
 
-                for task in done:
-                    exc = task.exception()
-                    if exc and not isinstance(exc, asyncio.CancelledError):
-                        self.ten_env.log_warn(
-                            f"Channel task exception: {exc}",
-                            category=LOG_CATEGORY_VENDOR,
+                if isinstance(message, bytes):
+                    if self._is_cancelled:
+                        self.ten_env.log_debug("Dropping audio (cancelled)")
+                        break
+
+                    # TTFB on first audio chunk
+                    if self._sent_ts and not self._ttfb_sent:
+                        ttfb_ms = int(
+                            (datetime.now() - self._sent_ts).total_seconds()
+                            * 1000
                         )
+                        yield ttfb_ms, EVENT_TTS_TTFB_METRIC
+                        self._ttfb_sent = True
 
-            except DeepgramTTSConnectionException:
-                raise
-
-            except asyncio.CancelledError:
-                return
-
-            except Exception as e:
-                self.ten_env.log_warn(
-                    f"vendor_status: connection error: {e}",
-                    category=LOG_CATEGORY_VENDOR,
-                )
-
-            finally:
-                if self._ws:
+                    self.ten_env.log_debug(
+                        f"DeepgramTTS: audio chunk, " f"length: {len(message)}"
+                    )
+                    yield message, EVENT_TTS_RESPONSE
+                else:
                     try:
-                        await self._ws.close()
-                    except Exception:
-                        pass
-                    self._ws = None
-
-            if self._closing:
-                return
-
-            self._connect_failures += 1
-            if self._connect_failures > MAX_RETRY_TIMES:
-                self.ten_env.log_error(
-                    f"Max retries ({MAX_RETRY_TIMES}) " f"exceeded",
-                    category=LOG_CATEGORY_VENDOR,
-                )
-                return
+                        data = json.loads(message)
+                        msg_type = data.get("type", "")
+
+                        if msg_type == "Flushed":
+                            self.ten_env.log_debug("DeepgramTTS: Flushed")
+                            yield None, EVENT_TTS_END
+                            break
+
+                        elif msg_type == "Warning":
+                            self.ten_env.log_warn(
+                                f"Deepgram warning: "
+                                f"{data.get('warn_msg', '')}"
+                            )
+
+                        elif msg_type == "Error":
+                            error_msg = data.get("err_msg", "Unknown error")
+                            self.ten_env.log_error(
+                                f"Deepgram error: {error_msg}"
+                            )
+                            yield (
+                                error_msg.encode("utf-8"),
+                                EVENT_TTS_ERROR,
+                            )
+                            break
+
+                    except json.JSONDecodeError:
+                        self.ten_env.log_warn(f"Failed to parse: {message}")
+
+            if not self._is_cancelled:
+                self.ten_env.log_debug("DeepgramTTS: complete")
 
-            delay = min(
-                min_delay * (2 ** (self._connect_failures - 1)),
-                max_delay,
-            )
-            self.ten_env.log_debug(
-                f"vendor_status: reconnecting in "
-                f"{delay:.1f}s "
-                f"(attempt {self._connect_failures}"
-                f"/{MAX_RETRY_TIMES})",
+        except Exception as e:
+            self.ten_env.log_error(
+                f"vendor_error: {e}",
                 category=LOG_CATEGORY_VENDOR,
             )
-            await asyncio.sleep(delay)
+            self._needs_reconnect = True
+            yield (
+                str(e).encode("utf-8"),
+                EVENT_TTS_ERROR,
+            )
 
     async def _connect(self) -> None:
         try:
@@ -314,119 +272,16 @@ async def _connect(self) -> None:
                     )
                 raise
 
-    # ── Send loop ───────────────────────────────────────────────
-
-    async def _send_loop(self) -> None:
-        """Read text from queue and send Speak+Flush to WS."""
-        try:
-            while not self._closing:
-                text = await self._text_queue.get()
-                if text is _SEND_STOP:
-                    return
-
-                if not self._ws:
-                    self.ten_env.log_error("WS not connected in send loop")
-                    return
-
-                self.ten_env.log_debug(
-                    f"send_text: {text[:80]}",
-                    category=LOG_CATEGORY_VENDOR,
-                )
-
-                speak_msg = {"type": "Speak", "text": text}
-                await self._ws.send(json.dumps(speak_msg))
-                await self._ws.send(json.dumps({"type": "Flush"}))
-
-        except asyncio.CancelledError:
-            return
-        except Exception as e:
-            self.ten_env.log_error(
-                f"vendor_error: send_loop error: {e}",
-                category=LOG_CATEGORY_VENDOR,
-            )
-            raise
-
-    # ── Receive loop ────────────────────────────────────────────
-
-    async def _receive_loop(self) -> None:
-        """Read from WS and dispatch to output queue."""
+    async def _ensure_connection(self) -> None:
         if not self._ws:
-            return
+            await self._connect()
 
-        try:
-            async for message in self._ws:
-                if self._closing:
-                    return
-
-                if isinstance(message, bytes):
-                    await self._handle_audio(message)
-                else:
-                    await self._handle_text_message(message)
-
-        except asyncio.CancelledError:
-            return
-        except websockets.exceptions.ConnectionClosed:
-            self.ten_env.log_warn(
-                "vendor_status: WS closed by server",
-                category=LOG_CATEGORY_VENDOR,
-            )
-        except Exception as e:
-            self.ten_env.log_error(
-                f"vendor_error: receive_loop: {e}",
-                category=LOG_CATEGORY_VENDOR,
-            )
-            raise
-
-    async def _handle_audio(self, data: bytes) -> None:
-        """Handle binary audio message from WS."""
-        if self._is_cancelled:
-            self.ten_env.log_debug("Dropping audio chunk (cancelled)")
-            return
-
-        # TTFB on first audio chunk
-        if self._sent_ts and not self._ttfb_sent:
-            ttfb_ms = int(
-                (datetime.now() - self._sent_ts).total_seconds() * 1000
-            )
-            await self._output_queue.put((ttfb_ms, EVENT_TTS_TTFB_METRIC))
-            self._ttfb_sent = True
-
-        self.ten_env.log_debug(
-            f"DeepgramTTS: audio chunk, " f"length: {len(data)}"
-        )
-        await self._output_queue.put((data, EVENT_TTS_RESPONSE))
-
-    async def _handle_text_message(self, raw: str) -> None:
-        """Handle JSON text message from WS."""
-        try:
-            data = json.loads(raw)
-        except json.JSONDecodeError:
-            self.ten_env.log_warn(f"Failed to parse message: {raw}")
-            return
-
-        msg_type = data.get("type", "")
-
-        if msg_type == "Flushed":
-            self.ten_env.log_debug("DeepgramTTS: Flushed received")
-            # Always signal END so get() returns promptly
-            # (even after cancel — the extension checks
-            # cancel state separately)
-            await self._output_queue.put((None, EVENT_TTS_END))
-
-        elif msg_type == "Warning":
-            self.ten_env.log_warn(
-                f"Deepgram warning: " f"{data.get('warn_msg', '')}"
-            )
-
-        elif msg_type == "Error":
-            error_msg = data.get("err_msg", "Unknown error")
-            self.ten_env.log_error(f"Deepgram error: {error_msg}")
-            await self._output_queue.put(
-                (
-                    error_msg.encode("utf-8"),
-                    EVENT_TTS_ERROR,
-                )
-            )
-
-        else:
-            self.ten_env.log_debug(f"Unknown message type: {msg_type}")
+    async def _reconnect(self) -> None:
+        """Close and re-establish the connection."""
+        if self._ws:
+            try:
+                await self._ws.close()
+            except Exception:
+                pass
+            self._ws = None
+        await self._connect()
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 749f70f4eb..eee5286fdc 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -220,6 +220,9 @@ async def request_tts(self, t: TTSTextInput) -> None:
                 )
                 if self.client:
                     self.client.reset_ttfb()
+                    if self.current_request_id is not None:
+                        # Fresh connection for new request_id
+                        self.client.mark_needs_reconnect()
                 self.current_request_id = t.request_id
                 self.current_request_finished = False
                 self.total_audio_bytes = 0
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
index d47e898a16..e612e4f7e8 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
@@ -29,7 +29,6 @@
 from deepgram_tts.deepgram_tts import (
     EVENT_TTS_RESPONSE,
     EVENT_TTS_END,
-    EVENT_TTS_FLUSH,
     EVENT_TTS_TTFB_METRIC,
 )
 

From cefa2f306d72d270d2936a4f268fad0a8ecc76ad Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 13:14:41 +0000
Subject: [PATCH 04/18] fix: remove reconnect-per-request, rely on cancel drain
 instead

reconnecting on every request_id change caused test_append_input_stress
to timeout (100 requests = 100 reconnections). the cancel() drain is
sufficient: it waits for Flushed before returning, keeping the
connection clean for the next request. reconnect only on error/timeout.

both test_interleaved_requests and test_append_input_stress now pass.
---
 .../agents/ten_packages/extension/deepgram_tts/extension.py    | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index eee5286fdc..749f70f4eb 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -220,9 +220,6 @@ async def request_tts(self, t: TTSTextInput) -> None:
                 )
                 if self.client:
                     self.client.reset_ttfb()
-                    if self.current_request_id is not None:
-                        # Fresh connection for new request_id
-                        self.client.mark_needs_reconnect()
                 self.current_request_id = t.request_id
                 self.current_request_finished = False
                 self.total_audio_bytes = 0

From 261949a188590a64958826313d22cda9bde47b84 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 13:30:50 +0000
Subject: [PATCH 05/18] refactor: align progressive disclosure docs with PD
 standard

L0 repo card:
- remove descriptive blockquote
- remove L2 section (L2 is reached via L1 links)
- rename Type to Repo Type, use enum value distributed-system
- add Description row to identity block
- rename L1 Summaries to L1 Index with Audience column
- update Last Reviewed to 2026-04-07

07_gotchas.md:
- cut from 236 to 117 lines (under 200-line ceiling)
- remove full restart recipe and operational runbook material
- keep actual gotchas: property tuples, signal handlers, task run,
  zombies, .env, next.js lock, tman wipe, graph cache, port 3000
- add pointer to new L2 deep dive

new L2 deep dive operations_restarts.md:
- full restart procedure with next-server kill
- zombie worker cleanup
- stale lock and cache cleanup
- port 3000 conflict debugging with /proc forensics
- .env and container restart recovery
- docker cp extension code workflow
- after-container-restart checklist

cross-links:
- add operations_restarts to deep_dives/_index.md
- add to related deep dives in 07_gotchas.md and 05_workflows.md
- trim 05_workflows.md docker cp section to pointer
---
 docs/ai/L0_repo_card.md                      |  33 ++--
 docs/ai/L1/05_workflows.md                   |  21 +--
 docs/ai/L1/07_gotchas.md                     | 182 ++++---------------
 docs/ai/L1/deep_dives/_index.md              |   1 +
 docs/ai/L1/deep_dives/operations_restarts.md | 171 +++++++++++++++++
 5 files changed, 221 insertions(+), 187 deletions(-)
 create mode 100644 docs/ai/L1/deep_dives/operations_restarts.md

diff --git a/docs/ai/L0_repo_card.md b/docs/ai/L0_repo_card.md
index 53c8a56953..288312322b 100644
--- a/docs/ai/L0_repo_card.md
+++ b/docs/ai/L0_repo_card.md
@@ -1,31 +1,26 @@
 # TEN Framework — Repo Card
 
-> Open-source platform for building real-time multimodal AI agents with voice, video, and tool capabilities.
-
 ## Identity
 
 | Field         | Value                                                                |
 | ------------- | -------------------------------------------------------------------- |
 | Repo          | `TEN-framework/TEN-Agent`                                           |
-| Type          | `framework` (SDK-library + API-service + frontend)                   |
+| Description   | Open-source platform for building real-time multimodal AI agents     |
+| Repo Type     | `distributed-system`                                                 |
 | Language      | Python (extensions), Go (API server), TypeScript/React (playground)  |
 | Deploy Target | Docker container (`ten_agent_dev`), Taskfile-based build             |
 | Owner         | TEN Framework team                                                   |
-| Last Reviewed | 2026-04-02                                                           |
-
-## L1 — Summaries
-
-| File                                     | Purpose                                                  |
-| ---------------------------------------- | -------------------------------------------------------- |
-| [01_setup](L1/01_setup.md)               | Docker, .env, ports, health checks, restart procedures   |
-| [02_architecture](L1/02_architecture.md) | Extensions, graphs, connections, RTC-first design        |
-| [03_code_map](L1/03_code_map.md)         | Directory tree, key files, base classes, 93+ extensions  |
-| [04_conventions](L1/04_conventions.md)   | Naming, Pydantic configs, params pattern, formatting     |
-| [05_workflows](L1/05_workflows.md)       | Create extension, modify graph, test, restart, deploy    |
-| [06_interfaces](L1/06_interfaces.md)     | REST API, connection schemas, base class abstract methods|
-| [07_gotchas](L1/07_gotchas.md)           | Property tuples, signal handlers, zombies, .env timing   |
-| [08_security](L1/08_security.md)         | API keys, .env, sensitive logging, git hooks             |
+| Last Reviewed | 2026-04-07                                                           |
 
-## L2 — Deep Dives
+## L1 Index
 
-See [L1/deep_dives/_index.md](L1/deep_dives/_index.md) for extended guides referenced by L1 files.
+| File                                     | Purpose                                                  | Audience |
+| ---------------------------------------- | -------------------------------------------------------- | -------- |
+| [01_setup](L1/01_setup.md)               | Docker, .env, ports, health checks, restart procedures   | both     |
+| [02_architecture](L1/02_architecture.md) | Extensions, graphs, connections, RTC-first design        | both     |
+| [03_code_map](L1/03_code_map.md)         | Directory tree, key files, base classes, 93+ extensions  | both     |
+| [04_conventions](L1/04_conventions.md)   | Naming, Pydantic configs, params pattern, formatting     | both     |
+| [05_workflows](L1/05_workflows.md)       | Create extension, modify graph, test, restart, deploy    | both     |
+| [06_interfaces](L1/06_interfaces.md)     | REST API, connection schemas, base class abstract methods| both     |
+| [07_gotchas](L1/07_gotchas.md)           | Property tuples, signal handlers, zombies, .env timing   | both     |
+| [08_security](L1/08_security.md)         | API keys, .env, sensitive logging, git hooks             | both     |
diff --git a/docs/ai/L1/05_workflows.md b/docs/ai/L1/05_workflows.md
index b8ad2b729b..acdc4b0b76 100644
--- a/docs/ai/L1/05_workflows.md
+++ b/docs/ai/L1/05_workflows.md
@@ -143,24 +143,8 @@ docker exec ten_agent_dev bash -c \
 
 ## Update Extension Code in Running Container
 
-When iterating on extension code locally:
-
-```bash
-# Copy updated files into the container (use /. to avoid nested dirs)
-sudo docker cp ./agents/ten_packages/extension/my_ext/. \
-  ten_agent_dev:/app/agents/ten_packages/extension/my_ext/
-
-# Verify symlink exists in the example's tenapp
-sudo docker exec ten_agent_dev bash -c \
-  "ls -la /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
-
-# If missing, create it manually
-sudo docker exec ten_agent_dev bash -c \
-  "ln -sf /app/agents/ten_packages/extension/my_ext \
-   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
-
-# Then nuclear restart
-```
+See [Operations and Restarts](deep_dives/operations_restarts.md) for the full procedure
+including `docker cp` syntax, symlink verification, and restart steps.
 
 ## Pre-Commit Checks
 
@@ -179,3 +163,4 @@ Pre-commit hooks validate: API key patterns, Black formatting, conventional comm
 - [Extension Development](deep_dives/extension_development.md) — Full extension creation with code examples
 - [Graph Configuration](deep_dives/graph_configuration.md) — Connection wiring and routing patterns
 - [Testing](deep_dives/testing.md) — Test infrastructure, guarder tests, debugging
+- [Operations and Restarts](deep_dives/operations_restarts.md) — Full restart procedures, recovery
diff --git a/docs/ai/L1/07_gotchas.md b/docs/ai/L1/07_gotchas.md
index e61011e489..15251d5f57 100644
--- a/docs/ai/L1/07_gotchas.md
+++ b/docs/ai/L1/07_gotchas.md
@@ -41,110 +41,42 @@ Never start the server with `./bin/api` or `./bin/main` directly.
 
 ## Zombie Worker Processes
 
-Worker processes (`bin/main`) run on the **host machine**, not inside Docker.
-They survive container restarts and server restarts.
-
-```bash
-# Check for zombies
-ps -elf | grep 'bin/main' | grep -v grep
-
-# Kill them
-ps -elf | grep 'bin/main' | grep -v grep | awk '{print $4}' | xargs -r sudo kill -9
-```
-
-Always kill zombies before restarting the server.
+Worker processes (`bin/main`) can survive container and server restarts.
+Always check for and kill zombies before restarting.
 
 ## .env Loaded at Container Startup Only
 
-Editing `.env` while the container is running has **no effect**. You must:
-
-```bash
-cd /home/ubuntu/ten-framework/ai_agents
-docker compose down && docker compose up -d
-# Then reinstall Python deps and task run
-```
-
-## Node.js Version for Playground
-
-Playground requires Node.js >= 20.9.0. The host machine may have an older version.
-Always run playground from **inside the container** (has Node 22):
-
-```bash
-# WRONG: running from host with Node 18
-cd playground && npm run dev  # Fails
-
-# CORRECT: task run starts playground inside container automatically
-```
+Editing `.env` while the container is running has **no effect**. You must
+`docker compose down && docker compose up -d`, then reinstall Python deps.
 
 ## Next.js Lock File
 
-After crashes, `.next/dev/lock` becomes stale, preventing restart:
-
-```bash
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-```
-
-Always use nuclear restart after playground crashes.
+After crashes, `.next/dev/lock` becomes stale, preventing restart. Delete it
+and do a full restart. See [Operations and Restarts](deep_dives/operations_restarts.md).
 
 ## Python Deps Not Persisted
 
-Python dependencies are installed into the container's filesystem and are lost
-on container restart. Always reinstall after `docker compose down && up`:
-
-```bash
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced/tenapp && bash scripts/install_python_deps.sh"
-```
+Python dependencies are lost on container restart. Always reinstall after
+`docker compose down && up`.
 
-## tman Install Creates Symlinks
-
-Never manually create symlinks with `ln -s` for extensions.
-Always use `tman install` which resolves dependencies and creates correct links:
+## tman install Can Wipe bin/main
 
-```bash
-docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example>/tenapp && tman install"
-```
+Running `tman install` when system dependencies have newer versions replaces
+the runtime packages and **deletes `bin/main`**. Use `task install` (full
+rebuild) instead of bare `tman install`. Signs: Worker fails with
+`bin/main: No such file or directory` in logs.
 
-**Important:** If `tman install` doesn't create a symlink for a new extension (e.g., after
-adding it to `manifest.json`), create it manually as a fallback:
+## tman Install Creates Symlinks
 
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "ln -sf /app/agents/ten_packages/extension/my_ext \
-   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
-```
+Never manually `ln -s` for extensions. Use `tman install` which resolves
+dependencies and creates correct links. If a symlink is missing after
+`tman install`, create it manually as a fallback.
 
 ## docker cp Creates Nested Directories
 
-When using `docker cp` to update extension code in the container, beware of
-trailing slashes creating nested directories:
-
-```bash
-# WRONG — creates /app/.../deepgram_tts/deepgram_tts/ (nested)
-sudo docker cp ./deepgram_tts/ container:/app/.../deepgram_tts/
-
-# CORRECT — copy contents into existing directory
-sudo docker cp ./deepgram_tts/. container:/app/.../deepgram_tts/
-```
-
-If you see `ModuleNotFoundError: No module named 'ten_packages.extension.X'`
-after a `docker cp`, check for nested directories inside the extension folder.
-
-## tman install Can Wipe bin/main
-
-Running `tman install` when system dependencies have newer versions will replace
-the runtime packages, which **deletes `bin/main`**. You must run the full
-`task install` (not just `tman install`) to rebuild it:
-
-```bash
-# This alone can break things if runtime versions changed:
-docker exec ten_agent_dev bash -c "cd /app/.../tenapp && tman install"
-
-# This is safe — rebuilds bin/main after tman install:
-docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example> && task install"
-```
-
-Signs: Worker fails with `bin/main: No such file or directory` in logs.
+When using `docker cp` to update extension code, trailing slashes create
+nested directories. Use `docker cp ./ext/. container:/path/ext/` syntax.
+Signs: `ModuleNotFoundError: No module named 'ten_packages.extension.X'`.
 
 ## Audio Routing: Split at Source Only
 
@@ -152,84 +84,34 @@ When routing audio to multiple destinations, the split must happen at the
 source node (e.g., `agora_rtc`), not at intermediate nodes. Splitting from
 intermediate nodes can cause crashes.
 
-```json
-// CORRECT: agora_rtc sends pcm_frame to both stt AND vad
-{"extension": "agora_rtc", "audio_frame": [
-  {"name": "pcm_frame", "dest": [{"extension": "stt"}, {"extension": "vad"}]}
-]}
-```
-
 ## Frontend Caches Graph List
 
-The playground caches the `/graphs` API response. When adding or removing graphs
-from `property.json`, a nuclear restart is required — simple server restart
-is not enough.
+The playground caches the `/graphs` API response. When adding or removing
+graphs from `property.json`, a full restart is required — simple server
+restart is not enough.
 
 ## Manifest Module Name Must Match
 
 The `name` field in extension `manifest.json` must exactly match the `addon`
 field used in graph nodes in `property.json`. Mismatches cause silent failures.
 
+## next-server Holds Port 3000
+
+Killing `node` and `bun` is not enough — `next-server` is a separate process
+that holds port 3000. If port 3000 is occupied, Next.js silently starts on
+3001+ which isn't Docker-exposed, making the frontend appear down.
+
 ## Apple Silicon Docker
 
 Docker containers may need Rosetta for x86 images on Apple Silicon Macs.
-Enable in Docker Desktop: Settings → General → Use Rosetta for x86_64/amd64 emulation.
+Enable in Docker Desktop: Settings > General > Use Rosetta.
 
 ## Windows Line Endings
 
-Before cloning on Windows, configure git to preserve Unix line endings:
-
-```bash
-git config --global core.autocrlf false
-```
-
-## Nuclear Restart Recipe
-
-When in doubt, use the nuclear option. **Must kill `next-server` too** — it
-holds port 3000 even after its parent `node` process is killed:
-
-```bash
-# 1. Kill EVERYTHING (including next-server which holds port 3000)
-sudo docker exec ten_agent_dev bash -c \
-  "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; pkill -9 -f next-server; pkill -9 -f tman"
-
-# 2. Clean up stale files
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-
-# 3. Wait for port 3000 TIME_WAIT to clear (critical!)
-# If Next.js can't bind port 3000, it silently starts on 3001/3002 which
-# isn't exposed by Docker — the frontend appears down.
-sleep 30  # or check: docker exec ten_agent_dev bash -c "cat /proc/net/tcp6 | grep ':0BB8' | wc -l"
-
-# 4. Start
-sudo docker exec -d ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant && task run > /tmp/task_run.log 2>&1"
-
-# 5. Verify (wait ~12s for startup)
-sleep 12
-sudo docker exec ten_agent_dev bash -c \
-  "curl -s http://localhost:8080/health && curl -s -o /dev/null -w ' Frontend:%{http_code}' http://localhost:3000/"
-```
-
-**Verify the logs** — check Next.js started on port 3000 (not 3001+):
-```bash
-sudo docker exec ten_agent_dev bash -c "strings /tmp/task_run.log | grep -E 'Local:|Port|Ready|Error'"
-```
-
-If you see `Port 3000 is in use`, find and kill the process holding it:
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "for pid in /proc/[0-9]*/fd/*; do \
-    link=\$(readlink \$pid 2>/dev/null); \
-    echo \"\$link\" | grep -q socket: && \
-    inode=\$(echo \$link | grep -oP '\\d+') && \
-    grep -q \$inode /proc/net/tcp6 2>/dev/null && \
-    grep \$inode /proc/net/tcp6 | grep -q ':0BB8' && \
-    echo PID=\$(echo \$pid | cut -d/ -f3) && break; \
-  done"
-```
+Before cloning on Windows: `git config --global core.autocrlf false`
 
 ## Related Deep Dives
 
+- [Operations and Restarts](deep_dives/operations_restarts.md) — Full restart procedures, port debugging, recovery
 - [Deployment](deep_dives/deployment.md) — Production setup, persistent startup
 - [Server Architecture](deep_dives/server_architecture.md) — Worker lifecycle, session management
diff --git a/docs/ai/L1/deep_dives/_index.md b/docs/ai/L1/deep_dives/_index.md
index 34502c601f..07b5b13b20 100644
--- a/docs/ai/L1/deep_dives/_index.md
+++ b/docs/ai/L1/deep_dives/_index.md
@@ -7,3 +7,4 @@
 | [testing.md](testing.md)                                 | All 15 TTS + 10 ASR guarder tests, pass criteria, config files, debugging | Running or debugging tests for an extension |
 | [deployment.md](deployment.md)                           | Docker, Cloudflare, Nginx, Grafana monitoring    | Deploying to production or setting up monitoring |
 | [server_architecture.md](server_architecture.md)         | Go server, property injection, worker lifecycle  | Understanding server internals or debugging      |
+| [operations_restarts.md](operations_restarts.md)         | Full restart procedures, port debugging, recovery| Restarting services, crash recovery, port conflicts|
diff --git a/docs/ai/L1/deep_dives/operations_restarts.md b/docs/ai/L1/deep_dives/operations_restarts.md
new file mode 100644
index 0000000000..cf824560ed
--- /dev/null
+++ b/docs/ai/L1/deep_dives/operations_restarts.md
@@ -0,0 +1,171 @@
+# Operations and Restarts
+
+> **When to Read This:** Load this document when you need to restart services,
+> debug port conflicts, recover from crashes, or clean up zombie processes.
+
+## When to Do a Full Restart
+
+| What Changed                    | Action                                               |
+| ------------------------------- | ---------------------------------------------------- |
+| `property.json` (graphs added)  | Full restart (frontend caches graph list)            |
+| `property.json` (config only)   | No restart needed (loaded per session)               |
+| `.env`                          | `docker compose down && docker compose up -d` + deps |
+| Python code                     | Restart server only                                  |
+| Go code                         | `task install` then restart server                   |
+| Container restart               | Reinstall Python deps, then `task run`               |
+
+## Full Restart Procedure
+
+Must kill `next-server` too — it holds port 3000 even after `node`/`bun` die:
+
+```bash
+# 1. Kill EVERYTHING
+sudo docker exec ten_agent_dev bash -c \
+  "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; \
+   pkill -9 -f next-server; pkill -9 -f tman"
+
+# 2. Clean up stale files
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+
+# 3. Wait for port 3000 TIME_WAIT to clear
+# If Next.js can't bind port 3000, it silently starts on 3001/3002
+# which isn't exposed by Docker — the frontend appears down.
+sleep 30
+
+# 4. Start
+sudo docker exec -d ten_agent_dev bash -c \
+  "cd /app/agents/examples/<example> && task run > /tmp/task_run.log 2>&1"
+
+# 5. Verify (wait ~12s for startup)
+sleep 12
+sudo docker exec ten_agent_dev bash -c \
+  "curl -s http://localhost:8080/health && \
+   curl -s -o /dev/null -w ' Frontend:%{http_code}' http://localhost:3000/"
+```
+
+## Verification
+
+Check Next.js started on port 3000 (not 3001+):
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "strings /tmp/task_run.log | grep -E 'Local:|Port|Ready|Error'"
+```
+
+Expected output:
+```
+   - Local:         http://localhost:3000
+ Ready in 2.1s
+```
+
+If you see `Port 3000 is in use`, the frontend is on the wrong port.
+
+## Zombie Worker Cleanup
+
+Worker processes (`bin/main`) run inside Docker but can survive server restarts:
+
+```bash
+# Check for zombies
+sudo docker exec ten_agent_dev bash -c \
+  "ps aux | grep 'bin/main' | grep -v grep"
+
+# Kill them
+sudo docker exec ten_agent_dev bash -c \
+  "pkill -9 -f 'bin/main'"
+```
+
+Always kill zombies before restarting the server.
+
+## Stale Lock Cleanup
+
+After crashes, `.next/dev/lock` becomes stale:
+
+```bash
+sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
+```
+
+Also clear the Next.js cache if React version errors appear:
+
+```bash
+sudo docker exec ten_agent_dev bash -c "rm -rf /app/playground/.next"
+```
+
+## Port 3000 Conflict Debugging
+
+If Next.js reports "Port 3000 is in use", find the process holding it:
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "for pid in /proc/[0-9]*/fd/*; do \
+    link=\$(readlink \$pid 2>/dev/null); \
+    echo \"\$link\" | grep -q socket: && \
+    inode=\$(echo \$link | grep -oP '\\d+') && \
+    grep -q \$inode /proc/net/tcp6 2>/dev/null && \
+    grep \$inode /proc/net/tcp6 | grep -q ':0BB8' && \
+    echo PID=\$(echo \$pid | cut -d/ -f3) && break; \
+  done"
+```
+
+Kill the PID, wait for TIME_WAIT to clear (~30s), then restart.
+
+If no PID is found but port is still busy, it's in TIME_WAIT state. Check:
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "cat /proc/net/tcp6 | grep ':0BB8'"
+```
+
+State `06` = TIME_WAIT. Wait 30-60 seconds for it to clear.
+
+## .env and Container Restart Recovery
+
+`.env` is loaded at container startup only. After editing:
+
+```bash
+cd /home/ubuntu/ten-framework/ai_agents
+docker compose down && docker compose up -d
+```
+
+Then reinstall everything (Python deps are not persisted):
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "cd /app/agents/examples/<example> && task install"
+```
+
+## Copying Extension Code to Running Container
+
+When iterating on extension code locally:
+
+```bash
+# Copy updated files (use /. to avoid nested dirs)
+sudo docker cp ./agents/ten_packages/extension/my_ext/. \
+  ten_agent_dev:/app/agents/ten_packages/extension/my_ext/
+
+# Verify symlink exists in the example's tenapp
+sudo docker exec ten_agent_dev bash -c \
+  "ls -la /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
+
+# If missing, create it manually
+sudo docker exec ten_agent_dev bash -c \
+  "ln -sf /app/agents/ten_packages/extension/my_ext \
+   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
+```
+
+Then do a full restart.
+
+## After Container Restart Checklist
+
+1. Reinstall Python dependencies
+2. Rebuild Go binary (`task install`)
+3. Kill any zombie workers
+4. Remove stale `.next/dev/lock`
+5. Start with `task run`
+6. Verify health endpoint and frontend status code
+
+## See Also
+
+- [Back to Gotchas](../07_gotchas.md)
+- [Back to Workflows](../05_workflows.md)
+- [Deployment](deployment.md) — Production setup, Cloudflare, Nginx
+- [Server Architecture](server_architecture.md) — Worker lifecycle

From 2fd1ace327e044d4b990231808a3b326acfa1725 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 13:45:34 +0000
Subject: [PATCH 06/18] =?UTF-8?q?fix:=20address=20codex=20review=20?=
 =?UTF-8?q?=E2=80=94=20connect=20fail-fast=20and=20error=20handling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

issue 1 (high): _connect() now always raises after calling the error
callback on 401. previously it returned control to the caller with
self._ws == None, causing a secondary AttributeError that masked the
real auth failure.

issue 2 (high): EVENT_TTS_ERROR on non-final chunks is logged as a
warning but not sent as a data event. sending error data for transient
partial-stream failures confuses the test harness and the base class
state machine. errors are only surfaced via _finalize_request() on
the final chunk (text_input_end=True), which is the correct contract.

open question: request state fields (current_request_id, sent_ts,
_audio_start_sent) are shared mutable state. however, the base class
AsyncTTS2BaseExtension serializes request_tts() calls — it does not
overlap them. this is confirmed by the interleaved_requests test
passing, which exercises rapid request_id switching.
---
 .../extension/deepgram_tts/deepgram_tts.py    |  9 +++---
 .../extension/deepgram_tts/extension.py       | 31 ++++++++++++-------
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 9f532bedef..ab774ae3b1 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -260,10 +260,11 @@ async def _connect(self) -> None:
             if "401" in error_message or "Unauthorized" in error_message:
                 if self.send_fatal_tts_error:
                     await self.send_fatal_tts_error(error_message=error_message)
-                else:
-                    raise DeepgramTTSConnectionException(
-                        status_code=401, body=error_message
-                    ) from e
+                # Always raise so callers don't proceed
+                # with self._ws == None
+                raise DeepgramTTSConnectionException(
+                    status_code=401, body=error_message
+                ) from e
             else:
                 self.ten_env.log_error(f"Deepgram TTS connection failed: {e}")
                 if self.send_non_fatal_tts_error:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 749f70f4eb..7f18db2b72 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -350,25 +350,34 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                 break
 
             elif event_status == EVENT_TTS_ERROR:
-                self.ten_env.log_error(
-                    "Received TTS_ERROR event from " "Deepgram TTS"
-                )
                 error_msg = (
                     data_msg.decode("utf-8")
                     if isinstance(data_msg, bytes)
                     else str(data_msg)
                 )
+                self.ten_env.log_error(f"TTS_ERROR from Deepgram: {error_msg}")
+                error = ModuleError(
+                    message=error_msg,
+                    module=ModuleType.TTS,
+                    code=ModuleErrorCode.NON_FATAL_ERROR,
+                    vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
+                )
                 if t.text_input_end:
+                    # Final chunk: surface error and
+                    # finalize the request
                     await self._finalize_request(
                         TTSAudioEndReason.ERROR,
-                        error=ModuleError(
-                            message=error_msg,
-                            module=ModuleType.TTS,
-                            code=(ModuleErrorCode.NON_FATAL_ERROR),
-                            vendor_info=ModuleErrorVendorInfo(
-                                vendor=self.vendor()
-                            ),
-                        ),
+                        error=error,
+                    )
+                else:
+                    # Non-final chunk: log only. The base
+                    # class will send subsequent chunks for
+                    # this request_id; errors on partial
+                    # streaming are transient.
+                    self.ten_env.log_warn(
+                        f"Transient TTS error on non-final "
+                        f"chunk for {t.request_id}: "
+                        f"{error_msg}"
                     )
                 break
 

From ace659fe5bd604ee6766839e21fdf0cdb6a2847b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 13:50:53 +0000
Subject: [PATCH 07/18] test: add state machine, recovery, and redaction tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

address codex review gaps vs cartesia_tts test coverage:

- test_sequential_requests: 3 requests with different IDs,
  validates request_id in audio_start and audio_end events
- test_reconnect_after_error: first request errors mid-stream,
  second request completes successfully (recovery)
- test_config_redacts_api_key: to_str(sensitive_handling=True)
  does not leak the API key
- test_client_empty_text_yields_end: unit test on client.get()
  for empty text — yields END immediately, no WS connection
- test_client_whitespace_text_yields_end: same for whitespace

standalone tests: 18/18 passed (was 13)
---
 .../deepgram_tts/tests/test_state_machine.py  | 289 ++++++++++++++++++
 1 file changed, 289 insertions(+)
 create mode 100644 ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
new file mode 100644
index 0000000000..96cdb72ad3
--- /dev/null
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
@@ -0,0 +1,289 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
+#
+# This file is part of TEN Framework, an open source project.
+# Licensed under the Apache License, Version 2.0.
+# See the LICENSE file for more information.
+#
+import asyncio
+import copy
+import json
+from unittest.mock import patch, AsyncMock, MagicMock
+
+from ten_runtime import (
+    ExtensionTester,
+    TenEnvTester,
+    Data,
+)
+from ten_ai_base.struct import TTSTextInput
+from deepgram_tts.deepgram_tts import (
+    EVENT_TTS_RESPONSE,
+    EVENT_TTS_END,
+    EVENT_TTS_TTFB_METRIC,
+    EVENT_TTS_ERROR,
+    DeepgramTTSClient,
+)
+from deepgram_tts.config import DeepgramTTSConfig
+
+MOCK_CONFIG = {
+    "params": {
+        "api_key": "test_api_key",
+        "model": "aura-2-thalia-en",
+        "encoding": "linear16",
+        "sample_rate": 24000,
+    },
+}
+
+
+def _create_mock_client():
+    mock = MagicMock()
+    mock.start = AsyncMock()
+    mock.stop = AsyncMock()
+    mock.cancel = AsyncMock()
+    mock.reset_ttfb = lambda: None
+    mock.mark_needs_reconnect = lambda: None
+
+    fake_audio = b"\x00\x01" * 200
+
+    async def mock_get(text):
+        yield (100, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock.get.side_effect = mock_get
+    return mock
+
+
+# ================ test sequential requests ================
+class SequentialRequestsTester(ExtensionTester):
+    """Send 3 requests with different IDs sequentially.
+
+    Each request should produce tts_audio_start, audio
+    frames, and tts_audio_end with the correct request_id.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.completed_request_ids = []
+        self.audio_start_ids = []
+        self.expected_ids = [
+            "seq_req_1",
+            "seq_req_2",
+            "seq_req_3",
+        ]
+        self.send_index = 0
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        ten_env_tester.log_info("Sequential requests test started.")
+        self._send_next(ten_env_tester)
+        ten_env_tester.on_start_done()
+
+    def _send_next(self, ten_env_tester: TenEnvTester) -> None:
+        if self.send_index >= len(self.expected_ids):
+            return
+        req_id = self.expected_ids[self.send_index]
+        tts_input = TTSTextInput(
+            request_id=req_id,
+            text=f"Hello from request {self.send_index + 1}.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        self.send_index += 1
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_start":
+            json_str, _ = data.get_property_to_json("")
+            d = json.loads(json_str) if json_str else {}
+            rid = d.get("request_id", "")
+            self.audio_start_ids.append(rid)
+        elif name == "tts_audio_end":
+            json_str, _ = data.get_property_to_json("")
+            d = json.loads(json_str) if json_str else {}
+            rid = d.get("request_id", "")
+            self.completed_request_ids.append(rid)
+            ten_env.log_info(f"Completed request: {rid}")
+            if len(self.completed_request_ids) < len(self.expected_ids):
+                self._send_next(ten_env)
+            else:
+                ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_sequential_requests(MockClient):
+    """Each sequential request should complete with its own
+    request_id in audio_start and audio_end."""
+    MockClient.return_value = _create_mock_client()
+
+    tester = SequentialRequestsTester()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(MOCK_CONFIG))
+    tester.run()
+
+    assert tester.completed_request_ids == [
+        "seq_req_1",
+        "seq_req_2",
+        "seq_req_3",
+    ], (
+        f"Expected 3 sequential completions, got "
+        f"{tester.completed_request_ids}"
+    )
+    assert tester.audio_start_ids == [
+        "seq_req_1",
+        "seq_req_2",
+        "seq_req_3",
+    ], f"audio_start ids mismatch: {tester.audio_start_ids}"
+
+
+# ================ test reconnect after error ================
+class ReconnectAfterErrorTester(ExtensionTester):
+    """First request errors, second request should succeed.
+
+    Validates that the client recovers after a mid-stream
+    failure.
+    """
+
+    def __init__(self):
+        super().__init__()
+        self.error_received = False
+        self.second_audio_end = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        # First request will trigger an error
+        tts_input = TTSTextInput(
+            request_id="err_req_1",
+            text="This will error.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "tts_audio_end":
+            if not self.error_received:
+                # First request ended (with error) — send
+                # second request
+                self.error_received = True
+                tts_input = TTSTextInput(
+                    request_id="ok_req_2",
+                    text="This should work.",
+                    text_input_end=True,
+                )
+                data2 = Data.create("tts_text_input")
+                data2.set_property_from_json(None, tts_input.model_dump_json())
+                ten_env.send_data(data2)
+            else:
+                self.second_audio_end = True
+                ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_reconnect_after_error(MockClient):
+    """After an error, subsequent requests should succeed."""
+    call_count = 0
+
+    def create_mock():
+        mock = MagicMock()
+        mock.start = AsyncMock()
+        mock.stop = AsyncMock()
+        mock.cancel = AsyncMock()
+        mock.reset_ttfb = lambda: None
+        mock.mark_needs_reconnect = lambda: None
+
+        fake_audio = b"\x00\x01" * 200
+
+        async def mock_get(text):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                # First call: error
+                yield (
+                    b"Simulated error",
+                    EVENT_TTS_ERROR,
+                )
+            else:
+                # Subsequent calls: success
+                yield (100, EVENT_TTS_TTFB_METRIC)
+                yield (fake_audio, EVENT_TTS_RESPONSE)
+                yield (None, EVENT_TTS_END)
+
+        mock.get.side_effect = mock_get
+        return mock
+
+    MockClient.return_value = create_mock()
+
+    tester = ReconnectAfterErrorTester()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(MOCK_CONFIG))
+    tester.run()
+
+    assert (
+        tester.second_audio_end
+    ), "Second request should complete after first errored."
+
+
+# ================ test config redaction ================
+def test_config_redacts_api_key():
+    """to_str(sensitive_handling=True) must not leak the
+    API key."""
+    config = DeepgramTTSConfig(
+        params={
+            "api_key": "super-secret-key-12345",
+            "model": "aura-2-thalia-en",
+        }
+    )
+    config.update_params()
+
+    safe_str = config.to_str(sensitive_handling=True)
+
+    assert "super-secret-key-12345" not in safe_str
+    assert "aura-2-thalia-en" in safe_str
+
+
+# ================ test empty text yields END ================
+def test_client_empty_text_yields_end():
+    """get() with empty text should yield EVENT_TTS_END
+    immediately without connecting."""
+
+    async def _run():
+        ten_env = MagicMock()
+        ten_env.log_warn = MagicMock()
+        config = DeepgramTTSConfig(api_key="test")
+        client = DeepgramTTSClient(config=config, ten_env=ten_env)
+
+        events = []
+        async for data, event in client.get(""):
+            events.append(event)
+
+        assert events == [EVENT_TTS_END]
+        assert client._ws is None  # no connection made
+
+    asyncio.run(_run())
+
+
+def test_client_whitespace_text_yields_end():
+    """get() with whitespace-only text should yield
+    EVENT_TTS_END."""
+
+    async def _run():
+        ten_env = MagicMock()
+        ten_env.log_warn = MagicMock()
+        config = DeepgramTTSConfig(api_key="test")
+        client = DeepgramTTSClient(config=config, ten_env=ten_env)
+
+        events = []
+        async for data, event in client.get("   \n\t  "):
+            events.append(event)
+
+        assert events == [EVENT_TTS_END]
+
+    asyncio.run(_run())

From 2a65917383b37ff3626c61de4354572bedd00056 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 14:05:56 +0000
Subject: [PATCH 08/18] fix: eliminate double error emission on auth failure,
 add targeted tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remove error callbacks from DeepgramTTSClient._connect() — error
reporting is now solely the caller's responsibility. this eliminates
the double-report where _connect() called send_fatal_tts_error and
then raised, causing _handle_connection_error to send a second error.

consolidate error handlers to use _finalize_request() which emits
exactly one error via finish_request(error=...).

new tests:
- test_auth_error_single_emission: 401 produces exactly 1 error event
- test_nonfinal_error_not_surfaced: error on non-final chunk is logged
  but not sent as public data event (documented contract)

standalone tests: 20/20 passed
---
 .../extension/deepgram_tts/deepgram_tts.py    |  13 --
 .../extension/deepgram_tts/extension.py       |  31 +---
 .../deepgram_tts/tests/test_state_machine.py  | 150 ++++++++++++++++++
 3 files changed, 157 insertions(+), 37 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index ab774ae3b1..4896b997c9 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -5,7 +5,6 @@
 #
 import asyncio
 import json
-from collections.abc import Callable
 from datetime import datetime
 from typing import AsyncIterator
 
@@ -46,13 +45,9 @@ def __init__(
         self,
         config: DeepgramTTSConfig,
         ten_env: AsyncTenEnv,
-        send_fatal_tts_error: Callable[[str], asyncio.Future] | None = None,
-        send_non_fatal_tts_error: Callable[[str], asyncio.Future] | None = None,
     ):
         self.config = config
         self.ten_env = ten_env
-        self.send_fatal_tts_error = send_fatal_tts_error
-        self.send_non_fatal_tts_error = send_non_fatal_tts_error
 
         self._ws: ClientConnection | None = None
         self._is_cancelled = False
@@ -258,19 +253,11 @@ async def _connect(self) -> None:
         except Exception as e:
             error_message = str(e)
             if "401" in error_message or "Unauthorized" in error_message:
-                if self.send_fatal_tts_error:
-                    await self.send_fatal_tts_error(error_message=error_message)
-                # Always raise so callers don't proceed
-                # with self._ws == None
                 raise DeepgramTTSConnectionException(
                     status_code=401, body=error_message
                 ) from e
             else:
                 self.ten_env.log_error(f"Deepgram TTS connection failed: {e}")
-                if self.send_non_fatal_tts_error:
-                    await self.send_non_fatal_tts_error(
-                        error_message=error_message
-                    )
                 raise
 
     async def _ensure_connection(self) -> None:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 7f18db2b72..b56e8c1230 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -138,8 +138,6 @@ def _create_client(self, ten_env: AsyncTenEnv) -> DeepgramTTSClient:
         return DeepgramTTSClient(
             config=self.config,
             ten_env=ten_env,
-            send_fatal_tts_error=self.send_fatal_tts_error,
-            send_non_fatal_tts_error=(self.send_non_fatal_tts_error),
         )
 
     async def _ensure_client(self) -> None:
@@ -271,15 +269,7 @@ async def request_tts(self, t: TTSTextInput) -> None:
                 code=ModuleErrorCode.NON_FATAL_ERROR,
                 vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
             )
-            await self.send_tts_error(
-                request_id=self.current_request_id,
-                error=error,
-            )
-            await self.finish_request(
-                request_id=self.current_request_id,
-                reason=TTSAudioEndReason.ERROR,
-                error=error,
-            )
+            await self._finalize_request(TTSAudioEndReason.ERROR, error=error)
             if isinstance(e, ConnectionRefusedError):
                 await self._reconnect_client()
 
@@ -388,10 +378,11 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
     async def _handle_connection_error(
         self, e: DeepgramTTSConnectionException
     ) -> None:
-        """Handle Deepgram connection errors."""
-        self.ten_env.log_error(
-            f"DeepgramTTSConnectionException in request_tts: " f"{e.body}"
-        )
+        """Handle Deepgram connection errors.
+
+        Sends exactly one error event via _finalize_request.
+        """
+        self.ten_env.log_error(f"DeepgramTTSConnectionException: {e.body}")
         if e.status_code == 401:
             code = ModuleErrorCode.FATAL_ERROR
         else:
@@ -407,15 +398,7 @@ async def _handle_connection_error(
                 message=e.body,
             ),
         )
-        await self.send_tts_error(
-            request_id=self.current_request_id,
-            error=error,
-        )
-        await self.finish_request(
-            request_id=self.current_request_id,
-            reason=TTSAudioEndReason.ERROR,
-            error=error,
-        )
+        await self._finalize_request(TTSAudioEndReason.ERROR, error=error)
 
     def _setup_recorder(self, request_id: str) -> None:
         """Set up PCMWriter for a new request."""
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
index 96cdb72ad3..a6307bc1e6 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
@@ -287,3 +287,153 @@ async def _run():
         assert events == [EVENT_TTS_END]
 
     asyncio.run(_run())
+
+
+# ================ test 401 emits exactly one error ================
+class AuthErrorTester(ExtensionTester):
+    """Validates that a 401 auth failure emits exactly one
+    error event and one terminal audio_end."""
+
+    def __init__(self):
+        super().__init__()
+        self.error_count = 0
+        self.audio_end_count = 0
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        tts_input = TTSTextInput(
+            request_id="auth_err_req",
+            text="This should fail with 401.",
+            text_input_end=True,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "error":
+            self.error_count += 1
+        elif name == "tts_audio_end":
+            self.audio_end_count += 1
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_auth_error_single_emission(MockClient):
+    """401 should produce exactly 1 error event, not
+    duplicates."""
+    from deepgram_tts.deepgram_tts import (
+        DeepgramTTSConnectionException,
+    )
+
+    mock = MagicMock()
+    mock.start = AsyncMock()
+    mock.stop = AsyncMock()
+    mock.cancel = AsyncMock()
+    mock.reset_ttfb = lambda: None
+    mock.mark_needs_reconnect = lambda: None
+
+    async def mock_get_auth_fail(text):
+        raise DeepgramTTSConnectionException(
+            status_code=401, body="Unauthorized"
+        )
+        yield  # make it a generator  # pragma: no cover
+
+    mock.get.side_effect = mock_get_auth_fail
+    MockClient.return_value = mock
+
+    tester = AuthErrorTester()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(MOCK_CONFIG))
+    tester.run()
+
+    assert tester.error_count == 1, (
+        f"Expected exactly 1 error event, got " f"{tester.error_count}"
+    )
+
+
+# ================ test non-final error contract ================
+class NonFinalErrorTester(ExtensionTester):
+    """Validates that an error on a non-final chunk does NOT
+    produce a public error event. Partial stream errors are
+    transient — only logged, not surfaced to callers."""
+
+    def __init__(self):
+        super().__init__()
+        self.error_count = 0
+        self.audio_end_received = False
+
+    def on_start(self, ten_env_tester: TenEnvTester) -> None:
+        # First chunk: non-final, will error
+        tts_input = TTSTextInput(
+            request_id="nonfinal_req",
+            text="First chunk errors.",
+            text_input_end=False,
+        )
+        data = Data.create("tts_text_input")
+        data.set_property_from_json(None, tts_input.model_dump_json())
+        ten_env_tester.send_data(data)
+
+        # Second chunk: final, succeeds
+        tts_input2 = TTSTextInput(
+            request_id="nonfinal_req",
+            text="Second chunk works.",
+            text_input_end=True,
+        )
+        data2 = Data.create("tts_text_input")
+        data2.set_property_from_json(None, tts_input2.model_dump_json())
+        ten_env_tester.send_data(data2)
+        ten_env_tester.on_start_done()
+
+    def on_data(self, ten_env: TenEnvTester, data) -> None:
+        name = data.get_name()
+        if name == "error":
+            self.error_count += 1
+        elif name == "tts_audio_end":
+            self.audio_end_received = True
+            ten_env.stop_test()
+
+
+@patch("deepgram_tts.extension.DeepgramTTSClient")
+def test_nonfinal_error_not_surfaced(MockClient):
+    """Error on non-final chunk should not emit public
+    error event. This is the intended contract: partial
+    stream errors are transient."""
+    call_count = 0
+
+    def create_mock():
+        mock = MagicMock()
+        mock.start = AsyncMock()
+        mock.stop = AsyncMock()
+        mock.cancel = AsyncMock()
+        mock.reset_ttfb = lambda: None
+        mock.mark_needs_reconnect = lambda: None
+
+        fake_audio = b"\x00\x01" * 200
+
+        async def mock_get(text):
+            nonlocal call_count
+            call_count += 1
+            if call_count == 1:
+                yield (b"Transient error", EVENT_TTS_ERROR)
+            else:
+                yield (100, EVENT_TTS_TTFB_METRIC)
+                yield (fake_audio, EVENT_TTS_RESPONSE)
+                yield (None, EVENT_TTS_END)
+
+        mock.get.side_effect = mock_get
+        return mock
+
+    MockClient.return_value = create_mock()
+
+    tester = NonFinalErrorTester()
+    tester.set_test_mode_single("deepgram_tts", json.dumps(MOCK_CONFIG))
+    tester.run()
+
+    assert tester.error_count == 0, (
+        f"Non-final error should not produce public error "
+        f"event, got {tester.error_count}"
+    )
+    assert (
+        tester.audio_end_received
+    ), "Request should still complete after non-final error"

From f197de3d6eb3b0ee7e6cf2294dd7b8cca6f98e66 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 14:36:04 +0000
Subject: [PATCH 09/18] docs: add tar sync method, cache cleanup, fix guarder
 test count

- operations_restarts.md: add tar-based container sync that excludes
  __pycache__ and .pytest_cache (recommended over docker cp). add
  cleanup command for stale cache artifacts in container.
- testing.md: fix TTS guarder count from 15 to 16. add container
  sync guidance before running tests.
---
 docs/ai/L1/deep_dives/operations_restarts.md | 22 +++++++++++++++++++-
 docs/ai/L1/deep_dives/testing.md             | 16 +++++++++++---
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/docs/ai/L1/deep_dives/operations_restarts.md b/docs/ai/L1/deep_dives/operations_restarts.md
index cf824560ed..c4bff1c10f 100644
--- a/docs/ai/L1/deep_dives/operations_restarts.md
+++ b/docs/ai/L1/deep_dives/operations_restarts.md
@@ -138,10 +138,17 @@ sudo docker exec ten_agent_dev bash -c \
 When iterating on extension code locally:
 
 ```bash
-# Copy updated files (use /. to avoid nested dirs)
+# Option 1: docker cp with /. suffix (avoids nested dirs)
 sudo docker cp ./agents/ten_packages/extension/my_ext/. \
   ten_agent_dev:/app/agents/ten_packages/extension/my_ext/
 
+# Option 2: tar with cache exclusion (recommended — avoids
+# __pycache__ and .pytest_cache causing import errors)
+tar --exclude='__pycache__' --exclude='.pytest_cache' \
+  -C ai_agents/agents/ten_packages/extension/my_ext -cf - . | \
+  sudo docker exec -i ten_agent_dev tar \
+  -C /app/agents/ten_packages/extension/my_ext -xf -
+
 # Verify symlink exists in the example's tenapp
 sudo docker exec ten_agent_dev bash -c \
   "ls -la /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
@@ -154,6 +161,19 @@ sudo docker exec ten_agent_dev bash -c \
 
 Then do a full restart.
 
+**Common pitfall:** If `docker cp` copies `__pycache__` or `.pytest_cache`
+from your local machine into the container, it can cause `ImportError` or
+stale bytecode during test collection. Use the tar method above or clean
+the container directory before copying:
+
+```bash
+sudo docker exec ten_agent_dev bash -c \
+  "find /app/agents/ten_packages/extension/my_ext \
+   -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; \
+   find /app/agents/ten_packages/extension/my_ext \
+   -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null"
+```
+
 ## After Container Restart Checklist
 
 1. Reinstall Python dependencies
diff --git a/docs/ai/L1/deep_dives/testing.md b/docs/ai/L1/deep_dives/testing.md
index 1ad2d72ecd..7c77e5ce0d 100644
--- a/docs/ai/L1/deep_dives/testing.md
+++ b/docs/ai/L1/deep_dives/testing.md
@@ -24,16 +24,26 @@ docker exec ten_agent_dev bash -c \
 docker exec ten_agent_dev bash -c \
   "cd /app && task test-extension-no-install EXTENSION=agents/ten_packages/extension/deepgram_tts"
 
-# TTS guarder (all 15 tests)
+# TTS guarder (16 tests)
 docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts"
 
-# ASR guarder (all 10 tests)
+# ASR guarder (10 tests)
 docker exec ten_agent_dev bash -c "cd /app && task asr-guarder-test EXTENSION=azure_asr_python"
 
-# Specific test only
+# Specific test only (faster iteration on failures)
 docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts -- -k test_flush"
 ```
 
+**Before running tests**, sync your local code into the container. Use tar
+to exclude cache artifacts that cause import errors:
+
+```bash
+tar --exclude='__pycache__' --exclude='.pytest_cache' \
+  -C ai_agents/agents/ten_packages/extension/my_ext -cf - . | \
+  sudo docker exec -i ten_agent_dev tar \
+  -C /app/agents/ten_packages/extension/my_ext -xf -
+```
+
 ## Extension Standalone Tests
 
 Each extension can have `tests/` with a `bin/start` entry point:

From 8a3282352db41ffd9c60c3903598d8faa71b3d2c Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 15:15:01 +0000
Subject: [PATCH 10/18] =?UTF-8?q?fix:=20address=20code=20review=20?=
 =?UTF-8?q?=E2=80=94=20401=20detection,=20dead=20code,=20dump=20writes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- use websockets.exceptions.InvalidStatus for typed 401 detection
  with string-match fallback for non-websockets exceptions
- remove dead send_fatal/non_fatal_tts_error methods (unused after
  client callback removal)
- remove redundant "LOG_CATEGORY_KEY_POINT: " log prefix
- await _write_dump() and _setup_recorder() directly instead of
  fire-and-forget asyncio.create_task (errors were silently lost)
- remove unused asyncio import
- remove duplicate pathlib import in test_basic.py

graph connections verified: voice_assistant_deepgram_tts has the same
3 connection blocks as the working voice_assistant graph. the
main_python extension handles LLM/TTS routing internally.
---
 .../extension/deepgram_tts/deepgram_tts.py    | 13 ++++--
 .../extension/deepgram_tts/extension.py       | 43 +++++--------------
 .../deepgram_tts/tests/test_basic.py          |  1 -
 3 files changed, 20 insertions(+), 37 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 4896b997c9..b78c317ab5 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -10,6 +10,7 @@
 
 import websockets
 from websockets.asyncio.client import ClientConnection
+from websockets.exceptions import InvalidStatus
 
 from .config import DeepgramTTSConfig
 from ten_runtime import AsyncTenEnv
@@ -250,15 +251,21 @@ async def _connect(self) -> None:
                 "vendor_status: connected to deepgram tts",
                 category=LOG_CATEGORY_VENDOR,
             )
+        except InvalidStatus as e:
+            raise DeepgramTTSConnectionException(
+                status_code=e.response.status_code,
+                body=str(e),
+            ) from e
         except Exception as e:
             error_message = str(e)
+            # Fallback string match for non-websockets
+            # exceptions (e.g., mocked tests)
             if "401" in error_message or "Unauthorized" in error_message:
                 raise DeepgramTTSConnectionException(
                     status_code=401, body=error_message
                 ) from e
-            else:
-                self.ten_env.log_error(f"Deepgram TTS connection failed: {e}")
-                raise
+            self.ten_env.log_error(f"Deepgram TTS connection failed: {e}")
+            raise
 
     async def _ensure_connection(self) -> None:
         if not self._ws:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index b56e8c1230..b6ff1006dd 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -3,7 +3,6 @@
 # Licensed under the Apache License, Version 2.0.
 # See the LICENSE file for more information.
 #
-import asyncio
 from datetime import datetime
 import os
 import traceback
@@ -60,8 +59,7 @@ async def on_init(self, ten_env: AsyncTenEnv) -> None:
             self.config = DeepgramTTSConfig.model_validate_json(config_json_str)
             self.config.update_params()
             ten_env.log_info(
-                f"LOG_CATEGORY_KEY_POINT: "
-                f"{self.config.to_str(sensitive_handling=True)}",
+                self.config.to_str(sensitive_handling=True),
                 category=LOG_CATEGORY_KEY_POINT,
             )
 
@@ -226,7 +224,7 @@ async def request_tts(self, t: TTSTextInput) -> None:
                 if t.metadata is not None:
                     self.session_id = t.metadata.get("session_id", "")
                     self.current_turn_id = t.metadata.get("turn_id", -1)
-                self._setup_recorder(t.request_id)
+                await self._setup_recorder(t.request_id)
             elif self.current_request_finished:
                 self.ten_env.log_error(
                     f"Received a message for a finished "
@@ -301,7 +299,7 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                         f"#{chunk_count}, "
                         f"size: {len(data_msg)} bytes"
                     )
-                    self._write_dump(data_msg)
+                    await self._write_dump(data_msg)
                     await self.send_tts_audio_data(data_msg)
                 else:
                     self.ten_env.log_debug(
@@ -400,7 +398,7 @@ async def _handle_connection_error(
         )
         await self._finalize_request(TTSAudioEndReason.ERROR, error=error)
 
-    def _setup_recorder(self, request_id: str) -> None:
+    async def _setup_recorder(self, request_id: str) -> None:
         """Set up PCMWriter for a new request."""
         if not (self.config and self.config.dump):
             return
@@ -409,7 +407,7 @@ def _setup_recorder(self, request_id: str) -> None:
             rid for rid in self.recorder_map.keys() if rid != request_id
         ]:
             try:
-                asyncio.create_task(self.recorder_map[old_rid].flush())
+                await self.recorder_map[old_rid].flush()
                 del self.recorder_map[old_rid]
                 self.ten_env.log_debug(
                     f"Cleaned up old PCMWriter for " f"request_id: {old_rid}"
@@ -431,7 +429,7 @@ def _setup_recorder(self, request_id: str) -> None:
                 f"{request_id}, file: {dump_file_path}"
             )
 
-    def _write_dump(self, data: bytes) -> None:
+    async def _write_dump(self, data: bytes) -> None:
         """Write audio data to dump file if enabled."""
         if (
             self.config
@@ -439,31 +437,10 @@ def _write_dump(self, data: bytes) -> None:
             and self.current_request_id
             and self.current_request_id in self.recorder_map
         ):
-            asyncio.create_task(
-                self.recorder_map[self.current_request_id].write(data)
-            )
-
-    async def send_fatal_tts_error(self, error_message: str) -> None:
-        await self.send_tts_error(
-            request_id=self.current_request_id or "",
-            error=ModuleError(
-                message=error_message,
-                module=ModuleType.TTS,
-                code=ModuleErrorCode.FATAL_ERROR,
-                vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
-            ),
-        )
-
-    async def send_non_fatal_tts_error(self, error_message: str) -> None:
-        await self.send_tts_error(
-            request_id=self.current_request_id or "",
-            error=ModuleError(
-                message=error_message,
-                module=ModuleType.TTS,
-                code=ModuleErrorCode.NON_FATAL_ERROR,
-                vendor_info=ModuleErrorVendorInfo(vendor=self.vendor()),
-            ),
-        )
+            try:
+                await self.recorder_map[self.current_request_id].write(data)
+            except Exception as e:
+                self.ten_env.log_error(f"Dump write failed: {e}")
 
     def _current_request_interval_ms(self) -> int:
         if not self.sent_ts:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
index e612e4f7e8..230f65f9ce 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
@@ -12,7 +12,6 @@
 # Licensed under the Apache License, Version 2.0.
 # See the LICENSE file for more information.
 #
-from pathlib import Path
 import json
 from unittest.mock import patch, AsyncMock
 import os

From 319d50422e8a1041d7dd63dc85af33b7fdf31deb Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 15:39:08 +0000
Subject: [PATCH 11/18] fix: resolve pylint W1404 implicit string concatenation
 warnings

---
 .../ten_packages/extension/deepgram_tts/extension.py      | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index b6ff1006dd..ad527ac716 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -120,9 +120,7 @@ async def cancel_tts(self) -> None:
                 if self.sent_ts:
                     await self._finalize_request(TTSAudioEndReason.INTERRUPTED)
         else:
-            self.ten_env.log_warn(
-                "No current request found, " "skipping TTS cancellation."
-            )
+            self.ten_env.log_warn("No current request, skipping cancel.")
 
     def vendor(self) -> str:
         return "deepgram"
@@ -303,7 +301,7 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                     await self.send_tts_audio_data(data_msg)
                 else:
                     self.ten_env.log_debug(
-                        "Received empty payload for " "TTS response"
+                        "Received empty payload for TTS response"
                     )
                     if t.text_input_end:
                         await self._finalize_request(
@@ -331,7 +329,7 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
 
             elif event_status == EVENT_TTS_END:
                 self.ten_env.log_info(
-                    "Received TTS_END event from " "Deepgram TTS"
+                    "Received TTS_END event from Deepgram TTS"
                 )
                 if t.text_input_end:
                     await self._finalize_request(TTSAudioEndReason.REQUEST_END)

From ede8cff978b0a7e2ec14c072ab776663ba8cc274 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 15:52:31 +0000
Subject: [PATCH 12/18] fix: reconnect on server errors, break after finalize,
 cleanup

- set _needs_reconnect on Deepgram server-side Error messages,
  not just Python exceptions. a protocol-level error leaves the
  websocket in an unknown state.
- add break after _finalize_request() in empty-payload branch
  to stop processing after request is finalized.
- remove dead mark_needs_reconnect() method and test mock refs.
- replace inline 8.0 timeout with WS_RECV_TIMEOUT constant.
---
 .../extension/deepgram_tts/deepgram_tts.py             | 10 +++++-----
 .../ten_packages/extension/deepgram_tts/extension.py   |  1 +
 .../extension/deepgram_tts/tests/test_state_machine.py |  4 ----
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index b78c317ab5..4dd87b5362 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -22,6 +22,9 @@
 EVENT_TTS_ERROR = 3
 EVENT_TTS_TTFB_METRIC = 5
 
+# Seconds to wait for a WebSocket response before timeout
+WS_RECV_TIMEOUT = 8.0
+
 
 class DeepgramTTSConnectionException(Exception):
     """Exception raised when Deepgram TTS connection fails"""
@@ -126,10 +129,6 @@ def reset_ttfb(self) -> None:
         self._sent_ts = None
         self._ttfb_sent = False
 
-    def mark_needs_reconnect(self) -> None:
-        """Called by extension when request_id changes."""
-        self._needs_reconnect = True
-
     async def get(
         self, text: str
     ) -> AsyncIterator[tuple[bytes | int | None, int]]:
@@ -165,7 +164,7 @@ async def get(
 
                 try:
                     message = await asyncio.wait_for(
-                        self._ws.recv(), timeout=8.0
+                        self._ws.recv(), timeout=WS_RECV_TIMEOUT
                     )
                 except asyncio.TimeoutError:
                     self.ten_env.log_error("Timeout waiting for Deepgram audio")
@@ -215,6 +214,7 @@ async def get(
                             self.ten_env.log_error(
                                 f"Deepgram error: {error_msg}"
                             )
+                            self._needs_reconnect = True
                             yield (
                                 error_msg.encode("utf-8"),
                                 EVENT_TTS_ERROR,
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index ad527ac716..1c7ff2417a 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -307,6 +307,7 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                         await self._finalize_request(
                             TTSAudioEndReason.REQUEST_END
                         )
+                        break
 
             elif event_status == EVENT_TTS_TTFB_METRIC:
                 if data_msg is not None and isinstance(data_msg, int):
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
index a6307bc1e6..b78b3b43aa 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
@@ -47,7 +47,6 @@ def _create_mock_client():
     mock.stop = AsyncMock()
     mock.cancel = AsyncMock()
     mock.reset_ttfb = lambda: None
-    mock.mark_needs_reconnect = lambda: None
 
     fake_audio = b"\x00\x01" * 200
 
@@ -198,7 +197,6 @@ def create_mock():
         mock.stop = AsyncMock()
         mock.cancel = AsyncMock()
         mock.reset_ttfb = lambda: None
-        mock.mark_needs_reconnect = lambda: None
 
         fake_audio = b"\x00\x01" * 200
 
@@ -332,7 +330,6 @@ def test_auth_error_single_emission(MockClient):
     mock.stop = AsyncMock()
     mock.cancel = AsyncMock()
     mock.reset_ttfb = lambda: None
-    mock.mark_needs_reconnect = lambda: None
 
     async def mock_get_auth_fail(text):
         raise DeepgramTTSConnectionException(
@@ -407,7 +404,6 @@ def create_mock():
         mock.stop = AsyncMock()
         mock.cancel = AsyncMock()
         mock.reset_ttfb = lambda: None
-        mock.mark_needs_reconnect = lambda: None
 
         fake_audio = b"\x00\x01" * 200
 

From 35e9b110b6767ab152a3d1c1423ad4ce57fea1ff Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 16:07:42 +0000
Subject: [PATCH 13/18] fix: cancel finalization, exception cleanup, test
 bootstrap

- cancel_tts() now always calls _finalize_request() when
  current_request_id is set, regardless of sent_ts. prevents
  downstream consumers hanging when cancel arrives before first
  text is processed.
- simplify redundant except (asyncio.TimeoutError, Exception)
  to except Exception.
- move sys.path bootstrap to conftest.py, remove from all 6
  test files. license headers now appear first as per repo style.
- remove unused import (copy) from test_state_machine.py.
---
 .../extension/deepgram_tts/deepgram_tts.py    |  2 +-
 .../extension/deepgram_tts/extension.py       |  6 +--
 .../extension/deepgram_tts/tests/conftest.py  |  8 +++
 .../deepgram_tts/tests/test_basic.py          |  9 ----
 .../deepgram_tts/tests/test_error_msg.py      |  8 ---
 .../deepgram_tts/tests/test_metrics.py        |  8 ---
 .../deepgram_tts/tests/test_params.py         | 54 ++++++++-----------
 .../deepgram_tts/tests/test_robustness.py     | 32 ++++-------
 .../deepgram_tts/tests/test_state_machine.py  | 17 ++----
 9 files changed, 48 insertions(+), 96 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 4dd87b5362..060e726b3c 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -106,7 +106,7 @@ async def cancel(self) -> None:
                 await self._ws.send(json.dumps({"type": "Flush"}))
                 # Drain until Flushed to leave connection clean
                 await asyncio.wait_for(self._drain_until_flushed(), timeout=3.0)
-            except (asyncio.TimeoutError, Exception) as e:
+            except Exception as e:
                 self.ten_env.log_warn(
                     f"Cancel drain failed: {e}, "
                     "will reconnect on next request"
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 1c7ff2417a..20129d6f9e 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -112,13 +112,11 @@ async def cancel_tts(self) -> None:
         self.current_request_finished = True
         if self.current_request_id:
             self.ten_env.log_debug(
-                f"Current request {self.current_request_id} "
-                f"is being cancelled. Sending INTERRUPTED."
+                f"Cancelling request {self.current_request_id}"
             )
             if self.client:
                 await self.client.cancel()
-                if self.sent_ts:
-                    await self._finalize_request(TTSAudioEndReason.INTERRUPTED)
+            await self._finalize_request(TTSAudioEndReason.INTERRUPTED)
         else:
             self.ten_env.log_warn("No current request, skipping cancel.")
 
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
index 001977148c..958647c64d 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/conftest.py
@@ -1,3 +1,11 @@
+import sys
+from pathlib import Path
+
+# Add project root to sys.path for test imports
+project_root = str(Path(__file__).resolve().parents[6])
+if project_root not in sys.path:
+    sys.path.insert(0, project_root)
+
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
index 230f65f9ce..2f001d17f3 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_basic.py
@@ -1,12 +1,3 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path to allow running tests from this directory
-# The project root is 6 levels up from the parent directory of this file.
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
index 26e5cccf70..f194ca34cc 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_error_msg.py
@@ -1,11 +1,3 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
index 3705c130f4..60d7cdfe20 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_metrics.py
@@ -1,11 +1,3 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
index d597cd6a52..aded961fde 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
@@ -1,11 +1,3 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
@@ -14,6 +6,7 @@
 import json
 from unittest.mock import patch, AsyncMock
 
+
 from ten_runtime import (
     ExtensionTester,
     TenEnvTester,
@@ -25,6 +18,24 @@
     EVENT_TTS_END,
     EVENT_TTS_TTFB_METRIC,
 )
+from unittest.mock import MagicMock
+
+
+def create_mock_client():
+    mock = MagicMock()
+    mock.start = AsyncMock()
+    mock.stop = AsyncMock()
+    mock.cancel = AsyncMock()
+    mock.reset_ttfb = lambda: None
+    fake_audio = b"\x00\x01\x02\x03" * 100
+
+    async def mock_get(text):
+        yield (100, EVENT_TTS_TTFB_METRIC)
+        yield (fake_audio, EVENT_TTS_RESPONSE)
+        yield (None, EVENT_TTS_END)
+
+    mock.get.side_effect = mock_get
+    return mock
 
 
 # ================ test different sample rates ================
@@ -58,31 +69,10 @@ def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
         self.audio_chunks_count += 1
 
 
-def _create_mock_client():
-    """Helper to create a mock client for tests."""
-    from unittest.mock import MagicMock
-
-    mock = MagicMock()
-    mock.start = AsyncMock()
-    mock.stop = AsyncMock()
-    mock.cancel = AsyncMock()
-    mock.reset_ttfb = lambda: None
-
-    fake_audio_chunk = b"\x00\x01\x02\x03" * 100
-
-    async def mock_get_audio_stream(text: str):
-        yield (100, EVENT_TTS_TTFB_METRIC)
-        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
-        yield (None, EVENT_TTS_END)
-
-    mock.get.side_effect = mock_get_audio_stream
-    return mock
-
-
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_sample_rate_16000(MockDeepgramTTSClient):
     """Test with 16000 Hz sample rate."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterSampleRate(16000)
     tester.set_test_mode_single(
@@ -108,7 +98,7 @@ def test_sample_rate_16000(MockDeepgramTTSClient):
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_sample_rate_24000(MockDeepgramTTSClient):
     """Test with 24000 Hz sample rate."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterSampleRate(24000)
     tester.set_test_mode_single(
@@ -134,7 +124,7 @@ def test_sample_rate_24000(MockDeepgramTTSClient):
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_sample_rate_48000(MockDeepgramTTSClient):
     """Test with 48000 Hz sample rate."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterSampleRate(48000)
     tester.set_test_mode_single(
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
index b807fe5834..6191c8f14a 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_robustness.py
@@ -1,11 +1,3 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
@@ -14,6 +6,7 @@
 import json
 from unittest.mock import patch, AsyncMock
 
+
 from ten_runtime import (
     ExtensionTester,
     TenEnvTester,
@@ -25,26 +18,23 @@
     EVENT_TTS_END,
     EVENT_TTS_TTFB_METRIC,
 )
+from unittest.mock import MagicMock
 
 
-def _create_mock_client():
-    """Helper to create a mock client for tests."""
-    from unittest.mock import MagicMock
-
+def create_mock_client():
     mock = MagicMock()
     mock.start = AsyncMock()
     mock.stop = AsyncMock()
     mock.cancel = AsyncMock()
     mock.reset_ttfb = lambda: None
+    fake_audio = b"\x00\x01\x02\x03" * 100
 
-    fake_audio_chunk = b"\x00\x01\x02\x03" * 100
-
-    async def mock_get_audio_stream(text: str):
+    async def mock_get(text):
         yield (100, EVENT_TTS_TTFB_METRIC)
-        yield (fake_audio_chunk, EVENT_TTS_RESPONSE)
+        yield (fake_audio, EVENT_TTS_RESPONSE)
         yield (None, EVENT_TTS_END)
 
-    mock.get.side_effect = mock_get_audio_stream
+    mock.get.side_effect = mock_get
     return mock
 
 
@@ -78,7 +68,7 @@ def on_data(self, ten_env: TenEnvTester, data) -> None:
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_empty_text(MockDeepgramTTSClient):
     """Test that empty text is handled gracefully."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterEmptyText()
     tester.set_test_mode_single(
@@ -132,7 +122,7 @@ def on_data(self, ten_env: TenEnvTester, data) -> None:
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_whitespace_text(MockDeepgramTTSClient):
     """Test that whitespace-only text is handled gracefully."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterWhitespaceText()
     tester.set_test_mode_single(
@@ -192,7 +182,7 @@ def on_audio_frame(self, ten_env: TenEnvTester, audio_frame):
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_long_text(MockDeepgramTTSClient):
     """Test that long text is handled correctly."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterLongText()
     tester.set_test_mode_single(
@@ -252,7 +242,7 @@ def on_data(self, ten_env: TenEnvTester, data) -> None:
 @patch("deepgram_tts.extension.DeepgramTTSClient")
 def test_special_characters(MockDeepgramTTSClient):
     """Test that special characters are handled correctly."""
-    MockDeepgramTTSClient.return_value = _create_mock_client()
+    MockDeepgramTTSClient.return_value = create_mock_client()
 
     tester = ExtensionTesterSpecialChars()
     tester.set_test_mode_single(
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
index b78b3b43aa..12650f9d2c 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_state_machine.py
@@ -1,21 +1,13 @@
-import sys
-from pathlib import Path
-
-# Add project root to sys.path
-project_root = str(Path(__file__).resolve().parents[6])
-if project_root not in sys.path:
-    sys.path.insert(0, project_root)
-
 #
 # This file is part of TEN Framework, an open source project.
 # Licensed under the Apache License, Version 2.0.
 # See the LICENSE file for more information.
 #
 import asyncio
-import copy
 import json
 from unittest.mock import patch, AsyncMock, MagicMock
 
+
 from ten_runtime import (
     ExtensionTester,
     TenEnvTester,
@@ -41,14 +33,13 @@
 }
 
 
-def _create_mock_client():
+def create_mock_client():
     mock = MagicMock()
     mock.start = AsyncMock()
     mock.stop = AsyncMock()
     mock.cancel = AsyncMock()
     mock.reset_ttfb = lambda: None
-
-    fake_audio = b"\x00\x01" * 200
+    fake_audio = b"\x00\x01\x02\x03" * 100
 
     async def mock_get(text):
         yield (100, EVENT_TTS_TTFB_METRIC)
@@ -120,7 +111,7 @@ def on_data(self, ten_env: TenEnvTester, data) -> None:
 def test_sequential_requests(MockClient):
     """Each sequential request should complete with its own
     request_id in audio_start and audio_end."""
-    MockClient.return_value = _create_mock_client()
+    MockClient.return_value = create_mock_client()
 
     tester = SequentialRequestsTester()
     tester.set_test_mode_single("deepgram_tts", json.dumps(MOCK_CONFIG))

From cc9fbad07b0fa9418e62f2df080496b335334e2b Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 16:20:28 +0000
Subject: [PATCH 14/18] chore: remove progressive disclosure docs from deepgram
 tts PR scope

moved to separate PR #2132 (docs/progressive-disclosure branch).
these repo-wide AI documentation files are a cross-cutting concern
independent of the deepgram tts extension.
---
 AGENTS.md                                     |  26 -
 CLAUDE.md                                     |   1 -
 docs/ai/L0_repo_card.md                       |  26 -
 docs/ai/L1/01_setup.md                        | 118 ----
 docs/ai/L1/02_architecture.md                 | 142 ----
 docs/ai/L1/03_code_map.md                     | 117 ----
 docs/ai/L1/04_conventions.md                  | 138 ----
 docs/ai/L1/05_workflows.md                    | 166 -----
 docs/ai/L1/06_interfaces.md                   | 150 ----
 docs/ai/L1/07_gotchas.md                      | 117 ----
 docs/ai/L1/08_security.md                     |  88 ---
 docs/ai/L1/deep_dives/_index.md               |  10 -
 docs/ai/L1/deep_dives/deployment.md           | 206 ------
 .../ai/L1/deep_dives/extension_development.md | 653 ------------------
 docs/ai/L1/deep_dives/graph_configuration.md  | 410 -----------
 docs/ai/L1/deep_dives/operations_restarts.md  | 191 -----
 docs/ai/L1/deep_dives/server_architecture.md  | 211 ------
 docs/ai/L1/deep_dives/testing.md              | 305 --------
 18 files changed, 3075 deletions(-)
 delete mode 100644 AGENTS.md
 delete mode 100644 CLAUDE.md
 delete mode 100644 docs/ai/L0_repo_card.md
 delete mode 100644 docs/ai/L1/01_setup.md
 delete mode 100644 docs/ai/L1/02_architecture.md
 delete mode 100644 docs/ai/L1/03_code_map.md
 delete mode 100644 docs/ai/L1/04_conventions.md
 delete mode 100644 docs/ai/L1/05_workflows.md
 delete mode 100644 docs/ai/L1/06_interfaces.md
 delete mode 100644 docs/ai/L1/07_gotchas.md
 delete mode 100644 docs/ai/L1/08_security.md
 delete mode 100644 docs/ai/L1/deep_dives/_index.md
 delete mode 100644 docs/ai/L1/deep_dives/deployment.md
 delete mode 100644 docs/ai/L1/deep_dives/extension_development.md
 delete mode 100644 docs/ai/L1/deep_dives/graph_configuration.md
 delete mode 100644 docs/ai/L1/deep_dives/operations_restarts.md
 delete mode 100644 docs/ai/L1/deep_dives/server_architecture.md
 delete mode 100644 docs/ai/L1/deep_dives/testing.md

diff --git a/AGENTS.md b/AGENTS.md
deleted file mode 100644
index d23c0aa719..0000000000
--- a/AGENTS.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# AI Agent Instructions
-
-This repository uses progressive disclosure documentation to help AI coding
-agents work efficiently. Documentation is structured in three levels under
-`docs/ai/`.
-
-## How to Load
-
-1. Read [docs/ai/L0_repo_card.md](docs/ai/L0_repo_card.md) to identify the repo.
-2. Load ALL 8 files in `docs/ai/L1/`. They are small — load all of them upfront.
-   This gives you setup, architecture, code map, conventions, workflows,
-   interfaces, gotchas, and security.
-3. If a task needs more detail than L1 provides, follow links to L2 deep dives
-   in `docs/ai/L1/deep_dives/`. Load only the specific L2 file you need.
-
-## Levels
-
-- **L0 (Repo Card):** Identity and L1 index. Table of contents.
-- **L1 (Summaries):** Eight structured summaries. Load all at session start.
-- **L2 (Deep Dives):** Full specifications. Load only when L1 isn't detailed enough.
-
-## Working Areas
-
-- **AI Agents development**: `ai_agents/` — see `ai_agents/AGENTS.md` for workspace-specific context
-- **Core framework**: `core/`, `packages/`, `build/`
-- **Operational reference**: `ai/AI_working_with_ten.md` (full), `ai/AI_working_with_ten_compact.md` (quick)
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index c2c4fb4158..0000000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1 +0,0 @@
-Read @AGENTS.md for AI agent instructions and progressive disclosure docs.
diff --git a/docs/ai/L0_repo_card.md b/docs/ai/L0_repo_card.md
deleted file mode 100644
index 288312322b..0000000000
--- a/docs/ai/L0_repo_card.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# TEN Framework — Repo Card
-
-## Identity
-
-| Field         | Value                                                                |
-| ------------- | -------------------------------------------------------------------- |
-| Repo          | `TEN-framework/TEN-Agent`                                           |
-| Description   | Open-source platform for building real-time multimodal AI agents     |
-| Repo Type     | `distributed-system`                                                 |
-| Language      | Python (extensions), Go (API server), TypeScript/React (playground)  |
-| Deploy Target | Docker container (`ten_agent_dev`), Taskfile-based build             |
-| Owner         | TEN Framework team                                                   |
-| Last Reviewed | 2026-04-07                                                           |
-
-## L1 Index
-
-| File                                     | Purpose                                                  | Audience |
-| ---------------------------------------- | -------------------------------------------------------- | -------- |
-| [01_setup](L1/01_setup.md)               | Docker, .env, ports, health checks, restart procedures   | both     |
-| [02_architecture](L1/02_architecture.md) | Extensions, graphs, connections, RTC-first design        | both     |
-| [03_code_map](L1/03_code_map.md)         | Directory tree, key files, base classes, 93+ extensions  | both     |
-| [04_conventions](L1/04_conventions.md)   | Naming, Pydantic configs, params pattern, formatting     | both     |
-| [05_workflows](L1/05_workflows.md)       | Create extension, modify graph, test, restart, deploy    | both     |
-| [06_interfaces](L1/06_interfaces.md)     | REST API, connection schemas, base class abstract methods| both     |
-| [07_gotchas](L1/07_gotchas.md)           | Property tuples, signal handlers, zombies, .env timing   | both     |
-| [08_security](L1/08_security.md)         | API keys, .env, sensitive logging, git hooks             | both     |
diff --git a/docs/ai/L1/01_setup.md b/docs/ai/L1/01_setup.md
deleted file mode 100644
index c6003da202..0000000000
--- a/docs/ai/L1/01_setup.md
+++ /dev/null
@@ -1,118 +0,0 @@
-# 01 Setup
-
-> Environment setup, local development, and quick commands for TEN Framework AI Agents.
-
-## Prerequisites
-
-| Requirement       | Version / Notes                                              |
-| ----------------- | ------------------------------------------------------------ |
-| Docker + Compose  | Required for container-based development                     |
-| Node.js           | LTS v18+ on host; container has Node 22                      |
-| API Keys          | Agora App ID, OpenAI, Deepgram ASR, ElevenLabs TTS (minimum)|
-| Hardware          | 2+ CPU cores, 4 GB RAM minimum                              |
-
-## Docker Container
-
-```bash
-cd /home/ubuntu/ten-framework/ai_agents
-docker compose up -d
-docker ps | grep ten_agent_dev   # Verify running
-```
-
-Container image: `ghcr.io/ten-framework/ten_agent_build:0.7.14`
-
-## Environment Variables
-
-**Single .env file**: `ai_agents/.env` — the ONLY source of environment config.
-
-| Variable                     | Purpose                      | Required |
-| ---------------------------- | ---------------------------- | -------- |
-| `AGORA_APP_ID`               | Agora RTC app identifier     | Yes      |
-| `AGORA_APP_CERTIFICATE`      | Agora RTC certificate        | No       |
-| `OPENAI_API_KEY`             | LLM provider                 | Yes      |
-| `OPENAI_MODEL`               | Model name (e.g., `gpt-4o`)  | Yes      |
-| `DEEPGRAM_API_KEY`           | ASR provider                 | Yes      |
-| `ELEVENLABS_TTS_KEY`         | TTS provider                 | Yes      |
-| `LOG_STDOUT`                 | Worker log visibility         | Yes (`true`) |
-| `SERVER_PORT`                | API server port               | Yes (`8080`) |
-| `WORKERS_MAX`                | Max concurrent sessions       | Yes (`100`)  |
-| `WORKER_QUIT_TIMEOUT_SECONDS`| Worker idle timeout           | Yes (`60`)   |
-
-See `.env.example` for the complete list. Extensions may require additional keys
-(Azure, AWS, Rime, etc.) — check extension README files.
-
-## Install and Run
-
-```bash
-# 1. Install Python dependencies (NOT persisted across container restarts)
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced/tenapp && \
-   bash scripts/install_python_deps.sh"
-
-# 2. Build and install (5-8 minutes first time)
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced && task install"
-
-# 3. Start everything (API server + playground + TMAN Designer)
-docker exec -d ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced && \
-   task run > /tmp/task_run.log 2>&1"
-```
-
-**CRITICAL**: Always use `task run` to start — never run `./bin/api` directly.
-
-## Ports
-
-| Port  | Service          |
-| ----- | ---------------- |
-| 8080  | Go API server    |
-| 3000  | Playground (Next.js) |
-| 49483 | TMAN Designer    |
-
-## Health Checks
-
-```bash
-curl -s http://localhost:8080/health
-# {"code":"0","data":null,"msg":"ok"}
-
-curl -s http://localhost:8080/graphs | jq -r '.data[].name'
-# voice_assistant, voice_assistant_heygen, etc.
-```
-
-## Restart Procedures
-
-| What Changed                    | Container? | Server?           | Frontend?         |
-| ------------------------------- | ---------- | ----------------- | ----------------- |
-| `property.json` (graphs added)  | No         | Nuclear restart   | Nuclear restart   |
-| `property.json` (config only)   | No         | No                | No                |
-| `.env` file                     | Yes        | Yes               | No                |
-| Python extension code           | No         | Yes               | No                |
-| Go server code                  | No         | Yes + `task install` | No             |
-
-**Nuclear restart** (safest after graph changes):
-
-```bash
-sudo docker exec ten_agent_dev bash -c "pkill -9 -f 'bin/api'; pkill -9 node; pkill -9 bun"
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-sleep 2
-sudo docker exec -d ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced && task run > /tmp/task_run.log 2>&1"
-```
-
-**After container restart**: always reinstall Python deps, then `task run`.
-
-**After .env changes**: `docker compose down && docker compose up -d`, reinstall deps, `task run`.
-
-## Logs
-
-```bash
-# All logs (inside container)
-docker exec ten_agent_dev tail -f /tmp/task_run.log
-
-# Filter by extension or channel
-docker exec ten_agent_dev tail -f /tmp/task_run.log | grep --line-buffered "deepgram"
-```
-
-## Related Deep Dives
-
-- [Deployment](deep_dives/deployment.md) — Docker Compose, Cloudflare tunnel, Nginx, Grafana monitoring
diff --git a/docs/ai/L1/02_architecture.md b/docs/ai/L1/02_architecture.md
deleted file mode 100644
index c35f537d7e..0000000000
--- a/docs/ai/L1/02_architecture.md
+++ /dev/null
@@ -1,142 +0,0 @@
-# 02 Architecture
-
-> System design overview: extensions, graphs, connections, and the server-worker model.
-
-## TEN Ecosystem
-
-| Component             | Purpose                                                |
-| --------------------- | ------------------------------------------------------ |
-| TEN Framework         | Core runtime (C/C++, Go, Python, Node.js bindings)     |
-| TEN Agent Examples    | Pre-built agent configurations (this repo's `ai_agents/`) |
-| TEN VAD               | Low-latency voice activity detection                   |
-| TEN Turn Detection    | Full-duplex dialogue management                        |
-| TEN Portal            | Documentation and blog site                            |
-
-## Extension System
-
-Extensions are modular components that process data — ASR, TTS, LLM, tools, RTC, avatars, etc.
-Each extension has a lifecycle:
-
-```
-on_init() → on_start() → [process messages] → on_stop() → on_deinit()
-```
-
-Every extension contains:
-
-| File              | Purpose                                    |
-| ----------------- | ------------------------------------------ |
-| `addon.py`        | Registration via `@register_addon_as_extension` |
-| `extension.py`    | Main logic, inherits from a base class     |
-| `manifest.json`   | Metadata, dependencies, API interface      |
-| `property.json`   | Default configuration values               |
-
-**Base classes** (in `ten_ai_base/interface/ten_ai_base/`):
-
-| Base Class                    | Use For           |
-| ----------------------------- | ----------------- |
-| `AsyncASRBaseExtension`       | Speech-to-text    |
-| `AsyncTTS2BaseExtension`      | Text-to-speech    |
-| `AsyncLLMBaseExtension`       | Chat completion   |
-| `AsyncLLMToolBaseExtension`   | LLM function tools|
-| `AsyncExtension`              | Generic / custom  |
-
-## Graph-Based Configuration
-
-Agents are assembled by defining **graphs** in `property.json`. A graph specifies
-which extensions run (nodes) and how data flows between them (connections).
-
-```json
-{
-  "predefined_graphs": [{
-    "name": "voice_assistant",
-    "auto_start": true,
-    "graph": {
-      "nodes": [
-        {"type": "extension", "name": "stt", "addon": "deepgram_asr_python", "property": {}},
-        {"type": "extension", "name": "llm", "addon": "openai_llm2_python", "property": {}},
-        {"type": "extension", "name": "tts", "addon": "elevenlabs_tts2_python", "property": {}}
-      ],
-      "connections": [...]
-    }
-  }]
-}
-```
-
-## Connection Types
-
-| Type          | Payload              | Example                                      |
-| ------------- | -------------------- | -------------------------------------------- |
-| `cmd`         | Named commands       | `tool_register`, `on_user_joined`, `flush`   |
-| `data`        | Named data messages  | `asr_result`, `text_data`, `tts_text_input`  |
-| `audio_frame` | PCM audio streams    | `pcm_frame` (16-bit, mono, 16/24/48 kHz)    |
-| `video_frame` | Video streams        | Raw video frames for vision/avatar           |
-
-## RTC-First Design
-
-TEN uses Agora RTC (Real-Time Communication) as the default transport, not WebSockets.
-
-| Aspect           | RTC (default)                    | WebSocket                  |
-| ---------------- | -------------------------------- | -------------------------- |
-| Latency          | 50-150ms (UDP-based)             | Higher (TCP-based)         |
-| Codec support    | Opus, VP8, VP9, AV1              | Raw PCM only               |
-| Bandwidth adapt  | Built-in adaptation + FEC        | Manual implementation      |
-| Use case         | Real-time voice/video            | Signaling, configuration   |
-
-WebSockets are used for signaling and configuration; RTC handles the media path.
-
-## Server-Worker Model
-
-```
-┌─────────────────┐     ┌──────────────────┐
-│  Go HTTP Server  │────▶│  Worker Process   │  (one per session)
-│  (port 8080)     │     │  (tman run start) │
-│                  │     │                   │
-│  /start → spawn  │     │  Loads graph from │
-│  /stop  → kill   │     │  property.json    │
-│  /ping  → keep   │     │  Runs extensions  │
-└─────────────────┘     └──────────────────┘
-```
-
-- **POST /start** spawns a worker process for a channel/session
-- **POST /stop** terminates the worker
-- **POST /ping** keeps the session alive (if timeout != -1)
-
-## Property Injection
-
-When `/start` is called, the server auto-injects dynamic values into the graph:
-
-- `channel_name` → injected into every node that has a `"channel"` property
-- `remote_stream_id`, `bot_stream_id`, `token` → injected via `startPropMap`
-- `req.Properties[extensionName]` → merged into specific node properties
-
-This is future-proof: any new extension with a "channel" property automatically
-receives the dynamic channel value without code changes.
-
-## Component Diagram
-
-```
-    Client (Browser/Mobile)
-           │
-           ▼
-    ┌──────────────┐
-    │  Playground   │  Next.js frontend (port 3000)
-    │  (UI)         │
-    └──────┬───────┘
-           │ REST API
-           ▼
-    ┌──────────────┐        ┌──────────────────────────────────┐
-    │  Go Server    │──spawn─▶│  Worker Process                   │
-    │  (port 8080)  │        │  ┌─────┐  ┌─────┐  ┌─────┐     │
-    │               │        │  │ ASR │─▶│ LLM │─▶│ TTS │     │
-    │               │        │  └──┬──┘  └─────┘  └──┬──┘     │
-    └──────────────┘        │     │                   │        │
-                             │  ┌──┴───────────────────┴──┐    │
-                             │  │      Agora RTC           │    │
-                             │  └─────────────────────────┘    │
-                             └──────────────────────────────────┘
-```
-
-## Related Deep Dives
-
-- [Server Architecture](deep_dives/server_architecture.md) — Go server internals, property injection pipeline
-- [Graph Configuration](deep_dives/graph_configuration.md) — Node schema, connection wiring, parallel routing
diff --git a/docs/ai/L1/03_code_map.md b/docs/ai/L1/03_code_map.md
deleted file mode 100644
index dd60723726..0000000000
--- a/docs/ai/L1/03_code_map.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 03 Code Map
-
-> Directory tree, module responsibilities, and key file locations.
-
-## Top-Level Structure
-
-All AI agent development happens inside `ai_agents/`:
-
-```
-ai_agents/
-├── agents/
-│   ├── ten_packages/
-│   │   ├── extension/           # 93+ extensions (ASR, TTS, LLM, tools, avatar)
-│   │   └── system/              # Core runtime packages
-│   │       ├── ten_ai_base/     # Base classes and API interface definitions
-│   │       ├── ten_runtime_python/
-│   │       └── ten_runtime_go/
-│   ├── examples/                # 24+ example agent configurations
-│   │   ├── voice-assistant/
-│   │   ├── voice-assistant-advanced/
-│   │   ├── voice-assistant-realtime/
-│   │   ├── voice-assistant-video/
-│   │   ├── doodler/
-│   │   └── ...
-│   ├── integration_tests/       # Test frameworks
-│   │   ├── asr_guarder/         # ASR integration tests
-│   │   └── tts_guarder/         # TTS integration tests
-│   └── scripts/                 # Build and packaging scripts
-├── server/                      # Go API server
-│   ├── main.go
-│   └── internal/
-│       ├── http_server.go       # REST endpoints, property injection
-│       └── config.go            # Parameter mapping (startPropMap)
-├── playground/                  # Next.js frontend UI (port 3000)
-│   └── src/                     # React components
-├── esp32-client/                # ESP32 hardware client
-├── Taskfile.yml                 # Root-level build/test tasks
-├── docker-compose.yml           # Container config
-├── .env                         # Environment variables (single source)
-└── .env.example                 # Template with all variables
-```
-
-Other repo-root directories: `core/` (C runtime), `packages/` (example/core extensions),
-`docs/` (framework docs), `tools/` (Grafana monitoring, profilers).
-
-## Extension Categories
-
-| Category  | Count | Examples                                                    |
-| --------- | ----- | ----------------------------------------------------------- |
-| ASR       | 10+   | `deepgram_asr_python`, `azure_asr_python`, `aws_asr_python` |
-| TTS       | 15+   | `deepgram_tts`, `elevenlabs_tts2_python`, `cartesia_tts`    |
-| LLM       | 8+    | `openai_llm2_python`, `gemini_llm2_python`, `bedrock_llm_python` |
-| Avatar    | 5+    | `heygen_avatar_python`, `anam_avatar_python`                |
-| Tools     | 8+    | `bingsearch_tool_python`, `vision_tool_python`              |
-| Transport | 3+    | `agora_rtc`, `websocket_server`, `http_server_python`       |
-| Other     | 10+   | `message_collector2`, `ten_vad_python`, `mcp_client_python` |
-
-## Extension File Structure
-
-Every extension follows this layout:
-
-| File               | Purpose                                        |
-| ------------------ | ---------------------------------------------- |
-| `__init__.py`      | Package marker                                 |
-| `addon.py`         | `@register_addon_as_extension` registration    |
-| `extension.py`     | Main logic, inherits from base class           |
-| `config.py`        | Pydantic config model (optional but common)    |
-| `manifest.json`    | Metadata, dependencies, API interface imports  |
-| `property.json`    | Default config values with `${env:VAR}` syntax |
-| `requirements.txt` | Python dependencies                            |
-| `README.md`        | Usage documentation (often multilingual)       |
-| `tests/`           | Standalone tests with `bin/start` entry point  |
-
-## Base Classes
-
-Located in example tenapp directories under `ten_packages/system/ten_ai_base/interface/ten_ai_base/`:
-
-| File       | Class                        | Purpose                    |
-| ---------- | ---------------------------- | -------------------------- |
-| `asr.py`   | `AsyncASRBaseExtension`      | Speech recognition         |
-| `tts.py`   | `AsyncTTSBaseExtension`      | Text-to-speech (basic)     |
-| `tts2.py`  | `AsyncTTS2BaseExtension`     | Text-to-speech (advanced)  |
-| `llm.py`   | `AsyncLLMBaseExtension`      | Language model completion   |
-| `llm2.py`  | `AsyncLLM2BaseExtension`     | Language model v2           |
-| `llm_tool.py` | `AsyncLLMToolBaseExtension` | LLM function calling tools |
-| `mllm.py`  | `AsyncMLLMBaseExtension`     | Multimodal LLM             |
-
-## API Interface Definitions
-
-Standard interfaces in `ten_ai_base/api/`:
-
-| File                    | Defines                           |
-| ----------------------- | --------------------------------- |
-| `asr-interface.json`    | ASR data/cmd/audio_frame schemas  |
-| `tts-interface.json`    | TTS data/cmd/audio_frame schemas  |
-| `llm-interface.json`    | LLM data/cmd schemas              |
-| `mllm-interface.json`   | Multimodal LLM schemas            |
-
-Extensions reference these via `manifest.json`:
-```json
-{"api": {"interface": [{"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}]}}
-```
-
-## Key Files Quick Reference
-
-| When working on...         | Look at                                            |
-| -------------------------- | -------------------------------------------------- |
-| New extension              | Similar extension in `agents/ten_packages/extension/` |
-| API interface changes      | `ten_ai_base/api/*.json`                           |
-| Graph configuration        | `agents/examples/*/tenapp/property.json`           |
-| Server endpoints           | `server/internal/http_server.go`                   |
-| Build/test tasks           | `Taskfile.yml` (root) and per-example              |
-| Test setup                 | `agents/ten_packages/extension/*/tests/bin/start`  |
-
-## Related Deep Dives
-
-- [Extension Development](deep_dives/extension_development.md) — Full creation guide with base class details
diff --git a/docs/ai/L1/04_conventions.md b/docs/ai/L1/04_conventions.md
deleted file mode 100644
index 41f11901f8..0000000000
--- a/docs/ai/L1/04_conventions.md
+++ /dev/null
@@ -1,138 +0,0 @@
-# 04 Conventions
-
-> Coding patterns, naming, configuration, and formatting standards.
-
-## Naming Conventions
-
-| Item            | Pattern                                | Example                    |
-| --------------- | -------------------------------------- | -------------------------- |
-| Extension dir   | `<vendor>_<type>_python`               | `deepgram_asr_python`      |
-| Addon name      | Same as directory name                 | `deepgram_asr_python`      |
-| Example dir     | `voice-assistant-<variant>`            | `voice-assistant-realtime` |
-| Config class    | `<Vendor><Type>Config(BaseModel)`      | `DeepgramTTSConfig`        |
-| Client class    | `<Vendor><Type>Client`                 | `DeepgramTTSClient`        |
-
-## Addon Registration
-
-Every extension must register via decorator in `addon.py`:
-
-```python
-from ten_runtime import Addon, register_addon_as_extension, TenEnv
-
-@register_addon_as_extension("deepgram_asr_python")
-class DeepgramASRExtensionAddon(Addon):
-    def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
-        ten.on_create_instance_done(DeepgramASRExtension(addon_name), context)
-```
-
-The decorator name **must match** the `addon` field in `property.json` graph nodes.
-
-## Base Class Selection
-
-| Need                    | Base Class                    | Key Abstract Methods                  |
-| ----------------------- | ----------------------------- | ------------------------------------- |
-| Speech-to-text          | `AsyncASRBaseExtension`       | `vendor()`, `start_connection()`, `send_audio()`, `finalize()` |
-| Text-to-speech (HTTP)   | `AsyncTTS2HttpExtension`      | `vendor()`, `request_tts()`, `synthesize_audio_sample_rate()` |
-| Text-to-speech (WS)     | `AsyncTTS2BaseExtension`      | `vendor()`, `request_tts()`, `cancel_tts()` |
-| Chat completion         | `AsyncLLMBaseExtension`       | `on_call_chat_completion()`, `on_data_chat_completion()` |
-| LLM function tool       | `AsyncLLMToolBaseExtension`   | `get_tool_metadata()`, `run_tool()`   |
-| Generic / custom        | `AsyncExtension`              | `on_cmd()`, `on_data()`, etc.         |
-
-## Pydantic Configuration
-
-Extensions use Pydantic models for config validation:
-
-```python
-from pydantic import BaseModel, Field
-
-class DeepgramTTSConfig(BaseModel):
-    api_key: str = ""
-    model: str = "aura-2-theia-en"
-    sample_rate: int = 24000
-    params: dict[str, Any] = Field(default_factory=dict)
-```
-
-Config is loaded from property.json in `on_init()`:
-```python
-config_json, _ = await ten_env.get_property_to_json("")
-self.config = DeepgramTTSConfig(**json.loads(config_json))
-```
-
-## Environment Variable Syntax
-
-In `property.json`, reference env vars:
-
-| Syntax                | Behavior                                |
-| --------------------- | --------------------------------------- |
-| `${env:VAR_NAME}`     | Required — error if missing             |
-| `${env:VAR_NAME\|}`   | Optional — empty string if missing      |
-| `${env:VAR_NAME\|default}` | Optional — uses default if missing |
-
-```json
-{"api_key": "${env:DEEPGRAM_API_KEY}", "region": "${env:AZURE_REGION|}"}
-```
-
-## Params Dict Pattern
-
-Extensions using HTTP/WebSocket services store all config in a `params` dictionary:
-
-1. **Store** `api_key` inside `params` dict in property.json and config
-2. **Extract** for authentication headers in the client constructor
-3. **Strip** from params **only when creating the HTTP request payload**
-
-```python
-# In client constructor — extract for auth
-self.api_key = config.params.get("api_key", "")
-self.headers = {"Authorization": f"Bearer {self.api_key}"}
-
-# In request method — strip before sending
-payload = {**self.config.params}
-payload.pop("api_key", None)
-```
-
-## Sensitive Data Logging
-
-Implement `to_str()` to encrypt sensitive fields before logging:
-
-```python
-def to_str(self, sensitive_handling: bool = True) -> str:
-    if not sensitive_handling:
-        return f"{self}"
-    config = copy.deepcopy(self)
-    if config.params and "api_key" in config.params:
-        config.params["api_key"] = utils.encrypt(config.params["api_key"])
-    return f"{config}"
-```
-
-## Logging
-
-- Use `ten_env.log_info()`, `ten_env.log_warn()`, `ten_env.log_error()`, `ten_env.log_debug()`
-- Categories: `LOG_CATEGORY_KEY_POINT` (lifecycle events), `LOG_CATEGORY_VENDOR` (vendor status)
-- All output goes to `/tmp/task_run.log` inside the container
-
-## Import Convention
-
-```python
-# Correct (v0.11+)
-from ten_runtime import Addon, register_addon_as_extension, TenEnv
-
-# Wrong (old v0.8.x — will not work)
-from ten import Addon
-```
-
-## Formatting
-
-- **Black** formatter with `--line-length 80`
-- Run: `task format` (from `ai_agents/`)
-- Check: `task check`
-- Excludes: `third_party/`, `http_server_python/`, `ten_packages/system`
-
-## Design Principles
-
-- **YAGNI**: Only implement what is needed now, not what might be needed later
-- **KISS**: Prefer simple solutions; three similar lines > premature abstraction
-- **No git-ignored files**: Never modify auto-generated files (manifest-lock.json, out/, .ten/, bin/)
-
-## Related Deep Dives
-
-- [Extension Development](deep_dives/extension_development.md) — Full creation guide with implementation walkthroughs
diff --git a/docs/ai/L1/05_workflows.md b/docs/ai/L1/05_workflows.md
deleted file mode 100644
index acdc4b0b76..0000000000
--- a/docs/ai/L1/05_workflows.md
+++ /dev/null
@@ -1,166 +0,0 @@
-# 05 Workflows
-
-> Step-by-step guides for common development tasks.
-
-## Create a New TTS / ASR / LLM Extension
-
-**Fastest path**: Copy a similar extension and adapt it.
-
-| Type        | Copy From                  | Base Class                  |
-| ----------- | -------------------------- | --------------------------- |
-| TTS (HTTP)  | `rime_http_tts`            | `AsyncTTS2HttpExtension`    |
-| TTS (WS)    | `deepgram_tts`             | `AsyncTTS2BaseExtension`    |
-| ASR         | `deepgram_asr_python`      | `AsyncASRBaseExtension`     |
-| LLM         | `openai_llm2_python`       | `AsyncLLMBaseExtension`     |
-
-```bash
-cp -r agents/ten_packages/extension/deepgram_tts agents/ten_packages/extension/my_vendor_tts
-```
-
-Then:
-1. Rename addon decorator, class names, `manifest.json` `name` field
-2. Implement the abstract methods for your vendor API
-3. Create `tests/configs/` with required config files (see below)
-4. Run guarder tests: `task tts-guarder-test EXTENSION=my_vendor_tts`
-5. Run formatter: `task format`
-
-**Required test config files** for TTS: `property.json`, `property_basic_audio_setting1.json`,
-`property_basic_audio_setting2.json`, `property_dump.json`, `property_miss_required.json`,
-`property_invalid.json`
-
-**Required test config files** for ASR: `property_en.json`, `property_zh.json`,
-`property_invalid.json`, `property_dump.json`
-
-For full walkthrough with code and all 15/10 test details, see
-[Extension Development](deep_dives/extension_development.md) and [Testing](deep_dives/testing.md).
-
-## Add Extension to a Graph
-
-1. **Add node** to `predefined_graphs[].graph.nodes[]` in the example's `tenapp/property.json`:
-   ```json
-   {"type": "extension", "name": "my_tts", "addon": "my_tts_python",
-    "extension_group": "tts_group",
-    "property": {"api_key": "${env:MY_API_KEY}"}}
-   ```
-
-2. **Add connections** — wire data flow between extensions:
-   ```json
-   {"extension": "my_tts",
-    "data": [{"name": "tts_text_input", "source": [{"extension": "main"}]}],
-    "audio_frame": [{"name": "pcm_frame", "dest": [{"extension": "agora_rtc"}]}]}
-   ```
-
-3. **Add dependency** to example `tenapp/manifest.json`:
-   ```json
-   {"type": "extension", "name": "my_tts_python", "version": "0.1.0"}
-   ```
-
-4. **Install** (use `task install`, not just `tman install` — the latter can wipe `bin/main`):
-   ```bash
-   docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example> && task install"
-   ```
-
-5. **Nuclear restart** (required when graphs are added/removed):
-   ```bash
-   sudo docker exec ten_agent_dev bash -c \
-     "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; pkill -9 -f next-server; pkill -9 -f tman"
-   sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-   sleep 30  # wait for port 3000 TIME_WAIT to clear
-   sudo docker exec -d ten_agent_dev bash -c \
-     "cd /app/agents/examples/<example> && task run > /tmp/task_run.log 2>&1"
-   ```
-
-See [Graph Configuration](deep_dives/graph_configuration.md) for connection types and routing patterns.
-
-**For complex multi-graph setups** (A/B testing vendors, avatar variants), use
-`rebuild_property.py` instead of hand-editing. See
-[Generating property.json](deep_dives/graph_configuration.md#generating-propertyjson-with-rebuild_propertypy).
-
-## Customize the Main Extension
-
-The "main" extension orchestrates agent behavior (greetings, tool routing, interruption).
-Three implementation variants exist:
-
-| Variant              | File                  | Use Case                        |
-| -------------------- | --------------------- | ------------------------------- |
-| Python Cascade       | `main_python_cascade` | ASR → LLM → TTS pipeline       |
-| Python Realtime V2V  | `main_python_realtime`| OpenAI Realtime API (voice-to-voice) |
-| Node.js Cascade      | `main_nodejs_cascade` | TypeScript implementation       |
-
-Modify `on_data()` to change event routing, `on_cmd()` for tool handling.
-
-## Run Tests
-
-```bash
-# All tests
-docker exec ten_agent_dev bash -c "cd /app && task test"
-
-# Single extension (with dependency install)
-docker exec ten_agent_dev bash -c \
-  "cd /app && task test-extension EXTENSION=agents/ten_packages/extension/deepgram_tts"
-
-# Single extension (skip install — faster)
-docker exec ten_agent_dev bash -c \
-  "cd /app && task test-extension-no-install EXTENSION=agents/ten_packages/extension/deepgram_tts"
-
-# ASR guarder integration tests
-docker exec ten_agent_dev bash -c \
-  "cd /app && task asr-guarder-test EXTENSION=azure_asr_python"
-
-# TTS guarder integration tests
-docker exec ten_agent_dev bash -c \
-  "cd /app && task tts-guarder-test EXTENSION=deepgram_tts"
-```
-
-See [Testing](deep_dives/testing.md) for test structure and debugging.
-
-## Restart After Changes
-
-| What Changed                    | Action                                               |
-| ------------------------------- | ---------------------------------------------------- |
-| `property.json` (graphs added)  | Nuclear restart (kill all, remove lock, task run)    |
-| `property.json` (config only)   | No restart needed (loaded per session)               |
-| `.env`                          | `docker compose down && docker compose up -d` + deps |
-| Python code                     | Restart server only                                  |
-| Go code                         | `task install` then restart server                   |
-| Container restart               | Reinstall Python deps, then `task run`               |
-
-## Build and Install
-
-```bash
-# Full install (first time or after adding extensions) — ALWAYS prefer this
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/<example> && task install"
-
-# Install Python deps only
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/<example>/tenapp && bash scripts/install_python_deps.sh"
-
-# Install extension dependencies only (creates symlinks) — WARNING: can wipe bin/main
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/<example>/tenapp && tman install"
-```
-
-## Update Extension Code in Running Container
-
-See [Operations and Restarts](deep_dives/operations_restarts.md) for the full procedure
-including `docker cp` syntax, symlink verification, and restart steps.
-
-## Pre-Commit Checks
-
-```bash
-# Format Python code (Black, line-length 80)
-docker exec ten_agent_dev bash -c "cd /app && task format"
-
-# Check formatting without modifying
-docker exec ten_agent_dev bash -c "cd /app && task check"
-```
-
-Pre-commit hooks validate: API key patterns, Black formatting, conventional commit messages.
-
-## Related Deep Dives
-
-- [Extension Development](deep_dives/extension_development.md) — Full extension creation with code examples
-- [Graph Configuration](deep_dives/graph_configuration.md) — Connection wiring and routing patterns
-- [Testing](deep_dives/testing.md) — Test infrastructure, guarder tests, debugging
-- [Operations and Restarts](deep_dives/operations_restarts.md) — Full restart procedures, recovery
diff --git a/docs/ai/L1/06_interfaces.md b/docs/ai/L1/06_interfaces.md
deleted file mode 100644
index d87b91514f..0000000000
--- a/docs/ai/L1/06_interfaces.md
+++ /dev/null
@@ -1,150 +0,0 @@
-# 06 Interfaces
-
-> REST API contracts, graph connection schemas, and base class abstract methods.
-
-## REST API Endpoints
-
-The Go server (`server/internal/http_server.go`) exposes:
-
-| Endpoint             | Method | Purpose                              | Key Fields                        |
-| -------------------- | ------ | ------------------------------------ | --------------------------------- |
-| `/health`            | GET    | Health check                         | Returns `{"code":"0"}`            |
-| `/graphs`            | GET    | List available graphs                | Returns `data[].name`             |
-| `/start`             | POST   | Start agent session                  | `graph_name`, `channel_name`      |
-| `/stop`              | POST   | Stop agent session                   | `channel_name`                    |
-| `/ping`              | POST   | Keep session alive                   | `channel_name`                    |
-| `/list`              | GET    | List active sessions                 | Returns worker list               |
-| `/token/generate`    | POST   | Generate Agora RTC token             | `channel_name`, `uid`             |
-
-### POST /start Request Body
-
-```json
-{
-  "request_id": "uuid",
-  "channel_name": "test_channel",
-  "user_uid": 176573,
-  "graph_name": "voice_assistant",
-  "properties": {
-    "openai_llm2_python": {"model": "gpt-4o-mini"}
-  },
-  "timeout": 60
-}
-```
-
-- `properties` — per-extension overrides merged into graph node properties
-- `timeout` — seconds of inactivity before auto-stop (-1 = never)
-
-## Graph Connection Types
-
-Connections in `property.json` define data flow between extensions:
-
-### Command Connections (`cmd`)
-
-```json
-{"extension": "main", "cmd": [
-  {"name": "tool_register", "dest": [{"extension": "llm"}]},
-  {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
-]}
-```
-
-Common commands: `tool_register`, `on_user_joined`, `flush`, `chat_completion_call`,
-`update_configs`
-
-### Data Connections (`data`)
-
-```json
-{"extension": "llm", "data": [
-  {"name": "text_data", "source": [{"extension": "main"}]},
-  {"name": "text_data", "dest": [{"extension": "tts"}]}
-]}
-```
-
-Common data: `asr_result`, `text_data`, `tts_text_input`, `tts_audio_start`,
-`tts_audio_end`, `error`
-
-### Audio Frame Connections (`audio_frame`)
-
-```json
-{"extension": "agora_rtc", "audio_frame": [
-  {"name": "pcm_frame", "dest": [{"extension": "stt"}]}
-]}
-```
-
-### Video Frame Connections (`video_frame`)
-
-```json
-{"extension": "agora_rtc", "video_frame": [
-  {"name": "video_frame", "dest": [{"extension": "vision"}]}
-]}
-```
-
-## Base Class Abstract Methods
-
-### ASR (`AsyncASRBaseExtension`)
-
-| Method                      | Returns   | Purpose                          |
-| --------------------------- | --------- | -------------------------------- |
-| `vendor()`                  | `str`     | Vendor name (e.g., "deepgram")   |
-| `start_connection()`        | `None`    | Connect to ASR service           |
-| `stop_connection()`         | `None`    | Disconnect                       |
-| `send_audio(frame)`         | `bool`    | Send audio frame to service      |
-| `finalize()`                | `None`    | Drain pending audio              |
-| `is_connected()`            | `bool`    | Connection status check          |
-| `input_audio_sample_rate()` | `int`     | Expected sample rate (e.g., 16000)|
-
-**Output helpers**: `send_asr_result()`, `send_asr_error()`, `send_asr_finalize_end()`,
-`send_connect_delay_metrics()`, `send_vendor_metrics()`
-
-### TTS (`AsyncTTS2BaseExtension`)
-
-| Method                          | Returns  | Purpose                              |
-| ------------------------------- | -------- | ------------------------------------ |
-| `vendor()`                      | `str`    | Vendor name (e.g., "elevenlabs")     |
-| `request_tts(tts_text_input)`   | `AsyncIterator` | Generate audio from text       |
-| `cancel_tts()`                  | `None`   | Handle flush/cancellation            |
-| `synthesize_audio_sample_rate()`| `int`    | Output sample rate (e.g., 24000)     |
-| `synthesize_audio_channels()`   | `int`    | Channel count (default: 1)           |
-| `synthesize_audio_sample_width()`| `int`   | Bytes per sample (default: 2)        |
-
-**Output helpers**: `send_tts_audio_data()`, `send_tts_audio_start()`, `send_tts_audio_end()`,
-`send_tts_error()`, `send_tts_ttfb_metrics()`, `send_tts_text_result()`
-
-**State machine**: QUEUED → PROCESSING → FINALIZING → COMPLETED (per request)
-
-### LLM (`AsyncLLMBaseExtension`)
-
-| Method                          | Returns | Purpose                          |
-| ------------------------------- | ------- | -------------------------------- |
-| `on_call_chat_completion()`     | varies  | Handle sync command requests     |
-| `on_data_chat_completion()`     | varies  | Handle stream-based data input   |
-| `on_tools_update(tool_metadata)`| `None`  | Handle new tool registration     |
-
-**Tool flow**: Extensions register tools via `CMD_TOOL_REGISTER` → LLM stores in
-`available_tools` → LLM calls tools during completion → results returned.
-
-## Manifest API Interface
-
-Extensions declare their API interface in `manifest.json`:
-
-```json
-{
-  "api": {
-    "interface": [
-      {"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}
-    ],
-    "property": {
-      "api_key": {"type": "string"},
-      "model": {"type": "string"},
-      "sample_rate": {"type": "int32"}
-    }
-  }
-}
-```
-
-Interface JSON files define the standard cmd/data/audio_frame schemas for each extension type.
-
-## Related Deep Dives
-
-- [Extension Development](deep_dives/extension_development.md) — Implementing abstract methods
-- [Server Architecture](deep_dives/server_architecture.md) — Endpoint handlers and property injection
-- [Graph Configuration](deep_dives/graph_configuration.md) — Full connection wiring examples
diff --git a/docs/ai/L1/07_gotchas.md b/docs/ai/L1/07_gotchas.md
deleted file mode 100644
index 15251d5f57..0000000000
--- a/docs/ai/L1/07_gotchas.md
+++ /dev/null
@@ -1,117 +0,0 @@
-# 07 Gotchas
-
-> Critical pitfalls, tribal knowledge, and troubleshooting.
-
-## CRITICAL: Property Getters Return Tuples
-
-All `get_property_*()` methods return `(value, error_or_none)`, not the raw value.
-
-```python
-# WRONG — causes TypeError
-threshold = await ten_env.get_property_float("threshold")
-if threshold > 0.5:  # TypeError: '>' not supported between 'float' and 'tuple'
-
-# CORRECT — extract from tuple
-threshold_result = await ten_env.get_property_float("threshold")
-threshold = threshold_result[0] if isinstance(threshold_result, tuple) else threshold_result
-```
-
-This applies to `get_property_string()`, `get_property_int()`, `get_property_float()`,
-`get_property_bool()`. Always extract `[0]`.
-
-## CRITICAL: Signal Handlers Forbidden
-
-Extensions run in worker threads. Signal handlers only work in the main thread.
-
-```python
-# WRONG — raises ValueError: signal only works in main thread
-signal.signal(signal.SIGTERM, handler)
-atexit.register(cleanup)
-
-# CORRECT — use extension lifecycle
-async def on_stop(self, ten_env):
-    await self.cleanup()
-```
-
-## CRITICAL: Always Use `task run`
-
-Never start the server with `./bin/api` or `./bin/main` directly.
-`task run` sets the correct PYTHONPATH and starts all services together
-(API server + playground + TMAN Designer).
-
-## Zombie Worker Processes
-
-Worker processes (`bin/main`) can survive container and server restarts.
-Always check for and kill zombies before restarting.
-
-## .env Loaded at Container Startup Only
-
-Editing `.env` while the container is running has **no effect**. You must
-`docker compose down && docker compose up -d`, then reinstall Python deps.
-
-## Next.js Lock File
-
-After crashes, `.next/dev/lock` becomes stale, preventing restart. Delete it
-and do a full restart. See [Operations and Restarts](deep_dives/operations_restarts.md).
-
-## Python Deps Not Persisted
-
-Python dependencies are lost on container restart. Always reinstall after
-`docker compose down && up`.
-
-## tman install Can Wipe bin/main
-
-Running `tman install` when system dependencies have newer versions replaces
-the runtime packages and **deletes `bin/main`**. Use `task install` (full
-rebuild) instead of bare `tman install`. Signs: Worker fails with
-`bin/main: No such file or directory` in logs.
-
-## tman Install Creates Symlinks
-
-Never manually `ln -s` for extensions. Use `tman install` which resolves
-dependencies and creates correct links. If a symlink is missing after
-`tman install`, create it manually as a fallback.
-
-## docker cp Creates Nested Directories
-
-When using `docker cp` to update extension code, trailing slashes create
-nested directories. Use `docker cp ./ext/. container:/path/ext/` syntax.
-Signs: `ModuleNotFoundError: No module named 'ten_packages.extension.X'`.
-
-## Audio Routing: Split at Source Only
-
-When routing audio to multiple destinations, the split must happen at the
-source node (e.g., `agora_rtc`), not at intermediate nodes. Splitting from
-intermediate nodes can cause crashes.
-
-## Frontend Caches Graph List
-
-The playground caches the `/graphs` API response. When adding or removing
-graphs from `property.json`, a full restart is required — simple server
-restart is not enough.
-
-## Manifest Module Name Must Match
-
-The `name` field in extension `manifest.json` must exactly match the `addon`
-field used in graph nodes in `property.json`. Mismatches cause silent failures.
-
-## next-server Holds Port 3000
-
-Killing `node` and `bun` is not enough — `next-server` is a separate process
-that holds port 3000. If port 3000 is occupied, Next.js silently starts on
-3001+ which isn't Docker-exposed, making the frontend appear down.
-
-## Apple Silicon Docker
-
-Docker containers may need Rosetta for x86 images on Apple Silicon Macs.
-Enable in Docker Desktop: Settings > General > Use Rosetta.
-
-## Windows Line Endings
-
-Before cloning on Windows: `git config --global core.autocrlf false`
-
-## Related Deep Dives
-
-- [Operations and Restarts](deep_dives/operations_restarts.md) — Full restart procedures, port debugging, recovery
-- [Deployment](deep_dives/deployment.md) — Production setup, persistent startup
-- [Server Architecture](deep_dives/server_architecture.md) — Worker lifecycle, session management
diff --git a/docs/ai/L1/08_security.md b/docs/ai/L1/08_security.md
deleted file mode 100644
index 5efef433c8..0000000000
--- a/docs/ai/L1/08_security.md
+++ /dev/null
@@ -1,88 +0,0 @@
-# 08 Security
-
-> Secret management, input validation, and repository hygiene.
-
-## API Key Management
-
-- **Single source**: All API keys live in `ai_agents/.env` (git-ignored)
-- **Never hardcode** keys in `property.json` — use `${env:VAR_NAME}` substitution
-- **Persistent storage**: Keep a copy of keys outside the repo (e.g., `~/api_keys.txt`)
-  so branch switches don't lose them
-- See `.env.example` for the complete variable catalog
-
-## Environment Variable Substitution
-
-In `property.json`, reference secrets via:
-
-```json
-{
-  "api_key": "${env:DEEPGRAM_API_KEY}",
-  "region": "${env:AZURE_REGION|eastus}"
-}
-```
-
-| Syntax                    | Behavior                     |
-| ------------------------- | ---------------------------- |
-| `${env:VAR}`              | Required — error if missing  |
-| `${env:VAR\|}`            | Optional — empty if missing  |
-| `${env:VAR\|default}`     | Optional — default if missing|
-
-## Sensitive Data in Logs
-
-Extensions must encrypt sensitive fields before logging:
-
-```python
-def to_str(self, sensitive_handling: bool = True) -> str:
-    config = copy.deepcopy(self)
-    if config.params and "api_key" in config.params:
-        config.params["api_key"] = utils.encrypt(config.params["api_key"])
-    return f"{config}"
-```
-
-Never log raw API keys, tokens, or credentials.
-
-## Server-Side Protections
-
-The Go server (`http_server.go`) implements:
-
-- **Path traversal prevention**: Ignores client-requested `tenapp_dir`, always uses
-  the launch-configured directory
-- **Channel name sanitization**: Validated before use in file operations
-- **Safe type conversion**: Property values are type-checked during merge
-- **Recursive property merge**: Prevents injection via nested config overrides
-
-## Pre-Commit Hooks
-
-| Hook          | What It Checks                                              |
-| ------------- | ----------------------------------------------------------- |
-| `pre-commit`  | Scans staged files for API key patterns (`API_KEY.*=[A-Za-z0-9]{20,}`) |
-| `pre-commit`  | Black formatting compliance (line-length 80)                |
-| `commit-msg`  | Conventional commit format, blocks AI tool name references  |
-
-## Git-Ignored Files
-
-These are auto-generated — never modify or commit them:
-
-| Pattern                | Source                    |
-| ---------------------- | ------------------------- |
-| `manifest-lock.json`   | `tman` dependency resolve |
-| `compile_commands.json`| Build system              |
-| `BUILD.gn`, `.gn`     | Build configuration       |
-| `out/`, `build/`       | Build output              |
-| `.ten/`                | TEN runtime files         |
-| `bin/main`, `bin/worker`| Compiled binaries        |
-| `.release/`            | Release packaging         |
-| `node_modules/`        | JS dependencies           |
-| `.env`                 | Environment secrets       |
-
-## Files That Should Never Be Committed
-
-- `.env` (API keys and secrets)
-- `*.pem` (certificates)
-- `*.pcm` (audio dumps)
-- Credential files, tokens, session data
-
-## Related Deep Dives
-
-- [Deployment](deep_dives/deployment.md) — Production security considerations
-- [Server Architecture](deep_dives/server_architecture.md) — Server-side validation details
diff --git a/docs/ai/L1/deep_dives/_index.md b/docs/ai/L1/deep_dives/_index.md
deleted file mode 100644
index 07b5b13b20..0000000000
--- a/docs/ai/L1/deep_dives/_index.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Deep Dives Index
-
-| Document                                                 | Summary                                          | Load When                                        |
-| -------------------------------------------------------- | ------------------------------------------------ | ------------------------------------------------ |
-| [extension_development.md](extension_development.md)     | Full extension creation guide, base classes, test configs, pre-submission checklist | Creating a new TTS/ASR/LLM extension |
-| [graph_configuration.md](graph_configuration.md)         | Graph nodes, connections, routing, property.json  | Modifying graphs or wiring extensions together   |
-| [testing.md](testing.md)                                 | All 15 TTS + 10 ASR guarder tests, pass criteria, config files, debugging | Running or debugging tests for an extension |
-| [deployment.md](deployment.md)                           | Docker, Cloudflare, Nginx, Grafana monitoring    | Deploying to production or setting up monitoring |
-| [server_architecture.md](server_architecture.md)         | Go server, property injection, worker lifecycle  | Understanding server internals or debugging      |
-| [operations_restarts.md](operations_restarts.md)         | Full restart procedures, port debugging, recovery| Restarting services, crash recovery, port conflicts|
diff --git a/docs/ai/L1/deep_dives/deployment.md b/docs/ai/L1/deep_dives/deployment.md
deleted file mode 100644
index 3ffae48c77..0000000000
--- a/docs/ai/L1/deep_dives/deployment.md
+++ /dev/null
@@ -1,206 +0,0 @@
-# Deployment
-
-> **When to Read This:** Load this document when you are deploying to production,
-> setting up HTTPS access, configuring monitoring, or ensuring services persist
-> across session closures.
-
-## Docker Compose Setup
-
-The development container is defined in `ai_agents/docker-compose.yml`:
-
-```yaml
-services:
-  ten_agent_dev:
-    image: ghcr.io/ten-framework/ten_agent_build:0.7.14
-    container_name: ten_agent_dev
-    ports:
-      - "49483:49483"   # TMAN Designer
-      - "3000:3000"     # Playground
-      - "8000-9001:8000-9001"  # API + worker range
-    volumes:
-      - .:/app
-    environment:
-      - LOG_PATH=${LOG_PATH}
-```
-
-Start: `cd ai_agents && docker compose up -d`
-
-## Persistent Startup (Survives Session Closure)
-
-Use `-d` flag with `docker exec` to keep services running after terminal disconnect:
-
-```bash
-# 1. Clean up existing processes
-sudo docker exec ten_agent_dev bash -c "pkill -9 -f 'bin/api'; pkill -9 node; pkill -9 bun"
-ps -elf | grep 'bin/main' | grep -v grep | awk '{print $4}' | xargs -r sudo kill -9 2>/dev/null
-
-# 2. Remove stale lock files
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-
-# 3. Install Python dependencies
-sudo docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced/tenapp && bash scripts/install_python_deps.sh"
-
-# 4. Start everything in detached mode
-sudo docker exec -d ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced && task run > /tmp/task_run.log 2>&1"
-
-# 5. Wait and verify
-sleep 15
-curl -s http://localhost:8080/health && echo " API ready"
-curl -s http://localhost:8080/graphs | jq -r '.data | length' | xargs echo "Graphs:"
-curl -s http://localhost:3000 -o /dev/null -w '%{http_code}' | xargs echo "Playground:"
-```
-
-Key: `-d` flag keeps processes running. `task run` starts API + playground + TMAN Designer.
-
-## Cloudflare Tunnel (Free HTTPS)
-
-Quick HTTPS access without domain or SSL setup:
-
-```bash
-# Start tunnel
-pkill cloudflared
-nohup cloudflared tunnel --url http://localhost:3000 > /tmp/cloudflare_tunnel.log 2>&1 &
-sleep 5
-
-# Get the random URL
-grep -o 'https://[^[:space:]]*\.trycloudflare\.com' /tmp/cloudflare_tunnel.log | head -1
-# Example: https://films-colon-msgid-incentives.trycloudflare.com
-```
-
-- Free tunnels get **random URLs** that change on restart
-- No DNS configuration needed
-- Good for development and demos
-
-## Nginx Reverse Proxy (Production HTTPS)
-
-For production with custom domain and SSL certificates:
-
-```nginx
-server {
-    listen [::]:453 ssl ipv6only=on;
-    listen 453 ssl;
-    ssl_certificate /etc/letsencrypt/live/oai.agora.io/fullchain.pem;
-    ssl_certificate_key /etc/letsencrypt/live/oai.agora.io/privkey.pem;
-    include /etc/letsencrypt/options-ssl-nginx.conf;
-    ssl_dhparam /etc/letsencrypt/ssl-dhparams.pem;
-
-    # API endpoints
-    location ~ ^/(health|ping|token|start|stop|graphs|list)(/|$) {
-        proxy_pass http://localhost:8080;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-    }
-
-    # Playground (with WebSocket upgrade)
-    location / {
-        proxy_pass http://localhost:3000;
-        proxy_set_header Host $host;
-        proxy_set_header X-Real-IP $remote_addr;
-        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
-        proxy_set_header X-Forwarded-Proto $scheme;
-        proxy_http_version 1.1;
-        proxy_set_header Upgrade $http_upgrade;
-        proxy_set_header Connection "upgrade";
-    }
-}
-```
-
-Apply: `sudo nginx -t && sudo systemctl reload nginx`
-
-## Production Build
-
-```bash
-# Build optimized frontend
-docker exec ten_agent_dev bash -c "cd /app/playground && npm run build"
-
-# Start production server
-docker exec -d ten_agent_dev bash -c \
-  "cd /app/playground && npm start > /tmp/playground_prod.log 2>&1"
-```
-
-## Grafana Monitoring
-
-Located in `tools/grafana-monitoring/`. Three deployment modes:
-
-### Pull Mode (Development)
-
-Prometheus scrapes a metrics endpoint exposed by the TEN runtime:
-
-```json
-// In property.json
-{
-  "ten": {
-    "exporter": {
-      "enabled": true,
-      "type": "prometheus",
-      "prometheus": {
-        "listen_address": "0.0.0.0",
-        "listen_port": 49484
-      }
-    }
-  }
-}
-```
-
-Setup: `cd tools/grafana-monitoring && docker compose -f docker-compose.pull.yml up -d`
-
-### Push Mode (Production)
-
-Uses OTEL Collector to push metrics to Prometheus and logs to Loki:
-
-```json
-// In property.json
-{
-  "ten": {
-    "exporter": {
-      "enabled": true,
-      "type": "otlp",
-      "otlp": {
-        "endpoint": "http://otel-collector:4317"
-      }
-    }
-  }
-}
-```
-
-Setup: `cd tools/grafana-monitoring && docker compose -f docker-compose.push.yml up -d`
-
-### Hybrid Mode
-
-Both Pull and Push simultaneously — useful for A/B testing or migration.
-
-### Monitored Metrics
-
-| Metric                            | Good Threshold | What It Measures                    |
-| --------------------------------- | -------------- | ----------------------------------- |
-| Extension Lifecycle Duration      | < 1 second     | on_configure, on_init, on_start, on_stop, on_deinit |
-| Extension CMD Processing Duration | < 100ms        | P50/P95 command handling time       |
-| Thread Message Queue Wait Time    | < 50ms         | Time messages wait before processing|
-
-### Log Aggregation (Push Mode Only)
-
-Push mode sends logs to Loki for centralized querying:
-
-```
-# LogQL query examples
-{service_name="ten_agent"} |= "error"
-{service_name="ten_agent"} | json | level="error"
-{service_name="ten_agent"} |= "deepgram" | json
-```
-
-## After Container Restart Checklist
-
-1. Reinstall Python dependencies (not persisted)
-2. Start server with `task run`
-3. Restart Cloudflare tunnel (if using)
-4. Kill any zombie worker processes on host
-5. Verify with `/health` and `/graphs` endpoints
-
-## See Also
-
-- [Back to Setup](../01_setup.md)
-- [Server Architecture](server_architecture.md) — Worker lifecycle, session management
diff --git a/docs/ai/L1/deep_dives/extension_development.md b/docs/ai/L1/deep_dives/extension_development.md
deleted file mode 100644
index ba8588742d..0000000000
--- a/docs/ai/L1/deep_dives/extension_development.md
+++ /dev/null
@@ -1,653 +0,0 @@
-# Extension Development
-
-> **When to Read This:** Load this document when you are creating a new TTS, ASR, or LLM
-> extension. It covers the exact files to create, base classes to inherit, abstract methods
-> to implement, test configs to provide, and guarder tests your extension must pass.
-
-## Quick Start: Copy an Existing Extension
-
-The fastest way to create a new extension is to copy a similar one:
-
-| Extension Type | Good Template to Copy           | Base Class                  |
-| -------------- | ------------------------------- | --------------------------- |
-| TTS (HTTP)     | `rime_http_tts`                 | `AsyncTTS2HttpExtension`    |
-| TTS (WebSocket)| `deepgram_tts`                  | `AsyncTTS2BaseExtension`    |
-| ASR (WebSocket)| `deepgram_asr_python`           | `AsyncASRBaseExtension`     |
-| LLM            | `openai_llm2_python`            | `AsyncLLMBaseExtension`     |
-| LLM Tool       | `bingsearch_tool_python`        | `AsyncLLMToolBaseExtension` |
-
-```bash
-cp -r agents/ten_packages/extension/deepgram_tts agents/ten_packages/extension/my_vendor_tts
-# Then rename: addon decorator, class names, manifest.json name field
-```
-
-## Directory Structure
-
-```
-my_vendor_tts_python/
-├── __init__.py              # Can be empty
-├── addon.py                 # Registration (MUST match manifest.json name)
-├── extension.py             # Main logic OR orchestration
-├── my_vendor_tts.py         # Vendor client (websocket/http logic)
-├── config.py                # Pydantic config model
-├── manifest.json            # Metadata + API interface + property schema
-├── property.json            # Defaults with ${env:VAR} syntax
-├── requirements.txt         # Python deps
-├── README.md                # Usage docs
-└── tests/
-    ├── bin/
-    │   └── start            # Test entry script (sets PYTHONPATH, runs pytest)
-    └── configs/
-        ├── property.json                     # Default test config
-        ├── property_basic_audio_setting1.json # Sample rate test 1 (e.g. 16000)
-        ├── property_basic_audio_setting2.json # Sample rate test 2 (e.g. 24000)
-        ├── property_dump.json                # Audio dump test config
-        ├── property_miss_required.json       # Missing API key test
-        └── property_invalid.json             # Invalid API key test
-```
-
-## Step 1: addon.py
-
-```python
-from ten_runtime import Addon, register_addon_as_extension, TenEnv
-
-@register_addon_as_extension("my_vendor_tts_python")
-class MyVendorTTSAddon(Addon):
-    def on_create_instance(self, ten: TenEnv, addon_name: str, context) -> None:
-        from .extension import MyVendorTTSExtension
-        ten.on_create_instance_done(MyVendorTTSExtension(addon_name), context)
-```
-
-The decorator name **must exactly match** `manifest.json` `name` field AND the `addon`
-field in graph nodes.
-
-## Step 2: config.py
-
-```python
-from pydantic import BaseModel, Field
-from typing import Any
-import copy
-from ten_ai_base import utils
-
-class MyVendorTTSConfig(BaseModel):
-    api_key: str = ""
-    model: str = "default-model"
-    sample_rate: int = 24000
-    dump: bool = False
-    dump_path: str = ""
-    params: dict[str, Any] = Field(default_factory=dict)
-
-    def validate(self) -> None:
-        key = self.api_key or self.params.get("api_key", "")
-        if not key:
-            raise ValueError("API key is required")
-
-    def to_str(self, sensitive_handling: bool = True) -> str:
-        if not sensitive_handling:
-            return f"{self}"
-        config = copy.deepcopy(self)
-        if config.params and "api_key" in config.params:
-            config.params["api_key"] = utils.encrypt(config.params["api_key"])
-        return f"{config}"
-```
-
-## Step 3: manifest.json
-
-```json
-{
-  "type": "extension",
-  "name": "my_vendor_tts_python",
-  "version": "0.1.0",
-  "dependencies": [
-    {"type": "system", "name": "ten_runtime_python", "version": "0.8"}
-  ],
-  "api": {
-    "interface": [
-      {"import_uri": "../../system/ten_ai_base/api/tts-interface.json"}
-    ],
-    "property": {
-      "api_key": {"type": "string"},
-      "model": {"type": "string"},
-      "sample_rate": {"type": "int32"},
-      "dump": {"type": "bool"},
-      "dump_path": {"type": "string"},
-      "params": {"type": "object"}
-    }
-  }
-}
-```
-
-Use `tts-interface.json` for TTS, `asr-interface.json` for ASR, `llm-interface.json` for LLM.
-
-## Step 4: property.json
-
-```json
-{
-  "api_key": "${env:MY_VENDOR_API_KEY}",
-  "model": "default-model",
-  "sample_rate": 24000
-}
-```
-
-## Step 5: extension.py — Implementing the Base Class
-
-### TTS Extension (WebSocket Mode)
-
-```python
-from ten_ai_base.tts2 import AsyncTTS2BaseExtension
-
-class MyVendorTTSExtension(AsyncTTS2BaseExtension):
-    def vendor(self) -> str:
-        return "my_vendor"
-
-    async def on_init(self, ten_env) -> None:
-        await super().on_init(ten_env)
-        config_json, _ = await ten_env.get_property_to_json("")
-        self.config = MyVendorTTSConfig(**json.loads(config_json))
-        self.config.validate()
-
-    async def on_start(self, ten_env) -> None:
-        await super().on_start(ten_env)
-        self.client = MyVendorTTSClient(self.config, ten_env)
-        await self.client.connect()
-
-    async def on_stop(self, ten_env) -> None:
-        await super().on_stop(ten_env)
-        await self.client.close()
-
-    async def request_tts(self, tts_text_input) -> AsyncIterator[tuple[bytes, int | None]]:
-        text = tts_text_input.get_text()
-        request_id = tts_text_input.get_request_id()
-        async for audio_chunk in self.client.synthesize(text, request_id):
-            yield audio_chunk, None  # (bytes, event_status)
-
-    async def cancel_tts(self) -> None:
-        await self.client.cancel()
-
-    def synthesize_audio_sample_rate(self) -> int:
-        return self.config.sample_rate
-
-    def synthesize_audio_channels(self) -> int:
-        return 1  # mono
-
-    def synthesize_audio_sample_width(self) -> int:
-        return 2  # 16-bit
-```
-
-**TTS2 state machine**: The base class manages request states automatically:
-QUEUED -> PROCESSING -> FINALIZING -> COMPLETED. Your `request_tts()` just yields audio bytes.
-
-**Output events sent automatically** by the base class:
-- `tts_audio_start` — when first audio chunk is ready
-- `pcm_frame` — for each audio chunk
-- `tts_audio_end` — when request completes
-- `tts_error` — on failure
-
-### TTS Extension (HTTP Mode)
-
-Simpler — for non-streaming HTTP APIs:
-
-```python
-from ten_ai_base.tts2_http import AsyncTTS2HttpExtension
-
-class MyVendorTTSExtension(AsyncTTS2HttpExtension):
-    def vendor(self) -> str:
-        return "my_vendor"
-
-    async def request_tts(self, text: str, request_id: str) -> AsyncIterator[bytes]:
-        async with httpx.AsyncClient() as client:
-            async with client.stream("POST", self.url, json={"text": text}) as resp:
-                async for chunk in resp.aiter_bytes():
-                    yield chunk
-
-    def synthesize_audio_sample_rate(self) -> int:
-        return self.config.sample_rate
-```
-
-### ASR Extension
-
-```python
-from ten_ai_base.asr import AsyncASRBaseExtension
-
-class MyVendorASRExtension(AsyncASRBaseExtension):
-    def vendor(self) -> str:
-        return "my_vendor"
-
-    async def start_connection(self) -> None:
-        self.ws = await websockets.connect(self.url, headers=self.auth_headers)
-        # Start a listener task for results
-        asyncio.create_task(self._listen_for_results())
-
-    async def stop_connection(self) -> None:
-        if self.ws:
-            await self.ws.close()
-            self.ws = None
-
-    async def send_audio(self, frame) -> bool:
-        buf = frame.lock_buf()
-        data = bytes(buf)
-        frame.unlock_buf(buf)
-        await self.ws.send(data)
-        return True
-
-    async def finalize(self) -> None:
-        await self.ws.send(json.dumps({"type": "CloseStream"}))
-        # Wait for final results before returning
-
-    def is_connected(self) -> bool:
-        return self.ws is not None and self.ws.open
-
-    def input_audio_sample_rate(self) -> int:
-        return 16000
-
-    async def _listen_for_results(self):
-        async for msg in self.ws:
-            result = json.loads(msg)
-            if result.get("is_final"):
-                asr_result = ASRResult(text=result["transcript"], language="en-US", ...)
-                await self.send_asr_result(asr_result)
-```
-
-**ASR output methods** you must call:
-- `await self.send_asr_result(asr_result)` — for each transcription
-- `await self.send_asr_error(error, vendor_info)` — on vendor errors
-- `await self.send_asr_finalize_end()` — when finalize completes
-
-**Buffer strategy**: Override `buffer_strategy()` to return `ASRBufferConfigModeKeep`
-if you want audio buffered during disconnects (default discards).
-
-### LLM Extension
-
-```python
-from ten_ai_base.llm import AsyncLLMBaseExtension
-
-class MyLLMExtension(AsyncLLMBaseExtension):
-    async def on_call_chat_completion(self, ten_env, **kwargs):
-        # Handle command-based chat requests
-        pass
-
-    async def on_data_chat_completion(self, ten_env, **kwargs):
-        # Handle stream-based data input
-        pass
-
-    async def on_tools_update(self, ten_env, tool_metadata):
-        async with self._available_tools_lock:
-            self.available_tools = tool_metadata
-```
-
----
-
-## TTS Audio Pipeline: Data Types and Flow
-
-Understanding the data types is critical for implementing TTS extensions correctly.
-
-### Data Flow Through the Pipeline
-
-```
-User speaks → Agora RTC → pcm_frame → ASR → asr_result → main_control
-  → text_data → LLM → text_data → main_control → tts_text_input → TTS
-  → pcm_frame → Agora RTC → User hears
-```
-
-### tts_text_input (incoming to your extension)
-
-```python
-class TTSTextInput:
-    request_id: str           # Unique request identifier
-    text: str                 # Text chunk to synthesize
-    text_input_end: bool      # True = last chunk for this request_id
-    metadata: dict            # Context: {session_id, turn_id, ...}
-```
-
-- Multiple `tts_text_input` messages can share one `request_id` (the "append" pattern)
-- `text_input_end=True` signals no more text is coming for this request
-- The base class handles queuing and buffering — your `request_tts()` receives complete inputs
-
-### tts_audio_start / tts_audio_end (outgoing from your extension)
-
-These are sent automatically by the base class. You don't need to send them manually.
-
-```json
-// tts_audio_start
-{"request_id": "req1", "metadata": {"session_id": "sess1", "turn_id": 1}}
-
-// tts_audio_end
-{
-  "request_id": "req1",
-  "request_event_interval_ms": 1500,
-  "request_total_audio_duration_ms": 3200,
-  "reason": 1,
-  "metadata": {"session_id": "sess1", "turn_id": 1}
-}
-```
-
-**Reason values**: `REQUEST_END` (1) = normal completion, `INTERRUPTED` (2) = flush/cancel,
-`ERROR` (3) = failure.
-
-### tts_flush / tts_flush_end
-
-Flush is triggered when the user interrupts (speaks while TTS is playing).
-
-```json
-// tts_flush (incoming signal)
-{"flush_id": "flush_abc123", "metadata": {"session_id": "sess1"}}
-
-// tts_flush_end (your extension's response — sent automatically by base class)
-{"flush_id": "flush_abc123", "metadata": {"session_id": "sess1"}}
-```
-
-**Critical**: `flush_id` and `metadata` must be echoed back exactly.
-
-## Flush Handling in TTS Extensions
-
-The base class (`AsyncTTS2BaseExtension`) handles most flush logic automatically.
-Your extension only needs to implement `cancel_tts()`:
-
-```python
-async def cancel_tts(self) -> None:
-    """Called when a flush signal arrives. Stop any in-progress synthesis."""
-    if self.client:
-        await self.client.cancel()
-```
-
-### What the Base Class Does on Flush
-
-1. Acquires `_put_lock` to block new `tts_text_input` arrivals
-2. Clears `_flush_complete_event` to prevent race conditions
-3. Flushes the internal queue (discards all pending items)
-4. Calls `cancel_tts()` on your extension (you stop the vendor API)
-5. Sends `tts_audio_end` with `reason=INTERRUPTED` for the current request
-6. Sends `tts_flush_end` with the echoed `flush_id` and `metadata`
-7. Resets all request state (ready for next request)
-8. Sets `_flush_complete_event` to re-enable queue processing
-
-### Request Interleaving (How Buffering Works)
-
-When multiple requests arrive with different `request_id`s:
-
-1. First request is processed immediately (`_processing_request_id = "req1"`)
-2. Messages for other request_ids are **buffered** in `_pending_messages`
-3. When req1 completes, the next buffered request is released (FIFO order)
-4. Each request maintains strict event ordering: `audio_start → frames → audio_end`
-
-Your `request_tts()` doesn't need to handle interleaving — the base class does it.
-
-## The Three property.json Files
-
-There are three distinct `property.json` files with different roles:
-
-### 1. Extension Defaults (`agents/ten_packages/extension/<name>/property.json`)
-
-Default config for the extension. Loaded when no overrides are specified:
-
-```json
-{
-  "api_key": "${env:MY_VENDOR_API_KEY}",
-  "model": "default-model",
-  "sample_rate": 24000
-}
-```
-
-### 2. App Graph Definition (`agents/examples/<name>/tenapp/property.json`)
-
-Defines the complete agent — nodes, connections, per-instance overrides:
-
-```json
-{
-  "ten": {
-    "predefined_graphs": [{
-      "name": "voice_assistant",
-      "graph": {
-        "nodes": [
-          {"name": "tts", "addon": "my_vendor_tts_python",
-           "property": {"model": "high-quality", "sample_rate": 24000}}
-        ],
-        "connections": [...]
-      }
-    }]
-  }
-}
-```
-
-Properties here **override** extension defaults for this specific graph instance.
-
-### 3. Test Configs (`agents/ten_packages/extension/<name>/tests/configs/*.json`)
-
-Used by guarder tests. Each test loads a specific config file:
-
-```json
-{
-  "dump": true,
-  "dump_path": "./tests/dump_output/",
-  "params": {"key": "${env:MY_VENDOR_API_KEY}", "sample_rate": 16000}
-}
-```
-
-**Loading order**: Extension defaults → App graph overrides → Test config overrides.
-
----
-
-## Step 6: Test Configuration Files
-
-Your extension's `tests/configs/` directory needs these config files for the guarder tests to work:
-
-### For TTS Extensions
-
-| Config File                          | Purpose                                | Content                                |
-| ------------------------------------ | -------------------------------------- | -------------------------------------- |
-| `property.json`                      | Default test config                    | Valid API key, default model/settings  |
-| `property_basic_audio_setting1.json` | Sample rate test 1                     | `sample_rate: 16000` + valid key       |
-| `property_basic_audio_setting2.json` | Sample rate test 2                     | `sample_rate: 24000` + valid key       |
-| `property_dump.json`                 | Audio dump test                        | `dump: true, dump_path: "./tests/dump_output/"` |
-| `property_miss_required.json`        | Missing params error test              | Empty API key                          |
-| `property_invalid.json`              | Invalid params error test              | Empty or invalid API key               |
-
-**Example `property.json`** (for elevenlabs):
-```json
-{
-  "params": {
-    "key": "${env:ELEVENLABS_TTS_KEY}",
-    "model_id": "eleven_turbo_v2_5"
-  }
-}
-```
-
-**Example `property_basic_audio_setting1.json`**:
-```json
-{
-  "dump": true,
-  "dump_path": "./tests/keep_dump_output/",
-  "params": {
-    "sample_rate": 16000,
-    "key": "${env:ELEVENLABS_TTS_KEY}"
-  }
-}
-```
-
-**Example `property_basic_audio_setting2.json`**:
-```json
-{
-  "dump": true,
-  "dump_path": "./tests/keep_dump_output/",
-  "params": {
-    "sample_rate": 24000,
-    "key": "${env:ELEVENLABS_TTS_KEY}"
-  }
-}
-```
-
-**Example `property_miss_required.json`**:
-```json
-{
-  "params": {"key": ""}
-}
-```
-
-### For ASR Extensions
-
-| Config File              | Purpose                    | Content                              |
-| ------------------------ | -------------------------- | ------------------------------------ |
-| `property_en.json`       | English transcription test | Valid key + `language: "en-US"`      |
-| `property_zh.json`       | Chinese transcription test | Valid key + `language: "zh-CN"`      |
-| `property_invalid.json`  | Error handling test        | `key: "invalid", region: "invalid"` |
-| `property_dump.json`     | Audio dump test            | Valid key + `dump: true`             |
-
----
-
-## Step 7: TTS Guarder Tests Your Extension Must Pass
-
-Run with: `task tts-guarder-test EXTENSION=my_vendor_tts_python`
-
-There are **15 tests**. Here's what each validates:
-
-### Must-Pass Tests
-
-| Test                                    | What It Validates                                        |
-| --------------------------------------- | -------------------------------------------------------- |
-| `test_append_input`                     | Multiple text inputs appended with same request_id       |
-| `test_append_input_stress`              | High volume of append operations                         |
-| `test_append_input_without_text_input_end` | Missing text_input_end flags handled gracefully       |
-| `test_append_interrupt`                 | New requests interrupting in-progress ones               |
-| `test_basic_audio_setting`              | Different sample rates produce different audio           |
-| `test_corner_input`                     | Special chars, emojis, very short/long text              |
-| `test_dump`                             | Audio dump files created with valid PCM data             |
-| `test_dump_each_request_id`             | Each request_id produces separate dump file              |
-| `test_empty_text_request`               | Empty/whitespace text: audio_end within 500ms, no crash  |
-| `test_flush`                            | Flush signal: receives flush_end, no data after 5s       |
-| `test_interleaved_requests`             | 8 concurrent requests maintain separate audio streams    |
-| `test_invalid_required_params`          | Invalid API key returns FATAL ERROR, no crash            |
-| `test_invalid_text_handling`            | Malformed text handled without crash                     |
-| `test_metrics`                          | TTFB metrics generated with valid timestamps             |
-| `test_miss_required_params`             | Missing API key returns appropriate error                |
-
-### Critical Pass Criteria
-
-- **Event ordering**: `tts_audio_start` -> `pcm_frame`(s) -> `tts_audio_end` per request
-- **Request isolation**: Interleaved requests must not mix audio streams
-- **Error handling**: Invalid/missing configs must produce errors, never crashes
-- **Empty text**: Must complete quickly (audio_end within 500ms), no audio generated
-- **Flush**: After flush_end, no more data for 5 seconds
-- **Dump files**: Valid PCM data, one file per request_id when enabled
-
-## Step 8: ASR Guarder Tests Your Extension Must Pass
-
-Run with: `task asr-guarder-test EXTENSION=my_vendor_asr_python`
-
-There are **10 tests** (1 skipped by default):
-
-| Test                        | What It Validates                                            |
-| --------------------------- | ------------------------------------------------------------ |
-| `test_connection_timing`    | Connects and transcribes English audio correctly             |
-| `test_asr_result`           | Result structure: id, text, language, session_id fields      |
-| `test_asr_finalize`         | Finalize signal produces final=True result + finalize_end    |
-| `test_reconnection`         | Recovers gracefully after connection failure                 |
-| `test_vendor_error`         | Invalid creds produce proper error with vendor info          |
-| `test_multi_language`       | English (en-US) and Chinese (zh-CN) both transcribe correctly|
-| `test_dump`                 | Audio dump files created correctly                           |
-| `test_metrics`              | TTFW and TTLW metrics: positive, TTLW > TTFW                |
-| `test_audio_timestamp`      | start_ms and duration_ms accuracy                            |
-| `test_long_duration_stream` | **Skipped by default** — 5+ min stream without timeout       |
-
-### Critical Pass Criteria
-
-- **Result fields**: Every ASR result must have `id`, `text`, `language`, `session_id`
-- **Finalize**: Must produce `final=True` result and `asr_finalize_end` response
-- **Error format**: Errors must have `id`, `module`, `code`, `message` + vendor info
-- **Metrics**: TTFW > 0, TTLW > TTFW, both in milliseconds
-- **Audio format**: Accepts 16-bit PCM, 16kHz, mono, 320 bytes per frame
-
----
-
-## AudioFrame Creation Pattern
-
-```python
-from ten_runtime import AudioFrame, AudioFrameDataFmt
-
-frame = AudioFrame.create("pcm_frame")
-frame.set_sample_rate(16000)
-frame.set_bytes_per_sample(2)        # 16-bit
-frame.set_number_of_channels(1)      # Mono
-frame.set_data_fmt(AudioFrameDataFmt.INTERLEAVE)
-frame.set_samples_per_channel(len(pcm_data) // 2)
-frame.alloc_buf(len(pcm_data))
-buf = frame.lock_buf()
-buf[:] = pcm_data
-frame.unlock_buf(buf)
-await ten_env.send_audio_frame(frame)
-```
-
-Set all properties **before** `alloc_buf()`.
-
-## Params Dict Pattern
-
-For HTTP/WebSocket vendor APIs:
-
-1. Store all config including `api_key` in `params` dict
-2. Extract `api_key` for auth headers in client constructor
-3. Strip `api_key` from params **only when building the HTTP request payload**
-4. In `update_params()`: add vendor-required params, normalize keys
-
-```python
-# Client constructor
-self.api_key = config.params.get("api_key", "")
-self.headers = {"Authorization": f"Bearer {self.api_key}"}
-
-# Request method
-payload = {**self.config.params}
-payload.pop("api_key", None)
-```
-
-## Bidirectional Extension Pattern
-
-For extensions that both receive from and send to the graph:
-
-```python
-class MyBridge(AsyncExtension):
-    async def on_init(self, ten_env):
-        self.ten_env = ten_env  # Store for callbacks
-
-    async def on_audio_frame(self, ten_env, audio_frame):
-        buf = audio_frame.lock_buf()
-        self.external_system.send(bytes(buf))
-        audio_frame.unlock_buf(buf)
-
-    async def _external_callback(self, data):
-        frame = AudioFrame.create("pcm_frame")
-        # ... fill frame ...
-        await self.ten_env.send_audio_frame(frame)
-```
-
-## Pre-Submission Checklist
-
-- [ ] `addon.py` decorator name matches `manifest.json` `name` field
-- [ ] All abstract methods implemented (vendor, request_tts/send_audio, etc.)
-- [ ] Config validation raises ValueError for missing required params
-- [ ] `to_str()` encrypts sensitive fields before logging
-- [ ] `tests/configs/` has all required config files (see Step 6)
-- [ ] `task tts-guarder-test` or `task asr-guarder-test` passes
-- [ ] `task format` passes (Black, line-length 80)
-- [ ] `task lint-extension EXTENSION=my_vendor_tts_python` passes
-- [ ] `requirements.txt` lists all Python dependencies
-- [ ] `README.md` documents config properties and env vars
-- [ ] No hardcoded API keys anywhere
-
-## Language-Specific Notes
-
-| Language   | Create Command                                                       |
-| ---------- | -------------------------------------------------------------------- |
-| Python     | `tman create extension name --template default_async_extension_python` |
-| Go         | `tman create extension name --template default_extension_go`          |
-| C++        | `tman create extension name --template default_extension_cpp`         |
-| Node.js    | `tman create extension name --template default_extension_nodejs`      |
-
-## Portal References (Full Guides)
-
-- [Create a TTS Extension (89K)](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_tts_extension.mdx) [EXTERNAL]
-- [Create an ASR Extension (39K)](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_asr_extension.mdx) [EXTERNAL]
-- [Create a Hello World Extension](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/extension_dev/create_hello_world_extension.mdx) [EXTERNAL]
-
-## See Also
-
-- [Back to Conventions](../04_conventions.md)
-- [Back to Workflows](../05_workflows.md)
-- [Testing](testing.md) — Full guarder test details and debugging
diff --git a/docs/ai/L1/deep_dives/graph_configuration.md b/docs/ai/L1/deep_dives/graph_configuration.md
deleted file mode 100644
index bfabb78ac8..0000000000
--- a/docs/ai/L1/deep_dives/graph_configuration.md
+++ /dev/null
@@ -1,410 +0,0 @@
-# Graph Configuration
-
-> **When to Read This:** Load this document when you are modifying graph definitions
-> in property.json, adding extensions to agent pipelines, or debugging data flow issues.
-
-## Overview
-
-Graphs define which extensions run and how they communicate. They are declared in
-`property.json` under the `predefined_graphs` array within the `ten` namespace.
-
-## Property.json Structure
-
-```json
-{
-  "ten": {
-    "log": {
-      "handlers": [...]
-    },
-    "predefined_graphs": [
-      {
-        "name": "voice_assistant",
-        "auto_start": true,
-        "graph": {
-          "nodes": [...],
-          "connections": [...]
-        }
-      }
-    ]
-  }
-}
-```
-
-- `name` — graph identifier, used in `/start` request's `graph_name` field
-- `auto_start` — set to `true` by the server for the selected graph at runtime
-- `graph.nodes` — extension instances
-- `graph.connections` — data flow wiring
-
-## Node Schema
-
-```json
-{
-  "type": "extension",
-  "name": "stt",
-  "addon": "deepgram_asr_python",
-  "extension_group": "transcription_group",
-  "property": {
-    "api_key": "${env:DEEPGRAM_API_KEY}",
-    "model": "nova-2",
-    "language": "en-US",
-    "sample_rate": 16000
-  }
-}
-```
-
-| Field             | Required | Purpose                                           |
-| ----------------- | -------- | ------------------------------------------------- |
-| `type`            | Yes      | Always `"extension"`                              |
-| `name`            | Yes      | Instance name (used in connections)               |
-| `addon`           | Yes      | Extension package name (must match manifest.json) |
-| `extension_group` | No       | Thread grouping for extensions                    |
-| `property`        | No       | Config overrides merged with extension defaults   |
-
-## Connection Schema
-
-Connections define how messages flow between extensions:
-
-```json
-{
-  "extension": "main",
-  "cmd": [
-    {"name": "flush", "dest": [{"extension": "llm"}, {"extension": "tts"}]},
-    {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
-  ],
-  "data": [
-    {"name": "text_data", "source": [{"extension": "llm"}]},
-    {"name": "text_data", "dest": [{"extension": "tts"}]}
-  ]
-}
-```
-
-Each connection block is **from the perspective of the named extension**:
-- `source` — "this extension receives X from these sources"
-- `dest` — "this extension sends X to these destinations"
-
-## Full Graph Example
-
-A basic voice assistant pipeline (ASR → LLM → TTS):
-
-```json
-{
-  "name": "voice_assistant",
-  "auto_start": false,
-  "graph": {
-    "nodes": [
-      {
-        "type": "extension", "name": "agora_rtc", "addon": "agora_rtc",
-        "extension_group": "rtc_group",
-        "property": {"app_id": "${env:AGORA_APP_ID}", "channel": "default"}
-      },
-      {
-        "type": "extension", "name": "stt", "addon": "deepgram_asr_python",
-        "extension_group": "stt_group",
-        "property": {"api_key": "${env:DEEPGRAM_API_KEY}", "model": "nova-2"}
-      },
-      {
-        "type": "extension", "name": "llm", "addon": "openai_llm2_python",
-        "extension_group": "llm_group",
-        "property": {"api_key": "${env:OPENAI_API_KEY}", "model": "${env:OPENAI_MODEL}"}
-      },
-      {
-        "type": "extension", "name": "tts", "addon": "elevenlabs_tts2_python",
-        "extension_group": "tts_group",
-        "property": {"api_key": "${env:ELEVENLABS_TTS_KEY}"}
-      }
-    ],
-    "connections": [
-      {
-        "extension": "agora_rtc",
-        "audio_frame": [
-          {"name": "pcm_frame", "dest": [{"extension": "stt"}]}
-        ]
-      },
-      {
-        "extension": "stt",
-        "data": [
-          {"name": "asr_result", "dest": [{"extension": "main"}]}
-        ]
-      },
-      {
-        "extension": "main",
-        "cmd": [
-          {"name": "flush", "dest": [{"extension": "llm"}, {"extension": "tts"}]},
-          {"name": "on_user_joined", "source": [{"extension": "agora_rtc"}]}
-        ],
-        "data": [
-          {"name": "text_data", "source": [{"extension": "llm"}]},
-          {"name": "text_data", "dest": [{"extension": "tts"}]}
-        ]
-      },
-      {
-        "extension": "tts",
-        "data": [
-          {"name": "tts_text_input", "source": [{"extension": "main"}]}
-        ],
-        "audio_frame": [
-          {"name": "pcm_frame", "dest": [{"extension": "agora_rtc"}]}
-        ]
-      }
-    ]
-  }
-}
-```
-
-## Connection Types Reference
-
-| Type          | Direction | Payload            | Example Names                       |
-| ------------- | --------- | ------------------ | ----------------------------------- |
-| `cmd`         | Both      | Named commands     | `flush`, `tool_register`, `on_user_joined`, `chat_completion_call`, `update_configs` |
-| `data`        | Both      | Named data msgs    | `asr_result`, `text_data`, `tts_text_input`, `tts_audio_start`, `tts_audio_end`, `error` |
-| `audio_frame` | Both      | PCM audio streams  | `pcm_frame`                         |
-| `video_frame` | Both      | Video streams      | `video_frame`                       |
-
-## Parallel Audio Routing
-
-When sending audio to multiple destinations, split at the **source node**:
-
-```json
-// CORRECT — split at agora_rtc (source)
-{
-  "extension": "agora_rtc",
-  "audio_frame": [
-    {"name": "pcm_frame", "dest": [
-      {"extension": "stt"},
-      {"extension": "vad"}
-    ]}
-  ]
-}
-```
-
-Do NOT split from intermediate nodes — this causes runtime crashes.
-
-## Property Injection
-
-When the server processes a `/start` request, it dynamically modifies the graph:
-
-1. **Graph selection**: Filters `predefined_graphs` to match `graph_name`, sets `auto_start: true`
-2. **Channel injection**: Scans all nodes — any node with a `"channel"` property gets `channel_name` injected
-3. **Start params**: Injects `remote_stream_id`, `bot_stream_id`, `token` via `startPropMap`
-4. **Extension overrides**: Merges `req.Properties[extensionName]` into matching node properties
-5. **Env var validation**: Resolves all `${env:VAR}` references
-
-This is why `agora_rtc` and any custom extension with a `"channel"` property automatically
-receive the dynamic channel name without code changes.
-
-## Adding a New Graph
-
-1. Add a new entry to `predefined_graphs[]` in the example's `tenapp/property.json`
-2. Ensure all referenced extensions are listed in `tenapp/manifest.json`
-3. Run `tman install` to create symlinks for new dependencies
-4. **Nuclear restart** required (frontend caches the graph list)
-
-## Generating property.json with rebuild_property.py
-
-For complex deployments with many graph variants, hand-editing property.json is
-error-prone. The `voice-assistant-advanced` example uses a Python script to generate
-it programmatically:
-
-**Location**: `agents/examples/voice-assistant-advanced/tenapp/rebuild_property.py`
-
-**Usage**:
-```bash
-docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/voice-assistant-advanced/tenapp && python3 rebuild_property.py"
-```
-
-### How It Works
-
-The script defines reusable **node configs** as Python dicts, then assembles them
-into graphs with helper functions:
-
-```python
-# 1. Define reusable node configs
-nova3_stt_100ms = {
-    "type": "extension", "name": "stt", "addon": "deepgram_ws_asr_python",
-    "extension_group": "stt",
-    "property": {
-        "params": {
-            "api_key": "${env:DEEPGRAM_API_KEY}",
-            "model": "nova-3", "language": "en-US",
-            "interim_results": True, "endpointing": 100,
-        }
-    },
-}
-
-cartesia_tts_sonic3 = {
-    "type": "extension", "name": "tts", "addon": "cartesia_tts",
-    "extension_group": "tts",
-    "property": {
-        "dump": False, "dump_path": "./",
-        "params": {
-            "api_key": "${env:CARTESIA_TTS_KEY}",
-            "model_id": "sonic-3",
-            "output_format": {"container": "raw", "sample_rate": 44100},
-        },
-    },
-}
-
-gpt51_llm = {
-    "type": "extension", "name": "llm", "addon": "openai_llm2_python",
-    "extension_group": "chatgpt",
-    "property": {
-        "base_url": "https://api.openai.com/v1",
-        "api_key": "${env:OPENAI_API_KEY}",
-        "model": "gpt-5.1", "max_tokens": 1000,
-        "prompt": "...", "greeting": "...",
-    },
-}
-
-# 2. Define reusable connection templates
-basic_connections = [
-    {"extension": "main_control", "cmd": [...], "data": [...]},
-    {"extension": "agora_rtc", "audio_frame": [...], "data": [...]},
-    {"extension": "streamid_adapter", "audio_frame": [...]},
-    {"extension": "tts", "data": [...], "audio_frame": [...]},
-    # ...
-]
-
-# 3. Assemble graphs with helper functions
-def create_basic_voice_assistant(name, has_avatar=False, avatar_type=None,
-                                  tts_config=None, stt_config=None, llm_config=None):
-    nodes = [agora_rtc_base, stt_config or nova3_stt_100ms, llm_config or ..., ...]
-    connections = copy.deepcopy(basic_connections)
-    if has_avatar:
-        # Modify connections: route TTS audio through avatar instead of direct to RTC
-        ...
-    return {"name": name, "auto_start": False, "graph": {"nodes": nodes, "connections": connections}}
-
-# 4. Build graph list and write property.json
-new_graphs = [
-    create_basic_voice_assistant("voice_assistant"),
-    create_basic_voice_assistant("voice_assistant_heygen", has_avatar=True, avatar_type="heygen"),
-    create_apollo_graph("flux_apollo_gpt_5_1_cartesia", gpt51_llm, flux_stt),
-    # ...
-]
-
-new_data = {"ten": {"log": log_config, "predefined_graphs": new_graphs}}
-with open("property.json", "w") as f:
-    json.dump(new_data, f, indent=2)
-```
-
-### Key Patterns in rebuild_property.py
-
-| Pattern                      | Purpose                                              |
-| ---------------------------- | ---------------------------------------------------- |
-| `copy.deepcopy(config)`      | Prevent mutation when reusing node configs           |
-| Parametric helper functions  | `create_basic_voice_assistant(name, tts_config=...)` |
-| Connection rewiring for avatars | Route TTS audio through avatar instead of direct to RTC |
-| Preserve existing log config | `log_config = data["ten"]["log"]` before overwriting |
-| Commented-out graph groups   | Keep old graph definitions for reference/reactivation|
-
-### When to Use rebuild_property.py
-
-- **Multiple graph variants** (A/B testing vendors: Deepgram vs Cartesia TTS)
-- **Avatar variants** (same pipeline with/without HeyGen/Anam)
-- **LLM model testing** (GPT-4o vs GPT-5.1 vs Groq)
-- **Complex connection rewiring** (avatar graphs need different audio routing)
-
-For simple single-graph setups, editing property.json directly is fine.
-
-## Manifest.json Dependencies
-
-When adding an extension to a graph, ensure its dependency is in `manifest.json`:
-
-```json
-{
-  "dependencies": [
-    {"type": "extension", "name": "my_vendor_tts_python", "version": "0.1.0"}
-  ]
-}
-```
-
-Then run:
-```bash
-docker exec ten_agent_dev bash -c "cd /app/agents/examples/<example>/tenapp && tman install"
-```
-
-## Main Extension Customization
-
-The "main" extension controls agent orchestration. Three variants exist:
-
-| Variant              | Language   | Pattern                      | Use Case                   |
-| -------------------- | ---------- | ---------------------------- | -------------------------- |
-| Python Cascade       | Python     | ASR → LLM → TTS pipeline    | Standard voice assistant   |
-| Python Realtime V2V  | Python     | OpenAI Realtime API          | Voice-to-voice (no ASR/TTS)|
-| Node.js Cascade      | TypeScript | ASR → LLM → TTS pipeline    | TypeScript preference      |
-
-Key customization points:
-- `on_data()` — event routing (match/case dispatcher)
-- `on_cmd()` — tool registration and handling
-- Greeting logic in `on_start()` or `on_user_joined` handler
-
-## Example Apps
-
-Available in `agents/examples/`. Key examples:
-
-| Example                           | Description                                          |
-| --------------------------------- | ---------------------------------------------------- |
-| `voice-assistant`                 | Basic: Deepgram ASR + OpenAI LLM + ElevenLabs TTS   |
-| `voice-assistant-advanced`        | Multiple graph variants, vendor A/B testing          |
-| `voice-assistant-realtime`        | OpenAI Realtime API (voice-to-voice, no ASR/TTS)    |
-| `voice-assistant-video`           | Vision capability added                              |
-| `voice-assistant-live2d`          | Live2D avatar integration                            |
-| `voice-assistant-sip-twilio`      | SIP phone integration (Twilio)                       |
-| `voice-assistant-sip-telnyx`      | SIP phone integration (Telnyx)                       |
-| `voice-assistant-sip-plivo`       | SIP phone integration (Plivo)                        |
-| `voice-assistant-with-ten-vad`    | Custom VAD (Voice Activity Detection)                |
-| `voice-assistant-with-turn-detection` | Transformer-based turn detection              |
-| `voice-assistant-nodejs`          | Node.js implementation                               |
-| `doodler`                         | Spoken prompts → hand-drawn sketches                 |
-| `speaker-diarization`             | Real-time multi-speaker identification               |
-| `transcription`                   | Audio transcription tool                             |
-| `websocket-example`               | WebSocket transport (no Agora RTC)                   |
-| `http-control`                    | HTTP-based control interface                         |
-
-### voice-assistant vs voice-assistant-advanced
-
-| Aspect                | voice-assistant             | voice-assistant-advanced          |
-| --------------------- | --------------------------- | --------------------------------- |
-| Graphs                | 1 (`voice_assistant`)       | 4+ variants (Flux/Apollo/Cartesia)|
-| Vendor switching      | Fixed components            | Multiple vendor combinations      |
-| LLM prompts           | Simple greeting             | Multi-step research workflows     |
-| Use case              | Getting started             | Production A/B testing            |
-
-Both follow the same core pipeline:
-```
-Agora RTC → streamid_adapter → ASR → main_control → LLM → TTS → Agora RTC
-```
-
-### Real Graph: voice-assistant/tenapp/property.json
-
-This is a complete, working graph. Key nodes:
-
-| Node               | Addon                    | Role                               |
-| ------------------ | ------------------------ | ---------------------------------- |
-| `agora_rtc`        | `agora_rtc`              | Audio/video transport              |
-| `streamid_adapter` | `streamid_adapter`       | Stream ID routing                  |
-| `stt`              | `deepgram_asr_python`    | Speech-to-text                     |
-| `llm`              | `openai_llm2_python`     | Language model                     |
-| `tts`              | `elevenlabs_tts2_python` | Text-to-speech                     |
-| `main_control`     | `main_python`            | Orchestration (greetings, routing) |
-| `message_collector` | `message_collector2`    | Transcript collection              |
-
-Connection wiring:
-```
-agora_rtc --pcm_frame--> streamid_adapter --pcm_frame--> stt
-stt --asr_result--> main_control
-main_control --text_data--> llm --text_data--> main_control --tts_text_input--> tts
-tts --pcm_frame--> agora_rtc
-```
-
-## Portal References
-
-- [Understanding property.json](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/project_structure/property_json.md) [EXTERNAL]
-- [Customize Agent via Code](https://github.com/TEN-framework/portal/blob/main/content/docs/ten_agent_examples/customize_agent/modify-main/index.mdx) [EXTERNAL]
-
-## See Also
-
-- [Back to Architecture](../02_architecture.md)
-- [Back to Workflows](../05_workflows.md)
-- [Back to Interfaces](../06_interfaces.md)
diff --git a/docs/ai/L1/deep_dives/operations_restarts.md b/docs/ai/L1/deep_dives/operations_restarts.md
deleted file mode 100644
index c4bff1c10f..0000000000
--- a/docs/ai/L1/deep_dives/operations_restarts.md
+++ /dev/null
@@ -1,191 +0,0 @@
-# Operations and Restarts
-
-> **When to Read This:** Load this document when you need to restart services,
-> debug port conflicts, recover from crashes, or clean up zombie processes.
-
-## When to Do a Full Restart
-
-| What Changed                    | Action                                               |
-| ------------------------------- | ---------------------------------------------------- |
-| `property.json` (graphs added)  | Full restart (frontend caches graph list)            |
-| `property.json` (config only)   | No restart needed (loaded per session)               |
-| `.env`                          | `docker compose down && docker compose up -d` + deps |
-| Python code                     | Restart server only                                  |
-| Go code                         | `task install` then restart server                   |
-| Container restart               | Reinstall Python deps, then `task run`               |
-
-## Full Restart Procedure
-
-Must kill `next-server` too — it holds port 3000 even after `node`/`bun` die:
-
-```bash
-# 1. Kill EVERYTHING
-sudo docker exec ten_agent_dev bash -c \
-  "pkill -9 -f 'bin/api'; pkill -9 -f bun; pkill -9 -f node; \
-   pkill -9 -f next-server; pkill -9 -f tman"
-
-# 2. Clean up stale files
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-
-# 3. Wait for port 3000 TIME_WAIT to clear
-# If Next.js can't bind port 3000, it silently starts on 3001/3002
-# which isn't exposed by Docker — the frontend appears down.
-sleep 30
-
-# 4. Start
-sudo docker exec -d ten_agent_dev bash -c \
-  "cd /app/agents/examples/<example> && task run > /tmp/task_run.log 2>&1"
-
-# 5. Verify (wait ~12s for startup)
-sleep 12
-sudo docker exec ten_agent_dev bash -c \
-  "curl -s http://localhost:8080/health && \
-   curl -s -o /dev/null -w ' Frontend:%{http_code}' http://localhost:3000/"
-```
-
-## Verification
-
-Check Next.js started on port 3000 (not 3001+):
-
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "strings /tmp/task_run.log | grep -E 'Local:|Port|Ready|Error'"
-```
-
-Expected output:
-```
-   - Local:         http://localhost:3000
- Ready in 2.1s
-```
-
-If you see `Port 3000 is in use`, the frontend is on the wrong port.
-
-## Zombie Worker Cleanup
-
-Worker processes (`bin/main`) run inside Docker but can survive server restarts:
-
-```bash
-# Check for zombies
-sudo docker exec ten_agent_dev bash -c \
-  "ps aux | grep 'bin/main' | grep -v grep"
-
-# Kill them
-sudo docker exec ten_agent_dev bash -c \
-  "pkill -9 -f 'bin/main'"
-```
-
-Always kill zombies before restarting the server.
-
-## Stale Lock Cleanup
-
-After crashes, `.next/dev/lock` becomes stale:
-
-```bash
-sudo docker exec ten_agent_dev bash -c "rm -f /app/playground/.next/dev/lock"
-```
-
-Also clear the Next.js cache if React version errors appear:
-
-```bash
-sudo docker exec ten_agent_dev bash -c "rm -rf /app/playground/.next"
-```
-
-## Port 3000 Conflict Debugging
-
-If Next.js reports "Port 3000 is in use", find the process holding it:
-
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "for pid in /proc/[0-9]*/fd/*; do \
-    link=\$(readlink \$pid 2>/dev/null); \
-    echo \"\$link\" | grep -q socket: && \
-    inode=\$(echo \$link | grep -oP '\\d+') && \
-    grep -q \$inode /proc/net/tcp6 2>/dev/null && \
-    grep \$inode /proc/net/tcp6 | grep -q ':0BB8' && \
-    echo PID=\$(echo \$pid | cut -d/ -f3) && break; \
-  done"
-```
-
-Kill the PID, wait for TIME_WAIT to clear (~30s), then restart.
-
-If no PID is found but port is still busy, it's in TIME_WAIT state. Check:
-
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "cat /proc/net/tcp6 | grep ':0BB8'"
-```
-
-State `06` = TIME_WAIT. Wait 30-60 seconds for it to clear.
-
-## .env and Container Restart Recovery
-
-`.env` is loaded at container startup only. After editing:
-
-```bash
-cd /home/ubuntu/ten-framework/ai_agents
-docker compose down && docker compose up -d
-```
-
-Then reinstall everything (Python deps are not persisted):
-
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "cd /app/agents/examples/<example> && task install"
-```
-
-## Copying Extension Code to Running Container
-
-When iterating on extension code locally:
-
-```bash
-# Option 1: docker cp with /. suffix (avoids nested dirs)
-sudo docker cp ./agents/ten_packages/extension/my_ext/. \
-  ten_agent_dev:/app/agents/ten_packages/extension/my_ext/
-
-# Option 2: tar with cache exclusion (recommended — avoids
-# __pycache__ and .pytest_cache causing import errors)
-tar --exclude='__pycache__' --exclude='.pytest_cache' \
-  -C ai_agents/agents/ten_packages/extension/my_ext -cf - . | \
-  sudo docker exec -i ten_agent_dev tar \
-  -C /app/agents/ten_packages/extension/my_ext -xf -
-
-# Verify symlink exists in the example's tenapp
-sudo docker exec ten_agent_dev bash -c \
-  "ls -la /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
-
-# If missing, create it manually
-sudo docker exec ten_agent_dev bash -c \
-  "ln -sf /app/agents/ten_packages/extension/my_ext \
-   /app/agents/examples/<example>/tenapp/ten_packages/extension/my_ext"
-```
-
-Then do a full restart.
-
-**Common pitfall:** If `docker cp` copies `__pycache__` or `.pytest_cache`
-from your local machine into the container, it can cause `ImportError` or
-stale bytecode during test collection. Use the tar method above or clean
-the container directory before copying:
-
-```bash
-sudo docker exec ten_agent_dev bash -c \
-  "find /app/agents/ten_packages/extension/my_ext \
-   -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; \
-   find /app/agents/ten_packages/extension/my_ext \
-   -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null"
-```
-
-## After Container Restart Checklist
-
-1. Reinstall Python dependencies
-2. Rebuild Go binary (`task install`)
-3. Kill any zombie workers
-4. Remove stale `.next/dev/lock`
-5. Start with `task run`
-6. Verify health endpoint and frontend status code
-
-## See Also
-
-- [Back to Gotchas](../07_gotchas.md)
-- [Back to Workflows](../05_workflows.md)
-- [Deployment](deployment.md) — Production setup, Cloudflare, Nginx
-- [Server Architecture](server_architecture.md) — Worker lifecycle
diff --git a/docs/ai/L1/deep_dives/server_architecture.md b/docs/ai/L1/deep_dives/server_architecture.md
deleted file mode 100644
index 18bebf0a15..0000000000
--- a/docs/ai/L1/deep_dives/server_architecture.md
+++ /dev/null
@@ -1,211 +0,0 @@
-# Server Architecture
-
-> **When to Read This:** Load this document when you need to understand how the Go API
-> server works, how property injection transforms graph configurations at runtime, or
-> how worker processes are managed.
-
-## Overview
-
-The TEN Agent server is a Go HTTP server built with the Gin framework. It manages
-agent session lifecycles — starting worker processes, injecting configuration, and
-handling session keepalive/teardown.
-
-## Server Structure
-
-```
-server/
-├── main.go                 # Entry point, parses flags, starts HTTP server
-└── internal/
-    ├── http_server.go      # All endpoint handlers + property injection
-    └── config.go           # startPropMap configuration for parameter injection
-```
-
-Key launch flag: `-tenapp_dir=<path>` — points to the example's `tenapp/` directory
-containing `property.json` and `manifest.json`.
-
-## Endpoint Handlers
-
-| Handler                          | Route              | Purpose                             |
-| -------------------------------- | ------------------ | ----------------------------------- |
-| `handlerHealth()`                | `GET /health`      | Returns `{"code":"0"}` if running   |
-| `handleGraphs()`                 | `GET /graphs`      | Reads predefined_graphs from property.json |
-| `handlerStart()`                 | `POST /start`      | Spawns worker process for a session |
-| `handlerStop()`                  | `POST /stop`       | Terminates worker process           |
-| `handlerPing()`                  | `POST /ping`       | Resets session timeout timer        |
-| `handlerList()`                  | `GET /list`        | Lists active workers/channels       |
-| `handlerGenerateToken()`         | `POST /token/generate` | Generates Agora RTC tokens     |
-| `handleAddonDefaultProperties()` | `GET /addon/default-properties` | Extension property.json files |
-| `handlerVectorDocumentUpdate()`  | `POST /vector/document/update` | Vector DB updates          |
-| `handlerVectorDocumentUpload()`  | `POST /vector/document/upload` | File uploads for vector DB |
-
-## Property Injection Pipeline
-
-When `/start` is called, the server transforms the static `property.json` into a
-session-specific configuration. This is the core of the `processProperty` function:
-
-### Step 1: Read Base Configuration
-
-```go
-// Read property.json from the configured tenapp_dir
-propertyJsonFile := filepath.Join(s.config.TenappDir, "property.json")
-content, _ := os.ReadFile(propertyJsonFile)
-```
-
-### Step 2: Filter Graphs
-
-Only the requested graph is kept; its `auto_start` is set to `true`:
-
-```go
-// Find matching graph by name
-for _, graph := range predefinedGraphs {
-    if graph.Name == req.GraphName {
-        graph.AutoStart = true
-        filteredGraphs = append(filteredGraphs, graph)
-    }
-}
-```
-
-### Step 3: Merge Dynamic Properties
-
-Per-extension property overrides from the request are merged:
-
-```go
-// req.Properties = {"openai_llm2_python": {"model": "gpt-4o-mini"}}
-for _, node := range graph.Nodes {
-    if props, ok := req.Properties[node.Name]; ok {
-        mergeProperties(node.Property, props)
-    }
-}
-```
-
-### Step 4: Inject Start Parameters
-
-The `startPropMap` (defined in `config.go`) maps request fields to node properties:
-
-```go
-var startPropMap = map[string]string{
-    "RemoteStreamId":      "remote_stream_id",
-    "BotStreamId":         "agora_uid",
-    "Token":               "token",
-    "WorkerHttpServerPort": "server_port",
-}
-```
-
-These values are injected into every node that has the corresponding property defined.
-
-### Step 5: Channel Auto-Injection
-
-Any node with a `"channel"` property automatically receives the request's `channel_name`:
-
-```go
-// Scan all nodes — if node has "channel" property, inject channel_name
-for _, node := range graph.Nodes {
-    if _, hasChannel := node.Property["channel"]; hasChannel {
-        node.Property["channel"] = req.ChannelName
-    }
-}
-```
-
-This is future-proof: adding a new extension with a `"channel"` property requires
-zero server code changes.
-
-### Step 6: Environment Variable Resolution
-
-All `${env:VAR}` and `${env:VAR|default}` references in the property JSON are
-resolved against the container's environment.
-
-### Step 7: Write Temp File and Spawn Worker
-
-The modified property JSON is written to a temporary file, and a worker process
-is spawned:
-
-```go
-// Write modified config
-tmpFile := filepath.Join(tmpDir, "property.json")
-os.WriteFile(tmpFile, modifiedJSON, 0644)
-
-// Spawn worker
-cmd := exec.Command("tman", "run", "start", "--property", tmpFile)
-```
-
-## Worker Process Lifecycle
-
-```
-/start request
-    │
-    ▼
-Server: processProperty() → temp property.json
-    │
-    ▼
-Server: exec("tman run start --property <tmp>")
-    │
-    ▼
-Worker process starts → loads graph → initializes extensions
-    │
-    ├── Extensions call on_init() → on_start()
-    ├── Extensions process messages (cmd, data, audio_frame, video_frame)
-    │
-    ├── /ping requests reset the timeout timer
-    │
-    ▼
-/stop request OR timeout
-    │
-    ▼
-Worker: extensions call on_stop() → on_deinit()
-    │
-    ▼
-Worker process terminates
-```
-
-**Important**: Worker processes run on the **host machine**, not inside Docker.
-They can outlive the server process and even container restarts. Always check for
-zombie workers with `ps -elf | grep 'bin/main'`.
-
-## Session Management
-
-| Action         | Server Behavior                                    |
-| -------------- | -------------------------------------------------- |
-| `/start`       | Spawns worker, stores in active workers map        |
-| `/stop`        | Sends SIGTERM to worker, removes from map          |
-| `/ping`        | Resets timeout timer for the channel               |
-| Timeout        | Auto-sends SIGTERM after `timeout` seconds idle    |
-| `/list`        | Returns all active channel → worker mappings       |
-
-Timeout of `-1` means the session never auto-stops (requires explicit `/stop`).
-
-## LOG_STDOUT for Worker Output
-
-Worker processes write to stdout. To see their output in `/tmp/task_run.log`,
-the `.env` must have:
-
-```bash
-LOG_STDOUT=true
-```
-
-Without this, extension logs (Python `print()`, `ten_env.log_*()`) are invisible.
-
-## Security Measures
-
-- **Path traversal prevention**: The server ignores any client-provided `tenapp_dir`
-  and always uses the launch-configured path
-- **Channel name sanitization**: Channel names are validated before use in file paths
-- **Safe property merge**: `mergeProperties()` handles nested configs safely with
-  type checking
-
-## Configuration (config.go)
-
-The `startPropMap` in `config.go` controls which request fields map to which
-node properties:
-
-| Request Field          | Node Property        | Purpose                        |
-| ---------------------- | -------------------- | ------------------------------ |
-| `RemoteStreamId`       | `remote_stream_id`   | Remote user's stream ID        |
-| `BotStreamId`          | `agora_uid`          | Bot's Agora UID                |
-| `Token`                | `token`              | Agora RTC token                |
-| `WorkerHttpServerPort` | `server_port`        | Worker's HTTP server port      |
-
-## See Also
-
-- [Back to Architecture](../02_architecture.md)
-- [Graph Configuration](graph_configuration.md) — Property.json structure and connections
-- [Back to Interfaces](../06_interfaces.md)
diff --git a/docs/ai/L1/deep_dives/testing.md b/docs/ai/L1/deep_dives/testing.md
deleted file mode 100644
index 7c77e5ce0d..0000000000
--- a/docs/ai/L1/deep_dives/testing.md
+++ /dev/null
@@ -1,305 +0,0 @@
-# Testing
-
-> **When to Read This:** Load this document when you need to run tests for an extension,
-> understand what the guarder tests validate, or debug test failures.
-
-## Overview
-
-Three levels of testing:
-1. **Extension standalone tests** — per-extension unit/integration tests in `tests/`
-2. **Guarder integration tests** — framework-level ASR/TTS validation suites
-3. **Root-level tasks** — orchestrated via `Taskfile.yml`
-
-## Running Tests
-
-```bash
-# All tests
-docker exec ten_agent_dev bash -c "cd /app && task test"
-
-# Single extension with dependency install
-docker exec ten_agent_dev bash -c \
-  "cd /app && task test-extension EXTENSION=agents/ten_packages/extension/deepgram_tts"
-
-# Single extension, skip install (faster iteration)
-docker exec ten_agent_dev bash -c \
-  "cd /app && task test-extension-no-install EXTENSION=agents/ten_packages/extension/deepgram_tts"
-
-# TTS guarder (16 tests)
-docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts"
-
-# ASR guarder (10 tests)
-docker exec ten_agent_dev bash -c "cd /app && task asr-guarder-test EXTENSION=azure_asr_python"
-
-# Specific test only (faster iteration on failures)
-docker exec ten_agent_dev bash -c "cd /app && task tts-guarder-test EXTENSION=deepgram_tts -- -k test_flush"
-```
-
-**Before running tests**, sync your local code into the container. Use tar
-to exclude cache artifacts that cause import errors:
-
-```bash
-tar --exclude='__pycache__' --exclude='.pytest_cache' \
-  -C ai_agents/agents/ten_packages/extension/my_ext -cf - . | \
-  sudo docker exec -i ten_agent_dev tar \
-  -C /app/agents/ten_packages/extension/my_ext -xf -
-```
-
-## Extension Standalone Tests
-
-Each extension can have `tests/` with a `bin/start` entry point:
-
-```
-my_extension/tests/
-├── bin/start            # Sets PYTHONPATH, runs pytest
-├── configs/             # Test config JSON files
-│   ├── property.json
-│   ├── property_basic_audio_setting1.json
-│   ├── property_basic_audio_setting2.json
-│   ├── property_dump.json
-│   ├── property_miss_required.json
-│   └── property_invalid.json
-├── conftest.py          # Fixtures
-└── test_*.py            # Test files
-```
-
-### PYTHONPATH
-
-Tests need this to import TEN runtime:
-
-```bash
-export PYTHONPATH=".:ten_packages/system/ten_runtime_python/lib:\
-ten_packages/system/ten_runtime_python/interface:\
-ten_packages/system/ten_ai_base/interface:\
-ten_packages/extension/${EXT_NAME}:$PYTHONPATH"
-```
-
----
-
-## TTS Guarder Tests (15 Tests)
-
-**Location**: `agents/integration_tests/tts_guarder/`
-
-These tests run against any TTS extension. The manifest template (`manifest-tmpl.json`)
-substitutes `{{extension_name}}` with your extension name at runtime.
-
-### Test Inventory
-
-| # | Test | What It Validates | Pass Criteria |
-|---|------|-------------------|---------------|
-| 1 | `test_append_input` | Multiple texts appended with same request_id | audio_start -> frames -> audio_end per group, correct request_id |
-| 2 | `test_append_input_stress` | High volume append operations | All appends processed without errors |
-| 3 | `test_append_input_without_text_input_end` | Missing text_input_end flag | Processes correctly despite missing flags |
-| 4 | `test_append_interrupt` | New requests interrupting in-progress ones | Interrupts handled without crash or malformed audio |
-| 5 | `test_basic_audio_setting` | Different sample rates produce different audio | Two configs with different sample_rate yield different output rates |
-| 6 | `test_corner_input` | Special chars, emojis, punctuation-only, very short/long | All processed without errors |
-| 7 | `test_dump` | Audio dump file creation | Dump file exists, contains valid PCM, size matches duration |
-| 8 | `test_dump_each_request_id` | Separate dump files per request_id | Each request_id has own dump file |
-| 9 | `test_empty_text_request` | Empty/whitespace text | audio_end within 500ms, no audio data, no crash |
-| 10 | `test_flush` | Flush signal handling | Receives flush_end with matching flush_id, no data for 5s after |
-| 11 | `test_interleaved_requests` | 8 concurrent requests with different request_ids | Each maintains separate audio stream, correct ordering per request |
-| 12 | `test_invalid_required_params` | Invalid API key | Returns FATAL ERROR with message, no crash |
-| 13 | `test_invalid_text_handling` | Malformed text, null chars, very long strings | Handled gracefully without crash |
-| 14 | `test_metrics` | TTFB metric generation | Metrics data present with valid timestamps |
-| 15 | `test_miss_required_params` | Missing API key | Appropriate error returned |
-
-### Critical TTS Invariants
-
-1. **Event ordering must be**: `tts_audio_start` -> `pcm_frame`(s) -> `tts_audio_end` per request
-2. **Request isolation**: Interleaved requests must never mix audio streams
-3. **Error handling**: Invalid/missing configs produce errors, never crashes
-4. **Empty text**: Must complete fast (audio_end within 500ms), generate no audio
-5. **Flush**: After flush_end, zero data output for 5 seconds
-
-### Required TTS Config Files
-
-Your `tests/configs/` must provide:
-
-```
-property.json                      # Valid API key + default settings
-property_basic_audio_setting1.json # sample_rate: 16000 + valid key + dump:true
-property_basic_audio_setting2.json # sample_rate: 24000 + valid key + dump:true
-property_dump.json                 # dump:true + dump_path + valid key
-property_miss_required.json        # Empty/missing API key
-property_invalid.json              # Empty/invalid API key
-```
-
-**Template** (`property_basic_audio_setting1.json`):
-```json
-{
-  "dump": true,
-  "dump_path": "./tests/keep_dump_output/",
-  "params": {
-    "sample_rate": 16000,
-    "key": "${env:MY_VENDOR_API_KEY}"
-  }
-}
-```
-
-### Sample Rate Test Notes
-
-Some extensions don't support multiple sample rates. To skip the sample rate
-comparison (test still runs, just doesn't assert rates differ), the test runner
-checks `ENABLE_SAMPLE_RATE` env var. Extensions like `openai_tts_python` and
-`humeai_tts_python` set this to `False`.
-
----
-
-## ASR Guarder Tests (10 Tests, 1 Skipped)
-
-**Location**: `agents/integration_tests/asr_guarder/`
-
-### Test Audio Format
-
-- 16-bit PCM, 16kHz sample rate, mono
-- Test files: `test_data/16k_en_us.pcm` (English), `test_data/16k_zh_cn.pcm` (Chinese)
-- Chunk size: 320 bytes per frame
-- Send interval: 10ms between frames
-
-### Test Inventory
-
-| # | Test | What It Validates | Pass Criteria |
-|---|------|-------------------|---------------|
-| 1 | `test_connection_timing` | Connect + transcribe English audio | Results received, language="en-US" |
-| 2 | `test_asr_result` | Result structure and data integrity | Fields: id, text, language, session_id all present |
-| 3 | `test_asr_finalize` | Finalize signal → final result + finalize_end | final=True in result, finalize_end received |
-| 4 | `test_reconnection` | Recovery after connection failure | Error detected, no crash, can reconnect |
-| 5 | `test_vendor_error` | Invalid creds → proper error format | Error has id, module, code, message + vendor info |
-| 6 | `test_multi_language` | English + Chinese transcription | en-US and zh-CN both detected correctly |
-| 7 | `test_dump` | Audio dump functionality | Dump files created with correct data |
-| 8 | `test_metrics` | TTFW and TTLW metrics | TTFW > 0, TTLW > TTFW, both in milliseconds |
-| 9 | `test_audio_timestamp` | start_ms and duration_ms accuracy | Timestamps accurate within tolerance |
-| 10 | `test_long_duration_stream` | **SKIPPED** — 5+ min stream | No timeout or connection drop |
-
-### Critical ASR Invariants
-
-1. **Result fields**: Every result must have `id`, `text`, `language`, `session_id`
-2. **Finalize flow**: `asr_finalize` cmd -> `final=True` result -> `asr_finalize_end` response
-3. **Error format**: `{id, module, code, message, vendor_info: {vendor, code, message}}`
-4. **Metrics**: TTFW (Time To First Word) > 0, TTLW (Time To Last Word) > TTFW
-
-### Required ASR Config Files
-
-```
-property_en.json       # Valid key + language: "en-US"
-property_zh.json       # Valid key + language: "zh-CN"
-property_invalid.json  # key: "invalid" (triggers vendor error test)
-property_dump.json     # Valid key + dump: true
-```
-
-**Template** (`property_en.json` for Deepgram):
-```json
-{
-  "params": {
-    "key": "${env:DEEPGRAM_API_KEY}",
-    "model": "nova-2",
-    "sample_rate": 16000,
-    "encoding": "linear16",
-    "language": "en-US"
-  }
-}
-```
-
----
-
-## Guarder Test Framework Internals
-
-### Manifest Template System
-
-Both guarders use template manifests with `{{extension_name}}` placeholders:
-
-```json
-{
-  "type": "app",
-  "name": "tts_guarder",
-  "version": "0.1.0",
-  "dependencies": [
-    {"path": "../../ten_packages/extension/{{extension_name}}"}
-  ]
-}
-```
-
-The Taskfile substitutes this at runtime with `sed`.
-
-### conftest.py Pattern
-
-Both guarders use a session-scoped FakeApp:
-
-```python
-@pytest.fixture(scope="session", autouse=True)
-def global_setup_and_teardown():
-    event = threading.Event()
-    fake_app_ctx = FakeAppCtx(event)
-    fake_app_thread = threading.Thread(target=run_fake_app, args=(fake_app_ctx,))
-    fake_app_thread.start()
-    event.wait()
-    yield
-    fake_app_ctx.fake_app.close()
-    fake_app_thread.join()
-```
-
-Each test creates its own `ExtensionTester` within this shared app context.
-Tests share the session-scoped app but get fresh extension instances.
-
-### Pytest Options
-
-- `--extension_name` — extension to test (required)
-- `--config_dir` — path to configs directory (required)
-- `--enable_sample_rate` — "True"/"False" for sample rate comparison (TTS only)
-
----
-
-## Common Test Failures and Fixes
-
-### "Timeout waiting for audio"
-- **Cause**: External API not responding within timeout
-- **Fix**: Check API key is valid, check network, increase timeout if needed
-- **Note**: Some flakiness is expected with external APIs — run individually to confirm
-
-### "Received error data" / FATAL ERROR
-- **Cause**: Extension detected invalid config and raised error (this is correct behavior for error tests)
-- **Fix**: If this happens on non-error tests, check your config files have valid API keys
-
-### "Found N dump files, expected M"
-- **Cause**: Some requests timed out and didn't produce dump files
-- **Fix**: Usually API timeout flakiness — rerun the test
-
-### "Received additional data after flush_end"
-- **Cause**: Extension sent audio data after it should have stopped
-- **Fix**: Ensure your cancel_tts/flush handling stops all pending output immediately
-
-### "Test failed: sample rates are the same"
-- **Cause**: Your extension ignores the sample_rate config
-- **Fix**: Implement sample_rate support, or set ENABLE_SAMPLE_RATE=False if your API doesn't support it
-
-### Import errors
-- **Cause**: PYTHONPATH doesn't include ten_runtime_python and ten_ai_base
-- **Fix**: Check `tests/bin/start` script sets PYTHONPATH correctly
-
-### "ModuleNotFoundError: No module named 'ten_packages.extension.xxx'"
-- **Cause**: Extension not installed in test environment
-- **Fix**: Run `tman install --standalone` in extension directory, or use `task test-extension` (does it automatically)
-
----
-
-## CI/CD Pipeline
-
-### Manual Guarder Tests (GitHub Actions)
-
-ASR and TTS guarder tests can be triggered manually:
-
-- Workflow: `.github/workflows/manual_test_asr_guarder.yml`
-- Inputs: `extension` name, `config_dir`, `branch`, `env_vars` (semicolon-separated secret names)
-- API keys loaded from GitHub Secrets at runtime
-
-### Extension Publishing
-
-- Workflow: `.github/workflows/manual_publish_extension.yml`
-- Steps: `tman install --standalone` -> `tman run build` -> `tman publish`
-- Requires `TEN_CLOUD_STORE` secret for publishing
-
----
-
-## See Also
-
-- [Extension Development](extension_development.md) — Config files and pre-submission checklist
-- [Back to Workflows](../05_workflows.md)

From d66c783e1669e084a4952599ca9fe0d4307b5c44 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 16:23:56 +0000
Subject: [PATCH 15/18] fix: move cancel flag reset to just before ws.send

clear _is_cancelled just before sending Speak+Flush, not at
method entry. prevents a concurrent cancel() from being lost
if it races with get() starting up.
---
 .../ten_packages/extension/deepgram_tts/deepgram_tts.py   | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 060e726b3c..cf82a000f0 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -138,9 +138,7 @@ async def get(
             yield None, EVENT_TTS_END
             return
 
-        self._is_cancelled = False
-
-        # Reconnect if needed (new request_id or after error)
+        # Reconnect if needed (after error or cancel)
         if self._needs_reconnect:
             await self._reconnect()
             self._needs_reconnect = False
@@ -150,6 +148,10 @@ async def get(
         if not self._ttfb_sent:
             self._sent_ts = datetime.now()
 
+        # Clear cancel flag just before sending, not at
+        # method entry — avoids race with concurrent cancel()
+        self._is_cancelled = False
+
         # Send Speak + Flush
         speak_msg = {"type": "Speak", "text": text}
         await self._ws.send(json.dumps(speak_msg))

From 76c781d129768b7a5627b870032a0eb539065694 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Tue, 7 Apr 2026 16:29:34 +0000
Subject: [PATCH 16/18] fix: remove dual finalization path, dead config code,
 simplify

- remove duplicate _finalize_request on empty EVENT_TTS_RESPONSE.
  rely solely on EVENT_TTS_END to close requests, avoiding risk
  of double-finalization.
- remove dead to_str() branch checking params['api_key'] after
  update_params() already deletes it.
- simplify _ensure_dict to only handle dict and fallback to empty.
---
 .../agents/ten_packages/extension/deepgram_tts/config.py | 7 +------
 .../ten_packages/extension/deepgram_tts/extension.py     | 9 +--------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
index 901b2eb449..cff5587242 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/config.py
@@ -58,11 +58,8 @@ def to_str(self, sensitive_handling: bool = True) -> str:
 
         config = copy.deepcopy(self)
 
-        # Encrypt sensitive fields
         if config.api_key:
             config.api_key = utils.encrypt(config.api_key)
-        if config.params and "api_key" in config.params:
-            config.params["api_key"] = utils.encrypt(config.params["api_key"])
 
         return f"{config}"
 
@@ -70,6 +67,4 @@ def to_str(self, sensitive_handling: bool = True) -> str:
     def _ensure_dict(value: Any) -> dict[str, Any]:
         if isinstance(value, dict):
             return value
-        if value is None:
-            return {}
-        return dict(value)
+        return {}
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 20129d6f9e..85190a3de7 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -298,14 +298,7 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                     await self._write_dump(data_msg)
                     await self.send_tts_audio_data(data_msg)
                 else:
-                    self.ten_env.log_debug(
-                        "Received empty payload for TTS response"
-                    )
-                    if t.text_input_end:
-                        await self._finalize_request(
-                            TTSAudioEndReason.REQUEST_END
-                        )
-                        break
+                    self.ten_env.log_debug("Empty payload, ignoring")
 
             elif event_status == EVENT_TTS_TTFB_METRIC:
                 if data_msg is not None and isinstance(data_msg, int):

From 09265e3b50fd14cb44a1fc0c09f8992df2feb997 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Wed, 8 Apr 2026 10:56:31 +0000
Subject: [PATCH 17/18] feat: add vendor params passthrough to deepgram
 websocket URL

forward additional deepgram query parameters from config.params through
to the websocket connection string. known keys (api_key, base_url, model,
encoding, sample_rate) are normalized onto the config object; remaining
scalar keys are appended to the query string via urlencode.

- replace f-string URL building with urlencode for correctness
- improve TTS_END logging to distinguish final vs intermediate events
- add test_params_passthrough unit test for URL construction
- bump version to 0.1.1
---
 .../extension/deepgram_tts/README.md          |  8 +++-
 .../extension/deepgram_tts/deepgram_tts.py    | 21 ++++++++---
 .../extension/deepgram_tts/extension.py       | 12 ++++--
 .../extension/deepgram_tts/manifest.json      |  2 +-
 .../deepgram_tts/tests/test_params.py         | 37 ++++++++++++++++++-
 5 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md b/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
index c8be961b39..ab18a5b30b 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/README.md
@@ -22,6 +22,7 @@ A TEN Framework extension that provides Text-to-Speech (TTS) capabilities using
 | `params.encoding` | string | `linear16` | Audio encoding format |
 | `params.sample_rate` | int | `24000` | Output sample rate in Hz |
 | `params.base_url` | string | `wss://api.deepgram.com/v1/speak` | WebSocket endpoint |
+| `params.<deepgram_query_param>` | scalar | Optional | Additional Deepgram websocket query parameters passed through to the vendor |
 | `dump` | bool | `false` | Enable audio dumping |
 | `dump_path` | string | `/tmp` | Path for audio dump files |
 
@@ -33,13 +34,18 @@ A TEN Framework extension that provides Text-to-Speech (TTS) capabilities using
     "api_key": "${env:DEEPGRAM_API_KEY}",
     "model": "aura-2-thalia-en",
     "encoding": "linear16",
-    "sample_rate": 24000
+    "sample_rate": 24000,
+    "container": "none"
   },
   "dump": false,
   "dump_path": "/tmp"
 }
 ```
 
+Known extension-owned keys such as `api_key`, `base_url`, `model`, `encoding`,
+and `sample_rate` are normalized onto the config object. Any remaining scalar
+keys under `params` are appended to the Deepgram websocket query string.
+
 ## Available Voice Models
 
 Deepgram Aura-2 voices:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index cf82a000f0..929e60fa1a 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -7,6 +7,7 @@
 import json
 from datetime import datetime
 from typing import AsyncIterator
+from urllib.parse import urlencode
 
 import websockets
 from websockets.asyncio.client import ClientConnection
@@ -65,12 +66,20 @@ def __init__(
 
     def _build_ws_url(self) -> str:
         base = self.config.base_url
-        params = (
-            f"model={self.config.model}"
-            f"&encoding={self.config.encoding}"
-            f"&sample_rate={self.config.sample_rate}"
-        )
-        return f"{base}?{params}"
+        query_params: dict[str, str | int | float | bool] = {
+            "model": self.config.model,
+            "encoding": self.config.encoding,
+            "sample_rate": self.config.sample_rate,
+        }
+
+        # Forward any additional Deepgram vendor params through the websocket
+        # query string while keeping auth and endpoint configuration out of it.
+        for key, value in self.config.params.items():
+            if key in {"api_key", "base_url"} or value is None:
+                continue
+            query_params[key] = value
+
+        return f"{base}?{urlencode(query_params, doseq=True)}"
 
     async def start(self) -> None:
         """Preheat: establish initial connection."""
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 85190a3de7..0f37ebad70 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -320,11 +320,17 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
                     )
 
             elif event_status == EVENT_TTS_END:
-                self.ten_env.log_info(
-                    "Received TTS_END event from Deepgram TTS"
-                )
                 if t.text_input_end:
+                    self.ten_env.log_info(
+                        f"Received final TTS_END event from Deepgram TTS "
+                        f"for request_id: {t.request_id}"
+                    )
                     await self._finalize_request(TTSAudioEndReason.REQUEST_END)
+                else:
+                    self.ten_env.log_debug(
+                        f"Received intermediate TTS_END event from "
+                        f"Deepgram TTS for request_id: {t.request_id}"
+                    )
                 break
 
             elif event_status == EVENT_TTS_ERROR:
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json b/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
index c2ef9bb7a0..ffaceacaa3 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/manifest.json
@@ -1,7 +1,7 @@
 {
   "type": "extension",
   "name": "deepgram_tts",
-  "version": "0.1.0",
+  "version": "0.1.1",
   "dependencies": [
     {
       "type": "system",
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
index aded961fde..48ed8fe1b6 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/tests/test_params.py
@@ -4,7 +4,8 @@
 # See the LICENSE file for more information.
 #
 import json
-from unittest.mock import patch, AsyncMock
+from urllib.parse import parse_qs, urlparse
+from unittest.mock import patch, AsyncMock, MagicMock
 
 
 from ten_runtime import (
@@ -18,7 +19,8 @@
     EVENT_TTS_END,
     EVENT_TTS_TTFB_METRIC,
 )
-from unittest.mock import MagicMock
+from deepgram_tts.config import DeepgramTTSConfig
+from deepgram_tts.deepgram_tts import DeepgramTTSClient
 
 
 def create_mock_client():
@@ -38,6 +40,37 @@ async def mock_get(text):
     return mock
 
 
+def test_params_passthrough():
+    """Additional Deepgram params should be appended to the websocket URL."""
+    config = DeepgramTTSConfig(
+        params={
+            "api_key": "test_api_key",
+            "base_url": "wss://api.deepgram.com/v1/speak",
+            "model": "aura-2-thalia-en",
+            "encoding": "linear16",
+            "sample_rate": 24000,
+            "bit_rate": 64000,
+            "container": "none",
+        }
+    )
+    config.update_params()
+
+    client = DeepgramTTSClient(config=config, ten_env=MagicMock())
+    parsed = urlparse(client._ws_url)
+    query = parse_qs(parsed.query)
+
+    assert parsed.scheme == "wss"
+    assert parsed.netloc == "api.deepgram.com"
+    assert parsed.path == "/v1/speak"
+    assert query["model"] == ["aura-2-thalia-en"]
+    assert query["encoding"] == ["linear16"]
+    assert query["sample_rate"] == ["24000"]
+    assert query["bit_rate"] == ["64000"]
+    assert query["container"] == ["none"]
+    assert "api_key" not in query
+    assert "base_url" not in query
+
+
 # ================ test different sample rates ================
 class ExtensionTesterSampleRate(ExtensionTester):
     def __init__(self, sample_rate: int):

From 13b76a1ef68a65a87a0c0e3b8f06c8f54c59b450 Mon Sep 17 00:00:00 2001
From: Ubuntu <ubuntu@ip-172-31-22-252.us-west-2.compute.internal>
Date: Wed, 8 Apr 2026 12:16:40 +0000
Subject: [PATCH 18/18] fix: add clarifying comments for event constant gap and
 sent_ts overwrite

---
 .../ten_packages/extension/deepgram_tts/deepgram_tts.py       | 3 ++-
 .../agents/ten_packages/extension/deepgram_tts/extension.py   | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
index 929e60fa1a..57c69132f3 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/deepgram_tts.py
@@ -17,7 +17,8 @@
 from ten_runtime import AsyncTenEnv
 from ten_ai_base.const import LOG_CATEGORY_VENDOR
 
-# Event types communicated back to the extension
+# Event types communicated back to the extension.
+# 4 is reserved (used by other TTS extensions for flush events).
 EVENT_TTS_RESPONSE = 1
 EVENT_TTS_END = 2
 EVENT_TTS_ERROR = 3
diff --git a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
index 0f37ebad70..aee0b9f1e8 100644
--- a/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
+++ b/ai_agents/agents/ten_packages/extension/deepgram_tts/extension.py
@@ -302,6 +302,10 @@ async def _process_tts_text(self, text: str, t: TTSTextInput) -> None:
 
             elif event_status == EVENT_TTS_TTFB_METRIC:
                 if data_msg is not None and isinstance(data_msg, int):
+                    # Overwrite sent_ts to audio-start time so that
+                    # _current_request_interval_ms() measures streaming
+                    # duration (first audio → last audio), not total
+                    # request time. This matches the HTTP base class.
                     self.sent_ts = datetime.now()
                     ttfb = data_msg
                     await self.send_tts_audio_start(