From ee2027884f29f352c70a1eb22d5da15b5572b23e Mon Sep 17 00:00:00 2001 From: Varun Nuthalapati Date: Sun, 26 Apr 2026 17:07:16 -0700 Subject: [PATCH] docs: add AsyncLLMBaseExtension and AsyncLLMToolBaseExtension guide Add an LLM Extension Development section to how_to_develop_with_ext.md with annotated code examples for both base classes. Covers the three required method overrides, send_text_output usage, tool metadata registration, run_tool dispatch, LLMToolResult types, and graph wiring. Includes a reference table of real in-repo implementations. Addresses #514. --- docs/development/how_to_develop_with_ext.md | 224 ++++++++++++++++++++ 1 file changed, 224 insertions(+) diff --git a/docs/development/how_to_develop_with_ext.md b/docs/development/how_to_develop_with_ext.md index 9f52c57709..cd858dfb9a 100644 --- a/docs/development/how_to_develop_with_ext.md +++ b/docs/development/how_to_develop_with_ext.md @@ -1040,6 +1040,230 @@ async onCmd(tenEnv: TenEnv, cmd: Cmd): Promise { --- +--- + +## LLM Extension Development + +TEN Framework provides two base classes for building LLM extensions that plug into the voice-assistant pipeline. Both live in the `ten_ai_base` system package. + +| Base class | Protocol | Best for | +|------------|----------|----------| +| `AsyncLLMBaseExtension` | Streaming text-in / text-out, tool dispatch | Chat-completion providers (OpenAI, xAI Grok, Bedrock, …) | +| `AsyncLLMToolBaseExtension` | Tool registration + result return | Custom tools called by an LLM extension | + +### Building an LLM extension with `AsyncLLMBaseExtension` + +`AsyncLLMBaseExtension` handles the conversation loop, memory management, user-join/leave commands, and flush. You only implement three methods. + +```python +# my_llm_python/extension.py +from typing import AsyncGenerator +from ten_ai_base.llm import AsyncLLMBaseExtension +from ten_ai_base.types import ( + LLMCallCompletionArgs, + LLMDataCompletionArgs, + LLMToolMetadata, +) +from ten_runtime.async_ten_env import AsyncTenEnv +from .client import MyLLMClient, MyLLMConfig + + +class MyLLMExtension(AsyncLLMBaseExtension): + def __init__(self, name: str) -> None: + super().__init__(name) + self.config: MyLLMConfig | None = None + self.client: MyLLMClient | None = None + + async def on_start(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_info("on_start") + await super().on_start(ten_env) + + self.config = await MyLLMConfig.create_async(ten_env=ten_env) + if not self.config.api_key: + ten_env.log_warn("api_key not set, LLM will not respond") + return + + self.client = MyLLMClient(ten_env, self.config) + + async def on_stop(self, ten_env: AsyncTenEnv) -> None: + ten_env.log_info("on_stop") + await super().on_stop(ten_env) + + # ------------------------------------------------------------------ # + # Called for each user message that enters the conversation queue. # + # Stream text tokens back via self.send_text_output(). # + # ------------------------------------------------------------------ # + async def on_data_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMDataCompletionArgs + ) -> None: + messages = kargs.get("messages", []) + if not messages: + ten_env.log_error("no messages in request") + return + if not self.client: + raise RuntimeError("client not initialised") + + async for token in self.client.stream_chat(messages): + self.send_text_output(ten_env, token, end_of_segment=False) + + # Signal end of this turn. + self.send_text_output(ten_env, "", end_of_segment=True) + + # ------------------------------------------------------------------ # + # Called when another extension (e.g. main_python) issues a # + # synchronous call_chat_completion command. # + # ------------------------------------------------------------------ # + async def on_call_chat_completion( + self, ten_env: AsyncTenEnv, **kargs: LLMCallCompletionArgs + ) -> str: + messages = kargs.get("messages", []) + if not self.client: + raise RuntimeError("client not initialised") + result = await self.client.chat(messages) + return result.to_json() + + # ------------------------------------------------------------------ # + # Called when a tool extension registers its metadata. # + # Delegate to the base class to add the tool to the available set. # + # ------------------------------------------------------------------ # + async def on_tools_update( + self, ten_env: AsyncTenEnv, tool: LLMToolMetadata + ) -> None: + return await super().on_tools_update(ten_env, tool) +``` + +Key points: + +- `send_text_output(ten_env, text, end_of_segment)` — emit a text token to the downstream TTS extension. Call with `end_of_segment=True` and empty text to close the turn. +- `self.available_tools` — list of `LLMToolMetadata` registered by connected tool extensions. Pass this to your API client when building the request. +- `on_data_chat_completion` runs in a cancellable async task managed by the base class. If a flush arrives mid-stream, the task is cancelled automatically. +- Call `await super().on_start(ten_env)` and `await super().on_stop(ten_env)` — the base class wires up the data queue and command handlers. + +**Extension addon registration (`addon.py`):** + +```python +from ten_runtime import Addon, register_addon_as_extension, TenEnv +from .extension import MyLLMExtension + + +@register_addon_as_extension("my_llm_python") +class MyLLMExtensionAddon(Addon): + def on_create_instance(self, ten_env: TenEnv, name: str, context) -> None: + ten_env.log_info("on_create_instance") + ten_env.on_create_instance_done(MyLLMExtension(name), context) +``` + +--- + +### Building a tool extension with `AsyncLLMToolBaseExtension` + +Tool extensions register their schema with connected LLM extensions and handle tool-call invocations. + +```python +# my_tool_python/extension.py +import json +from ten_ai_base.llm_tool import AsyncLLMToolBaseExtension +from ten_ai_base.types import LLMToolMetadata, LLMToolParameter, LLMToolResult +from ten_runtime.async_ten_env import AsyncTenEnv + + +class WebSearchToolExtension(AsyncLLMToolBaseExtension): + def get_tool_metadata(self, ten_env: AsyncTenEnv) -> list[LLMToolMetadata]: + return [ + LLMToolMetadata( + name="web_search", + description="Search the web and return a summary of the top results.", + parameters=[ + LLMToolParameter( + name="query", + type="string", + description="The search query.", + required=True, + ), + LLMToolParameter( + name="max_results", + type="integer", + description="Maximum number of results to return (default 3).", + required=False, + ), + ], + ) + ] + + async def run_tool( + self, ten_env: AsyncTenEnv, name: str, args: dict + ) -> LLMToolResult | None: + if name != "web_search": + return None + + query = args.get("query", "") + max_results = int(args.get("max_results", 3)) + + ten_env.log_info(f"web_search: query={query!r} max_results={max_results}") + + # Replace with a real search client call. + results = await self._do_search(query, max_results) + summary = "\n".join(f"- {r['title']}: {r['snippet']}" for r in results) + + return LLMToolResult( + type="llmresult", + content=summary, + ) + + async def _do_search(self, query: str, max_results: int) -> list[dict]: + # Stub — replace with actual search API call. + return [{"title": "Example", "snippet": f"Result for {query!r}"}] +``` + +Key points: + +- `get_tool_metadata` is called once at startup. Return one `LLMToolMetadata` per tool your extension provides. +- `run_tool` is called by the base class when the connected LLM requests a tool call. Return `LLMToolResult(type="llmresult", content=)` to feed the result back to the LLM, or `LLMToolResult(type="requery", content=)` to append extra context and re-run the LLM call. +- Return `None` for unrecognised tool names. + +**Graph wiring in `property.json`:** + +Connect the tool extension to the LLM extension in the graph so that tool registration and invocations are routed correctly: + +```json +{ + "ten": { + "nodes": [ + { "type": "extension", "name": "my_llm_python", "addon": "my_llm_python" }, + { "type": "extension", "name": "my_tool_python", "addon": "my_tool_python" } + ], + "connections": [ + { + "extension": "my_tool_python", + "cmd": [ + { "name": "tool_register", "dest": [{ "extension": "my_llm_python" }] } + ] + }, + { + "extension": "my_llm_python", + "cmd": [ + { "name": "tool_call", "dest": [{ "extension": "my_tool_python" }] } + ] + } + ] + } +} +``` + +--- + +### Real-world references + +The following extensions in this repository are good starting points for understanding the full implementation: + +| Extension | Base class | Notes | +|-----------|------------|-------| +| `ai_agents/agents/ten_packages/extension/grok_python` | `AsyncLLMBaseExtension` | xAI Grok streaming, tool dispatch, reasoning token handling | +| `ai_agents/agents/ten_packages/extension/openai_llm2_python` | `AsyncLLM2BaseExtension` | OpenAI LLM2 protocol, think-block parsing | +| `ai_agents/agents/ten_packages/extension/bingsearch_tool_python` | `AsyncLLMToolBaseExtension` | Bing web-search tool, `requery` result type | + +--- + ## Development Summary By following the complete development process provided in this guide, you can efficiently develop, test, and debug TEN extensions. Whether you choose C++, Go, Python, or Node.js, TEN Framework provides you with a complete toolchain and best practices to help you fully leverage the powerful features of TEN Framework and build high-performance, high-reliability extension applications.