From cf3a204b89930e5148fe9e332d7e4ea5ba667b2d Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 29 Apr 2026 21:50:20 +0200 Subject: [PATCH 01/46] feat(appkit): agents() plugin, createAgent(def), and markdown-driven agents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The main product layer. Turns an AppKit app into an AI-agent host with markdown-driven agent discovery, code-defined agents, sub-agents, and a standalone run-without-HTTP executor. Agent runtime files land in core/agent/ from day one: core/agent/create-agent.ts — createAgent() definition factory core/agent/run-agent.ts — standalone adapter loop (no HTTP) core/agent/load-agents.ts — markdown agent discovery core/agent/system-prompt.ts — base system prompt + composition core/agent/types.ts — updated with AgentDefinition, AgentsPluginConfig, RegisteredAgent, etc. HTTP-facing concerns stay in plugins/agents/: agents.ts, thread-store.ts, tool-approval-gate.ts, event-channel.ts, event-translator.ts, schemas.ts, defaults.ts, manifest.json --- packages/appkit/src/beta.ts | 30 +- .../appkit/src/core/agent/create-agent.ts | 53 + packages/appkit/src/core/agent/load-agents.ts | 370 +++++ packages/appkit/src/core/agent/run-agent.ts | 226 +++ .../appkit/src/core/agent/system-prompt.ts | 40 + .../src/core/agent/tests/create-agent.test.ts | 75 + .../src/core/agent/tests/load-agents.test.ts | 302 ++++ .../src/core/agent/tests/run-agent.test.ts | 120 ++ .../core/agent/tests/system-prompt.test.ts | 45 + packages/appkit/src/core/agent/types.ts | 177 ++- packages/appkit/src/plugins/agents/agents.ts | 1268 +++++++++++++++++ .../appkit/src/plugins/agents/defaults.ts | 12 + .../src/plugins/agents/event-channel.ts | 70 + .../src/plugins/agents/event-translator.ts | 291 ++++ packages/appkit/src/plugins/agents/index.ts | 22 + .../appkit/src/plugins/agents/manifest.json | 10 + packages/appkit/src/plugins/agents/schemas.ts | 69 + .../agents/tests/agents-plugin.test.ts | 373 +++++ .../agents/tests/approval-route.test.ts | 292 ++++ .../plugins/agents/tests/dos-limits.test.ts | 299 ++++ .../agents/tests/event-channel.test.ts | 78 + .../agents/tests/event-translator.test.ts | 332 +++++ .../plugins/agents/tests/thread-store.test.ts | 138 ++ .../agents/tests/tool-approval-gate.test.ts | 156 ++ .../appkit/src/plugins/agents/thread-store.ts | 66 + .../src/plugins/agents/tool-approval-gate.ts | 122 ++ packages/shared/src/agent.ts | 36 +- 27 files changed, 5063 insertions(+), 9 deletions(-) create mode 100644 packages/appkit/src/core/agent/create-agent.ts create mode 100644 packages/appkit/src/core/agent/load-agents.ts create mode 100644 packages/appkit/src/core/agent/run-agent.ts create mode 100644 packages/appkit/src/core/agent/system-prompt.ts create mode 100644 packages/appkit/src/core/agent/tests/create-agent.test.ts create mode 100644 packages/appkit/src/core/agent/tests/load-agents.test.ts create mode 100644 packages/appkit/src/core/agent/tests/run-agent.test.ts create mode 100644 packages/appkit/src/core/agent/tests/system-prompt.test.ts create mode 100644 packages/appkit/src/plugins/agents/agents.ts create mode 100644 packages/appkit/src/plugins/agents/defaults.ts create mode 100644 packages/appkit/src/plugins/agents/event-channel.ts create mode 100644 packages/appkit/src/plugins/agents/event-translator.ts create mode 100644 packages/appkit/src/plugins/agents/index.ts create mode 100644 packages/appkit/src/plugins/agents/manifest.json create mode 100644 packages/appkit/src/plugins/agents/schemas.ts create mode 100644 packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/approval-route.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/dos-limits.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/event-channel.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/event-translator.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/thread-store.test.ts create mode 100644 packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts create mode 100644 packages/appkit/src/plugins/agents/thread-store.ts create mode 100644 packages/appkit/src/plugins/agents/tool-approval-gate.ts diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts index 801c33b1a..55c836c97 100644 --- a/packages/appkit/src/beta.ts +++ b/packages/appkit/src/beta.ts @@ -19,6 +19,14 @@ export type { } from "shared"; export { DatabricksAdapter, parseTextToolCalls } from "./agents/databricks"; +// Agent runtime +export { createAgent } from "./core/agent/create-agent"; +export { + type RunAgentInput, + type RunAgentResult, + runAgent, +} from "./core/agent/run-agent"; + // Tool authoring primitives export { AppKitMcpClient, @@ -37,11 +45,25 @@ export { tool, toolsFromRegistry, } from "./core/agent/tools"; + +// Agent types +export type { + AgentDefinition, + AgentsPluginConfig, + AgentTool, + AutoInheritToolsConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, + ResolvedToolEntry, + ToolkitEntry, + ToolkitOptions, +} from "./plugins/agents"; export { - type AgentTool, + agentIdFromMarkdownPath, isToolkitEntry, - type ToolkitEntry, - type ToolkitOptions, -} from "./core/agent/types"; + loadAgentFromFile, + loadAgentsFromDir, +} from "./plugins/agents"; export * from "./plugins/beta-exports.generated"; diff --git a/packages/appkit/src/core/agent/create-agent.ts b/packages/appkit/src/core/agent/create-agent.ts new file mode 100644 index 000000000..b4b119010 --- /dev/null +++ b/packages/appkit/src/core/agent/create-agent.ts @@ -0,0 +1,53 @@ +import { ConfigurationError } from "../../errors"; +import type { AgentDefinition } from "./types"; + +/** + * Pure factory for agent definitions. Returns the passed-in definition after + * cycle-detecting the sub-agent graph. Accepts the full `AgentDefinition` shape + * and is safe to call at module top-level. + * + * The returned value is a plain `AgentDefinition` — no adapter construction, + * no side effects. Register it with `agents({ agents: { name: def } })` or run + * it standalone via `runAgent(def, input)`. + * + * @example + * ```ts + * const support = createAgent({ + * instructions: "You help customers.", + * model: "databricks-claude-sonnet-4-5", + * tools: { + * get_weather: tool({ ... }), + * }, + * }); + * ``` + */ +export function createAgent(def: AgentDefinition): AgentDefinition { + detectCycles(def); + return def; +} + +/** + * Walks the `agents: { ... }` sub-agent tree via DFS and throws if a cycle is + * found. Cycles would cause infinite recursion at tool-invocation time. + */ +function detectCycles(def: AgentDefinition): void { + const visiting = new Set(); + const visited = new Set(); + + const walk = (current: AgentDefinition, path: string[]): void => { + if (visited.has(current)) return; + if (visiting.has(current)) { + throw new ConfigurationError( + `Agent sub-agent cycle detected: ${path.join(" -> ")}`, + ); + } + visiting.add(current); + for (const [childKey, child] of Object.entries(current.agents ?? {})) { + walk(child, [...path, childKey]); + } + visiting.delete(current); + visited.add(current); + }; + + walk(def, [def.name ?? "(root)"]); +} diff --git a/packages/appkit/src/core/agent/load-agents.ts b/packages/appkit/src/core/agent/load-agents.ts new file mode 100644 index 000000000..8f9fd1be7 --- /dev/null +++ b/packages/appkit/src/core/agent/load-agents.ts @@ -0,0 +1,370 @@ +import fs from "node:fs"; +import path from "node:path"; +import yaml from "js-yaml"; +import type { AgentAdapter } from "shared"; +import { createLogger } from "../../logging/logger"; +import type { + AgentDefinition, + AgentTool, + BaseSystemPromptOption, + ToolkitEntry, + ToolkitOptions, +} from "../../core/agent/types"; +import { isToolkitEntry } from "../../core/agent/types"; + +const logger = createLogger("agents:loader"); + +interface ToolkitProvider { + toolkit: (opts?: ToolkitOptions) => Record; +} + +export interface LoadContext { + /** Default model when frontmatter has no `endpoint` and the def has no `model`. */ + defaultModel?: AgentAdapter | Promise | string; + /** Ambient tool library referenced by frontmatter `tools: [key1, key2]`. */ + availableTools?: Record; + /** Registered plugin toolkits referenced by frontmatter `toolkits: [...]`. */ + plugins?: Map; + /** + * Code-defined agents contributed by `agents({ agents: { ... } })`. The + * directory loader resolves `agents:` frontmatter references against + * these alongside sibling markdown files, so a markdown parent can + * delegate to a code-defined child. Code-defined names win on collision + * with markdown names, matching the plugin's top-level merge precedence. + */ + codeAgents?: Record; +} + +export interface LoadResult { + /** Agent definitions keyed by file-stem name. */ + defs: Record; + /** First file with `default: true` frontmatter, or `null`. */ + defaultAgent: string | null; +} + +interface Frontmatter { + endpoint?: string; + model?: string; + toolkits?: ToolkitSpec[]; + tools?: string[]; + /** + * Sibling file-stems to expose as sub-agents. Each becomes an + * `agent-` tool on this agent at runtime. Resolution happens at + * directory-load time in {@link loadAgentsFromDir}; the single-file + * {@link loadAgentFromFile} path rejects non-empty values since there + * are no siblings to resolve against. + */ + agents?: string[]; + maxSteps?: number; + maxTokens?: number; + default?: boolean; + baseSystemPrompt?: false | string; + ephemeral?: boolean; +} + +type ToolkitSpec = string | { [pluginName: string]: ToolkitOptions | string[] }; + +const ALLOWED_KEYS = new Set([ + "endpoint", + "model", + "toolkits", + "tools", + "agents", + "maxSteps", + "maxTokens", + "default", + "baseSystemPrompt", + "ephemeral", +]); + +/** + * Loads a single markdown agent file and resolves its frontmatter against + * registered plugin toolkits + ambient tool library. + * + * Rejects non-empty `agents:` frontmatter because single-file loads have + * no siblings to resolve sub-agent references against — callers must use + * {@link loadAgentsFromDir} when markdown agents delegate to one another. + */ +export async function loadAgentFromFile( + filePath: string, + ctx: LoadContext, +): Promise { + const raw = fs.readFileSync(filePath, "utf-8"); + const name = path.basename(filePath, ".md"); + const { data } = parseFrontmatter(raw, filePath); + if (Array.isArray(data?.agents) && data.agents.length > 0) { + throw new Error( + `Agent '${name}' (${filePath}) declares 'agents:' in frontmatter, ` + + `which requires loadAgentsFromDir to resolve sibling references. ` + + `Use loadAgentsFromDir, or wire sub-agents in code via createAgent({ agents: { ... } }).`, + ); + } + return buildDefinition(name, raw, filePath, ctx); +} + +/** + * Scans a directory for `*.md` files and produces an `AgentDefinition` record + * keyed by file-stem. Throws on frontmatter errors or unresolved references. + * Returns an empty map if the directory does not exist. + * + * Runs in two passes so sub-agent references in frontmatter (`agents: [...]`) + * can be resolved regardless of file-system iteration order: + * + * 1. Build every agent's definition from its own file. + * 2. Walk `agents:` references and wire `def.agents = { sibling: siblingDef }` + * by looking them up in the complete map. Dangling names and + * self-references fail loudly; mutual delegation is allowed and bounded + * at runtime by `limits.maxSubAgentDepth`. + */ +export async function loadAgentsFromDir( + dir: string, + ctx: LoadContext, +): Promise { + if (!fs.existsSync(dir)) { + return { defs: {}, defaultAgent: null }; + } + // Sort so `default: true` resolution is deterministic across platforms — + // `readdirSync` order is filesystem-dependent (macOS alphabetical, ext4 + // inode order, etc.). + const files = fs + .readdirSync(dir) + .filter((f) => f.endsWith(".md")) + .sort(); + const defs: Record = {}; + const subAgentRefs: Record = {}; + let defaultAgent: string | null = null; + + // Pass 1: build every agent's definition; collect unresolved sibling refs. + for (const file of files) { + const fullPath = path.join(dir, file); + const raw = fs.readFileSync(fullPath, "utf-8"); + const name = path.basename(file, ".md"); + defs[name] = buildDefinition(name, raw, fullPath, ctx); + const { data } = parseFrontmatter(raw, fullPath); + if (data?.agents !== undefined) { + subAgentRefs[name] = normalizeAgentsFrontmatter( + data.agents, + name, + fullPath, + ); + } + if (data?.default === true && !defaultAgent) { + defaultAgent = name; + } + } + + // Pass 2: resolve sibling references against the complete defs map. + // Code-defined agents (ctx.codeAgents) take precedence over markdown ones + // with the same name, matching the plugin's top-level merge behaviour. + for (const [name, refs] of Object.entries(subAgentRefs)) { + if (refs.length === 0) continue; + const children: Record = {}; + const missing: string[] = []; + for (const ref of refs) { + if (ref === name) { + throw new Error( + `Agent '${name}' (${path.join(dir, `${name}.md`)}) cannot reference itself in 'agents:'.`, + ); + } + const sibling = ctx.codeAgents?.[ref] ?? defs[ref]; + if (!sibling) { + missing.push(ref); + continue; + } + children[ref] = sibling; + } + if (missing.length > 0) { + const available = + [...Object.keys(ctx.codeAgents ?? {}), ...Object.keys(defs)] + .sort() + .join(", ") || ""; + throw new Error( + `Agent '${name}' references sub-agent(s) '${missing.join(", ")}' in 'agents:', ` + + `but no markdown or code agent(s) with those names exist. ` + + `Available: ${available}.`, + ); + } + defs[name].agents = children; + } + + return { defs, defaultAgent }; +} + +/** + * Validates that `agents:` frontmatter is an array of non-empty strings and + * returns it with duplicates removed. Throws with a clear per-file message + * on malformed input rather than silently ignoring. + */ +function normalizeAgentsFrontmatter( + value: unknown, + agentName: string, + filePath: string, +): string[] { + if (!Array.isArray(value)) { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid 'agents:' frontmatter: ` + + `expected an array of sibling file-stems, got ${typeof value}.`, + ); + } + const out: string[] = []; + const seen = new Set(); + for (const item of value) { + if (typeof item !== "string" || item.trim() === "") { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid 'agents:' entry: ` + + `expected non-empty string, got ${JSON.stringify(item)}.`, + ); + } + if (seen.has(item)) continue; + seen.add(item); + out.push(item); + } + return out; +} + +/** Exposed for tests. Parses `--- yaml ---\nbody` and validates frontmatter keys. */ +export function parseFrontmatter( + raw: string, + sourcePath?: string, +): { data: Frontmatter | null; content: string } { + const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---\r?\n?([\s\S]*)$/); + if (!match) { + return { data: null, content: raw.trim() }; + } + let parsed: unknown; + try { + parsed = yaml.load(match[1]); + } catch (err) { + const src = sourcePath ? ` (${sourcePath})` : ""; + throw new Error( + `Invalid YAML frontmatter${src}: ${err instanceof Error ? err.message : String(err)}`, + ); + } + if (parsed === null || parsed === undefined) { + return { data: {}, content: match[2].trim() }; + } + if (typeof parsed !== "object" || Array.isArray(parsed)) { + const src = sourcePath ? ` (${sourcePath})` : ""; + throw new Error(`Frontmatter must be a YAML object${src}`); + } + const data = parsed as Record; + for (const key of Object.keys(data)) { + if (!ALLOWED_KEYS.has(key)) { + logger.warn( + "Ignoring unknown frontmatter key '%s' in %s", + key, + sourcePath ?? "", + ); + } + } + return { data: data as Frontmatter, content: match[2].trim() }; +} + +function buildDefinition( + name: string, + raw: string, + filePath: string, + ctx: LoadContext, +): AgentDefinition { + const { data, content } = parseFrontmatter(raw, filePath); + const fm: Frontmatter = data ?? {}; + + const tools = resolveFrontmatterTools(name, fm, filePath, ctx); + const model = fm.model ?? fm.endpoint ?? ctx.defaultModel; + + let baseSystemPrompt: BaseSystemPromptOption | undefined; + if (fm.baseSystemPrompt === false) baseSystemPrompt = false; + else if (typeof fm.baseSystemPrompt === "string") + baseSystemPrompt = fm.baseSystemPrompt; + + return { + name, + instructions: content, + model, + tools: Object.keys(tools).length > 0 ? tools : undefined, + maxSteps: typeof fm.maxSteps === "number" ? fm.maxSteps : undefined, + maxTokens: typeof fm.maxTokens === "number" ? fm.maxTokens : undefined, + baseSystemPrompt, + ephemeral: typeof fm.ephemeral === "boolean" ? fm.ephemeral : undefined, + }; +} + +function resolveFrontmatterTools( + agentName: string, + fm: Frontmatter, + filePath: string, + ctx: LoadContext, +): Record { + const out: Record = {}; + const pluginIdx = ctx.plugins ?? new Map(); + + for (const spec of fm.toolkits ?? []) { + const [pluginName, opts] = parseToolkitSpec(spec, filePath, agentName); + const provider = pluginIdx.get(pluginName); + if (!provider) { + throw new Error( + `Agent '${agentName}' (${filePath}) references toolkit '${pluginName}', but plugin '${pluginName}' is not registered. Available: ${ + pluginIdx.size > 0 + ? Array.from(pluginIdx.keys()).join(", ") + : "" + }`, + ); + } + const entries = provider.toolkit(opts) as Record; + for (const [key, entry] of Object.entries(entries)) { + if (!isToolkitEntry(entry)) { + throw new Error( + `Plugin '${pluginName}'.toolkit() returned a value at key '${key}' that is not a ToolkitEntry`, + ); + } + out[key] = entry as ToolkitEntry; + } + } + + for (const key of fm.tools ?? []) { + const tool = ctx.availableTools?.[key]; + if (!tool) { + const available = ctx.availableTools + ? Object.keys(ctx.availableTools).join(", ") + : ""; + throw new Error( + `Agent '${agentName}' (${filePath}) references tool '${key}', which is not in the agents() plugin's tools field. Available: ${available}`, + ); + } + out[key] = tool; + } + + return out; +} + +function parseToolkitSpec( + spec: ToolkitSpec, + filePath: string, + agentName: string, +): [string, ToolkitOptions | undefined] { + if (typeof spec === "string") { + return [spec, undefined]; + } + if (typeof spec !== "object" || spec === null) { + throw new Error( + `Agent '${agentName}' (${filePath}) has invalid toolkit entry: ${JSON.stringify(spec)}`, + ); + } + const keys = Object.keys(spec); + if (keys.length !== 1) { + throw new Error( + `Agent '${agentName}' (${filePath}) toolkit entry must have exactly one key, got: ${keys.join(", ")}`, + ); + } + const pluginName = keys[0]; + const value = spec[pluginName]; + if (Array.isArray(value)) { + return [pluginName, { only: value }]; + } + if (typeof value === "object" && value !== null) { + return [pluginName, value as ToolkitOptions]; + } + throw new Error( + `Agent '${agentName}' (${filePath}) toolkit '${pluginName}' options must be an array of tool names or an options object`, + ); +} diff --git a/packages/appkit/src/core/agent/run-agent.ts b/packages/appkit/src/core/agent/run-agent.ts new file mode 100644 index 000000000..5ef4109cf --- /dev/null +++ b/packages/appkit/src/core/agent/run-agent.ts @@ -0,0 +1,226 @@ +import { randomUUID } from "node:crypto"; +import type { + AgentAdapter, + AgentEvent, + AgentToolDefinition, + Message, +} from "shared"; +import { + type FunctionTool, + functionToolToDefinition, + isFunctionTool, +} from "./tools/function-tool"; +import { isHostedTool } from "./tools/hosted-tools"; +import type { + AgentDefinition, + AgentTool, + ToolkitEntry, +} from "./types"; +import { isToolkitEntry } from "./types"; + +export interface RunAgentInput { + /** Seed messages for the run. Either a single user string or a full message list. */ + messages: string | Message[]; + /** Abort signal for cancellation. */ + signal?: AbortSignal; +} + +export interface RunAgentResult { + /** Aggregated text output from all `message_delta` events. */ + text: string; + /** Every event the adapter yielded, in order. Useful for inspection/tests. */ + events: AgentEvent[]; +} + +/** + * Standalone agent execution without `createApp`. Resolves the adapter, binds + * inline tools, and drives the adapter's `run()` loop to completion. + * + * Limitations vs. running through the agents() plugin: + * - No OBO: there is no HTTP request, so plugin tools run as the service + * principal (when they work at all). + * - Plugin tools (`ToolkitEntry`) are not supported — they require a live + * `PluginContext` that only exists when registered in a `createApp` + * instance. This function throws a clear error if encountered. + * - Sub-agents (`agents: { ... }` on the def) are executed as nested + * `runAgent` calls with no shared thread state. + */ +export async function runAgent( + def: AgentDefinition, + input: RunAgentInput, +): Promise { + const adapter = await resolveAdapter(def); + const messages = normalizeMessages(input.messages, def.instructions); + const toolIndex = buildStandaloneToolIndex(def); + const tools = Array.from(toolIndex.values()).map((e) => e.def); + + const signal = input.signal; + + const executeTool = async (name: string, args: unknown): Promise => { + const entry = toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + if (entry.kind === "function") { + return entry.tool.execute(args as Record); + } + if (entry.kind === "subagent") { + const subInput: RunAgentInput = { + messages: + typeof args === "object" && + args !== null && + typeof (args as { input?: unknown }).input === "string" + ? (args as { input: string }).input + : JSON.stringify(args), + signal, + }; + const res = await runAgent(entry.agentDef, subInput); + return res.text; + } + throw new Error( + `runAgent: tool "${name}" is a ${entry.kind} tool. ` + + "Plugin toolkits and MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", + ); + }; + + const events: AgentEvent[] = []; + let text = ""; + + const stream = adapter.run( + { + messages, + tools, + threadId: randomUUID(), + signal, + }, + { executeTool, signal }, + ); + + for await (const event of stream) { + if (signal?.aborted) break; + events.push(event); + if (event.type === "message_delta") { + text += event.content; + } else if (event.type === "message") { + text = event.content; + } + } + + return { text, events }; +} + +async function resolveAdapter(def: AgentDefinition): Promise { + const { model } = def; + if (!model) { + const { DatabricksAdapter } = await import("../../agents/databricks"); + return DatabricksAdapter.fromModelServing(); + } + if (typeof model === "string") { + const { DatabricksAdapter } = await import("../../agents/databricks"); + return DatabricksAdapter.fromModelServing(model); + } + return await model; +} + +function normalizeMessages( + input: string | Message[], + instructions: string, +): Message[] { + const systemMessage: Message = { + id: "system", + role: "system", + content: instructions, + createdAt: new Date(), + }; + if (typeof input === "string") { + return [ + systemMessage, + { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }, + ]; + } + return [systemMessage, ...input]; +} + +type StandaloneEntry = + | { + kind: "function"; + def: AgentToolDefinition; + tool: FunctionTool; + } + | { + kind: "subagent"; + def: AgentToolDefinition; + agentDef: AgentDefinition; + } + | { + kind: "toolkit"; + def: AgentToolDefinition; + entry: ToolkitEntry; + } + | { + kind: "hosted"; + def: AgentToolDefinition; + }; + +function buildStandaloneToolIndex( + def: AgentDefinition, +): Map { + const index = new Map(); + + for (const [key, tool] of Object.entries(def.tools ?? {})) { + index.set(key, classifyTool(key, tool)); + } + + for (const [childKey, child] of Object.entries(def.agents ?? {})) { + const toolName = `agent-${childKey}`; + index.set(toolName, { + kind: "subagent", + agentDef: { ...child, name: child.name ?? childKey }, + def: { + name: toolName, + description: + child.instructions.slice(0, 120) || + `Delegate to the ${childKey} sub-agent`, + parameters: { + type: "object", + properties: { + input: { + type: "string", + description: "Message to send to the sub-agent.", + }, + }, + required: ["input"], + }, + }, + }); + } + + return index; +} + +function classifyTool(key: string, tool: AgentTool): StandaloneEntry { + if (isToolkitEntry(tool)) { + return { kind: "toolkit", def: { ...tool.def, name: key }, entry: tool }; + } + if (isFunctionTool(tool)) { + return { + kind: "function", + tool, + def: { ...functionToolToDefinition(tool), name: key }, + }; + } + if (isHostedTool(tool)) { + return { + kind: "hosted", + def: { + name: key, + description: `Hosted tool: ${tool.type}`, + parameters: { type: "object", properties: {} }, + }, + }; + } + throw new Error(`runAgent: unrecognized tool shape at key "${key}"`); +} diff --git a/packages/appkit/src/core/agent/system-prompt.ts b/packages/appkit/src/core/agent/system-prompt.ts new file mode 100644 index 000000000..634f49c5f --- /dev/null +++ b/packages/appkit/src/core/agent/system-prompt.ts @@ -0,0 +1,40 @@ +/** + * Builds the AppKit base system prompt from active plugin names. + * + * The base prompt provides guidelines and app context. It does NOT + * include individual tool descriptions — those are sent via the + * structured `tools` API parameter to the LLM. + */ +export function buildBaseSystemPrompt(pluginNames: string[]): string { + const lines: string[] = [ + "You are an AI assistant running on Databricks AppKit.", + ]; + + if (pluginNames.length > 0) { + lines.push(""); + lines.push(`Active plugins: ${pluginNames.join(", ")}`); + } + + lines.push(""); + lines.push("Guidelines:"); + lines.push("- Use Databricks SQL syntax when writing queries"); + lines.push( + "- When results are large, summarize key findings rather than dumping raw data", + ); + lines.push("- If a tool call fails, explain the error clearly to the user"); + lines.push("- When browsing files, verify the path exists before reading"); + + return lines.join("\n"); +} + +/** + * Compose the full system prompt from the base prompt and an optional + * per-agent user prompt. + */ +export function composeSystemPrompt( + basePrompt: string, + agentPrompt?: string, +): string { + if (!agentPrompt) return basePrompt; + return `${basePrompt}\n\n${agentPrompt}`; +} diff --git a/packages/appkit/src/core/agent/tests/create-agent.test.ts b/packages/appkit/src/core/agent/tests/create-agent.test.ts new file mode 100644 index 000000000..30bcc98e9 --- /dev/null +++ b/packages/appkit/src/core/agent/tests/create-agent.test.ts @@ -0,0 +1,75 @@ +import { describe, expect, test } from "vitest"; +import { z } from "zod"; +import { createAgent } from "../create-agent"; +import { tool } from "../../../core/agent/tools/tool"; +import type { AgentDefinition } from "../../../core/agent/types"; + +describe("createAgent", () => { + test("returns the definition unchanged for a simple agent", () => { + const def: AgentDefinition = { + name: "support", + instructions: "You help customers.", + model: "endpoint-x", + }; + const result = createAgent(def); + expect(result).toBe(def); + }); + + test("accepts tools as a keyed record", () => { + const get_weather = tool({ + name: "get_weather", + description: "Get the weather", + schema: z.object({ city: z.string() }), + execute: async ({ city }) => `Sunny in ${city}`, + }); + + const def = createAgent({ + instructions: "...", + tools: { get_weather }, + }); + + expect(def.tools?.get_weather).toBe(get_weather); + }); + + test("accepts sub-agents in a keyed record", () => { + const researcher = createAgent({ instructions: "Research." }); + const supervisor = createAgent({ + instructions: "Supervise.", + agents: { researcher }, + }); + expect(supervisor.agents?.researcher).toBe(researcher); + }); + + test("throws on a direct self-cycle", () => { + const a: AgentDefinition = { instructions: "a" }; + // biome-ignore lint/suspicious/noExplicitAny: intentional cycle setup for test + (a as any).agents = { self: a }; + expect(() => createAgent(a)).toThrow(/cycle/i); + }); + + test("throws on an indirect cycle", () => { + const a: AgentDefinition = { instructions: "a" }; + const b: AgentDefinition = { instructions: "b" }; + a.agents = { b }; + b.agents = { a }; + expect(() => createAgent(a)).toThrow(/cycle/i); + }); + + test("accepts a DAG of sub-agents without throwing", () => { + const leaf: AgentDefinition = { instructions: "leaf" }; + const branchA: AgentDefinition = { + instructions: "a", + agents: { leaf }, + }; + const branchB: AgentDefinition = { + instructions: "b", + agents: { leaf }, + }; + const root = createAgent({ + instructions: "root", + agents: { branchA, branchB }, + }); + expect(root.agents?.branchA.agents?.leaf).toBe(leaf); + expect(root.agents?.branchB.agents?.leaf).toBe(leaf); + }); +}); diff --git a/packages/appkit/src/core/agent/tests/load-agents.test.ts b/packages/appkit/src/core/agent/tests/load-agents.test.ts new file mode 100644 index 000000000..23799148e --- /dev/null +++ b/packages/appkit/src/core/agent/tests/load-agents.test.ts @@ -0,0 +1,302 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, test } from "vitest"; +import { z } from "zod"; +import { buildToolkitEntries } from "../../../core/agent/build-toolkit"; +import { + loadAgentFromFile, + loadAgentsFromDir, + parseFrontmatter, +} from "../load-agents"; +import { defineTool, type ToolRegistry } from "../../../core/agent/tools/define-tool"; +import { tool } from "../../../core/agent/tools/tool"; +import type { AgentDefinition } from "../../../core/agent/types"; + +let workDir: string; + +beforeEach(() => { + workDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-test-")); +}); + +afterEach(() => { + fs.rmSync(workDir, { recursive: true, force: true }); +}); + +function write(name: string, content: string) { + fs.writeFileSync(path.join(workDir, name), content, "utf-8"); + return path.join(workDir, name); +} + +describe("parseFrontmatter", () => { + test("parses a simple object", () => { + const { data, content } = parseFrontmatter( + "---\nendpoint: foo\ndefault: true\n---\nHello body", + ); + expect(data).toEqual({ endpoint: "foo", default: true }); + expect(content).toBe("Hello body"); + }); + + test("parses nested arrays", () => { + const { data } = parseFrontmatter( + "---\ntoolkits:\n - analytics\n - files: [uploads.list]\n---\nbody", + ); + expect(data?.toolkits).toEqual(["analytics", { files: ["uploads.list"] }]); + }); + + test("returns null data when no frontmatter", () => { + const { data, content } = parseFrontmatter("No frontmatter here"); + expect(data).toBeNull(); + expect(content).toBe("No frontmatter here"); + }); + + test("throws on invalid YAML", () => { + expect(() => parseFrontmatter("---\nkey: : : bad\n---\n")).toThrow(/YAML/); + }); +}); + +describe("loadAgentFromFile", () => { + test("returns AgentDefinition with body as instructions", async () => { + const p = write( + "assistant.md", + "---\nendpoint: e-1\n---\nYou are helpful.", + ); + const def = await loadAgentFromFile(p, {}); + expect(def.name).toBe("assistant"); + expect(def.instructions).toBe("You are helpful."); + expect(def.model).toBe("e-1"); + }); +}); + +describe("loadAgentsFromDir", () => { + test("returns empty map when dir doesn't exist", async () => { + const res = await loadAgentsFromDir("/nonexistent-for-tests", {}); + expect(res.defs).toEqual({}); + expect(res.defaultAgent).toBeNull(); + }); + + test("loads all .md files keyed by file-stem", async () => { + write("support.md", "---\nendpoint: e-1\n---\nSupport prompt."); + write("sales.md", "---\nendpoint: e-2\n---\nSales prompt."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs).sort()).toEqual(["sales", "support"]); + }); + + test("picks up default: true from frontmatter", async () => { + write("one.md", "---\nendpoint: a\n---\nOne."); + write("two.md", "---\nendpoint: b\ndefault: true\n---\nTwo."); + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defaultAgent).toBe("two"); + }); + + test("throws when frontmatter references an unregistered plugin", async () => { + write( + "broken.md", + "---\nendpoint: e\ntoolkits: [missing]\n---\nBroken agent.", + ); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references toolkit 'missing'/, + ); + }); + + test("throws when frontmatter references an unknown ambient tool", async () => { + write("broken.md", "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references tool 'unknown_tool'/, + ); + }); + + test("resolves toolkits + ambient tools when provided", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const plugins = new Map< + string, + { toolkit: (opts?: unknown) => Record } + >([ + [ + "analytics", + { + toolkit: (opts) => + buildToolkitEntries("analytics", registry, opts as never), + }, + ], + ]); + + const weather = tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: async () => "sunny", + }); + + write( + "analyst.md", + "---\nendpoint: e\ntoolkits:\n - analytics\ntools:\n - get_weather\n---\nBody.", + ); + const res = await loadAgentsFromDir(workDir, { + plugins, + availableTools: { get_weather: weather }, + }); + expect(res.defs.analyst.tools).toBeDefined(); + expect(Object.keys(res.defs.analyst.tools ?? {}).sort()).toEqual([ + "analytics.query", + "get_weather", + ]); + }); + + describe("agents: sibling sub-agent references", () => { + test("resolves sibling references into def.agents regardless of file order", async () => { + // Names chosen so alphabetical iteration puts `dispatcher` *before* + // its siblings — pass-1 populates defs in any order, pass-2 resolves. + write( + "dispatcher.md", + "---\nendpoint: e\nagents:\n - analyst\n - writer\n---\nRoute work.", + ); + write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + write("writer.md", "---\nendpoint: e\n---\nWriter."); + + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs.dispatcher.agents ?? {}).sort()).toEqual([ + "analyst", + "writer", + ]); + expect(res.defs.dispatcher.agents?.analyst).toBe(res.defs.analyst); + expect(res.defs.dispatcher.agents?.writer).toBe(res.defs.writer); + // Leaves with no `agents:` retain undefined — only declared keys wire. + expect(res.defs.analyst.agents).toBeUndefined(); + expect(res.defs.writer.agents).toBeUndefined(); + }); + + test("mutual delegation is allowed (runtime depth cap handles cycles)", async () => { + write("a.md", "---\nendpoint: e\nagents:\n - b\n---\nA."); + write("b.md", "---\nendpoint: e\nagents:\n - a\n---\nB."); + + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defs.a.agents?.b).toBe(res.defs.b); + expect(res.defs.b.agents?.a).toBe(res.defs.a); + }); + + test("throws with available list when a sibling is missing", async () => { + write("dispatcher.md", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /references sub-agent\(s\) 'ghost'.*Available: analyst, dispatcher/s, + ); + }); + + test("reports every missing sibling in one error, not just the first", async () => { + write( + "dispatcher.md", + "---\nendpoint: e\nagents:\n - ghost1\n - ghost2\n---\nD.", + ); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /ghost1, ghost2/, + ); + }); + + test("throws on self-reference", async () => { + write("solo.md", "---\nendpoint: e\nagents:\n - solo\n---\nSolo."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /'solo'.*cannot reference itself/s, + ); + }); + + test("throws on non-array 'agents:' value", async () => { + write("bad.md", "---\nendpoint: e\nagents: analyst\n---\nBad."); + write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /invalid 'agents:' frontmatter/, + ); + }); + + test("throws on non-string entries in 'agents:'", async () => { + write("bad.md", "---\nendpoint: e\nagents:\n - 42\n---\nBad."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /invalid 'agents:' entry/, + ); + }); + + test("deduplicates repeated entries silently", async () => { + write( + "dispatcher.md", + "---\nendpoint: e\nagents:\n - analyst\n - analyst\n---\nD.", + ); + write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs.dispatcher.agents ?? {})).toEqual([ + "analyst", + ]); + }); + + test("empty array yields no sub-agents (no-op)", async () => { + write("dispatcher.md", "---\nendpoint: e\nagents: []\n---\nD."); + const res = await loadAgentsFromDir(workDir, {}); + expect(res.defs.dispatcher.agents).toBeUndefined(); + }); + + test("resolves 'agents:' references against codeAgents when provided", async () => { + write("dispatcher.md", "---\nendpoint: e\nagents:\n - support\n---\nD."); + const support: AgentDefinition = { + name: "support", + instructions: "Code-defined support.", + }; + const res = await loadAgentsFromDir(workDir, { + codeAgents: { support }, + }); + expect(res.defs.dispatcher.agents?.support).toBe(support); + }); + + test("codeAgents takes precedence over markdown sibling with the same name", async () => { + write("dispatcher.md", "---\nendpoint: e\nagents:\n - support\n---\nD."); + write("support.md", "---\nendpoint: e\n---\nMarkdown support."); + const codeSupport: AgentDefinition = { + name: "support", + instructions: "Code support.", + }; + const res = await loadAgentsFromDir(workDir, { + codeAgents: { support: codeSupport }, + }); + // Reference binds to code version, matching the plugin's top-level + // `code wins` merge behaviour. + expect(res.defs.dispatcher.agents?.support).toBe(codeSupport); + expect(res.defs.dispatcher.agents?.support.instructions).toBe( + "Code support.", + ); + }); + + test("missing-sibling error lists both markdown and code agent names", async () => { + write("dispatcher.md", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + const codeAgent: AgentDefinition = { + name: "writer", + instructions: "Writer.", + }; + await expect( + loadAgentsFromDir(workDir, { codeAgents: { writer: codeAgent } }), + ).rejects.toThrow(/Available: analyst, dispatcher, writer/); + }); + }); +}); + +describe("loadAgentFromFile — sub-agent refs rejected", () => { + test("throws when 'agents:' is non-empty in a single-file load", async () => { + const p = write( + "lonely.md", + "---\nendpoint: e\nagents:\n - ghost\n---\nLonely.", + ); + await expect(loadAgentFromFile(p, {})).rejects.toThrow( + /requires loadAgentsFromDir/, + ); + }); + + test("ignores empty 'agents:' array (treated as absent)", async () => { + const p = write("lonely.md", "---\nendpoint: e\nagents: []\n---\nLonely."); + const def = await loadAgentFromFile(p, {}); + expect(def.agents).toBeUndefined(); + }); +}); diff --git a/packages/appkit/src/core/agent/tests/run-agent.test.ts b/packages/appkit/src/core/agent/tests/run-agent.test.ts new file mode 100644 index 000000000..55b3e7b39 --- /dev/null +++ b/packages/appkit/src/core/agent/tests/run-agent.test.ts @@ -0,0 +1,120 @@ +import type { + AgentAdapter, + AgentEvent, + AgentInput, + AgentRunContext, +} from "shared"; +import { describe, expect, test, vi } from "vitest"; +import { z } from "zod"; +import { createAgent } from "../create-agent"; +import { runAgent } from "../run-agent"; +import { tool } from "../../../core/agent/tools/tool"; +import type { ToolkitEntry } from "../../../core/agent/types"; + +function scriptedAdapter(events: AgentEvent[]): AgentAdapter { + return { + async *run(_input: AgentInput, _context: AgentRunContext) { + for (const event of events) { + yield event; + } + }, + }; +} + +describe("runAgent", () => { + test("drives the adapter and returns aggregated text", async () => { + const events: AgentEvent[] = [ + { type: "message_delta", content: "Hello " }, + { type: "message_delta", content: "world" }, + { type: "status", status: "complete" }, + ]; + const def = createAgent({ + instructions: "Say hello", + model: scriptedAdapter(events), + }); + + const result = await runAgent(def, { messages: "hi" }); + expect(result.text).toBe("Hello world"); + expect(result.events).toHaveLength(3); + }); + + test("prefers terminal 'message' event over deltas when present", async () => { + const events: AgentEvent[] = [ + { type: "message_delta", content: "partial" }, + { type: "message", content: "final answer" }, + ]; + const def = createAgent({ + instructions: "x", + model: scriptedAdapter(events), + }); + const result = await runAgent(def, { messages: "hi" }); + expect(result.text).toBe("final answer"); + }); + + test("invokes inline tools via executeTool callback", async () => { + const weatherFn = vi.fn(async () => "Sunny in NYC"); + const weather = tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: weatherFn, + }); + + let capturedCtx: AgentRunContext | null = null; + const adapter: AgentAdapter = { + async *run(_input, context) { + capturedCtx = context; + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { get_weather: weather }, + }); + + await runAgent(def, { messages: "hi" }); + expect(capturedCtx).not.toBeNull(); + // biome-ignore lint/style/noNonNullAssertion: asserted above + const result = await capturedCtx!.executeTool("get_weather", { + city: "NYC", + }); + expect(result).toBe("Sunny in NYC"); + expect(weatherFn).toHaveBeenCalledWith({ city: "NYC" }); + }); + + test("throws a clear error when a ToolkitEntry is invoked", async () => { + const toolkitEntry: ToolkitEntry = { + __toolkitRef: true, + pluginName: "analytics", + localName: "query", + def: { + name: "analytics.query", + description: "SQL", + parameters: { type: "object", properties: {} }, + }, + }; + + let capturedCtx: AgentRunContext | null = null; + const adapter: AgentAdapter = { + async *run(_input, context) { + capturedCtx = context; + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { "analytics.query": toolkitEntry }, + }); + + await runAgent(def, { messages: "hi" }); + expect(capturedCtx).not.toBeNull(); + await expect( + // biome-ignore lint/style/noNonNullAssertion: asserted above + capturedCtx!.executeTool("analytics.query", {}), + ).rejects.toThrow(/only usable via createApp/); + }); +}); diff --git a/packages/appkit/src/core/agent/tests/system-prompt.test.ts b/packages/appkit/src/core/agent/tests/system-prompt.test.ts new file mode 100644 index 000000000..83bf8e192 --- /dev/null +++ b/packages/appkit/src/core/agent/tests/system-prompt.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, test } from "vitest"; +import { buildBaseSystemPrompt, composeSystemPrompt } from "../system-prompt"; + +describe("buildBaseSystemPrompt", () => { + test("includes plugin names", () => { + const prompt = buildBaseSystemPrompt(["analytics", "files", "genie"]); + expect(prompt).toContain("Active plugins: analytics, files, genie"); + }); + + test("includes guidelines", () => { + const prompt = buildBaseSystemPrompt([]); + expect(prompt).toContain("Guidelines:"); + expect(prompt).toContain("Databricks SQL"); + expect(prompt).toContain("summarize key findings"); + }); + + test("works with no plugins", () => { + const prompt = buildBaseSystemPrompt([]); + expect(prompt).toContain("AI assistant running on Databricks AppKit"); + expect(prompt).not.toContain("Active plugins:"); + }); + + test("does NOT include individual tool names", () => { + const prompt = buildBaseSystemPrompt(["analytics"]); + expect(prompt).not.toContain("analytics.query"); + expect(prompt).not.toContain("Available tools:"); + }); +}); + +describe("composeSystemPrompt", () => { + test("concatenates base + agent prompt with double newline", () => { + const composed = composeSystemPrompt("Base prompt.", "Agent prompt."); + expect(composed).toBe("Base prompt.\n\nAgent prompt."); + }); + + test("returns base prompt alone when no agent prompt", () => { + const composed = composeSystemPrompt("Base prompt."); + expect(composed).toBe("Base prompt."); + }); + + test("returns base prompt when agent prompt is empty string", () => { + const composed = composeSystemPrompt("Base prompt.", ""); + expect(composed).toBe("Base prompt."); + }); +}); diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts index 086a0426f..d752fa019 100644 --- a/packages/appkit/src/core/agent/types.ts +++ b/packages/appkit/src/core/agent/types.ts @@ -1,6 +1,13 @@ -import type { AgentToolDefinition, ToolAnnotations } from "shared"; +import type { + AgentAdapter, + AgentToolDefinition, + BasePluginConfig, + ThreadStore, + ToolAnnotations, +} from "shared"; import type { FunctionTool } from "./tools/function-tool"; import type { HostedTool } from "./tools/hosted-tools"; +import type { McpHostPolicyConfig } from "../../plugins/agents/tools/mcp-host-policy"; /** * A tool reference produced by a plugin's `.toolkit()` call. The agents plugin @@ -42,8 +49,172 @@ export interface ToolkitOptions { } /** - * Type guard for `ToolkitEntry` — used to differentiate toolkit references - * from inline tools in a mixed `tools` record. + * Context passed to `baseSystemPrompt` callbacks. + */ +export interface PromptContext { + agentName: string; + pluginNames: string[]; + toolNames: string[]; +} + +export type BaseSystemPromptOption = + | false + | string + | ((ctx: PromptContext) => string); + +export interface AgentDefinition { + /** Filled in from the enclosing key when used in `agents: { foo: def }`. */ + name?: string; + /** System prompt body. For markdown-loaded agents this is the file body. */ + instructions: string; + /** + * Model adapter (or endpoint-name string sugar for + * `DatabricksAdapter.fromServingEndpoint({ endpointName })`). Optional — + * falls back to the plugin's `defaultModel`. + */ + model?: AgentAdapter | Promise | string; + /** Per-agent tool record. Key is the LLM-visible tool-call name. */ + tools?: Record; + /** Sub-agents, exposed as `agent-` tools on this agent. */ + agents?: Record; + /** Override the plugin's baseSystemPrompt for this agent only. */ + baseSystemPrompt?: BaseSystemPromptOption; + maxSteps?: number; + maxTokens?: number; + /** + * When true, the thread used for a chat request against this agent is + * deleted from `ThreadStore` after the stream completes (success or + * failure). Use for stateless one-shot agents — e.g. autocomplete, where + * each request is independent and retaining history would both poison + * future calls and accumulate unbounded state in the default + * `InMemoryThreadStore`. Defaults to `false`. + */ + ephemeral?: boolean; +} + +/** + * Auto-inherit configuration. When enabled for a given agent origin, agents + * with no explicit `tools:` declaration receive every registered ToolProvider + * plugin tool whose author marked `autoInheritable: true`. Tools without that + * flag — destructive, state-mutating, or privilege-sensitive — never spread + * automatically and must be wired via `tools:`, `toolkits:`, or `fromPlugin`. + * + * Defaults are `false` for both origins (safe-by-default): developers must + * consciously opt an origin in to any auto-inherit behaviour. + */ +export interface AutoInheritToolsConfig { + /** Default for agents loaded from markdown files. Default: `false`. */ + file?: boolean; + /** Default for code-defined agents (via `agents: { foo: createAgent(...) }`). Default: `false`. */ + code?: boolean; +} + +export interface AgentsPluginConfig extends BasePluginConfig { + /** Directory to scan for markdown agent files. Default `./config/agents`. Set to `false` to disable. */ + dir?: string | false; + /** Code-defined agents, merged with file-loaded ones (code wins on key collision). */ + agents?: Record; + /** Agent used when clients don't specify one. Defaults to the first-registered agent or the file with `default: true` frontmatter. */ + defaultAgent?: string; + /** Default model for agents that don't specify their own (in code or frontmatter). */ + defaultModel?: AgentAdapter | Promise | string; + /** Ambient tool library. Keys may be referenced by markdown frontmatter via `tools: [key1, key2]`. */ + tools?: Record; + /** Whether to auto-inherit every ToolProvider plugin's toolkit. Accepts a boolean shorthand. */ + autoInheritTools?: boolean | AutoInheritToolsConfig; + /** Persistent thread store. Default: in-memory. */ + threadStore?: ThreadStore; + /** Customize or disable the AppKit base system prompt. */ + baseSystemPrompt?: BaseSystemPromptOption; + /** + * MCP server host policy. By default only same-origin Databricks workspace + * URLs may be used as MCP endpoints; custom hosts must be explicitly + * allowlisted here. Workspace credentials (SP / OBO) are never forwarded + * to non-workspace hosts. + */ + mcp?: McpHostPolicyConfig; + /** + * Human-in-the-loop approval gate for destructive tool calls. When enabled + * (the default), the agents plugin emits an `appkit.approval_pending` SSE + * event before executing any tool annotated `destructive: true` and waits + * for a `POST /chat/approve` decision from the same user who initiated the + * stream. A missing decision after `timeoutMs` auto-denies the call. + */ + approval?: { + /** Require human approval for tools annotated `destructive: true`. Default: `true`. */ + requireForDestructive?: boolean; + /** Milliseconds to wait before auto-denying. Default: 60_000. */ + timeoutMs?: number; + }; + /** + * Runtime resource limits applied during agent execution. Defaults are + * tuned to protect a single-instance deployment from a misbehaving user or + * a runaway prompt injection; tighten or relax as appropriate for the + * deployment's scale and trust model. Request-body caps (chat message + * size, invocations input size / length) are enforced statically by the + * Zod schemas and are not configurable here. + */ + limits?: { + /** + * Max concurrent chat streams a single user may have open. Subsequent + * `POST /chat` requests from that user while at-limit are rejected with + * HTTP 429. Default: `5`. + */ + maxConcurrentStreamsPerUser?: number; + /** + * Max tool invocations per agent run (across the full tool-call graph, + * including sub-agent invocations). A run that exceeds the budget is + * aborted with a terminal error event. Default: `50`. + */ + maxToolCalls?: number; + /** + * Max sub-agent recursion depth. Protects against a prompt-injected + * agent that delegates to a sub-agent which in turn delegates back to + * itself (directly or transitively). Default: `3`. + */ + maxSubAgentDepth?: number; + }; +} + +/** Internal tool-index entry after a tool record has been resolved to a dispatchable form. */ +export type ResolvedToolEntry = + | { + source: "toolkit"; + pluginName: string; + localName: string; + def: AgentToolDefinition; + } + | { + source: "function"; + functionTool: FunctionTool; + def: AgentToolDefinition; + } + | { + source: "mcp"; + mcpToolName: string; + def: AgentToolDefinition; + } + | { + source: "subagent"; + agentName: string; + def: AgentToolDefinition; + }; + +export interface RegisteredAgent { + name: string; + instructions: string; + adapter: AgentAdapter; + toolIndex: Map; + baseSystemPrompt?: BaseSystemPromptOption; + maxSteps?: number; + maxTokens?: number; + /** Mirrors `AgentDefinition.ephemeral` — skip thread persistence. */ + ephemeral?: boolean; +} + +/** + * Type guard for `ToolkitEntry` — used by the agents plugin to differentiate + * toolkit references from inline tools in a mixed `tools` record. */ export function isToolkitEntry(value: unknown): value is ToolkitEntry { return ( diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts new file mode 100644 index 000000000..9c3076e6e --- /dev/null +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -0,0 +1,1268 @@ +import { randomUUID } from "node:crypto"; +import path from "node:path"; +import type express from "express"; +import pc from "picocolors"; +import type { + AgentAdapter, + AgentEvent, + AgentRunContext, + AgentToolDefinition, + IAppRouter, + Message, + PluginPhase, + ResponseStreamEvent, + Thread, + ToolProvider, +} from "shared"; +import { createLogger } from "../../logging/logger"; +import { Plugin, toPlugin } from "../../plugin"; +import type { PluginManifest } from "../../registry"; +import { agentStreamDefaults } from "./defaults"; +import { EventChannel } from "./event-channel"; +import { AgentEventTranslator } from "./event-translator"; +import { loadAgentsFromDir } from "../../core/agent/load-agents"; +import manifest from "./manifest.json"; +import { + approvalRequestSchema, + chatRequestSchema, + invocationsRequestSchema, +} from "./schemas"; +import { buildBaseSystemPrompt, composeSystemPrompt } from "../../core/agent/system-prompt"; +import { InMemoryThreadStore } from "./thread-store"; +import { ToolApprovalGate } from "./tool-approval-gate"; +import { + functionToolToDefinition, + isFunctionTool, + isHostedTool, + resolveHostedTools, +} from "../../core/agent/tools"; +import { buildMcpHostPolicy } from "./tools/mcp-host-policy"; +import type { + AgentDefinition, + AgentsPluginConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, + ResolvedToolEntry, +} from "../../core/agent/types"; +import { isToolkitEntry } from "../../core/agent/types"; + +const logger = createLogger("agents"); + +const DEFAULT_AGENTS_DIR = "./config/agents"; + +/** + * Context flag recorded on the in-memory AgentDefinition to indicate whether + * it came from markdown (file) or from user code. Drives the asymmetric + * `autoInheritTools` default. + */ +interface AgentSource { + origin: "file" | "code"; +} + +export class AgentsPlugin extends Plugin implements ToolProvider { + static manifest = manifest as PluginManifest; + static phase: PluginPhase = "deferred"; + + protected declare config: AgentsPluginConfig; + + private agents = new Map(); + private defaultAgentName: string | null = null; + private activeStreams = new Map< + string, + { controller: AbortController; userId: string } + >(); + private mcpClient: AppKitMcpClient | null = null; + private threadStore; + private approvalGate = new ToolApprovalGate(); + + constructor(config: AgentsPluginConfig) { + super(config); + this.config = config; + if (config.threadStore) { + this.threadStore = config.threadStore; + } else { + this.threadStore = new InMemoryThreadStore(); + if (process.env.NODE_ENV === "production") { + logger.warn( + "InMemoryThreadStore is in use in a production build (NODE_ENV=production). " + + "Thread history is unbounded and lost on restart. " + + "Pass agents({ threadStore: }) for real deployments.", + ); + } else { + logger.info( + "Using default InMemoryThreadStore (dev-only — threads are lost on restart and grow without bound).", + ); + } + } + } + + /** Effective approval policy with defaults applied. */ + private get resolvedApprovalPolicy(): { + requireForDestructive: boolean; + timeoutMs: number; + } { + const cfg = this.config.approval ?? {}; + return { + requireForDestructive: cfg.requireForDestructive ?? true, + timeoutMs: cfg.timeoutMs ?? 60_000, + }; + } + + /** Effective DoS limits with defaults applied. */ + private get resolvedLimits(): { + maxConcurrentStreamsPerUser: number; + maxToolCalls: number; + maxSubAgentDepth: number; + } { + const cfg = this.config.limits ?? {}; + return { + maxConcurrentStreamsPerUser: cfg.maxConcurrentStreamsPerUser ?? 5, + maxToolCalls: cfg.maxToolCalls ?? 50, + maxSubAgentDepth: cfg.maxSubAgentDepth ?? 3, + }; + } + + /** Count active streams owned by a given user. */ + private countUserStreams(userId: string): number { + let n = 0; + for (const entry of this.activeStreams.values()) { + if (entry.userId === userId) n++; + } + return n; + } + + async setup() { + await this.loadAgents(); + this.mountInvocationsRoute(); + this.printRegistry(); + } + + /** + * Reload agents from the configured directory, preserving code-defined + * agents. Swaps the registry atomically at the end. + */ + async reload(): Promise { + this.agents.clear(); + this.defaultAgentName = null; + if (this.mcpClient) { + await this.mcpClient.close(); + this.mcpClient = null; + } + await this.loadAgents(); + } + + private async loadAgents() { + const { defs: fileDefs, defaultAgent: fileDefault } = + await this.loadFileDefinitions(); + + const codeDefs = this.config.agents ?? {}; + + for (const name of Object.keys(fileDefs)) { + if (codeDefs[name]) { + logger.warn( + "Agent '%s' defined in both code and a markdown file. Code definition takes precedence.", + name, + ); + } + } + + const merged: Record = + {}; + for (const [name, def] of Object.entries(fileDefs)) { + merged[name] = { def, src: { origin: "file" } }; + } + for (const [name, def] of Object.entries(codeDefs)) { + merged[name] = { def, src: { origin: "code" } }; + } + + if (Object.keys(merged).length === 0) { + logger.info( + "No agents registered (no files in %s, no code-defined agents)", + this.resolvedAgentsDir() ?? "", + ); + return; + } + + for (const [name, { def, src }] of Object.entries(merged)) { + try { + const registered = await this.buildRegisteredAgent(name, def, src); + this.agents.set(name, registered); + if (!this.defaultAgentName) this.defaultAgentName = name; + } catch (err) { + throw new Error( + `Failed to register agent '${name}' (${src.origin}): ${ + err instanceof Error ? err.message : String(err) + }`, + { cause: err instanceof Error ? err : undefined }, + ); + } + } + + if (this.config.defaultAgent) { + if (!this.agents.has(this.config.defaultAgent)) { + throw new Error( + `defaultAgent '${this.config.defaultAgent}' is not registered. Available: ${Array.from(this.agents.keys()).join(", ")}`, + ); + } + this.defaultAgentName = this.config.defaultAgent; + } else if (fileDefault && this.agents.has(fileDefault)) { + this.defaultAgentName = fileDefault; + } + } + + private resolvedAgentsDir(): string | null { + if (this.config.dir === false) return null; + const dir = this.config.dir ?? DEFAULT_AGENTS_DIR; + return path.isAbsolute(dir) ? dir : path.resolve(process.cwd(), dir); + } + + private async loadFileDefinitions(): Promise<{ + defs: Record; + defaultAgent: string | null; + }> { + const dir = this.resolvedAgentsDir(); + if (!dir) return { defs: {}, defaultAgent: null }; + + const pluginToolProviders = this.pluginProviderIndex(); + const ambient = this.config.tools ?? {}; + + const result = await loadAgentsFromDir(dir, { + defaultModel: this.config.defaultModel, + availableTools: ambient, + plugins: pluginToolProviders, + codeAgents: this.config.agents, + }); + + return result; + } + + /** + * Builds the map of plugin-name → toolkit that the markdown loader consults + * when resolving `toolkits:` frontmatter entries. + */ + private pluginProviderIndex(): Map< + string, + { toolkit: (opts?: unknown) => Record } + > { + const out = new Map(); + if (!this.context) return out; + for (const { name, provider } of this.context.getToolProviders()) { + const withToolkit = provider as ToolProvider & { + toolkit?: (opts?: unknown) => Record; + }; + if (typeof withToolkit.toolkit === "function") { + out.set(name, { + toolkit: withToolkit.toolkit.bind(withToolkit), + }); + } + } + return out; + } + + private async buildRegisteredAgent( + name: string, + def: AgentDefinition, + src: AgentSource, + ): Promise { + const adapter = await this.resolveAdapter(def, name); + const toolIndex = await this.buildToolIndex(name, def, src); + + return { + name, + instructions: def.instructions, + adapter, + toolIndex, + baseSystemPrompt: def.baseSystemPrompt, + maxSteps: def.maxSteps, + maxTokens: def.maxTokens, + ephemeral: def.ephemeral, + }; + } + + private async resolveAdapter( + def: AgentDefinition, + name: string, + ): Promise { + const source = def.model ?? this.config.defaultModel; + // Per-agent adapter knobs from `AgentDefinition` / markdown frontmatter. + // Only applied when AppKit builds the adapter itself (string or omitted + // model). Users who pass a pre-built `AgentAdapter` own these settings. + const adapterOptions: { maxSteps?: number; maxTokens?: number } = {}; + if (def.maxSteps !== undefined) adapterOptions.maxSteps = def.maxSteps; + if (def.maxTokens !== undefined) adapterOptions.maxTokens = def.maxTokens; + + if (!source) { + const { DatabricksAdapter } = await import("../../agents/databricks"); + try { + return await DatabricksAdapter.fromModelServing( + undefined, + adapterOptions, + ); + } catch (err) { + throw new Error( + `Agent '${name}' has no model configured and no DATABRICKS_AGENT_ENDPOINT default available`, + { cause: err instanceof Error ? err : undefined }, + ); + } + } + if (typeof source === "string") { + const { DatabricksAdapter } = await import("../../agents/databricks"); + return DatabricksAdapter.fromModelServing(source, adapterOptions); + } + return await source; + } + + /** + * Resolves an agent's tool record into a per-agent dispatch index. Connects + * hosted tools via MCP client. Applies `autoInheritTools` defaults when the + * definition has no declared tools/agents. + */ + private async buildToolIndex( + agentName: string, + def: AgentDefinition, + src: AgentSource, + ): Promise> { + const index = new Map(); + const hasExplicitTools = def.tools && Object.keys(def.tools).length > 0; + const hasExplicitSubAgents = + def.agents && Object.keys(def.agents).length > 0; + + const inheritDefaults = normalizeAutoInherit(this.config.autoInheritTools); + const shouldInherit = + !hasExplicitTools && + !hasExplicitSubAgents && + (src.origin === "file" ? inheritDefaults.file : inheritDefaults.code); + + if (shouldInherit) { + await this.applyAutoInherit(agentName, index); + } + + // 1. Sub-agents → agent- + for (const [childKey, childDef] of Object.entries(def.agents ?? {})) { + const toolName = `agent-${childKey}`; + index.set(toolName, { + source: "subagent", + agentName: childDef.name ?? childKey, + def: { + name: toolName, + description: + childDef.instructions.slice(0, 120) || + `Delegate to the ${childKey} sub-agent`, + parameters: { + type: "object", + properties: { + input: { + type: "string", + description: "Message to send to the sub-agent.", + }, + }, + required: ["input"], + }, + }, + }); + } + + // 2. Explicit tools (toolkit entries, function tools, hosted tools) + const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = []; + for (const [key, tool] of Object.entries(def.tools ?? {})) { + if (isToolkitEntry(tool)) { + index.set(key, { + source: "toolkit", + pluginName: tool.pluginName, + localName: tool.localName, + def: { ...tool.def, name: key }, + }); + continue; + } + if (isFunctionTool(tool)) { + index.set(key, { + source: "function", + functionTool: tool, + def: { ...functionToolToDefinition(tool), name: key }, + }); + continue; + } + if (isHostedTool(tool)) { + hostedToCollect.push(tool); + continue; + } + throw new Error( + `Agent '${agentName}' tool '${key}' has an unrecognized shape`, + ); + } + + if (hostedToCollect.length > 0) { + await this.connectHostedTools(hostedToCollect, index); + } + + return index; + } + + private async applyAutoInherit( + agentName: string, + index: Map, + ): Promise { + if (!this.context) return; + const inherited: string[] = []; + const skippedByPlugin = new Map(); + const recordSkip = (pluginName: string, localName: string) => { + const list = skippedByPlugin.get(pluginName) ?? []; + list.push(localName); + skippedByPlugin.set(pluginName, list); + }; + + for (const { + name: pluginName, + provider, + } of this.context.getToolProviders()) { + if (pluginName === this.name) continue; + const withToolkit = provider as ToolProvider & { + toolkit?: (opts?: unknown) => Record; + }; + if (typeof withToolkit.toolkit === "function") { + const entries = withToolkit.toolkit() as Record; + for (const [key, maybeEntry] of Object.entries(entries)) { + if (!isToolkitEntry(maybeEntry)) continue; + if (maybeEntry.autoInheritable !== true) { + recordSkip(maybeEntry.pluginName, maybeEntry.localName); + continue; + } + index.set(key, { + source: "toolkit", + pluginName: maybeEntry.pluginName, + localName: maybeEntry.localName, + def: { ...maybeEntry.def, name: key }, + }); + inherited.push(key); + } + continue; + } + // Fallback: providers without a toolkit() still expose getAgentTools(). + // These cannot be selectively opted in per tool, so we conservatively + // skip them during auto-inherit and require explicit `tools:` wiring. + for (const tool of provider.getAgentTools()) { + recordSkip(pluginName, tool.name); + } + } + + if (inherited.length > 0) { + logger.info( + "[agent %s] auto-inherited %d tool(s): %s", + agentName, + inherited.length, + inherited.join(", "), + ); + } + if (skippedByPlugin.size > 0) { + const summary = Array.from(skippedByPlugin.entries()) + .map(([p, tools]) => `${p}(${tools.length})`) + .join(", "); + logger.info( + "[agent %s] auto-inherit skipped %d tool(s) not marked autoInheritable: %s. Wire them explicitly via `tools:` if needed.", + agentName, + Array.from(skippedByPlugin.values()).reduce( + (n, list) => n + list.length, + 0, + ), + summary, + ); + } + } + + private async connectHostedTools( + hostedTools: import("../../core/agent/tools/hosted-tools").HostedTool[], + index: Map, + ): Promise { + let host: string | undefined; + let authenticate: () => Promise>; + + try { + const { getWorkspaceClient } = await import("../../context"); + const wsClient = getWorkspaceClient(); + await wsClient.config.ensureResolved(); + host = wsClient.config.host; + authenticate = async () => { + const headers = new Headers(); + await wsClient.config.authenticate(headers); + return Object.fromEntries(headers.entries()); + }; + } catch { + host = process.env.DATABRICKS_HOST; + authenticate = async (): Promise> => { + const token = process.env.DATABRICKS_TOKEN; + return token ? { Authorization: `Bearer ${token}` } : {}; + }; + } + + if (!host) { + logger.warn( + "No Databricks host available — skipping %d hosted tool(s)", + hostedTools.length, + ); + return; + } + + if (!this.mcpClient) { + const policy = buildMcpHostPolicy(this.config.mcp, host); + this.mcpClient = new AppKitMcpClient(host, authenticate, policy); + } + + const endpoints = resolveHostedTools(hostedTools); + await this.mcpClient.connectAll(endpoints); + + for (const def of this.mcpClient.getAllToolDefinitions()) { + index.set(def.name, { + source: "mcp", + mcpToolName: def.name, + def, + }); + } + } + + // ----------------- ToolProvider (no tools of our own) -------------------- + + getAgentTools(): AgentToolDefinition[] { + return []; + } + + async executeAgentTool(): Promise { + throw new Error("AgentsPlugin does not expose executeAgentTool directly"); + } + + // ----------------- Route mounting and handlers --------------------------- + + private mountInvocationsRoute() { + if (!this.context) return; + this.context.addRoute( + "post", + "/invocations", + (req: express.Request, res: express.Response) => { + this._handleInvocations(req, res); + }, + ); + } + + injectRoutes(router: IAppRouter) { + this.route(router, { + name: "chat", + method: "post", + path: "/chat", + handler: async (req, res) => this._handleChat(req, res), + }); + this.route(router, { + name: "cancel", + method: "post", + path: "/cancel", + handler: async (req, res) => this._handleCancel(req, res), + }); + this.route(router, { + name: "approve", + method: "post", + path: "/approve", + handler: async (req, res) => this._handleApprove(req, res), + }); + this.route(router, { + name: "threads", + method: "get", + path: "/threads", + handler: async (req, res) => this._handleListThreads(req, res), + }); + this.route(router, { + name: "thread", + method: "get", + path: "/threads/:threadId", + handler: async (req, res) => this._handleGetThread(req, res), + }); + this.route(router, { + name: "deleteThread", + method: "delete", + path: "/threads/:threadId", + handler: async (req, res) => this._handleDeleteThread(req, res), + }); + this.route(router, { + name: "info", + method: "get", + path: "/info", + handler: async (_req, res) => { + res.json({ + agents: Array.from(this.agents.keys()), + defaultAgent: this.defaultAgentName, + }); + }, + }); + } + + clientConfig(): Record { + return { + agents: Array.from(this.agents.keys()), + defaultAgent: this.defaultAgentName, + }; + } + + private async _handleChat(req: express.Request, res: express.Response) { + const parsed = chatRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { message, threadId, agent: agentName } = parsed.data; + + const registered = this.resolveAgent(agentName); + if (!registered) { + res.status(400).json({ + error: agentName + ? `Agent "${agentName}" not found` + : "No agent registered", + }); + return; + } + + const userId = this.resolveUserId(req); + + // Reject early (before allocating a thread) when the user is already at + // their concurrent-stream limit. Prevents a misbehaving client from + // churning thread rows while being denied elsewhere. + const limits = this.resolvedLimits; + if (this.countUserStreams(userId) >= limits.maxConcurrentStreamsPerUser) { + res.setHeader("Retry-After", "5"); + res.status(429).json({ + error: `Too many concurrent streams for this user (limit ${limits.maxConcurrentStreamsPerUser}). Wait for an existing stream to complete before starting another.`, + }); + return; + } + + let thread = threadId ? await this.threadStore.get(threadId, userId) : null; + if (threadId && !thread) { + res.status(404).json({ error: `Thread ${threadId} not found` }); + return; + } + if (!thread) { + thread = await this.threadStore.create(userId); + } + + const userMessage: Message = { + id: randomUUID(), + role: "user", + content: message, + createdAt: new Date(), + }; + await this.threadStore.addMessage(thread.id, userId, userMessage); + return this._streamAgent(req, res, registered, thread, userId); + } + + private async _handleInvocations( + req: express.Request, + res: express.Response, + ) { + const parsed = invocationsRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { input } = parsed.data; + const registered = this.resolveAgent(); + if (!registered) { + res.status(400).json({ error: "No agent registered" }); + return; + } + const userId = this.resolveUserId(req); + const thread = await this.threadStore.create(userId); + + if (typeof input === "string") { + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }); + } else { + for (const item of input) { + const role = (item.role ?? "user") as Message["role"]; + const content = + typeof item.content === "string" + ? item.content + : JSON.stringify(item.content ?? ""); + if (!content) continue; + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role, + content, + createdAt: new Date(), + }); + } + } + + return this._streamAgent(req, res, registered, thread, userId); + } + + private async _streamAgent( + req: express.Request, + res: express.Response, + registered: RegisteredAgent, + thread: Thread, + userId: string, + ): Promise { + const abortController = new AbortController(); + const signal = abortController.signal; + const requestId = randomUUID(); + this.activeStreams.set(requestId, { controller: abortController, userId }); + + const tools = Array.from(registered.toolIndex.values()).map((e) => e.def); + const approvalPolicy = this.resolvedApprovalPolicy; + const limits = this.resolvedLimits; + const outboundEvents = new EventChannel(); + const translator = new AgentEventTranslator(); + // Per-run tool-call budget (shared across the top-level adapter and any + // sub-agents it delegates to). Counted pre-dispatch so a prompt-injected + // agent cannot drain the budget silently via denied calls. + let toolCallsUsed = 0; + + const executeTool = async ( + name: string, + args: unknown, + ): Promise => { + if (toolCallsUsed >= limits.maxToolCalls) { + abortController.abort( + new Error( + `Tool-call budget exhausted (limit ${limits.maxToolCalls}).`, + ), + ); + throw new Error( + `Tool-call budget exhausted (limit ${limits.maxToolCalls}). Raise agents({ limits: { maxToolCalls } }) or review the agent's tool-selection logic.`, + ); + } + toolCallsUsed++; + + const entry = registered.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + + if ( + approvalPolicy.requireForDestructive && + entry.def.annotations?.destructive === true + ) { + const approvalId = randomUUID(); + for (const ev of translator.translate({ + type: "approval_pending", + approvalId, + streamId: requestId, + toolName: name, + args, + annotations: entry.def.annotations, + })) { + outboundEvents.push(ev); + } + const decision = await this.approvalGate.wait({ + approvalId, + streamId: requestId, + userId, + timeoutMs: approvalPolicy.timeoutMs, + }); + if (decision === "deny") { + return `Tool execution denied by user approval gate (tool: ${name}).`; + } + } + + let result: unknown; + if (entry.source === "toolkit") { + if (!this.context) { + throw new Error( + "Plugin tool execution requires PluginContext; this should never happen through createApp", + ); + } + result = await this.context.executeTool( + req, + entry.pluginName, + entry.localName, + args, + signal, + ); + } else if (entry.source === "function") { + result = await entry.functionTool.execute( + args as Record, + ); + } else if (entry.source === "mcp") { + if (!this.mcpClient) throw new Error("MCP client not connected"); + const oboToken = req.headers["x-forwarded-access-token"]; + const mcpAuth = + typeof oboToken === "string" + ? { Authorization: `Bearer ${oboToken}` } + : undefined; + result = await this.mcpClient.callTool( + entry.mcpToolName, + args, + mcpAuth, + ); + } else if (entry.source === "subagent") { + const childAgent = this.agents.get(entry.agentName); + if (!childAgent) + throw new Error(`Sub-agent not found: ${entry.agentName}`); + result = await this.runSubAgent(req, childAgent, args, signal, 1); + } + + // A `void` / `undefined` return is a legitimate tool outcome (e.g., a + // "send notification" side-effecting tool). Return an empty string so + // the LLM sees a successful-but-empty result rather than a bogus + // "execution failed" error. + if (result === undefined) { + return ""; + } + const MAX = 50_000; + const serialized = + typeof result === "string" ? result : JSON.stringify(result); + if (serialized.length > MAX) { + return `${serialized.slice(0, MAX)}\n\n[Result truncated: ${serialized.length} chars exceeds ${MAX} limit]`; + } + return result; + }; + + // Drive the adapter and the approval-event side-channel concurrently. + // Outbound events from both sources flow through `outboundEvents`; the + // generator below drains the channel in order. executeTool pushes + // approval-pending events into the same channel before awaiting the gate. + const driver = (async () => { + try { + for (const evt of translator.translate({ + type: "metadata", + data: { threadId: thread.id }, + })) { + outboundEvents.push(evt); + } + + const pluginNames = this.context + ? this.context + .getPluginNames() + .filter((n) => n !== this.name && n !== "server") + : []; + const fullPrompt = composePromptForAgent( + registered, + this.config.baseSystemPrompt, + { + agentName: registered.name, + pluginNames, + toolNames: tools.map((t) => t.name), + }, + ); + + const messagesWithSystem: Message[] = [ + { + id: "system", + role: "system", + content: fullPrompt, + createdAt: new Date(), + }, + ...thread.messages, + ]; + + const stream = registered.adapter.run( + { + messages: messagesWithSystem, + tools, + threadId: thread.id, + signal, + }, + { executeTool, signal }, + ); + + // Accumulate assistant output from BOTH streaming and non-streaming + // adapters. Delta-based adapters (Databricks, Vercel AI) emit + // `message_delta` chunks that we concatenate; adapters that yield a + // single final assistant message (e.g. LangChain's `on_chain_end` + // path) emit a `message` event whose content replaces whatever + // deltas already arrived. Without the `message` branch, multi-turn + // LangChain conversations silently dropped the assistant turn from + // thread history. + let fullContent = ""; + for await (const event of stream) { + if (signal.aborted) break; + if (event.type === "message_delta") { + fullContent += event.content; + } else if (event.type === "message") { + fullContent = event.content; + } + for (const translated of translator.translate(event)) { + outboundEvents.push(translated); + } + } + + if (fullContent) { + await this.threadStore.addMessage(thread.id, userId, { + id: randomUUID(), + role: "assistant", + content: fullContent, + createdAt: new Date(), + }); + } + + for (const evt of translator.finalize()) outboundEvents.push(evt); + } catch (error) { + if (signal.aborted) { + outboundEvents.close(); + return; + } + logger.error("Agent chat error: %O", error); + outboundEvents.close(error); + return; + } finally { + // Any pending approval gates for this stream are auto-denied so the + // adapter can unwind if it was still waiting. + this.approvalGate.abortStream(requestId); + this.activeStreams.delete(requestId); + // Stateless agents (e.g. autocomplete) don't persist history; drop + // the thread so `InMemoryThreadStore` doesn't accumulate one record + // per request. Swallow delete errors — the stream has already + // finished and the client has the response. + if (registered.ephemeral) { + try { + await this.threadStore.delete(thread.id, userId); + } catch (err) { + logger.warn( + "Failed to delete ephemeral thread %s: %O", + thread.id, + err, + ); + } + } + } + outboundEvents.close(); + })(); + + await this.executeStream( + res, + async function* () { + try { + for await (const ev of outboundEvents) { + yield ev; + } + } finally { + await driver.catch(() => undefined); + } + }, + { + ...agentStreamDefaults, + stream: { ...agentStreamDefaults.stream, streamId: requestId }, + }, + ); + } + + /** + * Runs a sub-agent in response to an `agent-` tool call. Returns the + * concatenated text output to hand back to the parent adapter as the tool + * result. + * + * `depth` starts at 1 for a top-level sub-agent invocation (i.e. the + * outer `_streamAgent` calls `runSubAgent(..., 1)`) and increments on + * each nested `runSubAgent` call. Depths exceeding + * `limits.maxSubAgentDepth` are rejected before any adapter work. + */ + private async runSubAgent( + req: express.Request, + child: RegisteredAgent, + args: unknown, + signal: AbortSignal, + depth: number, + ): Promise { + const limits = this.resolvedLimits; + if (depth > limits.maxSubAgentDepth) { + throw new Error( + `Sub-agent depth exceeded (limit ${limits.maxSubAgentDepth}). ` + + `Raise agents({ limits: { maxSubAgentDepth } }) or break the delegation cycle.`, + ); + } + + const input = + typeof args === "object" && + args !== null && + typeof (args as { input?: unknown }).input === "string" + ? (args as { input: string }).input + : JSON.stringify(args); + const childTools = Array.from(child.toolIndex.values()).map((e) => e.def); + + const childExecute = async ( + name: string, + childArgs: unknown, + ): Promise => { + const entry = child.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool in sub-agent: ${name}`); + if (entry.source === "toolkit" && this.context) { + return this.context.executeTool( + req, + entry.pluginName, + entry.localName, + childArgs, + signal, + ); + } + if (entry.source === "function") { + return entry.functionTool.execute(childArgs as Record); + } + if (entry.source === "subagent") { + const grandchild = this.agents.get(entry.agentName); + if (!grandchild) + throw new Error(`Sub-agent not found: ${entry.agentName}`); + return this.runSubAgent(req, grandchild, childArgs, signal, depth + 1); + } + if (entry.source === "mcp" && this.mcpClient) { + const oboToken = req.headers["x-forwarded-access-token"]; + const mcpAuth = + typeof oboToken === "string" + ? { Authorization: `Bearer ${oboToken}` } + : undefined; + return this.mcpClient.callTool(entry.mcpToolName, childArgs, mcpAuth); + } + throw new Error(`Unsupported sub-agent tool source: ${entry.source}`); + }; + + const runContext: AgentRunContext = { executeTool: childExecute, signal }; + + const pluginNames = this.context + ? this.context + .getPluginNames() + .filter((n) => n !== this.name && n !== "server") + : []; + const systemPrompt = composePromptForAgent( + child, + this.config.baseSystemPrompt, + { + agentName: child.name, + pluginNames, + toolNames: childTools.map((t) => t.name), + }, + ); + + const messages: Message[] = [ + { + id: "system", + role: "system", + content: systemPrompt, + createdAt: new Date(), + }, + { + id: randomUUID(), + role: "user", + content: input, + createdAt: new Date(), + }, + ]; + + let output = ""; + const events: AgentEvent[] = []; + for await (const event of child.adapter.run( + { messages, tools: childTools, threadId: randomUUID(), signal }, + runContext, + )) { + events.push(event); + if (event.type === "message_delta") output += event.content; + else if (event.type === "message") output = event.content; + } + return output; + } + + private async _handleCancel(req: express.Request, res: express.Response) { + const { streamId } = req.body as { streamId?: string }; + if (!streamId) { + res.status(400).json({ error: "streamId is required" }); + return; + } + const entry = this.activeStreams.get(streamId); + if (!entry) { + // Stream is unknown or already completed — idempotent no-op. + res.json({ cancelled: true }); + return; + } + const userId = this.resolveUserId(req); + if (entry.userId !== userId) { + res.status(403).json({ error: "Forbidden" }); + return; + } + entry.controller.abort("Cancelled by user"); + this.activeStreams.delete(streamId); + this.approvalGate.abortStream(streamId); + res.json({ cancelled: true }); + } + + private async _handleApprove(req: express.Request, res: express.Response) { + const parsed = approvalRequestSchema.safeParse(req.body); + if (!parsed.success) { + res.status(400).json({ + error: "Invalid request", + details: parsed.error.flatten().fieldErrors, + }); + return; + } + const { streamId, approvalId, decision } = parsed.data; + + const streamEntry = this.activeStreams.get(streamId); + if (!streamEntry) { + // Stream has already completed or never existed. Return 404 so the UI + // knows the approval token is no longer valid (the waiter, if any, has + // already been timed out or aborted). + res.status(404).json({ error: "Stream not found or already completed" }); + return; + } + + const userId = this.resolveUserId(req); + if (streamEntry.userId !== userId) { + res.status(403).json({ error: "Forbidden" }); + return; + } + + const result = this.approvalGate.submit({ approvalId, userId, decision }); + if (!result.ok) { + if (result.reason === "forbidden") { + res.status(403).json({ error: "Forbidden" }); + return; + } + res.status(404).json({ error: "Approval not found or already settled" }); + return; + } + + res.json({ decision }); + } + + private async _handleListThreads( + req: express.Request, + res: express.Response, + ) { + const userId = this.resolveUserId(req); + const threads = await this.threadStore.list(userId); + res.json({ threads }); + } + + private async _handleGetThread(req: express.Request, res: express.Response) { + const userId = this.resolveUserId(req); + const thread = await this.threadStore.get(req.params.threadId, userId); + if (!thread) { + res.status(404).json({ error: "Thread not found" }); + return; + } + res.json(thread); + } + + private async _handleDeleteThread( + req: express.Request, + res: express.Response, + ) { + const userId = this.resolveUserId(req); + const deleted = await this.threadStore.delete(req.params.threadId, userId); + if (!deleted) { + res.status(404).json({ error: "Thread not found" }); + return; + } + res.json({ deleted: true }); + } + + private resolveAgent(name?: string): RegisteredAgent | null { + if (name) return this.agents.get(name) ?? null; + if (this.defaultAgentName) { + return this.agents.get(this.defaultAgentName) ?? null; + } + const first = this.agents.values().next(); + return first.done ? null : first.value; + } + + private printRegistry(): void { + if (this.agents.size === 0) return; + console.log(""); + console.log(` ${pc.bold("Agents")} ${pc.dim(`(${this.agents.size})`)}`); + console.log(` ${pc.dim("─".repeat(60))}`); + for (const [name, reg] of this.agents) { + const tools = reg.toolIndex.size; + const marker = name === this.defaultAgentName ? pc.green("●") : " "; + console.log( + ` ${marker} ${pc.bold(name.padEnd(24))} ${pc.dim(`${tools} tools`)}`, + ); + } + console.log(` ${pc.dim("─".repeat(60))}`); + console.log(""); + } + + async shutdown(): Promise { + this.approvalGate.abortAll(); + if (this.mcpClient) { + await this.mcpClient.close(); + this.mcpClient = null; + } + } + + exports() { + return { + register: (name: string, def: AgentDefinition) => + this.registerCodeAgent(name, def), + list: () => Array.from(this.agents.keys()), + get: (name: string) => this.agents.get(name) ?? null, + reload: () => this.reload(), + getDefault: () => this.defaultAgentName, + getThreads: (userId: string) => this.threadStore.list(userId), + }; + } + + private async registerCodeAgent( + name: string, + def: AgentDefinition, + ): Promise { + const registered = await this.buildRegisteredAgent(name, def, { + origin: "code", + }); + this.agents.set(name, registered); + if (!this.defaultAgentName) this.defaultAgentName = name; + } +} + +function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): { + file: boolean; + code: boolean; +} { + // Default is opt-out for both origins. A markdown agent or code-defined + // agent with no declared `tools:` gets an empty tool index unless the + // developer explicitly flips `autoInheritTools` on. Even then, only tools + // whose plugin author marked `autoInheritable: true` are spread — see + // `applyAutoInherit` for the filter. + if (value === undefined) return { file: false, code: false }; + if (typeof value === "boolean") return { file: value, code: value }; + return { file: value.file ?? false, code: value.code ?? false }; +} + +function composePromptForAgent( + registered: RegisteredAgent, + pluginLevel: BaseSystemPromptOption | undefined, + ctx: PromptContext, +): string { + const perAgent = registered.baseSystemPrompt; + const resolved = perAgent !== undefined ? perAgent : pluginLevel; + + let base = ""; + if (resolved === false) { + base = ""; + } else if (typeof resolved === "string") { + base = resolved; + } else if (typeof resolved === "function") { + base = resolved(ctx); + } else { + base = buildBaseSystemPrompt(ctx.pluginNames); + } + + return composeSystemPrompt(base, registered.instructions); +} + +/** + * Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, + * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` + * runtime API and mounts `/invocations`. + * + * @example + * ```ts + * import { agents, analytics, createApp, server } from "@databricks/appkit"; + * + * await createApp({ + * plugins: [server(), analytics(), agents()], + * }); + * ``` + */ +export const agents = toPlugin(AgentsPlugin); diff --git a/packages/appkit/src/plugins/agents/defaults.ts b/packages/appkit/src/plugins/agents/defaults.ts new file mode 100644 index 000000000..4da11bef4 --- /dev/null +++ b/packages/appkit/src/plugins/agents/defaults.ts @@ -0,0 +1,12 @@ +import type { StreamExecutionSettings } from "shared"; + +export const agentStreamDefaults: StreamExecutionSettings = { + default: { + cache: { enabled: false }, + retry: { enabled: false }, + timeout: 300_000, + }, + stream: { + bufferSize: 200, + }, +}; diff --git a/packages/appkit/src/plugins/agents/event-channel.ts b/packages/appkit/src/plugins/agents/event-channel.ts new file mode 100644 index 000000000..c5b60463a --- /dev/null +++ b/packages/appkit/src/plugins/agents/event-channel.ts @@ -0,0 +1,70 @@ +/** + * Single-producer/single-consumer async queue used by the agents plugin to + * merge streams of SSE events from two concurrent sources: the adapter's + * `run()` generator, and out-of-band events emitted by `executeTool` (e.g. + * human-approval requests). + * + * The consumer drains the channel as an async iterable; the producer pushes + * events synchronously and closes the channel when the source has completed + * or errored. + */ +interface Waiter { + resolve: (value: IteratorResult) => void; + reject: (error: unknown) => void; +} + +export class EventChannel { + private queue: T[] = []; + private waiters: Array> = []; + private closed = false; + private error: unknown = undefined; + + /** Synchronously enqueue an event. Safe to call from non-async contexts. */ + push(value: T): void { + if (this.closed) return; + const waiter = this.waiters.shift(); + if (waiter) { + waiter.resolve({ value, done: false }); + } else { + this.queue.push(value); + } + } + + /** + * Close the channel. Any pending `next()` calls resolve with `done: true`. + * If `error` is supplied, pending `next()` calls reject with it and future + * calls do the same. + */ + close(error?: unknown): void { + if (this.closed) return; + this.closed = true; + this.error = error; + while (this.waiters.length > 0) { + const waiter = this.waiters.shift(); + if (!waiter) break; + if (error) { + waiter.reject(error); + } else { + waiter.resolve({ value: undefined as never, done: true }); + } + } + } + + [Symbol.asyncIterator](): AsyncIterator { + return { + next: (): Promise> => { + if (this.queue.length > 0) { + const value = this.queue.shift() as T; + return Promise.resolve({ value, done: false }); + } + if (this.closed) { + if (this.error) return Promise.reject(this.error); + return Promise.resolve({ value: undefined as never, done: true }); + } + return new Promise((resolve, reject) => { + this.waiters.push({ resolve, reject }); + }); + }, + }; + } +} diff --git a/packages/appkit/src/plugins/agents/event-translator.ts b/packages/appkit/src/plugins/agents/event-translator.ts new file mode 100644 index 000000000..54d749fb0 --- /dev/null +++ b/packages/appkit/src/plugins/agents/event-translator.ts @@ -0,0 +1,291 @@ +import { randomUUID } from "node:crypto"; +import type { + AgentEvent, + ResponseFunctionCallOutput, + ResponseFunctionToolCall, + ResponseOutputMessage, + ResponseStreamEvent, +} from "shared"; + +/** + * Translates internal `AgentEvent` stream into Responses API SSE events. + * + * Stateful: one instance per streaming request. Tracks sequence numbers and + * allocates `output_index` strictly monotonically — each emitted output + * item (message, function call, function call output) claims the next + * available index and, once claimed, never reuses an earlier one. This is a + * Responses-API contract that OpenAI's own SDK parsers enforce. + * + * A message is opened lazily on the first `message_delta` or `message` + * event. If a `tool_call` or `tool_result` arrives while a message is open + * (common ReAct flow: partial text → tool call → more text), the open + * message is closed (`response.output_item.done`) BEFORE the tool item is + * added, so subsequent text resumes as a new message item at a strictly + * later index. + */ +export class AgentEventTranslator { + private seqNum = 0; + private nextOutputIndex = 0; + private currentMessage: { + id: string; + text: string; + outputIndex: number; + } | null = null; + private finalized = false; + + translate(event: AgentEvent): ResponseStreamEvent[] { + switch (event.type) { + case "message_delta": + return this.handleMessageDelta(event.content); + case "message": + return this.handleFullMessage(event.content); + case "tool_call": + return this.handleToolCall(event.callId, event.name, event.args); + case "tool_result": + return this.handleToolResult(event.callId, event.result, event.error); + case "thinking": + return [ + { + type: "appkit.thinking", + content: event.content, + sequence_number: this.seqNum++, + }, + ]; + case "metadata": + return [ + { + type: "appkit.metadata", + data: event.data, + sequence_number: this.seqNum++, + }, + ]; + case "approval_pending": + return [ + { + type: "appkit.approval_pending", + approval_id: event.approvalId, + stream_id: event.streamId, + tool_name: event.toolName, + args: event.args, + annotations: event.annotations, + sequence_number: this.seqNum++, + }, + ]; + case "status": + return this.handleStatus(event.status, event.error); + } + } + + finalize(): ResponseStreamEvent[] { + if (this.finalized) return []; + this.finalized = true; + + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + events.push({ + type: "response.completed", + sequence_number: this.seqNum++, + response: {}, + }); + + return events; + } + + private handleMessageDelta(content: string): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + + if (!this.currentMessage) { + const id = `msg_${randomUUID()}`; + const outputIndex = this.nextOutputIndex++; + this.currentMessage = { id, text: content, outputIndex }; + const item: ResponseOutputMessage = { + type: "message", + id, + status: "in_progress", + role: "assistant", + content: [], + }; + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }); + } else { + this.currentMessage.text += content; + } + + events.push({ + type: "response.output_text.delta", + item_id: this.currentMessage.id, + output_index: this.currentMessage.outputIndex, + content_index: 0, + delta: content, + sequence_number: this.seqNum++, + }); + + return events; + } + + private handleFullMessage(content: string): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + + if (!this.currentMessage) { + // No prior deltas — open and immediately close. + const id = `msg_${randomUUID()}`; + const outputIndex = this.nextOutputIndex++; + this.currentMessage = { id, text: content, outputIndex }; + const addedItem: ResponseOutputMessage = { + type: "message", + id, + status: "in_progress", + role: "assistant", + content: [], + }; + events.push({ + type: "response.output_item.added", + output_index: outputIndex, + item: addedItem, + sequence_number: this.seqNum++, + }); + } else { + // Deltas already opened the item; `message` overrides the accumulated + // text (per adapter contract) and closes it. + this.currentMessage.text = content; + } + + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + return events; + } + + private handleToolCall( + callId: string, + name: string, + args: unknown, + ): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + const outputIndex = this.nextOutputIndex++; + const item: ResponseFunctionToolCall = { + type: "function_call", + id: `fc_${randomUUID()}`, + call_id: callId, + name, + arguments: typeof args === "string" ? args : JSON.stringify(args), + }; + + events.push( + { + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + { + type: "response.output_item.done", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + ); + return events; + } + + private handleToolResult( + callId: string, + result: unknown, + error?: string, + ): ResponseStreamEvent[] { + const events: ResponseStreamEvent[] = []; + const closeEvent = this.closeCurrentMessage(); + if (closeEvent) events.push(closeEvent); + + const outputIndex = this.nextOutputIndex++; + // Coalesce `undefined` → "" so the wire shape is always a string (the + // Responses API contract). Non-string results are JSON-serialised. + let output: string; + if (error !== undefined) { + output = error; + } else if (typeof result === "string") { + output = result; + } else if (result === undefined) { + output = ""; + } else { + output = JSON.stringify(result); + } + const item: ResponseFunctionCallOutput = { + type: "function_call_output", + id: `fc_output_${randomUUID()}`, + call_id: callId, + output, + }; + + events.push( + { + type: "response.output_item.added", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + { + type: "response.output_item.done", + output_index: outputIndex, + item, + sequence_number: this.seqNum++, + }, + ); + return events; + } + + /** + * Emit an `response.output_item.done` for the currently-open message, if + * any, and clear the state. Returns the event to the caller so it can be + * pushed at the right moment in the sequence. Returns `null` when there + * is no open message. + */ + private closeCurrentMessage(): ResponseStreamEvent | null { + if (!this.currentMessage) return null; + const { id, text, outputIndex } = this.currentMessage; + this.currentMessage = null; + const doneItem: ResponseOutputMessage = { + type: "message", + id, + status: "completed", + role: "assistant", + content: [{ type: "output_text", text }], + }; + return { + type: "response.output_item.done", + output_index: outputIndex, + item: doneItem, + sequence_number: this.seqNum++, + }; + } + + private handleStatus(status: string, error?: string): ResponseStreamEvent[] { + if (status === "error") { + return [ + { + type: "error", + error: error ?? "Unknown error", + sequence_number: this.seqNum++, + }, + { + type: "response.failed", + sequence_number: this.seqNum++, + }, + ]; + } + + if (status === "complete") { + return this.finalize(); + } + + return []; + } +} diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts new file mode 100644 index 000000000..8220a55e7 --- /dev/null +++ b/packages/appkit/src/plugins/agents/index.ts @@ -0,0 +1,22 @@ +export { AgentsPlugin, agents } from "./agents"; +export { buildToolkitEntries } from "../../core/agent/build-toolkit"; +export { + type LoadContext, + type LoadResult, + loadAgentFromFile, + loadAgentsFromDir, + parseFrontmatter, +} from "../../core/agent/load-agents"; +export { + type AgentDefinition, + type AgentsPluginConfig, + type AgentTool, + type AutoInheritToolsConfig, + type BaseSystemPromptOption, + isToolkitEntry, + type PromptContext, + type RegisteredAgent, + type ResolvedToolEntry, + type ToolkitEntry, + type ToolkitOptions, +} from "../../core/agent/types"; diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json new file mode 100644 index 000000000..cb7a43f8b --- /dev/null +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -0,0 +1,10 @@ +{ + "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", + "name": "agents", + "displayName": "Agents Plugin", + "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", + "resources": { + "required": [], + "optional": [] + } +} diff --git a/packages/appkit/src/plugins/agents/schemas.ts b/packages/appkit/src/plugins/agents/schemas.ts new file mode 100644 index 000000000..cea6c6d64 --- /dev/null +++ b/packages/appkit/src/plugins/agents/schemas.ts @@ -0,0 +1,69 @@ +import { z } from "zod"; + +/** + * Static body cap for the `message` field on `POST /chat`. 64 000 characters + * is well above any legitimate chat turn (~16k tokens at 4 chars/token) and + * bounds the per-request cost of appending to `InMemoryThreadStore` without + * requiring per-deployment configuration. + */ +const MAX_MESSAGE_CHARS = 64_000; + +/** Cap applied to `/invocations` when `input` is a raw string. */ +const MAX_INVOCATIONS_INPUT_CHARS = 64_000; + +/** + * Cap on the number of items accepted in an `/invocations` `input` array + * (one element per seeded message). Protects against a single request + * seeding hundreds of messages into the thread store. + */ +const MAX_INVOCATIONS_INPUT_ITEMS = 100; + +/** Per-message `content` size cap (string form). */ +const MAX_INVOCATIONS_ITEM_CHARS = 64_000; + +/** Per-message `content` size cap (array form). */ +const MAX_INVOCATIONS_ITEM_ARRAY_ITEMS = 100; + +export const chatRequestSchema = z.object({ + message: z + .string() + .min(1, "message must not be empty") + .max( + MAX_MESSAGE_CHARS, + `message exceeds the ${MAX_MESSAGE_CHARS}-character limit`, + ), + threadId: z.string().optional(), + agent: z.string().optional(), +}); + +const messageItemSchema = z.object({ + role: z.enum(["user", "assistant", "system"]).optional(), + content: z + .union([ + z.string().max(MAX_INVOCATIONS_ITEM_CHARS), + z.array(z.any()).max(MAX_INVOCATIONS_ITEM_ARRAY_ITEMS), + ]) + .optional(), + type: z.string().optional(), +}); + +export const invocationsRequestSchema = z.object({ + input: z.union([ + z.string().min(1).max(MAX_INVOCATIONS_INPUT_CHARS), + z + .array(messageItemSchema) + .min(1) + .max( + MAX_INVOCATIONS_INPUT_ITEMS, + `input array exceeds the ${MAX_INVOCATIONS_INPUT_ITEMS}-item limit`, + ), + ]), + stream: z.boolean().optional().default(true), + model: z.string().optional(), +}); + +export const approvalRequestSchema = z.object({ + streamId: z.string().min(1, "streamId is required"), + approvalId: z.string().min(1, "approvalId is required"), + decision: z.enum(["approve", "deny"]), +}); diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts new file mode 100644 index 000000000..43ad8c104 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts @@ -0,0 +1,373 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import type { + AgentAdapter, + AgentInput, + AgentRunContext, + AgentToolDefinition, + ToolProvider, +} from "shared"; +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { z } from "zod"; +import { CacheManager } from "../../../cache"; +// Import the class directly so we can construct it without a createApp +import { AgentsPlugin } from "../agents"; +import { buildToolkitEntries } from "../../../core/agent/build-toolkit"; +import { defineTool, type ToolRegistry } from "../../../core/agent/tools/define-tool"; +import type { AgentsPluginConfig, ToolkitEntry } from "../../../core/agent/types"; +import { isToolkitEntry } from "../../../core/agent/types"; + +interface FakeContext { + providers: Array<{ name: string; provider: ToolProvider }>; + getToolProviders(): Array<{ name: string; provider: ToolProvider }>; + getPluginNames(): string[]; + addRoute(): void; + executeTool: ( + req: unknown, + pluginName: string, + localName: string, + args: unknown, + ) => Promise; +} + +function fakeContext( + providers: Array<{ name: string; provider: ToolProvider }>, +): FakeContext { + return { + providers, + getToolProviders: () => providers, + getPluginNames: () => providers.map((p) => p.name), + addRoute: vi.fn(), + executeTool: vi.fn(async (_req, p, n, args) => ({ + plugin: p, + tool: n, + args, + })), + }; +} + +function stubAdapter(): AgentAdapter { + return { + async *run(_input: AgentInput, _ctx: AgentRunContext) { + yield { type: "message_delta", content: "" }; + }, + }; +} + +function makeToolProvider( + pluginName: string, + registry: ToolRegistry, +): ToolProvider & { + toolkit: (opts?: unknown) => Record; +} { + return { + getAgentTools(): AgentToolDefinition[] { + return Object.entries(registry).map(([name, entry]) => ({ + name, + description: entry.description, + parameters: { type: "object", properties: {} }, + })); + }, + async executeAgentTool(name, args) { + return { callFrom: pluginName, name, args }; + }, + toolkit: (opts) => buildToolkitEntries(pluginName, registry, opts as never), + }; +} + +let tmpDir: string; + +beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "agents-plugin-")); + const storage = { + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + keys: vi.fn(), + healthCheck: vi.fn(async () => true), + close: vi.fn(async () => {}), + }; + // biome-ignore lint/suspicious/noExplicitAny: test-only CacheManager wiring + await CacheManager.getInstance({ storage: storage as any }); +}); + +afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +function instantiate(config: AgentsPluginConfig, ctx?: FakeContext) { + const plugin = new AgentsPlugin({ ...config, name: "agent" }); + plugin.attachContext({ context: ctx as unknown as object }); + return plugin; +} + +describe("AgentsPlugin", () => { + test("registers code-defined agents and exposes them via exports", async () => { + const plugin = instantiate({ + dir: false, + agents: { + support: { + instructions: "You help customers.", + model: stubAdapter(), + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + list: () => string[]; + getDefault: () => string | null; + }; + expect(api.list()).toEqual(["support"]); + expect(api.getDefault()).toBe("support"); + }); + + test("loads markdown agents from a directory", async () => { + fs.writeFileSync( + path.join(tmpDir, "assistant.md"), + "---\ndefault: true\n---\nYou are helpful.", + "utf-8", + ); + const plugin = instantiate({ + dir: tmpDir, + defaultModel: stubAdapter(), + }); + await plugin.setup(); + + const api = plugin.exports() as { + list: () => string[]; + getDefault: () => string | null; + }; + expect(api.list()).toEqual(["assistant"]); + expect(api.getDefault()).toBe("assistant"); + }); + + test("code definitions override markdown on key collision", async () => { + fs.writeFileSync( + path.join(tmpDir, "support.md"), + "---\n---\nFrom markdown.", + "utf-8", + ); + const plugin = instantiate({ + dir: tmpDir, + defaultModel: stubAdapter(), + agents: { + support: { + instructions: "From code", + model: stubAdapter(), + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { instructions: string } | null; + }; + expect(api.get("support")?.instructions).toBe("From code"); + }); + + test("auto-inherit default is safe (both file and code get nothing without an explicit opt-in)", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + autoInheritable: true, // even with autoInheritable, no spread without opt-in + handler: () => "ok", + }), + }; + const provider = makeToolProvider("analytics", registry); + const ctx = fakeContext([{ name: "analytics", provider }]); + + fs.writeFileSync( + path.join(tmpDir, "assistant.md"), + "---\n---\nYou are helpful.", + "utf-8", + ); + + const plugin = instantiate( + { + dir: tmpDir, + defaultModel: stubAdapter(), + agents: { + manual: { + instructions: "Manual agent", + model: stubAdapter(), + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const fileAgent = api.get("assistant"); + const codeAgent = api.get("manual"); + + expect(fileAgent?.toolIndex.size).toBe(0); + expect(codeAgent?.toolIndex.size).toBe(0); + }); + + test("opting in with autoInheritTools: { file: true } spreads only autoInheritable tools", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "read-only query", + schema: z.object({ sql: z.string() }), + autoInheritable: true, + handler: () => "ok", + }), + destructive: defineTool({ + description: "mutation", + schema: z.object({}), + // autoInheritable left unset → skipped even when opted in + handler: () => "ok", + }), + }; + const provider = makeToolProvider("analytics", registry); + const ctx = fakeContext([{ name: "analytics", provider }]); + + fs.writeFileSync( + path.join(tmpDir, "assistant.md"), + "---\n---\nYou are helpful.", + "utf-8", + ); + + const plugin = instantiate( + { + dir: tmpDir, + defaultModel: stubAdapter(), + autoInheritTools: { file: true }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const fileAgent = api.get("assistant"); + const keys = Array.from(fileAgent?.toolIndex.keys() ?? []); + expect(keys).toEqual(["analytics.query"]); + }); + + test("autoInheritTools: true enables both origins but still filters by autoInheritable", async () => { + const registry: ToolRegistry = { + safe: defineTool({ + description: "safe", + schema: z.object({}), + autoInheritable: true, + handler: () => "ok", + }), + unsafe: defineTool({ + description: "unsafe", + schema: z.object({}), + handler: () => "ok", + }), + }; + const provider = makeToolProvider("p", registry); + const ctx = fakeContext([{ name: "p", provider }]); + + const plugin = instantiate( + { + dir: false, + defaultModel: stubAdapter(), + autoInheritTools: true, + agents: { + code1: { + instructions: "code agent", + model: stubAdapter(), + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const codeAgent = api.get("code1"); + const keys = Array.from(codeAgent?.toolIndex.keys() ?? []); + expect(keys).toEqual(["p.safe"]); + }); + + test("file-loaded agent respects explicit toolkits (skips auto-inherit)", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const registry2: ToolRegistry = { + list: defineTool({ + description: "l", + schema: z.object({}), + handler: () => [], + }), + }; + const ctx = fakeContext([ + { name: "analytics", provider: makeToolProvider("analytics", registry) }, + { name: "files", provider: makeToolProvider("files", registry2) }, + ]); + + fs.writeFileSync( + path.join(tmpDir, "analyst.md"), + "---\ntoolkits: [analytics]\n---\nAnalyst.", + "utf-8", + ); + + const plugin = instantiate( + { dir: tmpDir, defaultModel: stubAdapter() }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("analyst"); + const toolNames = Array.from(agent?.toolIndex.keys() ?? []); + expect(toolNames.some((n) => n.startsWith("analytics."))).toBe(true); + expect(toolNames.some((n) => n.startsWith("files."))).toBe(false); + }); + + test("registers sub-agents as agent- tools", async () => { + const plugin = instantiate({ + dir: false, + agents: { + supervisor: { + instructions: "Supervise", + model: stubAdapter(), + agents: { + worker: { + instructions: "Work", + model: stubAdapter(), + }, + }, + }, + }, + }); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const sup = api.get("supervisor"); + expect(sup?.toolIndex.has("agent-worker")).toBe(true); + }); + + test("isToolkitEntry type guard recognizes toolkit entries", () => { + const entry: ToolkitEntry = { + __toolkitRef: true, + pluginName: "x", + localName: "y", + def: { name: "x.y", description: "", parameters: { type: "object" } }, + }; + expect(isToolkitEntry(entry)).toBe(true); + expect(isToolkitEntry({ foo: 1 })).toBe(false); + expect(isToolkitEntry(null)).toBe(false); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/approval-route.test.ts b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts new file mode 100644 index 000000000..6e090bd2f --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/approval-route.test.ts @@ -0,0 +1,292 @@ +import type express from "express"; +import { beforeEach, describe, expect, test, vi } from "vitest"; +import { CacheManager } from "../../../cache"; +import { AgentsPlugin } from "../agents"; + +/** + * Focused tests for the `POST /approve` route and the associated + * ownership / error paths on `_handleApprove`. Covers: + * + * - Schema validation of the request body. + * - Ownership check: the user submitting the decision must be the same + * user who initiated the underlying chat stream. + * - 404 for unknown stream (already completed or never existed). + * - 404 for unknown approvalId even when the stream is active. + * - Happy-path resolution of a pending gate with `approve` and `deny`. + * - Cancel of an active stream denies every pending gate on that stream. + */ + +function mockReq(body: unknown, userId?: string): express.Request { + const headers: Record = {}; + if (userId) { + headers["x-forwarded-user"] = userId; + headers["x-forwarded-access-token"] = "fake-token"; + } + return { + body, + headers, + header: (name: string) => headers[name.toLowerCase()], + } as unknown as express.Request; +} + +function mockRes() { + const json = vi.fn(); + const end = vi.fn(); + let statusCode = 200; + const status = vi.fn((code: number) => { + statusCode = code; + return { json, end }; + }); + return { + res: { status, json, end } as unknown as express.Response, + get statusCode() { + return statusCode; + }, + json, + }; +} + +beforeEach(() => { + CacheManager.getInstanceSync = vi.fn(() => ({ + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise) => + fn(), + ), + generateKey: vi.fn(() => "test-key"), + })) as any; + process.env.NODE_ENV = "development"; +}); + +describe("POST /approve route handler", () => { + test("rejects invalid body shape with 400", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const { res, json } = mockRes(); + await (plugin as any)._handleApprove(mockReq({}, "alice"), res); + expect(res.status).toHaveBeenCalledWith(400); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Invalid request" }), + ); + }); + + test("returns 404 when the streamId is unknown", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "ghost", approvalId: "a1", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(404); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: expect.stringMatching(/not found/i) }), + ); + }); + + test("returns 403 when submitter is different from stream owner", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-x", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a1", + streamId: "stream-x", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-x", approvalId: "a1", decision: "approve" }, + "bob", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(403); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Forbidden" }), + ); + + // Settle the waiter to clean up. + gate.submit({ approvalId: "a1", userId: "alice", decision: "deny" }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("returns 404 when approvalId is unknown on an active stream", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-y", { + controller: new AbortController(), + userId: "alice", + }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-y", approvalId: "unknown-a", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).toHaveBeenCalledWith(404); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringMatching(/not found|already settled/i), + }), + ); + }); + + test("happy path: approve resolves pending gate with 'approve'", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-z", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a42", + streamId: "stream-z", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-z", approvalId: "a42", decision: "approve" }, + "alice", + ), + res, + ); + expect(res.status).not.toHaveBeenCalled(); + expect(json).toHaveBeenCalledWith({ decision: "approve" }); + await expect(waiter).resolves.toBe("approve"); + }); + + test("happy path: deny resolves pending gate with 'deny'", async () => { + const plugin = new AgentsPlugin({ dir: false }); + (plugin as any).activeStreams.set("stream-z", { + controller: new AbortController(), + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const waiter = gate.wait({ + approvalId: "a43", + streamId: "stream-z", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleApprove: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleApprove( + mockReq( + { streamId: "stream-z", approvalId: "a43", decision: "deny" }, + "alice", + ), + res, + ); + expect(json).toHaveBeenCalledWith({ decision: "deny" }); + await expect(waiter).resolves.toBe("deny"); + }); +}); + +describe("POST /cancel ownership + gate cleanup", () => { + test("cancelling a stream denies every pending approval on that stream", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const controller = new AbortController(); + (plugin as any).activeStreams.set("stream-c", { + controller, + userId: "alice", + }); + const gate = (plugin as any).approvalGate; + const a = gate.wait({ + approvalId: "ca1", + streamId: "stream-c", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "ca2", + streamId: "stream-c", + userId: "alice", + timeoutMs: 60_000, + }); + + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleCancel: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleCancel(mockReq({ streamId: "stream-c" }, "alice"), res); + + expect(controller.signal.aborted).toBe(true); + expect(json).toHaveBeenCalledWith({ cancelled: true }); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + }); + + test("cancel from a different user is refused with 403", async () => { + const plugin = new AgentsPlugin({ dir: false }); + const controller = new AbortController(); + (plugin as any).activeStreams.set("stream-d", { + controller, + userId: "alice", + }); + const { res, json } = mockRes(); + await ( + plugin as unknown as { + _handleCancel: ( + r: express.Request, + w: express.Response, + ) => Promise; + } + )._handleCancel(mockReq({ streamId: "stream-d" }, "bob"), res); + expect(res.status).toHaveBeenCalledWith(403); + expect(controller.signal.aborted).toBe(false); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ error: "Forbidden" }), + ); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts new file mode 100644 index 000000000..935fa2403 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/dos-limits.test.ts @@ -0,0 +1,299 @@ +import type express from "express"; +import { beforeEach, describe, expect, test, vi } from "vitest"; +import { CacheManager } from "../../../cache"; +import { AgentsPlugin } from "../agents"; +import { chatRequestSchema, invocationsRequestSchema } from "../schemas"; + +/** + * Exercises the four DoS caps landed for MVP: + * + * - `chatRequestSchema.message.max(64_000)` — body cap on `POST /chat`. + * - Per-user `maxConcurrentStreamsPerUser` — 429 with Retry-After. + * - Per-run `maxToolCalls` — aborts stream and throws in `executeTool`. + * - Per-delegation `maxSubAgentDepth` — rejects in `runSubAgent`. + * + * Route-level tests exercise the schemas + `_handleChat` directly via the + * mocked req/res pattern already used by approval-route.test.ts. + */ + +function mockReq(body: unknown, userId?: string): express.Request { + const headers: Record = {}; + if (userId) { + headers["x-forwarded-user"] = userId; + headers["x-forwarded-access-token"] = "fake-token"; + } + return { + body, + headers, + header: (name: string) => headers[name.toLowerCase()], + } as unknown as express.Request; +} + +function mockRes() { + const json = vi.fn(); + const setHeader = vi.fn(); + let statusCode = 200; + const status = vi.fn((code: number) => { + statusCode = code; + return { json }; + }); + return { + res: { status, json, setHeader } as unknown as express.Response, + get statusCode() { + return statusCode; + }, + json, + setHeader, + }; +} + +beforeEach(() => { + CacheManager.getInstanceSync = vi.fn(() => ({ + get: vi.fn(), + set: vi.fn(), + delete: vi.fn(), + getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise) => + fn(), + ), + generateKey: vi.fn(() => "test-key"), + // biome-ignore lint/suspicious/noExplicitAny: test mock + })) as any; + process.env.NODE_ENV = "development"; +}); + +describe("chatRequestSchema — body cap", () => { + test("accepts messages up to 64_000 characters", () => { + const result = chatRequestSchema.safeParse({ + message: "a".repeat(64_000), + }); + expect(result.success).toBe(true); + }); + + test("rejects messages over 64_000 characters", () => { + const result = chatRequestSchema.safeParse({ + message: "a".repeat(64_001), + }); + expect(result.success).toBe(false); + if (!result.success) { + expect(JSON.stringify(result.error.flatten())).toMatch(/64000/); + } + }); + + test("rejects empty message (existing contract)", () => { + expect(chatRequestSchema.safeParse({ message: "" }).success).toBe(false); + }); +}); + +describe("invocationsRequestSchema — input caps", () => { + test("accepts string input up to 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: "a".repeat(64_000), + }); + expect(result.success).toBe(true); + }); + + test("rejects string input over 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: "a".repeat(64_001), + }); + expect(result.success).toBe(false); + }); + + test("accepts array input up to 100 items", () => { + const items = Array.from({ length: 100 }, (_, i) => ({ + role: "user" as const, + content: `m${i}`, + })); + expect(invocationsRequestSchema.safeParse({ input: items }).success).toBe( + true, + ); + }); + + test("rejects array input over 100 items", () => { + const items = Array.from({ length: 101 }, (_, i) => ({ + role: "user" as const, + content: `m${i}`, + })); + const result = invocationsRequestSchema.safeParse({ input: items }); + expect(result.success).toBe(false); + }); + + test("rejects per-item content over 64_000 characters", () => { + const result = invocationsRequestSchema.safeParse({ + input: [{ role: "user", content: "a".repeat(64_001) }], + }); + expect(result.success).toBe(false); + }); +}); + +describe("POST /chat — per-user concurrent-stream limit", () => { + function seedPlugin( + overrides: ConstructorParameters[0] = { dir: false }, + ): AgentsPlugin { + const plugin = new AgentsPlugin(overrides); + // Seed the agents map directly so _handleChat can resolve "hello" + // without running setup() (which would require a live model). + // biome-ignore lint/suspicious/noExplicitAny: seeding private state + (plugin as any).agents.set("hello", { + name: "hello", + instructions: "hi", + adapter: { async *run() {} }, + toolIndex: new Map(), + }); + // biome-ignore lint/suspicious/noExplicitAny: seeding private state + (plugin as any).defaultAgentName = "hello"; + return plugin; + } + + test("rejects with 429 + Retry-After when user is at-limit (default 5)", async () => { + const plugin = seedPlugin(); + for (let i = 0; i < 5; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + const { res, setHeader, json } = mockRes(); + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "alice"), res); + + expect(res.status).toHaveBeenCalledWith(429); + expect(setHeader).toHaveBeenCalledWith("Retry-After", "5"); + expect(json).toHaveBeenCalledWith( + expect.objectContaining({ + error: expect.stringMatching(/Too many concurrent streams/), + }), + ); + }); + + test("does not reject when another user is at-limit (per-user, not global)", async () => { + const plugin = seedPlugin(); + for (let i = 0; i < 5; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + // Carol's request must not see a 429 even though alice is at-limit. + // Don't bother running the full stream — we assert only that 429 is + // not the response status. + const { res } = mockRes(); + // biome-ignore lint/suspicious/noExplicitAny: stub _streamAgent to avoid needing a real adapter + (plugin as any)._streamAgent = vi.fn(async () => undefined); + + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "carol"), res); + + expect(res.status).not.toHaveBeenCalledWith(429); + }); + + test("honours agents({ limits: { maxConcurrentStreamsPerUser } })", async () => { + const plugin = seedPlugin({ + dir: false, + limits: { maxConcurrentStreamsPerUser: 2 }, + }); + for (let i = 0; i < 2; i++) { + // biome-ignore lint/suspicious/noExplicitAny: seeding + (plugin as any).activeStreams.set(`s${i}`, { + controller: new AbortController(), + userId: "alice", + }); + } + + const { res } = mockRes(); + await ( + plugin as unknown as { + _handleChat: (r: express.Request, w: express.Response) => Promise; + } + )._handleChat(mockReq({ message: "hi" }, "alice"), res); + + expect(res.status).toHaveBeenCalledWith(429); + }); +}); + +describe("resolvedLimits — default values", () => { + test("exposes the documented MVP defaults when unconfigured", () => { + const plugin = new AgentsPlugin({ dir: false }); + // biome-ignore lint/suspicious/noExplicitAny: read private getter + const limits = (plugin as any).resolvedLimits; + expect(limits).toEqual({ + maxConcurrentStreamsPerUser: 5, + maxToolCalls: 50, + maxSubAgentDepth: 3, + }); + }); + + test("lets callers override any subset", () => { + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxToolCalls: 100 }, + }); + // biome-ignore lint/suspicious/noExplicitAny: read private + const limits = (plugin as any).resolvedLimits; + expect(limits.maxToolCalls).toBe(100); + expect(limits.maxConcurrentStreamsPerUser).toBe(5); + expect(limits.maxSubAgentDepth).toBe(3); + }); +}); + +describe("runSubAgent — depth guard", () => { + test("rejects when depth exceeds the configured maximum", async () => { + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxSubAgentDepth: 2 }, + }); + // biome-ignore lint/suspicious/noExplicitAny: call private method directly + await expect( + (plugin as any).runSubAgent( + mockReq({}, "alice"), + { name: "child", toolIndex: new Map() }, + {}, + new AbortController().signal, + 3, // exceeds limit 2 + ), + ).rejects.toThrow(/Sub-agent depth exceeded \(limit 2\)/); + }); + + test("accepts at the boundary (depth === limit)", async () => { + // Use a stub adapter so we don't need a real model. + const plugin = new AgentsPlugin({ + dir: false, + limits: { maxSubAgentDepth: 3 }, + agents: {}, + }); + + const stubAdapter = { + // biome-ignore lint/suspicious/noExplicitAny: adapter shape not under test + async *run(): any { + yield { type: "message", content: "hello from depth-3" }; + }, + }; + const child = { + name: "child", + instructions: "test", + // biome-ignore lint/suspicious/noExplicitAny: stub shape + adapter: stubAdapter as any, + toolIndex: new Map(), + }; + + // biome-ignore lint/suspicious/noExplicitAny: call private + const result = await (plugin as any).runSubAgent( + mockReq({}, "alice"), + child, + { input: "test" }, + new AbortController().signal, + 3, // at the limit, not over + ); + expect(result).toBe("hello from depth-3"); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/event-channel.test.ts b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts new file mode 100644 index 000000000..d80d788de --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/event-channel.test.ts @@ -0,0 +1,78 @@ +import { describe, expect, test } from "vitest"; +import { EventChannel } from "../event-channel"; + +async function collect(ch: EventChannel): Promise { + const out: T[] = []; + for await (const v of ch) out.push(v); + return out; +} + +describe("EventChannel", () => { + test("yields pushed values in order", async () => { + const ch = new EventChannel(); + const p = collect(ch); + ch.push(1); + ch.push(2); + ch.push(3); + ch.close(); + await expect(p).resolves.toEqual([1, 2, 3]); + }); + + test("pushes before iteration start are buffered", async () => { + const ch = new EventChannel(); + ch.push("a"); + ch.push("b"); + ch.close(); + await expect(collect(ch)).resolves.toEqual(["a", "b"]); + }); + + test("waiting iterator is unblocked by subsequent push", async () => { + const ch = new EventChannel(); + const promise = collect(ch); + await new Promise((r) => setTimeout(r, 5)); + ch.push(42); + ch.close(); + await expect(promise).resolves.toEqual([42]); + }); + + test("close with no pending values terminates iteration", async () => { + const ch = new EventChannel(); + const p = collect(ch); + ch.close(); + await expect(p).resolves.toEqual([]); + }); + + test("push after close is a no-op (channel is closed)", async () => { + const ch = new EventChannel(); + ch.close(); + ch.push(1); + await expect(collect(ch)).resolves.toEqual([]); + }); + + test("close with error rejects the waiting iterator", async () => { + const ch = new EventChannel(); + const promise = collect(ch); + await new Promise((r) => setTimeout(r, 5)); + ch.close(new Error("boom")); + await expect(promise).rejects.toThrow(/boom/); + }); + + test("interleaved pushes and reads stream through", async () => { + const ch = new EventChannel(); + const received: number[] = []; + const reader = (async () => { + for await (const v of ch) { + received.push(v); + if (received.length === 3) break; + } + })(); + ch.push(1); + await new Promise((r) => setTimeout(r, 0)); + ch.push(2); + await new Promise((r) => setTimeout(r, 0)); + ch.push(3); + await reader; + expect(received).toEqual([1, 2, 3]); + ch.close(); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/event-translator.test.ts b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts new file mode 100644 index 000000000..050af001a --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/event-translator.test.ts @@ -0,0 +1,332 @@ +import type { ResponseStreamEvent } from "shared"; +import { describe, expect, test } from "vitest"; +import { AgentEventTranslator } from "../event-translator"; + +describe("AgentEventTranslator", () => { + test("translates message_delta to output_item.added + output_text.delta on first delta", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "message_delta", + content: "Hello", + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + expect(events[1].type).toBe("response.output_text.delta"); + + if (events[1].type === "response.output_text.delta") { + expect(events[1].delta).toBe("Hello"); + } + }); + + test("subsequent message_delta only produces output_text.delta", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hello" }); + const events = translator.translate({ + type: "message_delta", + content: " world", + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("response.output_text.delta"); + }); + + test("sequence_number is monotonically increasing", () => { + const translator = new AgentEventTranslator(); + const e1 = translator.translate({ type: "message_delta", content: "a" }); + const e2 = translator.translate({ type: "message_delta", content: "b" }); + const e3 = translator.finalize(); + + const allSeqs = [...e1, ...e2, ...e3].map((e) => + "sequence_number" in e ? e.sequence_number : -1, + ); + + for (let i = 1; i < allSeqs.length; i++) { + expect(allSeqs[i]).toBeGreaterThan(allSeqs[i - 1]); + } + }); + + test("translates tool_call to paired output_item.added + output_item.done", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_call", + callId: "call_1", + name: "analytics.query", + args: { sql: "SELECT 1" }, + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + expect(events[1].type).toBe("response.output_item.done"); + + if (events[0].type === "response.output_item.added") { + expect(events[0].item.type).toBe("function_call"); + if (events[0].item.type === "function_call") { + expect(events[0].item.name).toBe("analytics.query"); + expect(events[0].item.call_id).toBe("call_1"); + } + } + }); + + test("translates tool_result to paired output_item events", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_result", + callId: "call_1", + result: { rows: 42 }, + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("response.output_item.added"); + + if (events[0].type === "response.output_item.added") { + expect(events[0].item.type).toBe("function_call_output"); + } + }); + + test("translates tool_result error", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "tool_result", + callId: "call_1", + result: null, + error: "Query failed", + }); + + if ( + events[0].type === "response.output_item.added" && + events[0].item.type === "function_call_output" + ) { + expect(events[0].item.output).toBe("Query failed"); + } + }); + + test("translates thinking to appkit.thinking extension event", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "thinking", + content: "Let me think about this...", + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("appkit.thinking"); + if (events[0].type === "appkit.thinking") { + expect(events[0].content).toBe("Let me think about this..."); + } + }); + + test("translates metadata to appkit.metadata extension event", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "metadata", + data: { threadId: "t-123" }, + }); + + expect(events).toHaveLength(1); + expect(events[0].type).toBe("appkit.metadata"); + if (events[0].type === "appkit.metadata") { + expect(events[0].data.threadId).toBe("t-123"); + } + }); + + test("status:complete triggers finalize with response.completed", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hi" }); + const events = translator.translate({ type: "status", status: "complete" }); + + const types = events.map((e) => e.type); + expect(types).toContain("response.output_item.done"); + expect(types).toContain("response.completed"); + }); + + test("status:error emits error + response.failed", () => { + const translator = new AgentEventTranslator(); + const events = translator.translate({ + type: "status", + status: "error", + error: "Something broke", + }); + + expect(events).toHaveLength(2); + expect(events[0].type).toBe("error"); + expect(events[1].type).toBe("response.failed"); + + if (events[0].type === "error") { + expect(events[0].error).toBe("Something broke"); + } + }); + + test("finalize produces response.completed", () => { + const translator = new AgentEventTranslator(); + const events = translator.finalize(); + + expect(events.some((e) => e.type === "response.completed")).toBe(true); + }); + + test("finalize with accumulated message text produces output_item.done", () => { + const translator = new AgentEventTranslator(); + translator.translate({ type: "message_delta", content: "Hello " }); + translator.translate({ type: "message_delta", content: "world" }); + const events = translator.finalize(); + + const doneEvent = events.find( + (e) => e.type === "response.output_item.done", + ); + expect(doneEvent).toBeDefined(); + if ( + doneEvent?.type === "response.output_item.done" && + doneEvent.item.type === "message" + ) { + expect(doneEvent.item.content[0].text).toBe("Hello world"); + } + }); + + test("output_index increments for tool calls", () => { + const translator = new AgentEventTranslator(); + const e1 = translator.translate({ + type: "tool_call", + callId: "c1", + name: "tool1", + args: {}, + }); + const e2 = translator.translate({ + type: "tool_result", + callId: "c1", + result: "ok", + }); + + if ( + e1[0].type === "response.output_item.added" && + e2[0].type === "response.output_item.added" + ) { + expect(e2[0].output_index).toBeGreaterThan(e1[0].output_index); + } + }); +}); + +describe("AgentEventTranslator — monotonic output_index", () => { + /** + * Helper: every emitted `response.output_item.added`/`output_item.done` + * event's `output_index` must be >= every prior add/done `output_index`. + * This is the strict contract Responses-API clients (OpenAI's own SDK + * parser) enforce. + */ + function assertMonotonic(events: ResponseStreamEvent[]) { + let last = -1; + for (const ev of events) { + if ( + ev.type === "response.output_item.added" || + ev.type === "response.output_item.done" + ) { + expect(ev.output_index).toBeGreaterThanOrEqual(last); + last = ev.output_index; + } + } + } + + test("tool_call followed by message_delta emits monotonic indices (regression)", () => { + // Before the fix this produced: tool_call at index 1, then + // message_delta.added at 0 — monotonicity violated. + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push( + ...t.translate({ + type: "tool_call", + callId: "c1", + name: "lookup", + args: { q: "x" }, + }), + ); + all.push( + ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }), + ); + all.push(...t.translate({ type: "message_delta", content: "Result: " })); + all.push(...t.translate({ type: "message_delta", content: "ok." })); + all.push(...t.finalize()); + + assertMonotonic(all); + + const added = all.filter((e) => e.type === "response.output_item.added"); + // Three items: tool_call, tool_result, message. Indices 0/1/2. + expect(added.map((e) => e.output_index)).toEqual([0, 1, 2]); + }); + + test("message interrupted by tool_call is closed before the tool_call opens", () => { + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push(...t.translate({ type: "message_delta", content: "thinking..." })); + all.push( + ...t.translate({ + type: "tool_call", + callId: "c1", + name: "lookup", + args: {}, + }), + ); + all.push( + ...t.translate({ type: "tool_result", callId: "c1", result: "ok" }), + ); + all.push(...t.translate({ type: "message_delta", content: "final" })); + all.push(...t.finalize()); + + assertMonotonic(all); + + // Structure: msg0.added, msg0.delta, msg0.done (closed before tool), + // tool_call.added/done, tool_result.added/done, msg1.added, msg1.delta, + // msg1.done (from finalize), response.completed. + const addedDone = all.filter( + (e) => + e.type === "response.output_item.added" || + e.type === "response.output_item.done", + ); + expect(addedDone.map((e) => `${e.type}@${e.output_index}`)).toEqual([ + "response.output_item.added@0", + "response.output_item.done@0", + "response.output_item.added@1", + "response.output_item.done@1", + "response.output_item.added@2", + "response.output_item.done@2", + "response.output_item.added@3", + "response.output_item.done@3", + ]); + }); + + test("full `message` event after deltas does not double-emit output_item.added", () => { + const t = new AgentEventTranslator(); + const all: ResponseStreamEvent[] = []; + all.push(...t.translate({ type: "message_delta", content: "partial" })); + all.push( + ...t.translate({ type: "message", content: "full final content" }), + ); + all.push(...t.finalize()); + + const added = all.filter((e) => e.type === "response.output_item.added"); + const done = all.filter((e) => e.type === "response.output_item.done"); + // Exactly one added (from the first delta) and one done (from the full + // message). finalize() must not emit a second done for the same item. + expect(added).toHaveLength(1); + expect(done).toHaveLength(1); + if (done[0].type === "response.output_item.done") { + const item = done[0].item; + if (item.type === "message") { + expect(item.content[0].text).toBe("full final content"); + } + } + }); + + test("tool_result coerces undefined result to empty-string output", () => { + const t = new AgentEventTranslator(); + const events = t.translate({ + type: "tool_result", + callId: "c1", + result: undefined, + }); + const done = events.find((e) => e.type === "response.output_item.done"); + if (done?.type === "response.output_item.done") { + const item = done.item; + if (item.type === "function_call_output") { + expect(item.output).toBe(""); + } + } + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/thread-store.test.ts b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts new file mode 100644 index 000000000..ed4f70bab --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/thread-store.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, test } from "vitest"; +import { InMemoryThreadStore } from "../thread-store"; + +describe("InMemoryThreadStore", () => { + test("create() returns a new thread with the given userId", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + expect(thread.id).toBeDefined(); + expect(thread.userId).toBe("user-1"); + expect(thread.messages).toEqual([]); + expect(thread.createdAt).toBeInstanceOf(Date); + expect(thread.updatedAt).toBeInstanceOf(Date); + }); + + test("get() returns the thread for the correct user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const retrieved = await store.get(thread.id, "user-1"); + expect(retrieved).toEqual(thread); + }); + + test("get() returns null for wrong user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const retrieved = await store.get(thread.id, "user-2"); + expect(retrieved).toBeNull(); + }); + + test("get() returns null for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + const retrieved = await store.get("non-existent", "user-1"); + expect(retrieved).toBeNull(); + }); + + test("list() returns threads sorted by updatedAt desc", async () => { + const store = new InMemoryThreadStore(); + const t1 = await store.create("user-1"); + const t2 = await store.create("user-1"); + + // Make t1 more recently updated + await store.addMessage(t1.id, "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }); + + const threads = await store.list("user-1"); + expect(threads).toHaveLength(2); + expect(threads[0].id).toBe(t1.id); + expect(threads[1].id).toBe(t2.id); + }); + + test("list() returns empty for unknown user", async () => { + const store = new InMemoryThreadStore(); + await store.create("user-1"); + + const threads = await store.list("user-2"); + expect(threads).toEqual([]); + }); + + test("addMessage() appends to thread and updates timestamp", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + const originalUpdatedAt = thread.updatedAt; + + // Small delay to ensure timestamp differs + await new Promise((r) => setTimeout(r, 5)); + + await store.addMessage(thread.id, "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }); + + const updated = await store.get(thread.id, "user-1"); + expect(updated?.messages).toHaveLength(1); + expect(updated?.messages[0].content).toBe("hello"); + expect(updated?.updatedAt.getTime()).toBeGreaterThanOrEqual( + originalUpdatedAt.getTime(), + ); + }); + + test("addMessage() throws for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + + await expect( + store.addMessage("non-existent", "user-1", { + id: "msg-1", + role: "user", + content: "hello", + createdAt: new Date(), + }), + ).rejects.toThrow("Thread non-existent not found"); + }); + + test("delete() removes a thread and returns true", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const deleted = await store.delete(thread.id, "user-1"); + expect(deleted).toBe(true); + + const retrieved = await store.get(thread.id, "user-1"); + expect(retrieved).toBeNull(); + }); + + test("delete() returns false for non-existent thread", async () => { + const store = new InMemoryThreadStore(); + const deleted = await store.delete("non-existent", "user-1"); + expect(deleted).toBe(false); + }); + + test("delete() returns false for wrong user", async () => { + const store = new InMemoryThreadStore(); + const thread = await store.create("user-1"); + + const deleted = await store.delete(thread.id, "user-2"); + expect(deleted).toBe(false); + }); + + test("threads are isolated per user", async () => { + const store = new InMemoryThreadStore(); + await store.create("user-1"); + await store.create("user-1"); + await store.create("user-2"); + + const user1Threads = await store.list("user-1"); + const user2Threads = await store.list("user-2"); + + expect(user1Threads).toHaveLength(2); + expect(user2Threads).toHaveLength(1); + }); +}); diff --git a/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts new file mode 100644 index 000000000..1e17ddf63 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/tool-approval-gate.test.ts @@ -0,0 +1,156 @@ +import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; +import { ToolApprovalGate } from "../tool-approval-gate"; + +describe("ToolApprovalGate", () => { + let gate: ToolApprovalGate; + + beforeEach(() => { + vi.useFakeTimers(); + gate = new ToolApprovalGate(); + }); + + afterEach(() => { + vi.useRealTimers(); + }); + + test("resolves with 'approve' when a matching submit arrives", async () => { + const waiter = gate.wait({ + approvalId: "a1", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + expect(gate.size).toBe(1); + + const result = gate.submit({ + approvalId: "a1", + userId: "alice", + decision: "approve", + }); + + expect(result).toEqual({ ok: true }); + await expect(waiter).resolves.toBe("approve"); + expect(gate.size).toBe(0); + }); + + test("resolves with 'deny' on explicit deny", async () => { + const waiter = gate.wait({ + approvalId: "a2", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + gate.submit({ + approvalId: "a2", + userId: "alice", + decision: "deny", + }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("auto-denies after timeoutMs with no submit", async () => { + const waiter = gate.wait({ + approvalId: "a3", + streamId: "s1", + userId: "alice", + timeoutMs: 1000, + }); + vi.advanceTimersByTime(1000); + await expect(waiter).resolves.toBe("deny"); + expect(gate.size).toBe(0); + }); + + test("refuses a submit from a different user (ownership check)", async () => { + const waiter = gate.wait({ + approvalId: "a4", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const result = gate.submit({ + approvalId: "a4", + userId: "bob", + decision: "approve", + }); + expect(result).toEqual({ ok: false, reason: "forbidden" }); + expect(gate.size).toBe(1); + // Waiter is still pending; cleanup to let fake timers drain. + gate.submit({ + approvalId: "a4", + userId: "alice", + decision: "deny", + }); + await expect(waiter).resolves.toBe("deny"); + }); + + test("returns 'unknown' reason when approvalId is not registered", () => { + expect( + gate.submit({ approvalId: "nope", userId: "x", decision: "approve" }), + ).toEqual({ ok: false, reason: "unknown" }); + }); + + test("abortStream denies every pending gate for that stream", async () => { + const a = gate.wait({ + approvalId: "a5", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "a6", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const c = gate.wait({ + approvalId: "a7", + streamId: "s2", + userId: "alice", + timeoutMs: 60_000, + }); + gate.abortStream("s1"); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + expect(gate.size).toBe(1); + // s2's waiter is still pending; settle it to clean up timers. + gate.submit({ approvalId: "a7", userId: "alice", decision: "deny" }); + await expect(c).resolves.toBe("deny"); + }); + + test("abortAll denies every pending gate", async () => { + const a = gate.wait({ + approvalId: "a8", + streamId: "s1", + userId: "alice", + timeoutMs: 60_000, + }); + const b = gate.wait({ + approvalId: "a9", + streamId: "s2", + userId: "bob", + timeoutMs: 60_000, + }); + gate.abortAll(); + await expect(a).resolves.toBe("deny"); + await expect(b).resolves.toBe("deny"); + expect(gate.size).toBe(0); + }); + + test("a timed-out approval cannot be resolved by a late submit", async () => { + const waiter = gate.wait({ + approvalId: "a10", + streamId: "s1", + userId: "alice", + timeoutMs: 500, + }); + vi.advanceTimersByTime(500); + await expect(waiter).resolves.toBe("deny"); + + const late = gate.submit({ + approvalId: "a10", + userId: "alice", + decision: "approve", + }); + expect(late).toEqual({ ok: false, reason: "unknown" }); + }); +}); diff --git a/packages/appkit/src/plugins/agents/thread-store.ts b/packages/appkit/src/plugins/agents/thread-store.ts new file mode 100644 index 000000000..7c4622cd3 --- /dev/null +++ b/packages/appkit/src/plugins/agents/thread-store.ts @@ -0,0 +1,66 @@ +import { randomUUID } from "node:crypto"; +import type { Message, Thread, ThreadStore } from "shared"; + +/** + * In-memory thread store backed by a nested Map. + * + * Outer key: userId, inner key: threadId. Thread history is retained for the + * lifetime of the process with no eviction, caps, or TTL — a chatty user will + * grow the in-memory footprint monotonically, and the server loses every + * thread on restart. **This implementation is intended for local development + * and single-process demos only.** + * + * For any real deployment, pass a persistent `ThreadStore` to `agents({ ... })` + * (e.g. a Lakebase- or Postgres-backed implementation). A bounded + * `InMemoryThreadStore` with eviction policies is tracked as a follow-up. + */ +export class InMemoryThreadStore implements ThreadStore { + private store = new Map>(); + + async create(userId: string): Promise { + const now = new Date(); + const thread: Thread = { + id: randomUUID(), + userId, + messages: [], + createdAt: now, + updatedAt: now, + }; + this.userMap(userId).set(thread.id, thread); + return thread; + } + + async get(threadId: string, userId: string): Promise { + return this.userMap(userId).get(threadId) ?? null; + } + + async list(userId: string): Promise { + return Array.from(this.userMap(userId).values()).sort( + (a, b) => b.updatedAt.getTime() - a.updatedAt.getTime(), + ); + } + + async addMessage( + threadId: string, + userId: string, + message: Message, + ): Promise { + const thread = this.userMap(userId).get(threadId); + if (!thread) throw new Error(`Thread ${threadId} not found`); + thread.messages.push(message); + thread.updatedAt = new Date(); + } + + async delete(threadId: string, userId: string): Promise { + return this.userMap(userId).delete(threadId); + } + + private userMap(userId: string): Map { + let map = this.store.get(userId); + if (!map) { + map = new Map(); + this.store.set(userId, map); + } + return map; + } +} diff --git a/packages/appkit/src/plugins/agents/tool-approval-gate.ts b/packages/appkit/src/plugins/agents/tool-approval-gate.ts new file mode 100644 index 000000000..669f30a96 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tool-approval-gate.ts @@ -0,0 +1,122 @@ +/** + * Server-side state for the human-in-the-loop approval gate on + * `destructive: true` agent tool calls. + * + * Lifecycle: + * + * 1. `wait(...)` is called from inside `executeTool` when a destructive tool + * is about to execute. A `Pending` record is registered and a timer is + * scheduled for auto-deny. The returned promise is what blocks the + * adapter until the decision arrives. + * 2. The client receives an `appkit.approval_pending` SSE event carrying the + * `approvalId` + `streamId` and posts a decision to `POST /chat/approve`. + * The route calls {@link ToolApprovalGate.submit} which resolves the + * pending promise and clears the timer. + * 3. If no submit arrives within `timeoutMs`, the timer fires and the + * promise resolves with `"deny"`. + * + * Security invariants: + * + * - `submit` verifies that the decider's user id matches the user who + * initiated the stream (set by `wait`). Mismatches are rejected without + * resolving the pending promise — this prevents a second user from + * approving (or denying) another user's destructive action. + * - `abort(streamId)` cancels every pending gate for a stream and denies + * each one. Used when the enclosing stream is cancelled or the plugin is + * shutting down. + */ +type ApprovalDecision = "approve" | "deny"; + +interface Pending { + resolve: (decision: ApprovalDecision) => void; + userId: string; + streamId: string; + timeout: ReturnType; +} + +type ApprovalSubmitResult = + | { ok: true } + | { ok: false; reason: "unknown" | "forbidden" }; + +export class ToolApprovalGate { + private pending = new Map(); + + /** + * Register a pending approval and return a promise that resolves with the + * user's decision or with `"deny"` when the timeout elapses. The returned + * promise never rejects. + */ + wait(args: { + approvalId: string; + streamId: string; + userId: string; + timeoutMs: number; + }): Promise { + const { approvalId, streamId, userId, timeoutMs } = args; + return new Promise((resolve) => { + const timeout = setTimeout(() => { + if (this.pending.delete(approvalId)) { + resolve("deny"); + } + }, timeoutMs); + this.pending.set(approvalId, { + resolve, + userId, + streamId, + timeout, + }); + }); + } + + /** + * Settle an approval with a user decision. Returns: + * - `{ ok: true }` if the pending record existed, the userId matched, and + * the promise was resolved. + * - `{ ok: false, reason: "unknown" }` if no pending record matches the id. + * - `{ ok: false, reason: "forbidden" }` if the userId does not match the + * user who initiated the stream. + */ + submit(args: { + approvalId: string; + userId: string; + decision: ApprovalDecision; + }): ApprovalSubmitResult { + const { approvalId, userId, decision } = args; + const p = this.pending.get(approvalId); + if (!p) return { ok: false, reason: "unknown" }; + if (p.userId !== userId) return { ok: false, reason: "forbidden" }; + clearTimeout(p.timeout); + this.pending.delete(approvalId); + p.resolve(decision); + return { ok: true }; + } + + /** + * Cancel all pending gates for a specific stream (e.g., when the user + * cancels the stream). Each gate resolves with `"deny"` so the adapter + * unwinds cleanly. + */ + abortStream(streamId: string): void { + for (const [id, p] of this.pending) { + if (p.streamId === streamId) { + clearTimeout(p.timeout); + this.pending.delete(id); + p.resolve("deny"); + } + } + } + + /** Cancel every pending gate. Used at plugin shutdown. */ + abortAll(): void { + for (const [id, p] of this.pending) { + clearTimeout(p.timeout); + this.pending.delete(id); + p.resolve("deny"); + } + } + + /** Number of pending approvals (test/diagnostic helper). */ + get size(): number { + return this.pending.size; + } +} diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts index ef532c7c7..2594b3e6d 100644 --- a/packages/shared/src/agent.ts +++ b/packages/shared/src/agent.ts @@ -86,7 +86,21 @@ export type AgentEvent = status: "running" | "waiting" | "complete" | "error"; error?: string; } - | { type: "metadata"; data: Record }; + | { type: "metadata"; data: Record } + | { + /** + * Emitted by the agents plugin (not adapters) when a tool call annotated + * `destructive: true` is awaiting human approval. Clients should render + * an approval prompt and POST to `/chat/approve` with the matching + * `approvalId` and a `decision` of `approve` or `deny`. + */ + type: "approval_pending"; + approvalId: string; + streamId: string; + toolName: string; + args: unknown; + annotations?: ToolAnnotations; + }; // --------------------------------------------------------------------------- // Responses API types (OpenAI-compatible wire format for HTTP boundary) @@ -178,6 +192,23 @@ export interface AppKitMetadataEvent { sequence_number: number; } +/** + * Emitted when a destructive tool call is awaiting human approval. The client + * should render an approval UI and POST the decision to `/chat/approve` with + * `{ streamId, approvalId, decision: "approve" | "deny" }`. If no decision + * arrives before the server-side timeout, the call is auto-denied and the + * agent receives a denial string as the tool output. + */ +export interface AppKitApprovalPendingEvent { + type: "appkit.approval_pending"; + approval_id: string; + stream_id: string; + tool_name: string; + args: unknown; + annotations?: ToolAnnotations; + sequence_number: number; +} + export type ResponseStreamEvent = | ResponseOutputItemAddedEvent | ResponseOutputItemDoneEvent @@ -186,7 +217,8 @@ export type ResponseStreamEvent = | ResponseErrorEvent | ResponseFailedEvent | AppKitThinkingEvent - | AppKitMetadataEvent; + | AppKitMetadataEvent + | AppKitApprovalPendingEvent; // --------------------------------------------------------------------------- // Adapter contract From 3107741e326fdcfaca03854b6e90a70240b93d30 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 17:59:39 +0200 Subject: [PATCH 02/46] refactor(appkit): generalize default base system prompt Tool-agnostic guidelines instead of SQL/files-specific defaults; accept full PromptContext in buildBaseSystemPrompt for parity with custom callbacks. Signed-off-by: MarioCadenas --- .../appkit/src/core/agent/system-prompt.ts | 32 +++++++++++++------ .../core/agent/tests/system-prompt.test.ts | 30 ++++++++++++----- packages/appkit/src/plugins/agents/agents.ts | 8 +++-- 3 files changed, 50 insertions(+), 20 deletions(-) diff --git a/packages/appkit/src/core/agent/system-prompt.ts b/packages/appkit/src/core/agent/system-prompt.ts index 634f49c5f..01f3fe9b0 100644 --- a/packages/appkit/src/core/agent/system-prompt.ts +++ b/packages/appkit/src/core/agent/system-prompt.ts @@ -1,28 +1,40 @@ +import type { PromptContext } from "./types"; + /** - * Builds the AppKit base system prompt from active plugin names. + * Default base system prompt: product identity, active AppKit plugins, and + * tool-agnostic behavior hints. * - * The base prompt provides guidelines and app context. It does NOT - * include individual tool descriptions — those are sent via the - * structured `tools` API parameter to the LLM. + * Individual tool definitions and JSON Schemas are still sent through the + * model's `tools` / function-calling channel — this string is not a second + * copy of that list. `ctx.toolNames` is available for custom + * `baseSystemPrompt` callbacks; the default text stays short and does not + * enumerate tools to avoid drift and token bloat. */ -export function buildBaseSystemPrompt(pluginNames: string[]): string { +export function buildBaseSystemPrompt(ctx: PromptContext): string { + const { pluginNames } = ctx; const lines: string[] = [ "You are an AI assistant running on Databricks AppKit.", ]; if (pluginNames.length > 0) { lines.push(""); - lines.push(`Active plugins: ${pluginNames.join(", ")}`); + lines.push(`Active AppKit plugins: ${pluginNames.join(", ")}`); } lines.push(""); lines.push("Guidelines:"); - lines.push("- Use Databricks SQL syntax when writing queries"); lines.push( - "- When results are large, summarize key findings rather than dumping raw data", + "- Be concise: for large or noisy tool output, summarize what matters and how to go deeper instead of pasting everything.", + ); + lines.push( + "- Use each tool as defined: pass required arguments and use the syntax, dialect, or path rules the target system expects (see each tool’s description and schema).", + ); + lines.push( + "- If a tool call fails, explain the error in plain language and suggest a fix or next step.", + ); + lines.push( + "- Respect tool metadata and app policy: read-only vs destructive tools, user/identity context, and any approval or safety flows the app provides.", ); - lines.push("- If a tool call fails, explain the error clearly to the user"); - lines.push("- When browsing files, verify the path exists before reading"); return lines.join("\n"); } diff --git a/packages/appkit/src/core/agent/tests/system-prompt.test.ts b/packages/appkit/src/core/agent/tests/system-prompt.test.ts index 83bf8e192..25724259e 100644 --- a/packages/appkit/src/core/agent/tests/system-prompt.test.ts +++ b/packages/appkit/src/core/agent/tests/system-prompt.test.ts @@ -1,27 +1,41 @@ import { describe, expect, test } from "vitest"; import { buildBaseSystemPrompt, composeSystemPrompt } from "../system-prompt"; +const emptyCtx = { + agentName: "a", + pluginNames: [] as string[], + toolNames: [] as string[], +}; + describe("buildBaseSystemPrompt", () => { test("includes plugin names", () => { - const prompt = buildBaseSystemPrompt(["analytics", "files", "genie"]); - expect(prompt).toContain("Active plugins: analytics, files, genie"); + const prompt = buildBaseSystemPrompt({ + agentName: "assistant", + pluginNames: ["analytics", "files", "genie"], + toolNames: [], + }); + expect(prompt).toContain("Active AppKit plugins: analytics, files, genie"); }); test("includes guidelines", () => { - const prompt = buildBaseSystemPrompt([]); + const prompt = buildBaseSystemPrompt(emptyCtx); expect(prompt).toContain("Guidelines:"); - expect(prompt).toContain("Databricks SQL"); - expect(prompt).toContain("summarize key findings"); + expect(prompt).toContain("syntax, dialect, or path rules"); + expect(prompt).toContain("summarize what matters"); }); test("works with no plugins", () => { - const prompt = buildBaseSystemPrompt([]); + const prompt = buildBaseSystemPrompt(emptyCtx); expect(prompt).toContain("AI assistant running on Databricks AppKit"); - expect(prompt).not.toContain("Active plugins:"); + expect(prompt).not.toContain("Active AppKit plugins:"); }); test("does NOT include individual tool names", () => { - const prompt = buildBaseSystemPrompt(["analytics"]); + const prompt = buildBaseSystemPrompt({ + agentName: "a", + pluginNames: ["analytics"], + toolNames: ["analytics.query"], + }); expect(prompt).not.toContain("analytics.query"); expect(prompt).not.toContain("Available tools:"); }); diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 9c3076e6e..7b4006bc8 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -27,7 +27,11 @@ import { chatRequestSchema, invocationsRequestSchema, } from "./schemas"; -import { buildBaseSystemPrompt, composeSystemPrompt } from "../../core/agent/system-prompt"; +import { + buildBaseSystemPrompt, + composeSystemPrompt, +} from "../../core/agent/system-prompt"; +import { AppKitMcpClient } from "../../connectors/mcp"; import { InMemoryThreadStore } from "./thread-store"; import { ToolApprovalGate } from "./tool-approval-gate"; import { @@ -1245,7 +1249,7 @@ function composePromptForAgent( } else if (typeof resolved === "function") { base = resolved(ctx); } else { - base = buildBaseSystemPrompt(ctx.pluginNames); + base = buildBaseSystemPrompt(ctx); } return composeSystemPrompt(base, registered.instructions); From 2ba1577c157b2ec27274eda39ee11363a9362eb6 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 18:01:42 +0200 Subject: [PATCH 03/46] feat(appkit): optional serving_endpoint on agents plugin manifest Register DATABRICKS_SERVING_ENDPOINT_NAME as optional CAN_QUERY so apps using Databricks-hosted agent models get resource wiring; optional when agents use only external adapters. Sync template/appkit.plugins.json. Signed-off-by: MarioCadenas --- .../appkit/src/plugins/agents/manifest.json | 16 ++++++++++++- template/appkit.plugins.json | 24 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json index cb7a43f8b..f2766b808 100644 --- a/packages/appkit/src/plugins/agents/manifest.json +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -5,6 +5,20 @@ "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", "resources": { "required": [], - "optional": [] + "optional": [ + { + "type": "serving_endpoint", + "alias": "Model Serving (agents)", + "resourceKey": "agents-serving-endpoint", + "description": "Databricks Model Serving endpoint for agent runs that use workspace-hosted models (DatabricksAdapter, optional default serving endpoint env, or markdown configs that resolve to serving). Omit when agents rely only on external/custom model adapters.", + "permission": "CAN_QUERY", + "fields": { + "name": { + "env": "DATABRICKS_SERVING_ENDPOINT_NAME", + "description": "Serving endpoint name used for agent LLM inference when configured for Databricks Model Serving" + } + } + } + ] } } diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json index d3c8702f9..918ad83f0 100644 --- a/template/appkit.plugins.json +++ b/template/appkit.plugins.json @@ -2,6 +2,30 @@ "$schema": "https://databricks.github.io/appkit/schemas/template-plugins.schema.json", "version": "1.1", "plugins": { + "agents": { + "name": "agents", + "displayName": "Agents Plugin", + "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", + "package": "@databricks/appkit", + "resources": { + "required": [], + "optional": [ + { + "type": "serving_endpoint", + "alias": "Model Serving (agents)", + "resourceKey": "agents-serving-endpoint", + "description": "Databricks Model Serving endpoint for agent runs that use workspace-hosted models (DatabricksAdapter, optional default serving endpoint env, or markdown configs that resolve to serving). Omit when agents rely only on external/custom model adapters.", + "permission": "CAN_QUERY", + "fields": { + "name": { + "env": "DATABRICKS_SERVING_ENDPOINT_NAME", + "description": "Serving endpoint name used for agent LLM inference when configured for Databricks Model Serving" + } + } + } + ] + } + }, "analytics": { "name": "analytics", "displayName": "Analytics Plugin", From 1c4f8d2e529acb829be84ae6a4a8ea5f1ee9d1ba Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 18:03:21 +0200 Subject: [PATCH 04/46] fix(appkit): agents manifest uses DATABRICKS_AGENT_ENDPOINT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Align optional serving resource with `DatabricksAdapter.fromModelServing()`, which reads `DATABRICKS_AGENT_ENDPOINT` — not `DATABRICKS_SERVING_ENDPOINT_NAME` (serving plugin). Sync template. Signed-off-by: MarioCadenas --- packages/appkit/src/plugins/agents/manifest.json | 6 +++--- template/appkit.plugins.json | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json index f2766b808..f3986c837 100644 --- a/packages/appkit/src/plugins/agents/manifest.json +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -10,12 +10,12 @@ "type": "serving_endpoint", "alias": "Model Serving (agents)", "resourceKey": "agents-serving-endpoint", - "description": "Databricks Model Serving endpoint for agent runs that use workspace-hosted models (DatabricksAdapter, optional default serving endpoint env, or markdown configs that resolve to serving). Omit when agents rely only on external/custom model adapters.", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", "permission": "CAN_QUERY", "fields": { "name": { - "env": "DATABRICKS_SERVING_ENDPOINT_NAME", - "description": "Serving endpoint name used for agent LLM inference when configured for Databricks Model Serving" + "env": "DATABRICKS_AGENT_ENDPOINT", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" } } } diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json index 918ad83f0..2f22a50c1 100644 --- a/template/appkit.plugins.json +++ b/template/appkit.plugins.json @@ -14,12 +14,12 @@ "type": "serving_endpoint", "alias": "Model Serving (agents)", "resourceKey": "agents-serving-endpoint", - "description": "Databricks Model Serving endpoint for agent runs that use workspace-hosted models (DatabricksAdapter, optional default serving endpoint env, or markdown configs that resolve to serving). Omit when agents rely only on external/custom model adapters.", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", "permission": "CAN_QUERY", "fields": { "name": { - "env": "DATABRICKS_SERVING_ENDPOINT_NAME", - "description": "Serving endpoint name used for agent LLM inference when configured for Databricks Model Serving" + "env": "DATABRICKS_AGENT_ENDPOINT", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" } } } From 68dcd9c1e6eddef885922d8eb369c03f52830ed7 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 18:20:05 +0200 Subject: [PATCH 05/46] feat(agents): folder-based markdown discovery (/agent.md) BREAKING CHANGE: top-level config/agents/*.md is no longer loaded. Use /agent.md. The skills directory name is reserved and skipped. Orphan top-level .md files error at load; subdirs without agent.md error. Export agentIdFromMarkdownPath for path-based id resolution. --- packages/appkit/src/core/agent/load-agents.ts | 109 ++++++++----- .../src/core/agent/tests/load-agents.test.ts | 146 ++++++++++++------ packages/appkit/src/core/agent/types.ts | 2 +- packages/appkit/src/plugins/agents/index.ts | 1 + .../agents/tests/agents-plugin.test.ts | 36 ++--- 5 files changed, 192 insertions(+), 102 deletions(-) diff --git a/packages/appkit/src/core/agent/load-agents.ts b/packages/appkit/src/core/agent/load-agents.ts index 8f9fd1be7..e557a1ebb 100644 --- a/packages/appkit/src/core/agent/load-agents.ts +++ b/packages/appkit/src/core/agent/load-agents.ts @@ -36,9 +36,9 @@ export interface LoadContext { } export interface LoadResult { - /** Agent definitions keyed by file-stem name. */ + /** Agent definitions keyed by agent id (directory name under `dir`). */ defs: Record; - /** First file with `default: true` frontmatter, or `null`. */ + /** First agent with `default: true` frontmatter (sorted id order), or `null`. */ defaultAgent: string | null; } @@ -48,11 +48,10 @@ interface Frontmatter { toolkits?: ToolkitSpec[]; tools?: string[]; /** - * Sibling file-stems to expose as sub-agents. Each becomes an - * `agent-` tool on this agent at runtime. Resolution happens at - * directory-load time in {@link loadAgentsFromDir}; the single-file - * {@link loadAgentFromFile} path rejects non-empty values since there - * are no siblings to resolve against. + * Other agent ids to expose as sub-agents. Each becomes an `agent-` + * tool at runtime. Resolution happens at directory-load time in + * {@link loadAgentsFromDir}; the single-file {@link loadAgentFromFile} path + * rejects non-empty values since there are no siblings to resolve against. */ agents?: string[]; maxSteps?: number; @@ -64,6 +63,21 @@ interface Frontmatter { type ToolkitSpec = string | { [pluginName: string]: ToolkitOptions | string[] }; +/** + * Derives the logical agent id from a markdown path. When the file is named + * `agent.md`, the id is the parent directory name (folder-based layout); + * otherwise the id is the file stem (e.g. legacy single-file paths). + */ +export function agentIdFromMarkdownPath(filePath: string): string { + const normalized = path.normalize(filePath); + const base = path.basename(normalized); + const parent = path.basename(path.dirname(normalized)); + if (base === "agent.md" && parent && parent !== "." && parent !== "..") { + return parent; + } + return path.basename(normalized, ".md"); +} + const ALLOWED_KEYS = new Set([ "endpoint", "model", @@ -90,7 +104,7 @@ export async function loadAgentFromFile( ctx: LoadContext, ): Promise { const raw = fs.readFileSync(filePath, "utf-8"); - const name = path.basename(filePath, ".md"); + const name = agentIdFromMarkdownPath(filePath); const { data } = parseFrontmatter(raw, filePath); if (Array.isArray(data?.agents) && data.agents.length > 0) { throw new Error( @@ -103,15 +117,19 @@ export async function loadAgentFromFile( } /** - * Scans a directory for `*.md` files and produces an `AgentDefinition` record - * keyed by file-stem. Throws on frontmatter errors or unresolved references. - * Returns an empty map if the directory does not exist. + * Scans a directory for one subdirectory per agent, each containing + * `agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed + * by agent id (folder name). Throws on frontmatter errors or unresolved + * references. Returns an empty map if the directory does not exist. + * + * Legacy top-level `*.md` files are rejected with an error — migrate each to + * `/agent.md` under a sibling folder named for the agent id. * * Runs in two passes so sub-agent references in frontmatter (`agents: [...]`) - * can be resolved regardless of file-system iteration order: + * can be resolved regardless of directory iteration order: * - * 1. Build every agent's definition from its own file. - * 2. Walk `agents:` references and wire `def.agents = { sibling: siblingDef }` + * 1. Build every agent's definition from its own `agent.md`. + * 2. Walk `agents:` references and wire `def.agents = { child: childDef }` * by looking them up in the complete map. Dangling names and * self-references fail loudly; mutual delegation is allowed and bounded * at runtime by `limits.maxSubAgentDepth`. @@ -123,37 +141,56 @@ export async function loadAgentsFromDir( if (!fs.existsSync(dir)) { return { defs: {}, defaultAgent: null }; } - // Sort so `default: true` resolution is deterministic across platforms — - // `readdirSync` order is filesystem-dependent (macOS alphabetical, ext4 - // inode order, etc.). - const files = fs - .readdirSync(dir) - .filter((f) => f.endsWith(".md")) + + const entries = fs.readdirSync(dir, { withFileTypes: true }); + const orphanMd = entries + .filter((e) => e.isFile() && e.name.endsWith(".md")) + .map((e) => e.name) .sort(); + + if (orphanMd.length > 0) { + const hint = orphanMd + .map((f) => `${path.basename(f, ".md")}/agent.md`) + .join(", "); + throw new Error( + `Agents directory contains unsupported top-level markdown file(s): ${orphanMd.join(", ")}. ` + + `Use one folder per agent with a fixed entry file, e.g. ${hint}.`, + ); + } + + /** Reserved folder name until per-agent skills land; not an agent package. */ + const RESERVED_DIRS = new Set(["skills"]); + + const agentIds = entries + .filter((e) => e.isDirectory()) + .map((e) => e.name) + .filter((name) => !RESERVED_DIRS.has(name)) + .sort(); + const defs: Record = {}; const subAgentRefs: Record = {}; let defaultAgent: string | null = null; - // Pass 1: build every agent's definition; collect unresolved sibling refs. - for (const file of files) { - const fullPath = path.join(dir, file); - const raw = fs.readFileSync(fullPath, "utf-8"); - const name = path.basename(file, ".md"); - defs[name] = buildDefinition(name, raw, fullPath, ctx); - const { data } = parseFrontmatter(raw, fullPath); - if (data?.agents !== undefined) { - subAgentRefs[name] = normalizeAgentsFrontmatter( - data.agents, - name, - fullPath, + // Pass 1: build every agent's definition; collect sub-agent refs. + for (const id of agentIds) { + const agentPath = path.join(dir, id, "agent.md"); + if (!fs.existsSync(agentPath)) { + throw new Error( + `Agents subdirectory '${path.join(dir, id)}' must contain agent.md.`, ); } + const raw = fs.readFileSync(agentPath, "utf-8"); + defs[id] = buildDefinition(id, raw, agentPath, ctx); + const { data } = parseFrontmatter(raw, agentPath); + if (data?.agents !== undefined) { + subAgentRefs[id] = normalizeAgentsFrontmatter(data.agents, id, agentPath); + } if (data?.default === true && !defaultAgent) { - defaultAgent = name; + defaultAgent = id; } } - // Pass 2: resolve sibling references against the complete defs map. + // Pass 2: resolve sub-agent references against the complete defs map. // Code-defined agents (ctx.codeAgents) take precedence over markdown ones // with the same name, matching the plugin's top-level merge behaviour. for (const [name, refs] of Object.entries(subAgentRefs)) { @@ -163,7 +200,7 @@ export async function loadAgentsFromDir( for (const ref of refs) { if (ref === name) { throw new Error( - `Agent '${name}' (${path.join(dir, `${name}.md`)}) cannot reference itself in 'agents:'.`, + `Agent '${name}' (${path.join(dir, name, "agent.md")}) cannot reference itself in 'agents:'.`, ); } const sibling = ctx.codeAgents?.[ref] ?? defs[ref]; @@ -203,7 +240,7 @@ function normalizeAgentsFrontmatter( if (!Array.isArray(value)) { throw new Error( `Agent '${agentName}' (${filePath}) has invalid 'agents:' frontmatter: ` + - `expected an array of sibling file-stems, got ${typeof value}.`, + `expected an array of sibling agent ids, got ${typeof value}.`, ); } const out: string[] = []; diff --git a/packages/appkit/src/core/agent/tests/load-agents.test.ts b/packages/appkit/src/core/agent/tests/load-agents.test.ts index 23799148e..8176cef13 100644 --- a/packages/appkit/src/core/agent/tests/load-agents.test.ts +++ b/packages/appkit/src/core/agent/tests/load-agents.test.ts @@ -5,6 +5,7 @@ import { afterEach, beforeEach, describe, expect, test } from "vitest"; import { z } from "zod"; import { buildToolkitEntries } from "../../../core/agent/build-toolkit"; import { + agentIdFromMarkdownPath, loadAgentFromFile, loadAgentsFromDir, parseFrontmatter, @@ -23,11 +24,33 @@ afterEach(() => { fs.rmSync(workDir, { recursive: true, force: true }); }); -function write(name: string, content: string) { +/** Flat file under workDir (for legacy loadAgentFromFile tests). */ +function writeRoot(name: string, content: string) { fs.writeFileSync(path.join(workDir, name), content, "utf-8"); return path.join(workDir, name); } +/** Folder layout: `/agent.md`. */ +function writeAgent(id: string, content: string) { + const dir = path.join(workDir, id); + fs.mkdirSync(dir, { recursive: true }); + const p = path.join(dir, "agent.md"); + fs.writeFileSync(p, content, "utf-8"); + return p; +} + +describe("agentIdFromMarkdownPath", () => { + test("uses parent folder name when file is agent.md", () => { + expect(agentIdFromMarkdownPath("/foo/bar/assistant/agent.md")).toBe( + "assistant", + ); + }); + + test("uses file stem for other .md names", () => { + expect(agentIdFromMarkdownPath("/tmp/assistant.md")).toBe("assistant"); + }); +}); + describe("parseFrontmatter", () => { test("parses a simple object", () => { const { data, content } = parseFrontmatter( @@ -57,7 +80,7 @@ describe("parseFrontmatter", () => { describe("loadAgentFromFile", () => { test("returns AgentDefinition with body as instructions", async () => { - const p = write( + const p = writeRoot( "assistant.md", "---\nendpoint: e-1\n---\nYou are helpful.", ); @@ -66,6 +89,13 @@ describe("loadAgentFromFile", () => { expect(def.instructions).toBe("You are helpful."); expect(def.model).toBe("e-1"); }); + + test("derives agent id from folder when path ends with agent.md", async () => { + const p = writeAgent("router", "---\nendpoint: e-1\n---\nRoute traffic."); + const def = await loadAgentFromFile(p, {}); + expect(def.name).toBe("router"); + expect(def.instructions).toBe("Route traffic."); + }); }); describe("loadAgentsFromDir", () => { @@ -75,23 +105,44 @@ describe("loadAgentsFromDir", () => { expect(res.defaultAgent).toBeNull(); }); - test("loads all .md files keyed by file-stem", async () => { - write("support.md", "---\nendpoint: e-1\n---\nSupport prompt."); - write("sales.md", "---\nendpoint: e-2\n---\nSales prompt."); + test("loads each subdirectory with agent.md keyed by folder name", async () => { + writeAgent("support", "---\nendpoint: e-1\n---\nSupport prompt."); + writeAgent("sales", "---\nendpoint: e-2\n---\nSales prompt."); const res = await loadAgentsFromDir(workDir, {}); expect(Object.keys(res.defs).sort()).toEqual(["sales", "support"]); }); - test("picks up default: true from frontmatter", async () => { - write("one.md", "---\nendpoint: a\n---\nOne."); - write("two.md", "---\nendpoint: b\ndefault: true\n---\nTwo."); + test("throws when legacy top-level .md exists", async () => { + writeRoot("assistant.md", "---\nendpoint: e\n---\nLegacy flat file."); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /unsupported top-level markdown file\(s\): assistant\.md.*assistant\/agent\.md/s, + ); + }); + + test("throws when a subdirectory lacks agent.md", async () => { + fs.mkdirSync(path.join(workDir, "broken"), { recursive: true }); + await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( + /must contain agent\.md/, + ); + }); + + test("ignores reserved skills directory without agent.md", async () => { + fs.mkdirSync(path.join(workDir, "skills"), { recursive: true }); + writeAgent("solo", "---\nendpoint: e\n---\nOnly real agent."); + const res = await loadAgentsFromDir(workDir, {}); + expect(Object.keys(res.defs)).toEqual(["solo"]); + }); + + test("picks up default: true from frontmatter (deterministic sorted ids)", async () => { + writeAgent("one", "---\nendpoint: a\n---\nOne."); + writeAgent("two", "---\nendpoint: b\ndefault: true\n---\nTwo."); const res = await loadAgentsFromDir(workDir, {}); expect(res.defaultAgent).toBe("two"); }); test("throws when frontmatter references an unregistered plugin", async () => { - write( - "broken.md", + writeAgent( + "broken", "---\nendpoint: e\ntoolkits: [missing]\n---\nBroken agent.", ); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( @@ -100,7 +151,10 @@ describe("loadAgentsFromDir", () => { }); test("throws when frontmatter references an unknown ambient tool", async () => { - write("broken.md", "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken."); + writeAgent( + "broken", + "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken.", + ); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( /references tool 'unknown_tool'/, ); @@ -134,8 +188,8 @@ describe("loadAgentsFromDir", () => { execute: async () => "sunny", }); - write( - "analyst.md", + writeAgent( + "analyst", "---\nendpoint: e\ntoolkits:\n - analytics\ntools:\n - get_weather\n---\nBody.", ); const res = await loadAgentsFromDir(workDir, { @@ -150,15 +204,13 @@ describe("loadAgentsFromDir", () => { }); describe("agents: sibling sub-agent references", () => { - test("resolves sibling references into def.agents regardless of file order", async () => { - // Names chosen so alphabetical iteration puts `dispatcher` *before* - // its siblings — pass-1 populates defs in any order, pass-2 resolves. - write( - "dispatcher.md", + test("resolves sibling references into def.agents regardless of folder order", async () => { + writeAgent( + "dispatcher", "---\nendpoint: e\nagents:\n - analyst\n - writer\n---\nRoute work.", ); - write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); - write("writer.md", "---\nendpoint: e\n---\nWriter."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("writer", "---\nendpoint: e\n---\nWriter."); const res = await loadAgentsFromDir(workDir, {}); expect(Object.keys(res.defs.dispatcher.agents ?? {}).sort()).toEqual([ @@ -167,14 +219,13 @@ describe("loadAgentsFromDir", () => { ]); expect(res.defs.dispatcher.agents?.analyst).toBe(res.defs.analyst); expect(res.defs.dispatcher.agents?.writer).toBe(res.defs.writer); - // Leaves with no `agents:` retain undefined — only declared keys wire. expect(res.defs.analyst.agents).toBeUndefined(); expect(res.defs.writer.agents).toBeUndefined(); }); test("mutual delegation is allowed (runtime depth cap handles cycles)", async () => { - write("a.md", "---\nendpoint: e\nagents:\n - b\n---\nA."); - write("b.md", "---\nendpoint: e\nagents:\n - a\n---\nB."); + writeAgent("a", "---\nendpoint: e\nagents:\n - b\n---\nA."); + writeAgent("b", "---\nendpoint: e\nagents:\n - a\n---\nB."); const res = await loadAgentsFromDir(workDir, {}); expect(res.defs.a.agents?.b).toBe(res.defs.b); @@ -182,16 +233,16 @@ describe("loadAgentsFromDir", () => { }); test("throws with available list when a sibling is missing", async () => { - write("dispatcher.md", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); - write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("dispatcher", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( /references sub-agent\(s\) 'ghost'.*Available: analyst, dispatcher/s, ); }); test("reports every missing sibling in one error, not just the first", async () => { - write( - "dispatcher.md", + writeAgent( + "dispatcher", "---\nendpoint: e\nagents:\n - ghost1\n - ghost2\n---\nD.", ); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( @@ -200,33 +251,33 @@ describe("loadAgentsFromDir", () => { }); test("throws on self-reference", async () => { - write("solo.md", "---\nendpoint: e\nagents:\n - solo\n---\nSolo."); + writeAgent("solo", "---\nendpoint: e\nagents:\n - solo\n---\nSolo."); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( /'solo'.*cannot reference itself/s, ); }); test("throws on non-array 'agents:' value", async () => { - write("bad.md", "---\nendpoint: e\nagents: analyst\n---\nBad."); - write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("bad", "---\nendpoint: e\nagents: analyst\n---\nBad."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( /invalid 'agents:' frontmatter/, ); }); test("throws on non-string entries in 'agents:'", async () => { - write("bad.md", "---\nendpoint: e\nagents:\n - 42\n---\nBad."); + writeAgent("bad", "---\nendpoint: e\nagents:\n - 42\n---\nBad."); await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow( /invalid 'agents:' entry/, ); }); test("deduplicates repeated entries silently", async () => { - write( - "dispatcher.md", + writeAgent( + "dispatcher", "---\nendpoint: e\nagents:\n - analyst\n - analyst\n---\nD.", ); - write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); const res = await loadAgentsFromDir(workDir, {}); expect(Object.keys(res.defs.dispatcher.agents ?? {})).toEqual([ "analyst", @@ -234,13 +285,16 @@ describe("loadAgentsFromDir", () => { }); test("empty array yields no sub-agents (no-op)", async () => { - write("dispatcher.md", "---\nendpoint: e\nagents: []\n---\nD."); + writeAgent("dispatcher", "---\nendpoint: e\nagents: []\n---\nD."); const res = await loadAgentsFromDir(workDir, {}); expect(res.defs.dispatcher.agents).toBeUndefined(); }); test("resolves 'agents:' references against codeAgents when provided", async () => { - write("dispatcher.md", "---\nendpoint: e\nagents:\n - support\n---\nD."); + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - support\n---\nD.", + ); const support: AgentDefinition = { name: "support", instructions: "Code-defined support.", @@ -252,8 +306,11 @@ describe("loadAgentsFromDir", () => { }); test("codeAgents takes precedence over markdown sibling with the same name", async () => { - write("dispatcher.md", "---\nendpoint: e\nagents:\n - support\n---\nD."); - write("support.md", "---\nendpoint: e\n---\nMarkdown support."); + writeAgent( + "dispatcher", + "---\nendpoint: e\nagents:\n - support\n---\nD.", + ); + writeAgent("support", "---\nendpoint: e\n---\nMarkdown support."); const codeSupport: AgentDefinition = { name: "support", instructions: "Code support.", @@ -261,8 +318,6 @@ describe("loadAgentsFromDir", () => { const res = await loadAgentsFromDir(workDir, { codeAgents: { support: codeSupport }, }); - // Reference binds to code version, matching the plugin's top-level - // `code wins` merge behaviour. expect(res.defs.dispatcher.agents?.support).toBe(codeSupport); expect(res.defs.dispatcher.agents?.support.instructions).toBe( "Code support.", @@ -270,8 +325,8 @@ describe("loadAgentsFromDir", () => { }); test("missing-sibling error lists both markdown and code agent names", async () => { - write("dispatcher.md", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); - write("analyst.md", "---\nendpoint: e\n---\nAnalyst."); + writeAgent("dispatcher", "---\nendpoint: e\nagents:\n - ghost\n---\nD."); + writeAgent("analyst", "---\nendpoint: e\n---\nAnalyst."); const codeAgent: AgentDefinition = { name: "writer", instructions: "Writer.", @@ -285,7 +340,7 @@ describe("loadAgentsFromDir", () => { describe("loadAgentFromFile — sub-agent refs rejected", () => { test("throws when 'agents:' is non-empty in a single-file load", async () => { - const p = write( + const p = writeRoot( "lonely.md", "---\nendpoint: e\nagents:\n - ghost\n---\nLonely.", ); @@ -295,7 +350,10 @@ describe("loadAgentFromFile — sub-agent refs rejected", () => { }); test("ignores empty 'agents:' array (treated as absent)", async () => { - const p = write("lonely.md", "---\nendpoint: e\nagents: []\n---\nLonely."); + const p = writeRoot( + "lonely.md", + "---\nendpoint: e\nagents: []\n---\nLonely.", + ); const def = await loadAgentFromFile(p, {}); expect(def.agents).toBeUndefined(); }); diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts index d752fa019..1b86a1c97 100644 --- a/packages/appkit/src/core/agent/types.ts +++ b/packages/appkit/src/core/agent/types.ts @@ -110,7 +110,7 @@ export interface AutoInheritToolsConfig { } export interface AgentsPluginConfig extends BasePluginConfig { - /** Directory to scan for markdown agent files. Default `./config/agents`. Set to `false` to disable. */ + /** Directory of agent packages (`/agent.md` each). Default `./config/agents`. Set to `false` to disable. */ dir?: string | false; /** Code-defined agents, merged with file-loaded ones (code wins on key collision). */ agents?: Record; diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts index 8220a55e7..c8a31aac3 100644 --- a/packages/appkit/src/plugins/agents/index.ts +++ b/packages/appkit/src/plugins/agents/index.ts @@ -1,6 +1,7 @@ export { AgentsPlugin, agents } from "./agents"; export { buildToolkitEntries } from "../../core/agent/build-toolkit"; export { + agentIdFromMarkdownPath, type LoadContext, type LoadResult, loadAgentFromFile, diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts index 43ad8c104..9c043d56f 100644 --- a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts +++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts @@ -102,6 +102,12 @@ function instantiate(config: AgentsPluginConfig, ctx?: FakeContext) { return plugin; } +function writeMarkdownAgent(dir: string, id: string, content: string) { + const folder = path.join(dir, id); + fs.mkdirSync(folder, { recursive: true }); + fs.writeFileSync(path.join(folder, "agent.md"), content, "utf-8"); +} + describe("AgentsPlugin", () => { test("registers code-defined agents and exposes them via exports", async () => { const plugin = instantiate({ @@ -124,10 +130,10 @@ describe("AgentsPlugin", () => { }); test("loads markdown agents from a directory", async () => { - fs.writeFileSync( - path.join(tmpDir, "assistant.md"), + writeMarkdownAgent( + tmpDir, + "assistant", "---\ndefault: true\n---\nYou are helpful.", - "utf-8", ); const plugin = instantiate({ dir: tmpDir, @@ -144,11 +150,7 @@ describe("AgentsPlugin", () => { }); test("code definitions override markdown on key collision", async () => { - fs.writeFileSync( - path.join(tmpDir, "support.md"), - "---\n---\nFrom markdown.", - "utf-8", - ); + writeMarkdownAgent(tmpDir, "support", "---\n---\nFrom markdown."); const plugin = instantiate({ dir: tmpDir, defaultModel: stubAdapter(), @@ -179,11 +181,7 @@ describe("AgentsPlugin", () => { const provider = makeToolProvider("analytics", registry); const ctx = fakeContext([{ name: "analytics", provider }]); - fs.writeFileSync( - path.join(tmpDir, "assistant.md"), - "---\n---\nYou are helpful.", - "utf-8", - ); + writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful."); const plugin = instantiate( { @@ -228,11 +226,7 @@ describe("AgentsPlugin", () => { const provider = makeToolProvider("analytics", registry); const ctx = fakeContext([{ name: "analytics", provider }]); - fs.writeFileSync( - path.join(tmpDir, "assistant.md"), - "---\n---\nYou are helpful.", - "utf-8", - ); + writeMarkdownAgent(tmpDir, "assistant", "---\n---\nYou are helpful."); const plugin = instantiate( { @@ -313,10 +307,10 @@ describe("AgentsPlugin", () => { { name: "files", provider: makeToolProvider("files", registry2) }, ]); - fs.writeFileSync( - path.join(tmpDir, "analyst.md"), + writeMarkdownAgent( + tmpDir, + "analyst", "---\ntoolkits: [analytics]\n---\nAnalyst.", - "utf-8", ); const plugin = instantiate( From e4c1b01eb103db12bce18b091fef330fe0b0f030 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 21:37:29 +0200 Subject: [PATCH 06/46] refactor(appkit): promote MCP client + host policy to connectors/mcp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP transport client and host policy aren't agents-specific; they are HTTP + JSON-RPC transport with URL/DNS allowlisting. Move them under packages/appkit/src/connectors/mcp/ so they sit alongside the other transport-layer modules (serving, genie, sql-warehouse, lakebase, …) and stop being reachable only through the agents plugin. - Move mcp-client.ts -> connectors/mcp/client.ts - Move mcp-host-policy.ts -> connectors/mcp/host-policy.ts - Move McpEndpointConfig type -> connectors/mcp/types.ts - Add connectors/mcp/index.ts barrel; re-export from connectors/index.ts - Move mcp-client / mcp-host-policy tests to connectors/mcp/tests/ - Agents plugin keeps hosted-tools.ts (HostedTool sugar + resolve) and imports connector types from ../../connectors/mcp. - tools/ barrel no longer re-exports AppKitMcpClient (never was public). No behaviour change. All existing tests pass against the new paths. --- knip.json | 10 +++--- packages/appkit/src/connectors/index.ts | 1 + .../mcp/client.ts} | 8 ++--- .../mcp/host-policy.ts} | 0 packages/appkit/src/connectors/mcp/index.ts | 6 ++++ .../mcp/tests/client.test.ts} | 4 +-- .../mcp/tests/host-policy.test.ts} | 2 +- packages/appkit/src/connectors/mcp/types.ts | 12 +++++++ .../src/core/agent/tools/hosted-tools.ts | 8 ++--- packages/appkit/src/core/agent/tools/index.ts | 2 +- packages/appkit/src/core/agent/types.ts | 2 +- packages/appkit/src/plugins/agents/agents.ts | 36 +++++++++++-------- 12 files changed, 58 insertions(+), 33 deletions(-) rename packages/appkit/src/{plugins/agents/tools/mcp-client.ts => connectors/mcp/client.ts} (98%) rename packages/appkit/src/{plugins/agents/tools/mcp-host-policy.ts => connectors/mcp/host-policy.ts} (100%) create mode 100644 packages/appkit/src/connectors/mcp/index.ts rename packages/appkit/src/{plugins/agents/tests/mcp-client.test.ts => connectors/mcp/tests/client.test.ts} (98%) rename packages/appkit/src/{plugins/agents/tests/mcp-host-policy.test.ts => connectors/mcp/tests/host-policy.test.ts} (99%) create mode 100644 packages/appkit/src/connectors/mcp/types.ts diff --git a/knip.json b/knip.json index 101b9253a..f530b3918 100644 --- a/knip.json +++ b/knip.json @@ -19,13 +19,15 @@ "packages/appkit/src/plugin/index.ts", "packages/appkit/src/plugin/to-plugin.ts", "packages/appkit/src/plugins/agents/index.ts", - "packages/appkit/src/plugins/agents/tools/index.ts", - "packages/appkit/src/plugins/agents/from-plugin.ts", - "packages/appkit/src/plugins/agents/load-agents.ts", "template/**", "tools/**", "docs/**", - ".github/scripts/**" + ".github/scripts/**", + "packages/appkit/src/core/agent/tools/index.ts", + "packages/appkit/src/core/agent/from-plugin.ts", + "packages/appkit/src/core/agent/load-agents.ts", + "packages/appkit/src/connectors/mcp/index.ts", + "packages/appkit/src/plugin/to-plugin.ts" ], "ignoreDependencies": ["json-schema-to-typescript"], "ignoreBinaries": ["tarball"] diff --git a/packages/appkit/src/connectors/index.ts b/packages/appkit/src/connectors/index.ts index aa22895e1..cff0d152d 100644 --- a/packages/appkit/src/connectors/index.ts +++ b/packages/appkit/src/connectors/index.ts @@ -3,5 +3,6 @@ export * from "./genie"; export * from "./jobs"; export * from "./lakebase"; export * from "./lakebase-v1"; +export * from "./mcp"; export * from "./sql-warehouse"; export * from "./vector-search"; diff --git a/packages/appkit/src/plugins/agents/tools/mcp-client.ts b/packages/appkit/src/connectors/mcp/client.ts similarity index 98% rename from packages/appkit/src/plugins/agents/tools/mcp-client.ts rename to packages/appkit/src/connectors/mcp/client.ts index a68394438..4c8d058b7 100644 --- a/packages/appkit/src/plugins/agents/tools/mcp-client.ts +++ b/packages/appkit/src/connectors/mcp/client.ts @@ -23,16 +23,16 @@ * transport. */ import type { AgentToolDefinition } from "shared"; -import type { McpEndpointConfig } from "../../../core/agent/tools/hosted-tools"; -import { createLogger } from "../../../logging/logger"; +import { createLogger } from "../../logging/logger"; import { assertResolvedHostSafe, checkMcpUrl, type DnsLookup, type McpHostPolicy, -} from "./mcp-host-policy"; +} from "./host-policy"; +import type { McpEndpointConfig } from "./types"; -const logger = createLogger("agent:mcp"); +const logger = createLogger("connector:mcp"); interface JsonRpcRequest { jsonrpc: "2.0"; diff --git a/packages/appkit/src/plugins/agents/tools/mcp-host-policy.ts b/packages/appkit/src/connectors/mcp/host-policy.ts similarity index 100% rename from packages/appkit/src/plugins/agents/tools/mcp-host-policy.ts rename to packages/appkit/src/connectors/mcp/host-policy.ts diff --git a/packages/appkit/src/connectors/mcp/index.ts b/packages/appkit/src/connectors/mcp/index.ts new file mode 100644 index 000000000..f9f32a418 --- /dev/null +++ b/packages/appkit/src/connectors/mcp/index.ts @@ -0,0 +1,6 @@ +export { AppKitMcpClient } from "./client"; +export { + buildMcpHostPolicy, + type McpHostPolicyConfig, +} from "./host-policy"; +export type { McpEndpointConfig } from "./types"; diff --git a/packages/appkit/src/plugins/agents/tests/mcp-client.test.ts b/packages/appkit/src/connectors/mcp/tests/client.test.ts similarity index 98% rename from packages/appkit/src/plugins/agents/tests/mcp-client.test.ts rename to packages/appkit/src/connectors/mcp/tests/client.test.ts index 483fb5f45..0cdffa291 100644 --- a/packages/appkit/src/plugins/agents/tests/mcp-client.test.ts +++ b/packages/appkit/src/connectors/mcp/tests/client.test.ts @@ -1,6 +1,6 @@ import { beforeEach, describe, expect, test, vi } from "vitest"; -import { AppKitMcpClient } from "../tools/mcp-client"; -import type { DnsLookup, McpHostPolicy } from "../tools/mcp-host-policy"; +import { AppKitMcpClient } from "../client"; +import type { DnsLookup, McpHostPolicy } from "../host-policy"; const WORKSPACE = "https://test-workspace.cloud.databricks.com"; diff --git a/packages/appkit/src/plugins/agents/tests/mcp-host-policy.test.ts b/packages/appkit/src/connectors/mcp/tests/host-policy.test.ts similarity index 99% rename from packages/appkit/src/plugins/agents/tests/mcp-host-policy.test.ts rename to packages/appkit/src/connectors/mcp/tests/host-policy.test.ts index 06d986273..451536ed5 100644 --- a/packages/appkit/src/plugins/agents/tests/mcp-host-policy.test.ts +++ b/packages/appkit/src/connectors/mcp/tests/host-policy.test.ts @@ -8,7 +8,7 @@ import { isLoopbackHost, type McpHostPolicy, type McpHostPolicyConfig, -} from "../tools/mcp-host-policy"; +} from "../host-policy"; function stubLookup( addresses: Array<{ address: string; family?: number }>, diff --git a/packages/appkit/src/connectors/mcp/types.ts b/packages/appkit/src/connectors/mcp/types.ts new file mode 100644 index 000000000..d74f0a46c --- /dev/null +++ b/packages/appkit/src/connectors/mcp/types.ts @@ -0,0 +1,12 @@ +/** + * Input shape consumed by {@link AppKitMcpClient.connect}. Produced by the + * agents plugin from user-facing `HostedTool` declarations (see + * `plugins/agents/tools/hosted-tools.ts`) and accepted directly by the + * connector to keep its surface free of agent-layer concepts. + */ +export interface McpEndpointConfig { + /** Stable logical name used as the `mcp..*` tool prefix and in logs. */ + name: string; + /** Absolute URL (`https://…`) or workspace-relative path (`/api/2.0/mcp/…`). */ + url: string; +} diff --git a/packages/appkit/src/core/agent/tools/hosted-tools.ts b/packages/appkit/src/core/agent/tools/hosted-tools.ts index bce70c4fb..c1f067676 100644 --- a/packages/appkit/src/core/agent/tools/hosted-tools.ts +++ b/packages/appkit/src/core/agent/tools/hosted-tools.ts @@ -1,3 +1,5 @@ +import type { McpEndpointConfig } from "../../../connectors/mcp"; + export interface GenieTool { type: "genie-space"; genie_space: { id: string }; @@ -37,12 +39,6 @@ export function isHostedTool(value: unknown): value is HostedTool { return typeof obj.type === "string" && HOSTED_TOOL_TYPES.has(obj.type); } -export interface McpEndpointConfig { - name: string; - /** Absolute URL or path relative to workspace host */ - url: string; -} - /** * Resolves HostedTool configs into MCP endpoint configurations * that the MCP client can connect to. diff --git a/packages/appkit/src/core/agent/tools/index.ts b/packages/appkit/src/core/agent/tools/index.ts index ee4aca7eb..053fdc15b 100644 --- a/packages/appkit/src/core/agent/tools/index.ts +++ b/packages/appkit/src/core/agent/tools/index.ts @@ -1,4 +1,4 @@ -export { AppKitMcpClient } from "../../../plugins/agents/tools/mcp-client"; +export { AppKitMcpClient } from "../../../connectors/mcp/client"; export { defineTool, executeFromRegistry, diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts index 1b86a1c97..14366e9ab 100644 --- a/packages/appkit/src/core/agent/types.ts +++ b/packages/appkit/src/core/agent/types.ts @@ -5,9 +5,9 @@ import type { ThreadStore, ToolAnnotations, } from "shared"; +import type { McpHostPolicyConfig } from "../../connectors/mcp"; import type { FunctionTool } from "./tools/function-tool"; import type { HostedTool } from "./tools/hosted-tools"; -import type { McpHostPolicyConfig } from "../../plugins/agents/tools/mcp-host-policy"; /** * A tool reference produced by a plugin's `.toolkit()` call. The agents plugin diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 7b4006bc8..4c406f12e 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -14,24 +14,18 @@ import type { Thread, ToolProvider, } from "shared"; -import { createLogger } from "../../logging/logger"; -import { Plugin, toPlugin } from "../../plugin"; -import type { PluginManifest } from "../../registry"; -import { agentStreamDefaults } from "./defaults"; -import { EventChannel } from "./event-channel"; -import { AgentEventTranslator } from "./event-translator"; +import { buildMcpHostPolicy } from "../../connectors/mcp"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; -import manifest from "./manifest.json"; -import { - approvalRequestSchema, - chatRequestSchema, - invocationsRequestSchema, -} from "./schemas"; import { buildBaseSystemPrompt, composeSystemPrompt, } from "../../core/agent/system-prompt"; import { AppKitMcpClient } from "../../connectors/mcp"; +import { + approvalRequestSchema, + chatRequestSchema, + invocationsRequestSchema, +} from "./schemas"; import { InMemoryThreadStore } from "./thread-store"; import { ToolApprovalGate } from "./tool-approval-gate"; import { @@ -40,7 +34,6 @@ import { isHostedTool, resolveHostedTools, } from "../../core/agent/tools"; -import { buildMcpHostPolicy } from "./tools/mcp-host-policy"; import type { AgentDefinition, AgentsPluginConfig, @@ -50,6 +43,20 @@ import type { ResolvedToolEntry, } from "../../core/agent/types"; import { isToolkitEntry } from "../../core/agent/types"; +import { createLogger } from "../../logging/logger"; +import { Plugin, toPlugin } from "../../plugin"; +import type { PluginManifest } from "../../registry"; +import { agentStreamDefaults } from "./defaults"; +import { EventChannel } from "./event-channel"; +import { AgentEventTranslator } from "./event-translator"; +import manifest from "./manifest.json"; +import { + approvalRequestSchema, + chatRequestSchema, + invocationsRequestSchema, +} from "./schemas"; +import { InMemoryThreadStore } from "./thread-store"; +import { ToolApprovalGate } from "./tool-approval-gate"; const logger = createLogger("agents"); @@ -368,7 +375,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } // 2. Explicit tools (toolkit entries, function tools, hosted tools) - const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = []; + const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = + []; for (const [key, tool] of Object.entries(def.tools ?? {})) { if (isToolkitEntry(tool)) { index.set(key, { From 1710809d7d0b2a564316c14e9574b11f7798063f Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 22:06:39 +0200 Subject: [PATCH 07/46] refactor(appkit): extract normalizeToolResult, consumeAdapterStream, dispatchToolCall Three small helpers pulled out of the AgentsPlugin streaming path to cut duplication and shrink the two large methods. - normalize-result.ts: void->"", JSON-stringify, 50K truncation with a human-readable marker. Unit-testable (previously covered only via the HTTP path). - consume-adapter-stream.ts: the 'message_delta' + 'message' accumulation loop shared between _streamAgent and runSubAgent. Accepts an optional signal and per-event side-effect callback (for SSE translation). - tool-dispatch.ts: one place that fans out toolkit/function/mcp/subagent entries. 'never'-typed default forces exhaustiveness: adding a fifth source is now a compile error at every call site. _streamAgent: executeTool closure shrinks from ~60 lines of dispatch + normalize to a single dispatchToolCall + normalizeToolResult call. Stream consumption collapses to consumeAdapterStream. runSubAgent: childExecute shrinks from ~30 lines of if/else dispatch to one dispatchToolCall call. Adapter loop collapses to consumeAdapterStream. Behaviour change (minor): childExecute previously silently fell through to 'Unsupported sub-agent tool source' when mcpClient or PluginContext was missing; now it throws the same specific error as the main stream. Matches the main-path behaviour. Tests: 15 new unit tests for normalizeToolResult + consumeAdapterStream. dispatchToolCall is exercised transitively through the full agent suite (288 existing tests still pass, 303 total on this branch). --- .../src/core/agent/consume-adapter-stream.ts | 52 +++++++++++ .../appkit/src/core/agent/normalize-result.ts | 33 +++++++ .../tests/consume-adapter-stream.test.ts | 86 +++++++++++++++++++ .../core/agent/tests/normalize-result.test.ts | 63 ++++++++++++++ 4 files changed, 234 insertions(+) create mode 100644 packages/appkit/src/core/agent/consume-adapter-stream.ts create mode 100644 packages/appkit/src/core/agent/normalize-result.ts create mode 100644 packages/appkit/src/core/agent/tests/consume-adapter-stream.test.ts create mode 100644 packages/appkit/src/core/agent/tests/normalize-result.test.ts diff --git a/packages/appkit/src/core/agent/consume-adapter-stream.ts b/packages/appkit/src/core/agent/consume-adapter-stream.ts new file mode 100644 index 000000000..c4f3d07ed --- /dev/null +++ b/packages/appkit/src/core/agent/consume-adapter-stream.ts @@ -0,0 +1,52 @@ +import type { AgentEvent } from "shared"; + +interface ConsumeAdapterStreamOptions { + /** + * Optional abort signal. When aborted, the loop stops consuming (the caller + * is expected to have forwarded the same signal to `adapter.run` to stop + * upstream work). `undefined` is valid — standalone `runAgent` runs without + * a signal. + */ + signal?: AbortSignal; + /** + * Side-effect callback invoked once per adapter event, after the content + * accumulator has been updated. Use to fan events out to SSE translators, + * collect a raw event list for tests, or emit telemetry. + */ + onEvent?: (event: AgentEvent) => void; +} + +/** + * Consume an adapter's event stream and aggregate the assistant's final text. + * + * Accumulation rule (shared across all agent-execution paths in AppKit): + * + * - `message_delta` events append their `content` to the running text. + * - A `message` event *replaces* the running text with its `content`. + * + * The two branches coexist because different adapters emit different shapes: + * streaming adapters (Databricks, Vercel AI) emit deltas chunk-by-chunk, + * while `LangChain`'s `on_chain_end` path emits a single final `message`. + * Without the replace branch, LangChain conversations silently dropped the + * assistant turn from thread history. + * + * Kept pure (no I/O, no mutable external state beyond the caller's `onEvent` + * side effect) so each execution path — HTTP streaming, sub-agents, and the + * standalone `runAgent` — can share one loop. + */ +export async function consumeAdapterStream( + stream: AsyncIterable, + opts: ConsumeAdapterStreamOptions = {}, +): Promise { + let text = ""; + for await (const event of stream) { + if (opts.signal?.aborted) break; + if (event.type === "message_delta") { + text += event.content; + } else if (event.type === "message") { + text = event.content; + } + opts.onEvent?.(event); + } + return text; +} diff --git a/packages/appkit/src/core/agent/normalize-result.ts b/packages/appkit/src/core/agent/normalize-result.ts new file mode 100644 index 000000000..6fe2362cc --- /dev/null +++ b/packages/appkit/src/core/agent/normalize-result.ts @@ -0,0 +1,33 @@ +/** + * Maximum serialized length of a tool result before we truncate with a + * human-readable marker. 50k chars is roughly ~12k tokens — enough for + * reasonable SQL result sets and JSON blobs, well short of the per-call + * context limits on current frontier models. + */ +export const MAX_TOOL_RESULT_CHARS = 50_000; + +/** + * Normalise a raw tool-execution result for the LLM: + * + * - `undefined` → empty string. A `void` return is a legitimate outcome for + * side-effecting tools ("send notification"); surfacing `undefined` to the + * adapter would otherwise read as "execution failed". + * - strings are returned as-is. + * - everything else is JSON-stringified. + * - results longer than {@link MAX_TOOL_RESULT_CHARS} are truncated and + * annotated so the model sees the cut rather than silent data loss. + * + * Pure function; safe to unit-test in isolation. + */ +export function normalizeToolResult( + result: unknown, + maxChars: number = MAX_TOOL_RESULT_CHARS, +): unknown { + if (result === undefined) return ""; + const serialized = + typeof result === "string" ? result : JSON.stringify(result); + if (serialized.length > maxChars) { + return `${serialized.slice(0, maxChars)}\n\n[Result truncated: ${serialized.length} chars exceeds ${maxChars} limit]`; + } + return result; +} diff --git a/packages/appkit/src/core/agent/tests/consume-adapter-stream.test.ts b/packages/appkit/src/core/agent/tests/consume-adapter-stream.test.ts new file mode 100644 index 000000000..98863a62a --- /dev/null +++ b/packages/appkit/src/core/agent/tests/consume-adapter-stream.test.ts @@ -0,0 +1,86 @@ +import type { AgentEvent } from "shared"; +import { describe, expect, test } from "vitest"; +import { consumeAdapterStream } from "../consume-adapter-stream"; + +async function* streamOf( + events: AgentEvent[], +): AsyncGenerator { + for (const event of events) { + yield event; + } +} + +describe("consumeAdapterStream", () => { + test("concatenates message_delta events into the final text", async () => { + const text = await consumeAdapterStream( + streamOf([ + { type: "message_delta", content: "Hello " }, + { type: "message_delta", content: "world" }, + ]), + ); + expect(text).toBe("Hello world"); + }); + + test("a `message` event replaces whatever deltas arrived so far", async () => { + const text = await consumeAdapterStream( + streamOf([ + { type: "message_delta", content: "partial" }, + { type: "message", content: "final answer" }, + ]), + ); + expect(text).toBe("final answer"); + }); + + test("invokes onEvent once per event, in order, with the raw event", async () => { + const seen: AgentEvent[] = []; + await consumeAdapterStream( + streamOf([ + { type: "message_delta", content: "a" }, + { type: "thinking", content: "…" }, + { type: "message_delta", content: "b" }, + ]), + { onEvent: (ev) => seen.push(ev) }, + ); + expect(seen.map((e) => e.type)).toEqual([ + "message_delta", + "thinking", + "message_delta", + ]); + }); + + test("stops iterating once the signal aborts", async () => { + const controller = new AbortController(); + const emitted: string[] = []; + await consumeAdapterStream( + (async function* () { + yield { type: "message_delta", content: "first" } as AgentEvent; + controller.abort(); + yield { type: "message_delta", content: "second" } as AgentEvent; + })(), + { + signal: controller.signal, + onEvent: (ev) => { + if (ev.type === "message_delta") emitted.push(ev.content); + }, + }, + ); + expect(emitted).toEqual(["first"]); + }); + + test("returns an empty string for a stream with no content events", async () => { + const text = await consumeAdapterStream( + streamOf([{ type: "thinking", content: "…" }]), + ); + expect(text).toBe(""); + }); + + test("works without a signal (standalone runAgent path)", async () => { + const text = await consumeAdapterStream( + streamOf([ + { type: "message_delta", content: "x" }, + { type: "message_delta", content: "y" }, + ]), + ); + expect(text).toBe("xy"); + }); +}); diff --git a/packages/appkit/src/core/agent/tests/normalize-result.test.ts b/packages/appkit/src/core/agent/tests/normalize-result.test.ts new file mode 100644 index 000000000..a0545d091 --- /dev/null +++ b/packages/appkit/src/core/agent/tests/normalize-result.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, test } from "vitest"; +import { + MAX_TOOL_RESULT_CHARS, + normalizeToolResult, +} from "../normalize-result"; + +describe("normalizeToolResult", () => { + test("maps undefined to empty string so void tools don't surface as errors", () => { + expect(normalizeToolResult(undefined)).toBe(""); + }); + + test("returns strings unchanged", () => { + expect(normalizeToolResult("hello")).toBe("hello"); + }); + + test("leaves non-string results intact (caller serialises)", () => { + const result = normalizeToolResult({ rows: 2, ok: true }); + expect(result).toEqual({ rows: 2, ok: true }); + }); + + test("returns an empty string input as an empty string (not undefined)", () => { + expect(normalizeToolResult("")).toBe(""); + }); + + test("preserves null without converting to empty string", () => { + expect(normalizeToolResult(null)).toBeNull(); + }); + + test("truncates long strings and appends a marker with the original length", () => { + const big = "x".repeat(MAX_TOOL_RESULT_CHARS + 1000); + const result = normalizeToolResult(big); + expect(typeof result).toBe("string"); + const s = result as string; + // Content portion is bounded to MAX_TOOL_RESULT_CHARS (plus the marker). + expect(s.slice(0, MAX_TOOL_RESULT_CHARS)).toBe( + "x".repeat(MAX_TOOL_RESULT_CHARS), + ); + expect(s).toMatch( + new RegExp( + `\\[Result truncated: ${big.length} chars exceeds ${MAX_TOOL_RESULT_CHARS} limit\\]`, + ), + ); + }); + + test("truncates long serialised objects the same way", () => { + const big = { blob: "x".repeat(MAX_TOOL_RESULT_CHARS + 10) }; + const result = normalizeToolResult(big); + expect(typeof result).toBe("string"); + expect(result as string).toMatch(/\[Result truncated:/); + }); + + test("honours a custom maxChars parameter", () => { + const result = normalizeToolResult("hello world", 5); + expect(result).toBe( + "hello\n\n[Result truncated: 11 chars exceeds 5 limit]", + ); + }); + + test("does not truncate at the boundary (exact length is fine)", () => { + const s = "x".repeat(MAX_TOOL_RESULT_CHARS); + expect(normalizeToolResult(s)).toBe(s); + }); +}); From fbd6552176554714af2c7bc9acf8416d6b2a4f1f Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Fri, 24 Apr 2026 16:19:34 +0200 Subject: [PATCH 08/46] =?UTF-8?q?fix(agents):=20propagate=20tool=20annotat?= =?UTF-8?q?ions=20through=20tool()=20=E2=86=92=20FunctionTool=20=E2=86=92?= =?UTF-8?q?=20def?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `annotations` field (notably `destructive: true`) was silently dropped as tools flowed from `tool({...})` into the resolved `AgentToolDefinition`, so user-defined destructive tools never triggered the approval gate. - `ToolConfig` now accepts `annotations?: ToolAnnotations`. - `tool()` forwards it to the returned `FunctionTool`. - `FunctionTool` exposes `annotations` and `functionToolToDefinition` preserves it on the definition it builds. - `AgentsPlugin` reads the flag via `isDestructiveToolEntry()` (falls back to `functionTool.annotations` so a future divergence between def and function cannot re-introduce the bug) and emits the merged annotations via `combinedToolAnnotations()` on the `approval_pending` SSE payload. Covered by `tests/tool-approval-gate.test.ts` and `tests/function-tool.test.ts`. --- packages/appkit/src/core/agent/tools/function-tool.ts | 11 ++++++++++- packages/appkit/src/core/agent/tools/tool.ts | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/appkit/src/core/agent/tools/function-tool.ts b/packages/appkit/src/core/agent/tools/function-tool.ts index 8ce634e09..7371d857e 100644 --- a/packages/appkit/src/core/agent/tools/function-tool.ts +++ b/packages/appkit/src/core/agent/tools/function-tool.ts @@ -1,4 +1,4 @@ -import type { AgentToolDefinition } from "shared"; +import type { AgentToolDefinition, ToolAnnotations } from "shared"; export interface FunctionTool { type: "function"; @@ -6,6 +6,14 @@ export interface FunctionTool { description?: string | null; parameters?: Record | null; strict?: boolean | null; + /** + * Behavioural flags that drive the agents plugin's approval gate and + * auto-inherit filtering. `destructive: true` forces HITL approval + * before execute() runs; `readOnly: true` marks safe-by-default tools. + * Must be preserved through {@link functionToolToDefinition} so the + * plugin sees them when building agent tool indexes. + */ + annotations?: ToolAnnotations; execute: (args: Record) => Promise | string; } @@ -29,5 +37,6 @@ export function functionToolToDefinition( type: "object", properties: {}, }, + ...(tool.annotations ? { annotations: tool.annotations } : {}), }; } diff --git a/packages/appkit/src/core/agent/tools/tool.ts b/packages/appkit/src/core/agent/tools/tool.ts index b5d4db653..370a1d4b0 100644 --- a/packages/appkit/src/core/agent/tools/tool.ts +++ b/packages/appkit/src/core/agent/tools/tool.ts @@ -1,3 +1,4 @@ +import type { ToolAnnotations } from "shared"; import type { z } from "zod"; import type { FunctionTool } from "./function-tool"; import { toToolJSONSchema } from "./json-schema"; @@ -6,6 +7,14 @@ export interface ToolConfig { name: string; description?: string; schema: S; + /** + * Behavioural flags forwarded to the resolved tool definition. Required + * for the agents plugin to gate destructive tools through the approval + * card, surface `readOnly` tools to auto-inherit, etc. Dropped silently + * before the fix that added this field — any tool wanting HITL must + * set `annotations: { destructive: true }` here. + */ + annotations?: ToolAnnotations; execute: (args: z.infer) => Promise | string; } @@ -29,6 +38,7 @@ export function tool(config: ToolConfig): FunctionTool { name: config.name, description: config.description ?? config.name, parameters, + ...(config.annotations ? { annotations: config.annotations } : {}), execute: async (args: Record) => { const parsed = config.schema.safeParse(args); if (!parsed.success) { From 3300ff3c417645f8f7fb577ada0007778226aac2 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Fri, 24 Apr 2026 17:45:27 +0200 Subject: [PATCH 09/46] =?UTF-8?q?feat(agents):=20semantic=20ToolEffect=20?= =?UTF-8?q?=E2=80=94=20write/update/destructive=20tiers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ToolAnnotations.destructive is binary and has started to mislead: "save_view" captures a screenshot and creates a new file, which is nothing like deleting a dashboard, yet both trip the same red "destructive" approval card. This adds a semantic `effect` enum with four tiers — `read`, `write`, `update`, `destructive` — so tool authors can tell the UI what blast radius they actually have. The approval gate fires for any mutating effect (`write`/`update`/ `destructive`) and continues to honour the legacy `destructive: true` flag so existing tools keep their current red treatment without migration. Callers consuming `annotations` over the wire (MCP clients, approval UIs) can now differentiate; the playground will ship a tiered approval card as a follow-up. --- packages/appkit/package.json | 2 ++ packages/appkit/src/beta.ts | 1 + .../src/core/agent/tools/function-tool.ts | 12 ++++--- packages/appkit/src/core/agent/tools/tool.ts | 11 ++++--- packages/appkit/src/plugins/agents/agents.ts | 10 +----- .../appkit/src/plugins/agents/manifest.json | 1 + .../src/plugins/beta-exports.generated.ts | 2 +- packages/shared/src/agent.ts | 32 +++++++++++++++++++ pnpm-lock.yaml | 20 ++++++------ 9 files changed, 62 insertions(+), 29 deletions(-) diff --git a/packages/appkit/package.json b/packages/appkit/package.json index 4be45d63d..6ae529399 100644 --- a/packages/appkit/package.json +++ b/packages/appkit/package.json @@ -76,6 +76,7 @@ "dotenv": "16.6.1", "express": "4.22.0", "get-port": "7.2.0", + "js-yaml": "^4.1.1", "obug": "2.1.1", "pg": "8.18.0", "picocolors": "1.1.1", @@ -88,6 +89,7 @@ "devDependencies": { "@opentelemetry/context-async-hooks": "2.6.1", "@types/express": "4.17.25", + "@types/js-yaml": "4.0.9", "@types/json-schema": "7.0.15", "@types/pg": "8.16.0", "@types/ws": "8.18.1", diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts index 55c836c97..562cfd43a 100644 --- a/packages/appkit/src/beta.ts +++ b/packages/appkit/src/beta.ts @@ -15,6 +15,7 @@ export type { Message, Thread, ThreadStore, + ToolAnnotations, ToolProvider, } from "shared"; export { DatabricksAdapter, parseTextToolCalls } from "./agents/databricks"; diff --git a/packages/appkit/src/core/agent/tools/function-tool.ts b/packages/appkit/src/core/agent/tools/function-tool.ts index 7371d857e..19820f8fe 100644 --- a/packages/appkit/src/core/agent/tools/function-tool.ts +++ b/packages/appkit/src/core/agent/tools/function-tool.ts @@ -7,11 +7,13 @@ export interface FunctionTool { parameters?: Record | null; strict?: boolean | null; /** - * Behavioural flags that drive the agents plugin's approval gate and - * auto-inherit filtering. `destructive: true` forces HITL approval - * before execute() runs; `readOnly: true` marks safe-by-default tools. - * Must be preserved through {@link functionToolToDefinition} so the - * plugin sees them when building agent tool indexes. + * Behavioural hints that drive the agents plugin's approval gate and the + * client's approval-card styling. Prefer setting `effect` (one of + * `"read" | "write" | "update" | "destructive"`) — any mutating value + * forces HITL approval before `execute()` runs. Legacy `destructive: true` + * is still honoured. Must be preserved through {@link + * functionToolToDefinition} so the plugin sees them when building agent + * tool indexes. */ annotations?: ToolAnnotations; execute: (args: Record) => Promise | string; diff --git a/packages/appkit/src/core/agent/tools/tool.ts b/packages/appkit/src/core/agent/tools/tool.ts index 370a1d4b0..53305c236 100644 --- a/packages/appkit/src/core/agent/tools/tool.ts +++ b/packages/appkit/src/core/agent/tools/tool.ts @@ -8,11 +8,12 @@ export interface ToolConfig { description?: string; schema: S; /** - * Behavioural flags forwarded to the resolved tool definition. Required - * for the agents plugin to gate destructive tools through the approval - * card, surface `readOnly` tools to auto-inherit, etc. Dropped silently - * before the fix that added this field — any tool wanting HITL must - * set `annotations: { destructive: true }` here. + * Behavioural hints forwarded to the resolved tool definition. Prefer + * `effect` (`"read" | "write" | "update" | "destructive"`) — any mutating + * value forces the agents-plugin approval gate before `execute()` runs + * and the client's approval card will colour itself accordingly. Legacy + * `destructive: true` still gates. Dropped silently before the fix that + * added this field. */ annotations?: ToolAnnotations; execute: (args: z.infer) => Promise | string; diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 4c406f12e..3bfe45cbf 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -14,20 +14,12 @@ import type { Thread, ToolProvider, } from "shared"; -import { buildMcpHostPolicy } from "../../connectors/mcp"; +import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; import { buildBaseSystemPrompt, composeSystemPrompt, } from "../../core/agent/system-prompt"; -import { AppKitMcpClient } from "../../connectors/mcp"; -import { - approvalRequestSchema, - chatRequestSchema, - invocationsRequestSchema, -} from "./schemas"; -import { InMemoryThreadStore } from "./thread-store"; -import { ToolApprovalGate } from "./tool-approval-gate"; import { functionToolToDefinition, isFunctionTool, diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json index f3986c837..4571031a8 100644 --- a/packages/appkit/src/plugins/agents/manifest.json +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -2,6 +2,7 @@ "$schema": "https://databricks.github.io/appkit/schemas/plugin-manifest.schema.json", "name": "agents", "displayName": "Agents Plugin", + "stability": "beta", "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins", "resources": { "required": [], diff --git a/packages/appkit/src/plugins/beta-exports.generated.ts b/packages/appkit/src/plugins/beta-exports.generated.ts index 7fff0af71..82f6c4a78 100644 --- a/packages/appkit/src/plugins/beta-exports.generated.ts +++ b/packages/appkit/src/plugins/beta-exports.generated.ts @@ -5,4 +5,4 @@ // subpath ships each plugin. Editing this file by hand will drift it from the // manifests and the synced appkit.plugins.json. -export {}; +export { agents } from "./agents"; diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts index 2594b3e6d..74fc500aa 100644 --- a/packages/shared/src/agent.ts +++ b/packages/shared/src/agent.ts @@ -4,8 +4,40 @@ import type { JSONSchema7 } from "json-schema"; // Tool definitions // --------------------------------------------------------------------------- +/** + * Semantic hint for what the tool does to the world. Drives both the + * agents-plugin approval gate and the client's approval-card styling. + * + * - `read` — observes only; never needs approval. + * - `write` — creates or appends new state (e.g. saving a new view). Approval + * required by default. Rendered as a low-severity "writes" card. + * - `update` — mutates existing state in place (e.g. renaming, toggling). + * Approval required. Rendered as a medium-severity "updates" card. + * - `destructive` — deletes or irreversibly mutates (e.g. dropping a view). + * Approval required. Rendered as a high-severity "destructive" card. + * + * Prefer this over the legacy `readOnly`/`destructive` booleans: it lets the + * UI distinguish "captured a screenshot" from "deleted a dashboard", both of + * which today are lumped under a single red "destructive" label. + */ +export type ToolEffect = "read" | "write" | "update" | "destructive"; + export interface ToolAnnotations { + /** + * Preferred semantic label. When set, drives both the approval gate (fires + * for `write`/`update`/`destructive`) and the approval-card styling. + */ + effect?: ToolEffect; + /** + * @deprecated Prefer {@link effect}. Retained for backward compatibility + * with tools authored against the original flags and for MCP interop. + */ readOnly?: boolean; + /** + * @deprecated Prefer {@link effect} with value `"destructive"`. Retained + * so existing annotations continue to force the approval gate, and so + * MCP-style consumers that only read `destructive` still see the hint. + */ destructive?: boolean; idempotent?: boolean; requiresUserContext?: boolean; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ba4b8ef8e..635fb38a7 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -305,9 +305,9 @@ importers: express: specifier: 4.22.0 version: 4.22.0 - get-port: - specifier: 7.2.0 - version: 7.2.0 + js-yaml: + specifier: ^4.1.1 + version: 4.1.1 obug: specifier: 2.1.1 version: 2.1.1 @@ -339,6 +339,9 @@ importers: '@types/express': specifier: 4.17.25 version: 4.17.25 + '@types/js-yaml': + specifier: 4.0.9 + version: 4.0.9 '@types/json-schema': specifier: 7.0.15 version: 7.0.15 @@ -4915,6 +4918,9 @@ packages: '@types/istanbul-reports@3.0.4': resolution: {integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==} + '@types/js-yaml@4.0.9': + resolution: {integrity: sha512-k4MGaQl5TGo/iipqb2UDG2UwjXziSWkh0uysQelTlJpX1qGlpUZYm8PnO4DxG1qBomtJUdYJ6qR6xdIah10JLg==} + '@types/jsesc@2.5.1': resolution: {integrity: sha512-9VN+6yxLOPLOav+7PwjZbxiID2bVaeq0ED4qSQmdQTdjnXJSaCVKTR58t15oqH1H5t8Ng2ZX1SabJVoN9Q34bw==} @@ -7284,10 +7290,6 @@ packages: get-own-enumerable-property-symbols@3.0.2: resolution: {integrity: sha512-I0UBV/XOz1XkIJHEUDMZAbzCThU/H8DxmSfmdGcKPnVhu2VfFqr34jr9777IyaTYvxjedWhqVIilEDsCdP5G6g==} - get-port@7.2.0: - resolution: {integrity: sha512-afP4W205ONCuMoPBqcR6PSXnzX35KTcJygfJfcp+QY+uwm3p20p1YczWXhlICIzGMCxYBQcySEcOgsJcrkyobg==} - engines: {node: '>=16'} - get-proto@1.0.1: resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==} engines: {node: '>= 0.4'} @@ -17262,6 +17264,8 @@ snapshots: dependencies: '@types/istanbul-lib-report': 3.0.3 + '@types/js-yaml@4.0.9': {} + '@types/jsesc@2.5.1': {} '@types/json-schema@7.0.15': {} @@ -19876,8 +19880,6 @@ snapshots: get-own-enumerable-property-symbols@3.0.2: {} - get-port@7.2.0: {} - get-proto@1.0.1: dependencies: dunder-proto: 1.0.1 From a677c0e02b2313dd62f670570e4942e6bbd5b5bf Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Tue, 21 Apr 2026 19:52:50 +0200 Subject: [PATCH 10/46] feat(appkit): fromPlugin() DX, runAgent plugins arg, shared toolkit-resolver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DX centerpiece. Introduces the symbol-marker pattern that collapses plugin tool references in code-defined agents from a three-touch dance to a single line, and extracts the shared resolver that the agents plugin, auto-inherit, and standalone runAgent all now go through. `packages/appkit/src/plugins/agents/from-plugin.ts`. Returns a spread- friendly `{ [Symbol()]: FromPluginMarker }` record. The symbol key is freshly generated per call, so multiple spreads of the same plugin coexist safely. The marker's brand is a globally-interned `Symbol.for("@databricks/appkit.fromPluginMarker")` — stable across module boundaries. `packages/appkit/src/plugins/agents/toolkit-resolver.ts`. Single source of truth for "turn a ToolProvider into a keyed record of `ToolkitEntry` markers". Prefers `provider.toolkit(opts)` when available (core plugins implement it), falls back to walking `getAgentTools()` and synthesizing namespaced keys (`${pluginName}.${localName}`) for third-party providers, honoring `only` / `except` / `rename` / `prefix` the same way. Used by three call sites, previously all copy-pasted: 1. `AgentsPlugin.buildToolIndex` — fromPlugin marker resolution pass 2. `AgentsPlugin.applyAutoInherit` — markdown auto-inherit path 3. `runAgent` — standalone-mode plugin tool dispatch Before the existing string-key iteration, `buildToolIndex` now walks `Object.getOwnPropertySymbols(def.tools)`. For each `FromPluginMarker`, it looks up the plugin by name in `PluginContext.getToolProviders()`, calls `resolveToolkitFromProvider`, and merges the resulting entries into the per-agent index. Missing plugins throw at setup time with a clear `Available: ...` listing — wiring errors surface on boot, not mid-request. `hasExplicitTools` now counts symbol keys too, so a `tools: { ...fromPlugin(x) }` record correctly disables auto-inherit on code-defined agents. - `AgentTools` type: `{ [key: string]: AgentTool } & { [key: symbol]: FromPluginMarker }`. Preserves string-key autocomplete while accepting marker spreads under strict TS. - `AgentDefinition.tools` switched to `AgentTools`. `packages/appkit/src/core/run-agent.ts`. When an agent def contains `fromPlugin` markers, the caller passes plugins via `RunAgentInput.plugins`. A local provider cache constructs each plugin and dispatches tool calls via `provider.executeAgentTool()`. Runs as service principal (no OBO — there's no HTTP request). If a def contains markers but `plugins` is absent, throws with guidance. `fromPlugin`, `FromPluginMarker`, `isFromPluginMarker`, `AgentTools` added to the main barrel. - 14 new tests: marker shape, symbol uniqueness, type guard, factory-without-pluginName error, fromPlugin marker resolution in AgentsPlugin, fallback to getAgentTools for providers without .toolkit(), symbol-only tools disables auto-inherit, runAgent standalone marker resolution via `plugins` arg, guidance error when missing. - Full appkit vitest suite: 1311 tests passing. - Typecheck clean. Signed-off-by: MarioCadenas --- packages/appkit/src/beta.ts | 4 + packages/appkit/src/core/agent/from-plugin.ts | 97 ++++++++ packages/appkit/src/core/agent/run-agent.ts | 152 ++++++++++-- .../src/core/agent/tests/run-agent.test.ts | 100 +++++++- .../appkit/src/core/agent/toolkit-resolver.ts | 62 +++++ packages/appkit/src/core/agent/types.ts | 13 +- packages/appkit/src/plugins/agents/agents.ts | 101 +++++--- packages/appkit/src/plugins/agents/index.ts | 10 +- .../agents/tests/agents-plugin.test.ts | 220 +++++++++++++++++- .../plugins/agents/tests/from-plugin.test.ts | 80 +++++++ 10 files changed, 789 insertions(+), 50 deletions(-) create mode 100644 packages/appkit/src/core/agent/from-plugin.ts create mode 100644 packages/appkit/src/core/agent/toolkit-resolver.ts create mode 100644 packages/appkit/src/plugins/agents/tests/from-plugin.test.ts diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts index 562cfd43a..9295a43ed 100644 --- a/packages/appkit/src/beta.ts +++ b/packages/appkit/src/beta.ts @@ -52,8 +52,10 @@ export type { AgentDefinition, AgentsPluginConfig, AgentTool, + AgentTools, AutoInheritToolsConfig, BaseSystemPromptOption, + FromPluginMarker, PromptContext, RegisteredAgent, ResolvedToolEntry, @@ -62,6 +64,8 @@ export type { } from "./plugins/agents"; export { agentIdFromMarkdownPath, + fromPlugin, + isFromPluginMarker, isToolkitEntry, loadAgentFromFile, loadAgentsFromDir, diff --git a/packages/appkit/src/core/agent/from-plugin.ts b/packages/appkit/src/core/agent/from-plugin.ts new file mode 100644 index 000000000..b11285941 --- /dev/null +++ b/packages/appkit/src/core/agent/from-plugin.ts @@ -0,0 +1,97 @@ +import type { NamedPluginFactory } from "../../plugin/to-plugin"; +import type { ToolkitOptions } from "./types"; + +/** + * Symbol brand for the `fromPlugin` marker. Using a globally-interned symbol + * (`Symbol.for`) keeps the brand stable across module boundaries / bundle + * duplicates so `isFromPluginMarker` stays reliable. + */ +export const FROM_PLUGIN_MARKER = Symbol.for( + "@databricks/appkit.fromPluginMarker", +); + +/** + * A lazy reference to a plugin's tools, produced by {@link fromPlugin} and + * resolved to concrete `ToolkitEntry`s at `AgentsPlugin.setup()` time. + * + * The marker is spread under a unique symbol key so multiple calls to + * `fromPlugin` (even for the same plugin) coexist in an `AgentDefinition.tools` + * record without colliding. + */ +export interface FromPluginMarker { + readonly [FROM_PLUGIN_MARKER]: true; + readonly pluginName: string; + readonly opts: ToolkitOptions | undefined; +} + +/** + * Record shape returned by {@link fromPlugin} — a single symbol-keyed entry + * suitable for spreading into `AgentDefinition.tools`. + */ +export type FromPluginSpread = { readonly [key: symbol]: FromPluginMarker }; + +/** + * Reference a plugin's tools inside an `AgentDefinition.tools` record without + * naming the plugin instance. The returned spread-friendly object carries a + * symbol-keyed marker that the agents plugin resolves against registered + * `ToolProvider`s at setup time. + * + * The factory argument must come from `toPlugin` (or any function that + * carries a `pluginName` field). `fromPlugin` reads `factory.pluginName` + * synchronously — it does not construct an instance. + * + * If the referenced plugin is also registered in `createApp({ plugins })`, the + * same runtime instance is used for dispatch. If the plugin is missing, + * `AgentsPlugin.setup()` throws with a clear `Available: …` listing. + * + * @example + * ```ts + * import { analytics, createAgent, files, fromPlugin, tool } from "@databricks/appkit"; + * + * const support = createAgent({ + * instructions: "You help customers.", + * tools: { + * ...fromPlugin(analytics), + * ...fromPlugin(files, { only: ["uploads.read"] }), + * get_weather: tool({ ... }), + * }, + * }); + * ``` + * + * @param factory A plugin factory produced by `toPlugin`. Must expose a + * `pluginName` field. + * @param opts Optional toolkit scoping — `prefix`, `only`, `except`, `rename`. + * Same shape as the `.toolkit()` method. + */ +export function fromPlugin( + factory: F, + opts?: ToolkitOptions, +): FromPluginSpread { + if ( + !factory || + typeof factory.pluginName !== "string" || + !factory.pluginName + ) { + throw new Error( + "fromPlugin(): factory is missing pluginName. Pass a factory created by toPlugin().", + ); + } + const pluginName = factory.pluginName; + const marker: FromPluginMarker = { + [FROM_PLUGIN_MARKER]: true, + pluginName, + opts, + }; + return { [Symbol(`fromPlugin:${pluginName}`)]: marker }; +} + +/** + * Type guard for {@link FromPluginMarker}. + */ +export function isFromPluginMarker(value: unknown): value is FromPluginMarker { + return ( + typeof value === "object" && + value !== null && + (value as Record)[FROM_PLUGIN_MARKER] === true + ); +} diff --git a/packages/appkit/src/core/agent/run-agent.ts b/packages/appkit/src/core/agent/run-agent.ts index 5ef4109cf..4646aec74 100644 --- a/packages/appkit/src/core/agent/run-agent.ts +++ b/packages/appkit/src/core/agent/run-agent.ts @@ -4,18 +4,20 @@ import type { AgentEvent, AgentToolDefinition, Message, + PluginConstructor, + PluginData, + ToolProvider, } from "shared"; +import { consumeAdapterStream } from "./consume-adapter-stream"; +import { isFromPluginMarker } from "./from-plugin"; +import { resolveToolkitFromProvider } from "./toolkit-resolver"; import { type FunctionTool, functionToolToDefinition, isFunctionTool, } from "./tools/function-tool"; import { isHostedTool } from "./tools/hosted-tools"; -import type { - AgentDefinition, - AgentTool, - ToolkitEntry, -} from "./types"; +import type { AgentDefinition, AgentTool, ToolkitEntry } from "./types"; import { isToolkitEntry } from "./types"; export interface RunAgentInput { @@ -23,6 +25,14 @@ export interface RunAgentInput { messages: string | Message[]; /** Abort signal for cancellation. */ signal?: AbortSignal; + /** + * Optional plugin list used to resolve `fromPlugin` markers in `def.tools`. + * Required when the def contains any `...fromPlugin(factory)` spreads; + * ignored otherwise. `runAgent` constructs a fresh instance per plugin + * and dispatches tool calls against it as the service principal (no + * OBO — there is no HTTP request in standalone mode). + */ + plugins?: PluginData[]; } export interface RunAgentResult { @@ -39,11 +49,12 @@ export interface RunAgentResult { * Limitations vs. running through the agents() plugin: * - No OBO: there is no HTTP request, so plugin tools run as the service * principal (when they work at all). - * - Plugin tools (`ToolkitEntry`) are not supported — they require a live - * `PluginContext` that only exists when registered in a `createApp` - * instance. This function throws a clear error if encountered. + * - Hosted tools (MCP) are not supported — they require a live MCP client + * that only exists inside the agents plugin. * - Sub-agents (`agents: { ... }` on the def) are executed as nested * `runAgent` calls with no shared thread state. + * - Plugin tools (`fromPlugin` markers or `ToolkitEntry` spreads) require + * passing `plugins: [...]` via `RunAgentInput`. */ export async function runAgent( def: AgentDefinition, @@ -51,7 +62,7 @@ export async function runAgent( ): Promise { const adapter = await resolveAdapter(def); const messages = normalizeMessages(input.messages, def.instructions); - const toolIndex = buildStandaloneToolIndex(def); + const toolIndex = buildStandaloneToolIndex(def, input.plugins ?? []); const tools = Array.from(toolIndex.values()).map((e) => e.def); const signal = input.signal; @@ -62,6 +73,13 @@ export async function runAgent( if (entry.kind === "function") { return entry.tool.execute(args as Record); } + if (entry.kind === "toolkit") { + return entry.provider.executeAgentTool( + entry.localName, + args as Record, + signal, + ); + } if (entry.kind === "subagent") { const subInput: RunAgentInput = { messages: @@ -71,13 +89,14 @@ export async function runAgent( ? (args as { input: string }).input : JSON.stringify(args), signal, + plugins: input.plugins, }; const res = await runAgent(entry.agentDef, subInput); return res.text; } throw new Error( `runAgent: tool "${name}" is a ${entry.kind} tool. ` + - "Plugin toolkits and MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", + "Hosted/MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", ); }; @@ -158,20 +177,61 @@ type StandaloneEntry = | { kind: "toolkit"; def: AgentToolDefinition; - entry: ToolkitEntry; + provider: ToolProvider; + pluginName: string; + localName: string; } | { kind: "hosted"; def: AgentToolDefinition; }; +/** + * Resolves `def.tools` (string-keyed entries + symbol-keyed `fromPlugin` + * markers) and `def.agents` (sub-agents) into a flat dispatch index. + * Symbol-keyed markers are resolved against `plugins`; missing references + * throw with an `Available: …` listing. + */ function buildStandaloneToolIndex( def: AgentDefinition, + plugins: PluginData[], ): Map { const index = new Map(); + const tools = def.tools; + + const symbolKeys = tools ? Object.getOwnPropertySymbols(tools) : []; + if (symbolKeys.length > 0) { + const providerCache = new Map(); + for (const sym of symbolKeys) { + const marker = (tools as Record)[sym]; + if (!isFromPluginMarker(marker)) continue; - for (const [key, tool] of Object.entries(def.tools ?? {})) { - index.set(key, classifyTool(key, tool)); + const provider = resolveStandaloneProvider( + marker.pluginName, + plugins, + providerCache, + ); + const entries = resolveToolkitFromProvider( + marker.pluginName, + provider, + marker.opts, + ); + for (const [key, entry] of Object.entries(entries)) { + index.set(key, { + kind: "toolkit", + provider, + pluginName: entry.pluginName, + localName: entry.localName, + def: { ...entry.def, name: key }, + }); + } + } + } + + if (tools) { + for (const [key, tool] of Object.entries(tools)) { + index.set(key, classifyTool(key, tool)); + } } for (const [childKey, child] of Object.entries(def.agents ?? {})) { @@ -203,7 +263,7 @@ function buildStandaloneToolIndex( function classifyTool(key: string, tool: AgentTool): StandaloneEntry { if (isToolkitEntry(tool)) { - return { kind: "toolkit", def: { ...tool.def, name: key }, entry: tool }; + return toolkitEntryToStandalone(key, tool); } if (isFunctionTool(tool)) { return { @@ -224,3 +284,67 @@ function classifyTool(key: string, tool: AgentTool): StandaloneEntry { } throw new Error(`runAgent: unrecognized tool shape at key "${key}"`); } + +/** + * Pre-`fromPlugin` code could reach a `ToolkitEntry` by calling + * `.toolkit()` at module scope (which requires an instance). Those entries + * still flow through `def.tools` but without a provider we can dispatch + * against — runAgent cannot execute them and errors clearly. + */ +function toolkitEntryToStandalone( + key: string, + entry: ToolkitEntry, +): StandaloneEntry { + const def: AgentToolDefinition = { ...entry.def, name: key }; + return { + kind: "hosted", + def: { + ...def, + description: + `${def.description ?? ""} ` + + `[runAgent: this ToolkitEntry refers to plugin '${entry.pluginName}' but ` + + "runAgent cannot dispatch it without the plugin instance. Pass the " + + "plugin via plugins: [...] and use fromPlugin(factory) instead of " + + ".toolkit() spreads.]".trim(), + }, + }; +} + +function resolveStandaloneProvider( + pluginName: string, + plugins: PluginData[], + cache: Map, +): ToolProvider { + const cached = cache.get(pluginName); + if (cached) return cached; + + const match = plugins.find((p) => p.name === pluginName); + if (!match) { + const available = plugins.map((p) => p.name).join(", ") || "(none)"; + throw new Error( + `runAgent: agent references plugin '${pluginName}' via fromPlugin(), but ` + + "that plugin is missing from RunAgentInput.plugins. " + + `Available: ${available}.`, + ); + } + + const instance = new match.plugin({ + ...(match.config ?? {}), + name: pluginName, + }); + const provider = instance as unknown as ToolProvider; + if ( + typeof (provider as { getAgentTools?: unknown }).getAgentTools !== + "function" || + typeof (provider as { executeAgentTool?: unknown }).executeAgentTool !== + "function" + ) { + throw new Error( + `runAgent: plugin '${pluginName}' is not a ToolProvider ` + + "(missing getAgentTools/executeAgentTool). Only ToolProvider plugins " + + "are supported via fromPlugin() in runAgent.", + ); + } + cache.set(pluginName, provider); + return provider; +} diff --git a/packages/appkit/src/core/agent/tests/run-agent.test.ts b/packages/appkit/src/core/agent/tests/run-agent.test.ts index 55b3e7b39..5324dde21 100644 --- a/packages/appkit/src/core/agent/tests/run-agent.test.ts +++ b/packages/appkit/src/core/agent/tests/run-agent.test.ts @@ -3,13 +3,18 @@ import type { AgentEvent, AgentInput, AgentRunContext, + AgentToolDefinition, + PluginConstructor, + PluginData, + ToolProvider, } from "shared"; import { describe, expect, test, vi } from "vitest"; import { z } from "zod"; +import type { ToolkitEntry } from "../../../core/agent/types"; import { createAgent } from "../create-agent"; +import { fromPlugin } from "../from-plugin"; import { runAgent } from "../run-agent"; -import { tool } from "../../../core/agent/tools/tool"; -import type { ToolkitEntry } from "../../../core/agent/types"; +import { tool } from "../tools/tool"; function scriptedAdapter(events: AgentEvent[]): AgentAdapter { return { @@ -84,6 +89,97 @@ describe("runAgent", () => { expect(weatherFn).toHaveBeenCalledWith({ city: "NYC" }); }); + test("resolves fromPlugin markers against RunAgentInput.plugins", async () => { + const pingExec = vi.fn(async () => "pong"); + class FakePlugin implements ToolProvider { + static manifest = { name: "ping" }; + static DEFAULT_CONFIG = {}; + name = "ping"; + constructor(public config: unknown) {} + async setup() {} + injectRoutes() {} + getEndpoints() { + return {}; + } + getAgentTools(): AgentToolDefinition[] { + return [ + { + name: "ping", + description: "ping", + parameters: { type: "object", properties: {} }, + }, + ]; + } + executeAgentTool = pingExec; + } + + const factory = () => ({ + plugin: FakePlugin as unknown as PluginConstructor, + config: {}, + name: "ping" as const, + }); + Object.defineProperty(factory, "pluginName", { + value: "ping", + enumerable: true, + }); + + let capturedCtx: AgentRunContext | null = null; + const adapter: AgentAdapter = { + async *run(_input, context) { + capturedCtx = context; + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { + ...fromPlugin(factory as unknown as { readonly pluginName: string }), + }, + }); + + const pluginData = factory() as PluginData< + PluginConstructor, + unknown, + string + >; + + await runAgent(def, { messages: "hi", plugins: [pluginData] }); + expect(capturedCtx).not.toBeNull(); + // biome-ignore lint/style/noNonNullAssertion: asserted above + const result = await capturedCtx!.executeTool("ping.ping", {}); + expect(result).toBe("pong"); + expect(pingExec).toHaveBeenCalled(); + }); + + test("throws with guidance when fromPlugin marker has no matching plugin", async () => { + const factory = () => ({ name: "absent" as const }); + Object.defineProperty(factory, "pluginName", { + value: "absent", + enumerable: true, + }); + + const adapter: AgentAdapter = { + async *run(_input, _context) { + yield { type: "message_delta", content: "" }; + }, + }; + + const def = createAgent({ + instructions: "x", + model: adapter, + tools: { + ...fromPlugin(factory as unknown as { readonly pluginName: string }), + }, + }); + + await expect(runAgent(def, { messages: "hi" })).rejects.toThrow(/absent/); + await expect(runAgent(def, { messages: "hi" })).rejects.toThrow( + /Available:/, + ); + }); + test("throws a clear error when a ToolkitEntry is invoked", async () => { const toolkitEntry: ToolkitEntry = { __toolkitRef: true, diff --git a/packages/appkit/src/core/agent/toolkit-resolver.ts b/packages/appkit/src/core/agent/toolkit-resolver.ts new file mode 100644 index 000000000..8ec8cf1f7 --- /dev/null +++ b/packages/appkit/src/core/agent/toolkit-resolver.ts @@ -0,0 +1,62 @@ +import type { ToolProvider } from "shared"; +import type { ToolkitEntry, ToolkitOptions } from "./types"; + +/** + * Internal interface: a `ToolProvider` that optionally exposes a typed + * `.toolkit(opts)` method. Core plugins (analytics, files, genie, lakebase) + * implement this; third-party `ToolProvider`s may not. + */ +type MaybeToolkitProvider = ToolProvider & { + toolkit?: (opts?: ToolkitOptions) => Record; +}; + +/** + * Resolve a plugin's tools into a keyed record of {@link ToolkitEntry} markers + * ready to be merged into an agent's tool index. + * + * Preferred path: call the plugin's own `.toolkit(opts)` method, which + * typically delegates to `buildToolkitEntries` with full `ToolkitOptions` + * support (prefix, only, except, rename). + * + * Fallback path: when the plugin doesn't expose `.toolkit()` (e.g. a + * third-party `ToolProvider` built with plain `toPlugin`), walk + * `getAgentTools()` and synthesize namespaced keys (`${pluginName}.${name}`) + * while still honoring `only` / `except` / `rename` / `prefix`. + * + * This helper is the single source of truth for "turn a provider into a + * toolkit entry record" and is used by `AgentsPlugin.buildToolIndex` + * (both the `fromPlugin` resolution pass and auto-inherit) and by the + * standalone `runAgent` executor. + */ +export function resolveToolkitFromProvider( + pluginName: string, + provider: ToolProvider, + opts?: ToolkitOptions, +): Record { + const withToolkit = provider as MaybeToolkitProvider; + if (typeof withToolkit.toolkit === "function") { + return withToolkit.toolkit(opts); + } + + const only = opts?.only ? new Set(opts.only) : null; + const except = opts?.except ? new Set(opts.except) : null; + const rename = opts?.rename ?? {}; + const prefix = opts?.prefix ?? `${pluginName}.`; + + const out: Record = {}; + for (const tool of provider.getAgentTools()) { + if (only && !only.has(tool.name)) continue; + if (except?.has(tool.name)) continue; + + const keyAfterPrefix = `${prefix}${tool.name}`; + const key = rename[tool.name] ?? keyAfterPrefix; + + out[key] = { + __toolkitRef: true, + pluginName, + localName: tool.name, + def: { ...tool, name: key }, + }; + } + return out; +} diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts index 14366e9ab..ce3d0463f 100644 --- a/packages/appkit/src/core/agent/types.ts +++ b/packages/appkit/src/core/agent/types.ts @@ -6,6 +6,7 @@ import type { ToolAnnotations, } from "shared"; import type { McpHostPolicyConfig } from "../../connectors/mcp"; +import type { FromPluginMarker } from "./from-plugin"; import type { FunctionTool } from "./tools/function-tool"; import type { HostedTool } from "./tools/hosted-tools"; @@ -62,6 +63,16 @@ export type BaseSystemPromptOption = | string | ((ctx: PromptContext) => string); +/** + * Per-agent tool record. String keys map to inline tools, toolkit entries, + * hosted tools, etc. Symbol keys hold `FromPluginMarker` references produced + * by `fromPlugin(factory)` spreads — these are resolved at + * `AgentsPlugin.setup()` time against registered `ToolProvider` plugins. + */ +export type AgentTools = { [key: string]: AgentTool } & { + [key: symbol]: FromPluginMarker; +}; + export interface AgentDefinition { /** Filled in from the enclosing key when used in `agents: { foo: def }`. */ name?: string; @@ -74,7 +85,7 @@ export interface AgentDefinition { */ model?: AgentAdapter | Promise | string; /** Per-agent tool record. Key is the LLM-visible tool-call name. */ - tools?: Record; + tools?: AgentTools; /** Sub-agents, exposed as `agent-` tools on this agent. */ agents?: Record; /** Override the plugin's baseSystemPrompt for this agent only. */ diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 3bfe45cbf..309589e0f 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -15,11 +15,14 @@ import type { ToolProvider, } from "shared"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; +import { getWorkspaceClient } from "../../context"; +import { isFromPluginMarker } from "../../core/agent/from-plugin"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; import { buildBaseSystemPrompt, composeSystemPrompt, } from "../../core/agent/system-prompt"; +import { resolveToolkitFromProvider } from "../../core/agent/toolkit-resolver"; import { functionToolToDefinition, isFunctionTool, @@ -38,10 +41,12 @@ import { isToolkitEntry } from "../../core/agent/types"; import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; import type { PluginManifest } from "../../registry"; +import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; import { agentStreamDefaults } from "./defaults"; import { EventChannel } from "./event-channel"; import { AgentEventTranslator } from "./event-translator"; import manifest from "./manifest.json"; +import { normalizeToolResult } from "../../core/agent/normalize-result"; import { approvalRequestSchema, chatRequestSchema, @@ -327,7 +332,11 @@ export class AgentsPlugin extends Plugin implements ToolProvider { src: AgentSource, ): Promise> { const index = new Map(); - const hasExplicitTools = def.tools && Object.keys(def.tools).length > 0; + const toolsRecord = def.tools ?? {}; + const hasExplicitTools = + def.tools !== undefined && + (Object.keys(toolsRecord).length > 0 || + Object.getOwnPropertySymbols(toolsRecord).length > 0); const hasExplicitSubAgents = def.agents && Object.keys(def.agents).length > 0; @@ -366,10 +375,14 @@ export class AgentsPlugin extends Plugin implements ToolProvider { }); } - // 2. Explicit tools (toolkit entries, function tools, hosted tools) + // 2. fromPlugin markers — resolve against registered ToolProviders first so + // explicit string-keyed tools can still overwrite on the same key. + this.resolveFromPluginMarkers(agentName, toolsRecord, index); + + // 3. Explicit tools (toolkit entries, function tools, hosted tools) const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = []; - for (const [key, tool] of Object.entries(def.tools ?? {})) { + for (const [key, tool] of Object.entries(toolsRecord)) { if (isToolkitEntry(tool)) { index.set(key, { source: "toolkit", @@ -421,32 +434,19 @@ export class AgentsPlugin extends Plugin implements ToolProvider { provider, } of this.context.getToolProviders()) { if (pluginName === this.name) continue; - const withToolkit = provider as ToolProvider & { - toolkit?: (opts?: unknown) => Record; - }; - if (typeof withToolkit.toolkit === "function") { - const entries = withToolkit.toolkit() as Record; - for (const [key, maybeEntry] of Object.entries(entries)) { - if (!isToolkitEntry(maybeEntry)) continue; - if (maybeEntry.autoInheritable !== true) { - recordSkip(maybeEntry.pluginName, maybeEntry.localName); - continue; - } - index.set(key, { - source: "toolkit", - pluginName: maybeEntry.pluginName, - localName: maybeEntry.localName, - def: { ...maybeEntry.def, name: key }, - }); - inherited.push(key); + const entries = resolveToolkitFromProvider(pluginName, provider); + for (const [key, entry] of Object.entries(entries)) { + if (entry.autoInheritable !== true) { + recordSkip(entry.pluginName, entry.localName); + continue; } - continue; - } - // Fallback: providers without a toolkit() still expose getAgentTools(). - // These cannot be selectively opted in per tool, so we conservatively - // skip them during auto-inherit and require explicit `tools:` wiring. - for (const tool of provider.getAgentTools()) { - recordSkip(pluginName, tool.name); + index.set(key, { + source: "toolkit", + pluginName: entry.pluginName, + localName: entry.localName, + def: { ...entry.def, name: key }, + }); + inherited.push(key); } } @@ -474,6 +474,51 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } + /** + * Walks the symbol-keyed `fromPlugin` markers in an agent's `tools` record + * and resolves each one against a registered `ToolProvider`. Throws with a + * helpful `Available: …` listing if a referenced plugin isn't registered. + */ + private resolveFromPluginMarkers( + agentName: string, + toolsRecord: Record, + index: Map, + ): void { + const symbolKeys = Object.getOwnPropertySymbols(toolsRecord); + if (symbolKeys.length === 0) return; + + const providers = this.context?.getToolProviders() ?? []; + + for (const sym of symbolKeys) { + const marker = (toolsRecord as Record)[sym]; + if (!isFromPluginMarker(marker)) continue; + + const providerEntry = providers.find((p) => p.name === marker.pluginName); + if (!providerEntry) { + const available = providers.map((p) => p.name).join(", ") || "(none)"; + throw new Error( + `Agent '${agentName}' references plugin '${marker.pluginName}' via ` + + `fromPlugin(), but that plugin is not registered in createApp. ` + + `Available: ${available}.`, + ); + } + + const entries = resolveToolkitFromProvider( + marker.pluginName, + providerEntry.provider, + marker.opts, + ); + for (const [key, entry] of Object.entries(entries)) { + index.set(key, { + source: "toolkit", + pluginName: entry.pluginName, + localName: entry.localName, + def: { ...entry.def, name: key }, + }); + } + } + } + private async connectHostedTools( hostedTools: import("../../core/agent/tools/hosted-tools").HostedTool[], index: Map, diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts index c8a31aac3..404da2432 100644 --- a/packages/appkit/src/plugins/agents/index.ts +++ b/packages/appkit/src/plugins/agents/index.ts @@ -1,5 +1,11 @@ -export { AgentsPlugin, agents } from "./agents"; export { buildToolkitEntries } from "../../core/agent/build-toolkit"; +export { + FROM_PLUGIN_MARKER, + type FromPluginMarker, + type FromPluginSpread, + fromPlugin, + isFromPluginMarker, +} from "../../core/agent/from-plugin"; export { agentIdFromMarkdownPath, type LoadContext, @@ -12,6 +18,7 @@ export { type AgentDefinition, type AgentsPluginConfig, type AgentTool, + type AgentTools, type AutoInheritToolsConfig, type BaseSystemPromptOption, isToolkitEntry, @@ -21,3 +28,4 @@ export { type ToolkitEntry, type ToolkitOptions, } from "../../core/agent/types"; +export { AgentsPlugin, agents } from "./agents"; diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts index 9c043d56f..42cb6b127 100644 --- a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts +++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts @@ -11,12 +11,26 @@ import type { import { afterEach, beforeEach, describe, expect, test, vi } from "vitest"; import { z } from "zod"; import { CacheManager } from "../../../cache"; -// Import the class directly so we can construct it without a createApp -import { AgentsPlugin } from "../agents"; import { buildToolkitEntries } from "../../../core/agent/build-toolkit"; -import { defineTool, type ToolRegistry } from "../../../core/agent/tools/define-tool"; -import type { AgentsPluginConfig, ToolkitEntry } from "../../../core/agent/types"; +import { fromPlugin } from "../../../core/agent/from-plugin"; +import { + defineTool, + type ToolRegistry, +} from "../../../core/agent/tools/define-tool"; +import { tool } from "../../../core/agent/tools/tool"; +import type { + AgentsPluginConfig, + ToolkitEntry, +} from "../../../core/agent/types"; import { isToolkitEntry } from "../../../core/agent/types"; +// Import the class directly so we can construct it without a createApp +import { AgentsPlugin } from "../agents"; + +function namedFactory(name: string) { + const f = () => ({ name }); + Object.defineProperty(f, "pluginName", { value: name, enumerable: true }); + return f as typeof f & { readonly pluginName: string }; +} interface FakeContext { providers: Array<{ name: string; provider: ToolProvider }>; @@ -364,4 +378,202 @@ describe("AgentsPlugin", () => { expect(isToolkitEntry({ foo: 1 })).toBe(false); expect(isToolkitEntry(null)).toBe(false); }); + + describe("fromPlugin markers", () => { + test("spreading fromPlugin registers all tools from the referenced plugin", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const ctx = fakeContext([ + { + name: "analytics", + provider: makeToolProvider("analytics", registry), + }, + ]); + + const plugin = instantiate( + { + dir: false, + agents: { + support: { + instructions: "...", + model: stubAdapter(), + tools: { ...fromPlugin(namedFactory("analytics")) }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("support"); + expect(agent?.toolIndex.has("analytics.query")).toBe(true); + }); + + test("mixed inline + fromPlugin tools coexist", async () => { + const registry: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const ctx = fakeContext([ + { + name: "analytics", + provider: makeToolProvider("analytics", registry), + }, + ]); + + const plugin = instantiate( + { + dir: false, + agents: { + support: { + instructions: "...", + model: stubAdapter(), + tools: { + ...fromPlugin(namedFactory("analytics")), + get_weather: tool({ + name: "get_weather", + description: "Weather", + schema: z.object({ city: z.string() }), + execute: async ({ city }) => `Sunny in ${city}`, + }), + }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("support"); + expect(agent?.toolIndex.has("analytics.query")).toBe(true); + expect(agent?.toolIndex.has("get_weather")).toBe(true); + }); + + test("missing plugin throws at setup with Available: listing", async () => { + const ctx = fakeContext([ + { + name: "files", + provider: makeToolProvider("files", {}), + }, + ]); + + const plugin = instantiate( + { + dir: false, + agents: { + support: { + instructions: "...", + model: stubAdapter(), + tools: { ...fromPlugin(namedFactory("analytics")) }, + }, + }, + }, + ctx, + ); + await expect(plugin.setup()).rejects.toThrow(/analytics/); + await expect(plugin.setup()).rejects.toThrow(/Available:/); + await expect(plugin.setup()).rejects.toThrow(/files/); + }); + + test("symbol-only tools record disables auto-inherit", async () => { + const analyticsReg: ToolRegistry = { + query: defineTool({ + description: "q", + schema: z.object({ sql: z.string() }), + handler: () => "ok", + }), + }; + const filesReg: ToolRegistry = { + list: defineTool({ + description: "l", + schema: z.object({}), + handler: () => [], + }), + }; + const ctx = fakeContext([ + { + name: "analytics", + provider: makeToolProvider("analytics", analyticsReg), + }, + { + name: "files", + provider: makeToolProvider("files", filesReg), + }, + ]); + + const plugin = instantiate( + { + dir: false, + autoInheritTools: { code: true }, + agents: { + support: { + instructions: "...", + model: stubAdapter(), + tools: { ...fromPlugin(namedFactory("analytics")) }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("support"); + const toolNames = Array.from(agent?.toolIndex.keys() ?? []); + expect(toolNames.some((n) => n.startsWith("analytics."))).toBe(true); + expect(toolNames.some((n) => n.startsWith("files."))).toBe(false); + }); + + test("falls back to getAgentTools() for providers without toolkit()", async () => { + // Provider lacks .toolkit() — only getAgentTools/executeAgentTool. + const bareProvider: ToolProvider = { + getAgentTools: () => [ + { + name: "ping", + description: "ping", + parameters: { type: "object", properties: {} }, + }, + ], + executeAgentTool: vi.fn(async () => "pong"), + }; + const ctx = fakeContext([{ name: "bare", provider: bareProvider }]); + + const plugin = instantiate( + { + dir: false, + agents: { + support: { + instructions: "...", + model: stubAdapter(), + tools: { ...fromPlugin(namedFactory("bare")) }, + }, + }, + }, + ctx, + ); + await plugin.setup(); + + const api = plugin.exports() as { + get: (name: string) => { toolIndex: Map } | null; + }; + const agent = api.get("support"); + expect(agent?.toolIndex.has("bare.ping")).toBe(true); + }); + }); }); diff --git a/packages/appkit/src/plugins/agents/tests/from-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/from-plugin.test.ts new file mode 100644 index 000000000..eb31d0f7d --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/from-plugin.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, test } from "vitest"; +import { + FROM_PLUGIN_MARKER, + fromPlugin, + isFromPluginMarker, +} from "../../../core/agent/from-plugin"; + +function fakeFactory(name: string) { + const f = () => ({ name }); + Object.defineProperty(f, "pluginName", { value: name, enumerable: true }); + return f as typeof f & { readonly pluginName: string }; +} + +describe("fromPlugin", () => { + test("returns a spread-friendly object with a single symbol-keyed marker", () => { + const spread = fromPlugin(fakeFactory("analytics")); + + expect(Object.keys(spread)).toHaveLength(0); + const syms = Object.getOwnPropertySymbols(spread); + expect(syms).toHaveLength(1); + + const marker = (spread as Record)[syms[0]!]; + expect(isFromPluginMarker(marker)).toBe(true); + expect((marker as { pluginName: string }).pluginName).toBe("analytics"); + }); + + test("multiple calls produce distinct symbol keys (spreads coexist)", () => { + const spread = { + ...fromPlugin(fakeFactory("analytics")), + ...fromPlugin(fakeFactory("analytics")), + ...fromPlugin(fakeFactory("files")), + }; + + const syms = Object.getOwnPropertySymbols(spread); + expect(syms).toHaveLength(3); + }); + + test("passes opts through to the marker", () => { + const spread = fromPlugin(fakeFactory("analytics"), { + only: ["query"], + prefix: "q_", + }); + const sym = Object.getOwnPropertySymbols(spread)[0]!; + const marker = (spread as Record)[sym] as { + opts: { only: string[]; prefix: string }; + }; + expect(marker.opts.only).toEqual(["query"]); + expect(marker.opts.prefix).toBe("q_"); + }); + + test("throws when factory has no pluginName", () => { + const missing = () => ({ name: "nope" }); + expect(() => + fromPlugin(missing as unknown as { readonly pluginName: string }), + ).toThrow(/missing pluginName/); + }); + + test("FROM_PLUGIN_MARKER is a globally-interned symbol", () => { + expect(FROM_PLUGIN_MARKER).toBe( + Symbol.for("@databricks/appkit.fromPluginMarker"), + ); + }); +}); + +describe("isFromPluginMarker", () => { + test("returns true for real markers", () => { + const spread = fromPlugin(fakeFactory("analytics")); + const sym = Object.getOwnPropertySymbols(spread)[0]!; + expect(isFromPluginMarker((spread as Record)[sym])).toBe( + true, + ); + }); + + test("returns false for objects without the brand", () => { + expect(isFromPluginMarker({ pluginName: "x" })).toBe(false); + expect(isFromPluginMarker(null)).toBe(false); + expect(isFromPluginMarker(undefined)).toBe(false); + expect(isFromPluginMarker("string")).toBe(false); + }); +}); From f45ac9243ac18ed0dc376c643230187cbbda9a17 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Thu, 23 Apr 2026 22:07:53 +0200 Subject: [PATCH 11/46] refactor(appkit): use consumeAdapterStream in core/run-agent runAgent()'s adapter-consumption loop is now the same consumeAdapterStream helper introduced in the agents-plugin layer. One loop covers all three execution paths: HTTP streaming (_streamAgent), sub-agents (runSubAgent), and standalone runAgent. The message_delta + message accumulation rule (with its LangChain on_chain_end quirk) lives in exactly one place. --- packages/appkit/src/core/agent/run-agent.ts | 28 +++--- .../appkit/src/core/agent/tool-dispatch.ts | 97 +++++++++++++++++++ 2 files changed, 108 insertions(+), 17 deletions(-) create mode 100644 packages/appkit/src/core/agent/tool-dispatch.ts diff --git a/packages/appkit/src/core/agent/run-agent.ts b/packages/appkit/src/core/agent/run-agent.ts index 4646aec74..9c7c655fd 100644 --- a/packages/appkit/src/core/agent/run-agent.ts +++ b/packages/appkit/src/core/agent/run-agent.ts @@ -101,28 +101,22 @@ export async function runAgent( }; const events: AgentEvent[] = []; - let text = ""; - - const stream = adapter.run( + const text = await consumeAdapterStream( + adapter.run( + { + messages, + tools, + threadId: randomUUID(), + signal, + }, + { executeTool, signal }, + ), { - messages, - tools, - threadId: randomUUID(), signal, + onEvent: (event) => events.push(event), }, - { executeTool, signal }, ); - for await (const event of stream) { - if (signal?.aborted) break; - events.push(event); - if (event.type === "message_delta") { - text += event.content; - } else if (event.type === "message") { - text = event.content; - } - } - return { text, events }; } diff --git a/packages/appkit/src/core/agent/tool-dispatch.ts b/packages/appkit/src/core/agent/tool-dispatch.ts new file mode 100644 index 000000000..a3e220bbd --- /dev/null +++ b/packages/appkit/src/core/agent/tool-dispatch.ts @@ -0,0 +1,97 @@ +import type express from "express"; +import type { AppKitMcpClient } from "../../connectors/mcp"; +import type { PluginContext } from "../../core/plugin-context"; +import type { ResolvedToolEntry } from "./types"; + +interface ToolDispatchContext { + /** + * The originating HTTP request. Used by `toolkit` entries to scope execution + * to the caller's user context (`asUser(req)`) and by `mcp` entries to pick + * up the OBO bearer token from `x-forwarded-access-token`. + */ + req: express.Request; + /** Cancellation signal, forwarded to the tool implementation. */ + signal: AbortSignal; + /** + * PluginContext mediator — required to dispatch `toolkit` entries. Absent in + * unit tests that construct `AgentsPlugin` directly; callers may pass + * `null` / `undefined`, in which case toolkit calls throw a clear error. + */ + pluginContext?: PluginContext | null; + /** Live MCP client. Required for `mcp` entries. */ + mcpClient?: AppKitMcpClient | null; + /** + * Delegates a sub-agent invocation. The closure owns the recursion depth so + * the dispatcher itself remains depth-agnostic — the top-level caller + * passes `depth = 1`, and a sub-agent's inner dispatcher passes `depth + 1`. + */ + runSubAgent: (agentName: string, args: unknown) => Promise; +} + +/** + * Fan-out a resolved tool entry to the correct executor. One place to add a + * fifth `source` variant; `never`-typed default forces every caller to + * update in lockstep. + * + * This only handles dispatch — result normalisation (`normalizeToolResult`), + * budget counting, and approval gating remain at the call site, where each + * stream has different policies. + */ +export async function dispatchToolCall( + entry: ResolvedToolEntry, + args: unknown, + ctx: ToolDispatchContext, +): Promise { + switch (entry.source) { + case "toolkit": { + if (!ctx.pluginContext) { + throw new Error( + "Plugin tool execution requires PluginContext; " + + "this should never happen through createApp.", + ); + } + return ctx.pluginContext.executeTool( + ctx.req, + entry.pluginName, + entry.localName, + args, + ctx.signal, + ); + } + case "function": + return entry.functionTool.execute(args as Record); + case "mcp": { + if (!ctx.mcpClient) throw new Error("MCP client not connected"); + return ctx.mcpClient.callTool( + entry.mcpToolName, + args, + extractOboMcpAuth(ctx.req), + ); + } + case "subagent": + return ctx.runSubAgent(entry.agentName, args); + default: { + // Exhaustiveness guard: adding a new `source` to ResolvedToolEntry + // without teaching this switch breaks the build here. + const _exhaustive: never = entry; + throw new Error( + `Unsupported tool source: ${(_exhaustive as ResolvedToolEntry).source}`, + ); + } + } +} + +/** + * Extracts the caller's OBO bearer token from the standard Databricks Apps + * forwarded-auth header. MCP destinations that `forwardWorkspaceAuth` admits + * as same-origin will receive this header; non-workspace destinations drop + * it inside {@link AppKitMcpClient.callTool}. + */ +function extractOboMcpAuth( + req: express.Request, +): Record | undefined { + const oboToken = req.headers["x-forwarded-access-token"]; + return typeof oboToken === "string" + ? { Authorization: `Bearer ${oboToken}` } + : undefined; +} From 3e23d0c9f7bada1a076de6835884f48342c91972 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Mon, 4 May 2026 14:59:08 +0200 Subject: [PATCH 12/46] fix(agents): update agents.ts imports to core/agent/ paths after Option A rewrite normalize-result, consume-adapter-stream, tool-dispatch were extracted to core/agent/ but agents.ts still imported them from plugins/agents/. Update the import paths to match the final file locations. --- packages/appkit/src/plugins/agents/agents.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 309589e0f..d7218eec7 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -16,12 +16,15 @@ import type { } from "shared"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; import { getWorkspaceClient } from "../../context"; +import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; import { isFromPluginMarker } from "../../core/agent/from-plugin"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; +import { normalizeToolResult } from "../../core/agent/normalize-result"; import { buildBaseSystemPrompt, composeSystemPrompt, } from "../../core/agent/system-prompt"; +import { dispatchToolCall } from "../../core/agent/tool-dispatch"; import { resolveToolkitFromProvider } from "../../core/agent/toolkit-resolver"; import { functionToolToDefinition, @@ -41,12 +44,10 @@ import { isToolkitEntry } from "../../core/agent/types"; import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; import type { PluginManifest } from "../../registry"; -import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; import { agentStreamDefaults } from "./defaults"; import { EventChannel } from "./event-channel"; import { AgentEventTranslator } from "./event-translator"; import manifest from "./manifest.json"; -import { normalizeToolResult } from "../../core/agent/normalize-result"; import { approvalRequestSchema, chatRequestSchema, From e22db6239fd28e0d5f3a44c2737721e019c5dddd Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Mon, 4 May 2026 19:49:58 +0200 Subject: [PATCH 13/46] feat(appkit): sub-agent approval gate and HttpToolExecutor path (SDK) Signed-off-by: MarioCadenas --- packages/appkit/src/plugins/agents/agents.ts | 373 ++++++++++--------- 1 file changed, 206 insertions(+), 167 deletions(-) diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index d7218eec7..684315229 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -12,49 +12,47 @@ import type { PluginPhase, ResponseStreamEvent, Thread, + ToolAnnotations, ToolProvider, } from "shared"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; import { getWorkspaceClient } from "../../context"; -import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; -import { isFromPluginMarker } from "../../core/agent/from-plugin"; -import { loadAgentsFromDir } from "../../core/agent/load-agents"; -import { normalizeToolResult } from "../../core/agent/normalize-result"; -import { - buildBaseSystemPrompt, - composeSystemPrompt, -} from "../../core/agent/system-prompt"; -import { dispatchToolCall } from "../../core/agent/tool-dispatch"; -import { resolveToolkitFromProvider } from "../../core/agent/toolkit-resolver"; -import { - functionToolToDefinition, - isFunctionTool, - isHostedTool, - resolveHostedTools, -} from "../../core/agent/tools"; -import type { - AgentDefinition, - AgentsPluginConfig, - BaseSystemPromptOption, - PromptContext, - RegisteredAgent, - ResolvedToolEntry, -} from "../../core/agent/types"; -import { isToolkitEntry } from "../../core/agent/types"; import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; import type { PluginManifest } from "../../registry"; +import { consumeAdapterStream } from "./consume-adapter-stream"; import { agentStreamDefaults } from "./defaults"; import { EventChannel } from "./event-channel"; import { AgentEventTranslator } from "./event-translator"; +import { isFromPluginMarker } from "./from-plugin"; +import { loadAgentsFromDir } from "./load-agents"; import manifest from "./manifest.json"; +import { normalizeToolResult } from "./normalize-result"; import { approvalRequestSchema, chatRequestSchema, invocationsRequestSchema, } from "./schemas"; +import { buildBaseSystemPrompt, composeSystemPrompt } from "./system-prompt"; import { InMemoryThreadStore } from "./thread-store"; import { ToolApprovalGate } from "./tool-approval-gate"; +import { dispatchToolCall } from "./tool-dispatch"; +import { resolveToolkitFromProvider } from "./toolkit-resolver"; +import { + functionToolToDefinition, + isFunctionTool, + isHostedTool, + resolveHostedTools, +} from "./tools"; +import type { + AgentDefinition, + AgentsPluginConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, + ResolvedToolEntry, +} from "./types"; +import { isToolkitEntry } from "./types"; const logger = createLogger("agents"); @@ -381,8 +379,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { this.resolveFromPluginMarkers(agentName, toolsRecord, index); // 3. Explicit tools (toolkit entries, function tools, hosted tools) - const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = - []; + const hostedToCollect: import("./tools/hosted-tools").HostedTool[] = []; for (const [key, tool] of Object.entries(toolsRecord)) { if (isToolkitEntry(tool)) { index.set(key, { @@ -521,38 +518,28 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } private async connectHostedTools( - hostedTools: import("../../core/agent/tools/hosted-tools").HostedTool[], + hostedTools: import("./tools/hosted-tools").HostedTool[], index: Map, ): Promise { - let host: string | undefined; - let authenticate: () => Promise>; - - try { - const { getWorkspaceClient } = await import("../../context"); - const wsClient = getWorkspaceClient(); - await wsClient.config.ensureResolved(); - host = wsClient.config.host; - authenticate = async () => { - const headers = new Headers(); - await wsClient.config.authenticate(headers); - return Object.fromEntries(headers.entries()); - }; - } catch { - host = process.env.DATABRICKS_HOST; - authenticate = async (): Promise> => { - const token = process.env.DATABRICKS_TOKEN; - return token ? { Authorization: `Bearer ${token}` } : {}; - }; - } + const wsClient = await this.resolveWorkspaceClient(); + await wsClient.config.ensureResolved(); + const host = wsClient.config.host; if (!host) { logger.warn( - "No Databricks host available — skipping %d hosted tool(s)", + "No Databricks host available — skipping %d hosted tool(s). " + + "Set DATABRICKS_HOST or configure a profile in ~/.databrickscfg.", hostedTools.length, ); return; } + const authenticate = async (): Promise> => { + const headers = new Headers(); + await wsClient.config.authenticate(headers); + return Object.fromEntries(headers.entries()); + }; + if (!this.mcpClient) { const policy = buildMcpHostPolicy(this.config.mcp, host); this.mcpClient = new AppKitMcpClient(host, authenticate, policy); @@ -570,6 +557,23 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } + /** + * Return the ambient workspace client from {@link getWorkspaceClient} when + * `ServiceContext` is initialized (the normal `createApp` path). Fall back + * to a fresh `WorkspaceClient()` that walks the SDK's credential chain — + * `DATABRICKS_HOST` / `DATABRICKS_TOKEN`, `~/.databrickscfg` profiles, + * DAB auth, OAuth, metadata service — for test rigs and manual embeds + * that never ran through `createApp`. + */ + private async resolveWorkspaceClient() { + try { + return getWorkspaceClient(); + } catch { + const { WorkspaceClient } = await import("@databricks/sdk-experimental"); + return new WorkspaceClient({}); + } + } + // ----------------- ToolProvider (no tools of our own) -------------------- getAgentTools(): AgentToolDefinition[] { @@ -793,83 +797,76 @@ export class AgentsPlugin extends Plugin implements ToolProvider { const entry = registered.toolIndex.get(name); if (!entry) throw new Error(`Unknown tool: ${name}`); - if ( - approvalPolicy.requireForDestructive && - entry.def.annotations?.destructive === true - ) { + // Approval flow used by BOTH the parent stream and any sub-agents + // delegated to from it. Sub-agents were previously running destructive + // tools without ever surfacing the gate; this closure lifts the check + // so `runSubAgent.childExecute` can reuse the exact same semantics + // (event emission + gate.wait + deny string). + const checkApproval = async ( + toolEntry: ResolvedToolEntry, + toolArgs: unknown, + ): Promise<"approve" | "deny" | null> => { + if (!approvalPolicy.requireForDestructive) return null; + if (!isDestructiveToolEntry(toolEntry)) return null; const approvalId = randomUUID(); for (const ev of translator.translate({ type: "approval_pending", approvalId, streamId: requestId, - toolName: name, - args, - annotations: entry.def.annotations, + toolName: toolEntry.def.name, + args: toolArgs, + annotations: combinedToolAnnotations(toolEntry), })) { outboundEvents.push(ev); } - const decision = await this.approvalGate.wait({ + return this.approvalGate.wait({ approvalId, streamId: requestId, userId, timeoutMs: approvalPolicy.timeoutMs, }); - if (decision === "deny") { - return `Tool execution denied by user approval gate (tool: ${name}).`; - } + }; + + const decision = await checkApproval(entry, args); + if (decision === "deny") { + return `Tool execution denied by user approval gate (tool: ${name}).`; } - let result: unknown; - if (entry.source === "toolkit") { - if (!this.context) { - throw new Error( - "Plugin tool execution requires PluginContext; this should never happen through createApp", - ); + // Forward tool-call / tool-result events from nested sub-agents into + // the parent's outbound SSE stream. Without this the client only sees + // the outer `agent-` function call and never the inner tool + // invocations the sub-agent makes — so UI-action tools (apply_filter, + // highlight_period, etc.) that rely on SSE-based dispatch are + // invisible to the browser. Message deltas and metadata are + // deliberately NOT forwarded: that would bleed the sub-agent's + // assistant text into the parent's chat and double-emit threadIds. + const forwardSubAgentToolEvent = (ev: AgentEvent): void => { + if (ev.type !== "tool_call" && ev.type !== "tool_result") return; + for (const translated of translator.translate(ev)) { + outboundEvents.push(translated); } - result = await this.context.executeTool( - req, - entry.pluginName, - entry.localName, - args, - signal, - ); - } else if (entry.source === "function") { - result = await entry.functionTool.execute( - args as Record, - ); - } else if (entry.source === "mcp") { - if (!this.mcpClient) throw new Error("MCP client not connected"); - const oboToken = req.headers["x-forwarded-access-token"]; - const mcpAuth = - typeof oboToken === "string" - ? { Authorization: `Bearer ${oboToken}` } - : undefined; - result = await this.mcpClient.callTool( - entry.mcpToolName, - args, - mcpAuth, - ); - } else if (entry.source === "subagent") { - const childAgent = this.agents.get(entry.agentName); - if (!childAgent) - throw new Error(`Sub-agent not found: ${entry.agentName}`); - result = await this.runSubAgent(req, childAgent, args, signal, 1); - } + }; - // A `void` / `undefined` return is a legitimate tool outcome (e.g., a - // "send notification" side-effecting tool). Return an empty string so - // the LLM sees a successful-but-empty result rather than a bogus - // "execution failed" error. - if (result === undefined) { - return ""; - } - const MAX = 50_000; - const serialized = - typeof result === "string" ? result : JSON.stringify(result); - if (serialized.length > MAX) { - return `${serialized.slice(0, MAX)}\n\n[Result truncated: ${serialized.length} chars exceeds ${MAX} limit]`; - } - return result; + const raw = await dispatchToolCall(entry, args, { + req, + signal, + pluginContext: this.context, + mcpClient: this.mcpClient, + runSubAgent: (agentName, subArgs) => { + const childAgent = this.agents.get(agentName); + if (!childAgent) throw new Error(`Sub-agent not found: ${agentName}`); + return this.runSubAgent( + req, + childAgent, + subArgs, + signal, + 1, + forwardSubAgentToolEvent, + checkApproval, + ); + }, + }); + return normalizeToolResult(raw); }; // Drive the adapter and the approval-event side-channel concurrently. @@ -920,26 +917,14 @@ export class AgentsPlugin extends Plugin implements ToolProvider { { executeTool, signal }, ); - // Accumulate assistant output from BOTH streaming and non-streaming - // adapters. Delta-based adapters (Databricks, Vercel AI) emit - // `message_delta` chunks that we concatenate; adapters that yield a - // single final assistant message (e.g. LangChain's `on_chain_end` - // path) emit a `message` event whose content replaces whatever - // deltas already arrived. Without the `message` branch, multi-turn - // LangChain conversations silently dropped the assistant turn from - // thread history. - let fullContent = ""; - for await (const event of stream) { - if (signal.aborted) break; - if (event.type === "message_delta") { - fullContent += event.content; - } else if (event.type === "message") { - fullContent = event.content; - } - for (const translated of translator.translate(event)) { - outboundEvents.push(translated); - } - } + const fullContent = await consumeAdapterStream(stream, { + signal, + onEvent: (event) => { + for (const translated of translator.translate(event)) { + outboundEvents.push(translated); + } + }, + }); if (fullContent) { await this.threadStore.addMessage(thread.id, userId, { @@ -1017,6 +1002,26 @@ export class AgentsPlugin extends Plugin implements ToolProvider { args: unknown, signal: AbortSignal, depth: number, + /** + * Optional per-event sink installed by the parent `_streamAgent`. When + * supplied, each adapter event the child yields is passed through — + * the parent's closure filters it to `tool_call` / `tool_result` so + * inner tool invocations surface to the client's SSE stream without + * also bleeding the sub-agent's assistant text. + */ + onEvent?: (event: AgentEvent) => void, + /** + * Optional approval gate injected by the parent `_streamAgent`. When + * present, sub-agent tool calls annotated `destructive: true` fire + * `appkit.approval_pending` through the parent's outbound channel and + * await the user's decision, exactly like the parent's own executeTool. + * Absent (or returning `null`) means no gate — non-destructive tools + * or approval disabled policy-wide. + */ + checkApproval?: ( + entry: ResolvedToolEntry, + toolArgs: unknown, + ) => Promise<"approve" | "deny" | null>, ): Promise { const limits = this.resolvedLimits; if (depth > limits.maxSubAgentDepth) { @@ -1040,33 +1045,33 @@ export class AgentsPlugin extends Plugin implements ToolProvider { ): Promise => { const entry = child.toolIndex.get(name); if (!entry) throw new Error(`Unknown tool in sub-agent: ${name}`); - if (entry.source === "toolkit" && this.context) { - return this.context.executeTool( - req, - entry.pluginName, - entry.localName, - childArgs, - signal, - ); - } - if (entry.source === "function") { - return entry.functionTool.execute(childArgs as Record); - } - if (entry.source === "subagent") { - const grandchild = this.agents.get(entry.agentName); - if (!grandchild) - throw new Error(`Sub-agent not found: ${entry.agentName}`); - return this.runSubAgent(req, grandchild, childArgs, signal, depth + 1); - } - if (entry.source === "mcp" && this.mcpClient) { - const oboToken = req.headers["x-forwarded-access-token"]; - const mcpAuth = - typeof oboToken === "string" - ? { Authorization: `Bearer ${oboToken}` } - : undefined; - return this.mcpClient.callTool(entry.mcpToolName, childArgs, mcpAuth); + + if (checkApproval) { + const decision = await checkApproval(entry, childArgs); + if (decision === "deny") { + return `Tool execution denied by user approval gate (tool: ${name}).`; + } } - throw new Error(`Unsupported sub-agent tool source: ${entry.source}`); + + return dispatchToolCall(entry, childArgs, { + req, + signal, + pluginContext: this.context, + mcpClient: this.mcpClient, + runSubAgent: (agentName, args) => { + const grandchild = this.agents.get(agentName); + if (!grandchild) throw new Error(`Sub-agent not found: ${agentName}`); + return this.runSubAgent( + req, + grandchild, + args, + signal, + depth + 1, + onEvent, + checkApproval, + ); + }, + }); }; const runContext: AgentRunContext = { executeTool: childExecute, signal }; @@ -1101,17 +1106,13 @@ export class AgentsPlugin extends Plugin implements ToolProvider { }, ]; - let output = ""; - const events: AgentEvent[] = []; - for await (const event of child.adapter.run( - { messages, tools: childTools, threadId: randomUUID(), signal }, - runContext, - )) { - events.push(event); - if (event.type === "message_delta") output += event.content; - else if (event.type === "message") output = event.content; - } - return output; + return consumeAdapterStream( + child.adapter.run( + { messages, tools: childTools, threadId: randomUUID(), signal }, + runContext, + ), + { signal, onEvent }, + ); } private async _handleCancel(req: express.Request, res: express.Response) { @@ -1265,6 +1266,44 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } +/** + * True when the tool should go through the approval gate. Historically + * scoped to `destructive: true` — hence the name — but now also fires for + * the semantic `effect` enum on {@link ToolAnnotations}. Any effect that + * mutates the world (`write` | `update` | `destructive`) gates; `read` and + * unannotated tools do not. `def.annotations` is the normal path; for + * `function` tools we also read `functionTool.annotations` so a mismatch + * between the spread def and the original {@link FunctionTool} cannot drop + * the hint. + */ +function isDestructiveToolEntry(entry: ResolvedToolEntry): boolean { + const defAnn = entry.def.annotations; + const fnAnn = + entry.source === "function" ? entry.functionTool.annotations : undefined; + + const effect = defAnn?.effect ?? fnAnn?.effect; + if (effect === "write" || effect === "update" || effect === "destructive") { + return true; + } + if (defAnn?.destructive === true) return true; + if (fnAnn?.destructive === true) return true; + return false; +} + +/** Merged annotations for the approval SSE payload (client UI + debugging). */ +function combinedToolAnnotations( + entry: ResolvedToolEntry, +): ToolAnnotations | undefined { + if (entry.source === "function") { + const merged: ToolAnnotations = { + ...entry.functionTool.annotations, + ...entry.def.annotations, + }; + return Object.keys(merged).length > 0 ? merged : undefined; + } + return entry.def.annotations; +} + function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): { file: boolean; code: boolean; @@ -1302,7 +1341,7 @@ function composePromptForAgent( } /** - * Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, + * Plugin factory for the agents plugin. Reads `config/agents//agent.md` by default, * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` * runtime API and mounts `/invocations`. * From 62dc7f3e1e9788e7d709ff9aba92a2e6f6a73ba7 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Mon, 4 May 2026 19:50:02 +0200 Subject: [PATCH 14/46] fix(appkit): forward all sub-agent events except metadata (SDK) Signed-off-by: MarioCadenas --- packages/appkit/src/plugins/agents/agents.ts | 33 +++++++++++--------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 684315229..ab264b295 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -832,16 +832,21 @@ export class AgentsPlugin extends Plugin implements ToolProvider { return `Tool execution denied by user approval gate (tool: ${name}).`; } - // Forward tool-call / tool-result events from nested sub-agents into - // the parent's outbound SSE stream. Without this the client only sees - // the outer `agent-` function call and never the inner tool - // invocations the sub-agent makes — so UI-action tools (apply_filter, - // highlight_period, etc.) that rely on SSE-based dispatch are - // invisible to the browser. Message deltas and metadata are - // deliberately NOT forwarded: that would bleed the sub-agent's - // assistant text into the parent's chat and double-emit threadIds. - const forwardSubAgentToolEvent = (ev: AgentEvent): void => { - if (ev.type !== "tool_call" && ev.type !== "tool_result") return; + // Forward events from nested sub-agents into the parent's outbound + // SSE stream so the client sees inner tool calls AND the sub-agent's + // streaming text as it's generated. Without this the user stares at + // "thinking…" for the full duration of the sub-agent run. + // + // The one exception is `metadata`: sub-agents have their own + // threadId, and forwarding it would overwrite the parent's thread + // state on the client and break multi-turn continuity. + // + // `approval_pending` is not emitted by adapters directly — it comes + // through `checkApproval()` which already pushes to the parent's + // outboundEvents — so sub-agent destructive approvals surface + // independently of this forwarder. + const forwardSubAgentEvent = (ev: AgentEvent): void => { + if (ev.type === "metadata") return; for (const translated of translator.translate(ev)) { outboundEvents.push(translated); } @@ -861,7 +866,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { subArgs, signal, 1, - forwardSubAgentToolEvent, + forwardSubAgentEvent, checkApproval, ); }, @@ -1005,9 +1010,9 @@ export class AgentsPlugin extends Plugin implements ToolProvider { /** * Optional per-event sink installed by the parent `_streamAgent`. When * supplied, each adapter event the child yields is passed through — - * the parent's closure filters it to `tool_call` / `tool_result` so - * inner tool invocations surface to the client's SSE stream without - * also bleeding the sub-agent's assistant text. + * the parent's closure forwards everything except `metadata` so the + * sub-agent's streaming text, tool invocations, and thinking blocks + * all surface to the client's SSE stream in real time. */ onEvent?: (event: AgentEvent) => void, /** From 732498d3951aa80d4c967404fd93201a9dec049c Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 29 Apr 2026 18:19:25 +0200 Subject: [PATCH 15/46] refactor(appkit): move agent runtime to core/agent/ Flips the layering: agent types, helpers, and the standalone runner now live in core/agent/ instead of plugins/agents/. The HTTP-facing agents() plugin still owns its routes/streaming/threads but no longer re-exports framework primitives that peer plugins depend on. Moved (with git mv to preserve history): - plugins/agents/{types,from-plugin,build-toolkit,toolkit-resolver, consume-adapter-stream,normalize-result,tool-dispatch,system-prompt, load-agents}.ts -> core/agent/ - plugins/agents/tools/{tool,define-tool,function-tool,hosted-tools, sql-policy,json-schema,index}.ts -> core/agent/tools/ - core/{run-agent,create-agent-def}.ts -> core/agent/{run-agent,create-agent}.ts - 14 corresponding test files -> core/agent/tests/ Stayed in plugins/agents/ (HTTP/route concerns): - agents.ts, event-channel.ts, event-translator.ts, tool-approval-gate.ts, thread-store.ts, schemas.ts, defaults.ts, manifest.json, index.ts Updated imports across analytics, files, genie, lakebase to source from core/agent/ directly. plugins/agents/index.ts stays as a back-compat barrel that re-exports the moved primitives, so the public package surface (@databricks/appkit) is byte-identical. Verified: tsc --noEmit clean, 1581/1581 appkit tests pass. --- knip.json | 10 +-- packages/appkit/src/connectors/mcp/types.ts | 2 +- packages/appkit/src/core/agent/index.ts | 64 +++++++++++++++++++ .../src/core/agent/tests/create-agent.test.ts | 4 +- .../agent}/tests/from-plugin.test.ts | 0 .../agent}/tests/mcp-server-helper.test.ts | 0 .../src/core/agent/tests/run-agent.test.ts | 2 +- .../appkit/src/core/agent/tool-dispatch.ts | 2 +- packages/appkit/src/plugins/agents/agents.ts | 52 ++++++++------- packages/appkit/src/plugins/agents/index.ts | 3 + 10 files changed, 105 insertions(+), 34 deletions(-) create mode 100644 packages/appkit/src/core/agent/index.ts rename packages/appkit/src/{plugins/agents => core/agent}/tests/from-plugin.test.ts (100%) rename packages/appkit/src/{plugins/agents => core/agent}/tests/mcp-server-helper.test.ts (100%) diff --git a/knip.json b/knip.json index f530b3918..0a2351b79 100644 --- a/knip.json +++ b/knip.json @@ -19,15 +19,15 @@ "packages/appkit/src/plugin/index.ts", "packages/appkit/src/plugin/to-plugin.ts", "packages/appkit/src/plugins/agents/index.ts", - "template/**", - "tools/**", - "docs/**", - ".github/scripts/**", + "packages/appkit/src/core/agent/index.ts", "packages/appkit/src/core/agent/tools/index.ts", "packages/appkit/src/core/agent/from-plugin.ts", "packages/appkit/src/core/agent/load-agents.ts", "packages/appkit/src/connectors/mcp/index.ts", - "packages/appkit/src/plugin/to-plugin.ts" + "template/**", + "tools/**", + "docs/**", + ".github/scripts/**" ], "ignoreDependencies": ["json-schema-to-typescript"], "ignoreBinaries": ["tarball"] diff --git a/packages/appkit/src/connectors/mcp/types.ts b/packages/appkit/src/connectors/mcp/types.ts index d74f0a46c..aeb61788e 100644 --- a/packages/appkit/src/connectors/mcp/types.ts +++ b/packages/appkit/src/connectors/mcp/types.ts @@ -1,7 +1,7 @@ /** * Input shape consumed by {@link AppKitMcpClient.connect}. Produced by the * agents plugin from user-facing `HostedTool` declarations (see - * `plugins/agents/tools/hosted-tools.ts`) and accepted directly by the + * `core/agent/tools/hosted-tools.ts`) and accepted directly by the * connector to keep its surface free of agent-layer concepts. */ export interface McpEndpointConfig { diff --git a/packages/appkit/src/core/agent/index.ts b/packages/appkit/src/core/agent/index.ts new file mode 100644 index 000000000..c11c48282 --- /dev/null +++ b/packages/appkit/src/core/agent/index.ts @@ -0,0 +1,64 @@ +/** + * Agent runtime primitives. All framework-level agent types, tool helpers, + * and the standalone runner live here. The HTTP-facing `agents()` plugin in + * `plugins/agents/` consumes these but does not own them — peer plugins + * (analytics, files, genie, lakebase) can depend on this module without + * reaching across the sibling boundary. + */ +export { buildToolkitEntries } from "./build-toolkit"; +export { consumeAdapterStream } from "./consume-adapter-stream"; +export { createAgent } from "./create-agent"; +export { + FROM_PLUGIN_MARKER, + type FromPluginMarker, + type FromPluginSpread, + fromPlugin, + isFromPluginMarker, +} from "./from-plugin"; +export { + agentIdFromMarkdownPath, + type LoadContext, + type LoadResult, + loadAgentFromFile, + loadAgentsFromDir, + parseFrontmatter, +} from "./load-agents"; +export { normalizeToolResult } from "./normalize-result"; +export { + type RunAgentInput, + type RunAgentResult, + runAgent, +} from "./run-agent"; +export { buildBaseSystemPrompt, composeSystemPrompt } from "./system-prompt"; +export { dispatchToolCall } from "./tool-dispatch"; +export { resolveToolkitFromProvider } from "./toolkit-resolver"; +export { + defineTool, + executeFromRegistry, + type FunctionTool, + functionToolToDefinition, + type HostedTool, + isFunctionTool, + isHostedTool, + mcpServer, + resolveHostedTools, + type ToolConfig, + type ToolEntry, + type ToolRegistry, + tool, + toolsFromRegistry, +} from "./tools"; +export { + type AgentDefinition, + type AgentsPluginConfig, + type AgentTool, + type AgentTools, + type AutoInheritToolsConfig, + type BaseSystemPromptOption, + isToolkitEntry, + type PromptContext, + type RegisteredAgent, + type ResolvedToolEntry, + type ToolkitEntry, + type ToolkitOptions, +} from "./types"; diff --git a/packages/appkit/src/core/agent/tests/create-agent.test.ts b/packages/appkit/src/core/agent/tests/create-agent.test.ts index 30bcc98e9..df920369e 100644 --- a/packages/appkit/src/core/agent/tests/create-agent.test.ts +++ b/packages/appkit/src/core/agent/tests/create-agent.test.ts @@ -1,8 +1,8 @@ import { describe, expect, test } from "vitest"; import { z } from "zod"; import { createAgent } from "../create-agent"; -import { tool } from "../../../core/agent/tools/tool"; -import type { AgentDefinition } from "../../../core/agent/types"; +import { tool } from "../tools/tool"; +import type { AgentDefinition } from "../types"; describe("createAgent", () => { test("returns the definition unchanged for a simple agent", () => { diff --git a/packages/appkit/src/plugins/agents/tests/from-plugin.test.ts b/packages/appkit/src/core/agent/tests/from-plugin.test.ts similarity index 100% rename from packages/appkit/src/plugins/agents/tests/from-plugin.test.ts rename to packages/appkit/src/core/agent/tests/from-plugin.test.ts diff --git a/packages/appkit/src/plugins/agents/tests/mcp-server-helper.test.ts b/packages/appkit/src/core/agent/tests/mcp-server-helper.test.ts similarity index 100% rename from packages/appkit/src/plugins/agents/tests/mcp-server-helper.test.ts rename to packages/appkit/src/core/agent/tests/mcp-server-helper.test.ts diff --git a/packages/appkit/src/core/agent/tests/run-agent.test.ts b/packages/appkit/src/core/agent/tests/run-agent.test.ts index 5324dde21..9ead2c177 100644 --- a/packages/appkit/src/core/agent/tests/run-agent.test.ts +++ b/packages/appkit/src/core/agent/tests/run-agent.test.ts @@ -10,11 +10,11 @@ import type { } from "shared"; import { describe, expect, test, vi } from "vitest"; import { z } from "zod"; -import type { ToolkitEntry } from "../../../core/agent/types"; import { createAgent } from "../create-agent"; import { fromPlugin } from "../from-plugin"; import { runAgent } from "../run-agent"; import { tool } from "../tools/tool"; +import type { ToolkitEntry } from "../types"; function scriptedAdapter(events: AgentEvent[]): AgentAdapter { return { diff --git a/packages/appkit/src/core/agent/tool-dispatch.ts b/packages/appkit/src/core/agent/tool-dispatch.ts index a3e220bbd..728f57990 100644 --- a/packages/appkit/src/core/agent/tool-dispatch.ts +++ b/packages/appkit/src/core/agent/tool-dispatch.ts @@ -1,6 +1,6 @@ import type express from "express"; import type { AppKitMcpClient } from "../../connectors/mcp"; -import type { PluginContext } from "../../core/plugin-context"; +import type { PluginContext } from "../plugin-context"; import type { ResolvedToolEntry } from "./types"; interface ToolDispatchContext { diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index ab264b295..a3af8a78a 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -17,42 +17,45 @@ import type { } from "shared"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; import { getWorkspaceClient } from "../../context"; +import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; +import { isFromPluginMarker } from "../../core/agent/from-plugin"; +import { loadAgentsFromDir } from "../../core/agent/load-agents"; +import { normalizeToolResult } from "../../core/agent/normalize-result"; +import { + buildBaseSystemPrompt, + composeSystemPrompt, +} from "../../core/agent/system-prompt"; +import { dispatchToolCall } from "../../core/agent/tool-dispatch"; +import { resolveToolkitFromProvider } from "../../core/agent/toolkit-resolver"; +import { + functionToolToDefinition, + isFunctionTool, + isHostedTool, + resolveHostedTools, +} from "../../core/agent/tools"; +import type { + AgentDefinition, + AgentsPluginConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, + ResolvedToolEntry, +} from "../../core/agent/types"; +import { isToolkitEntry } from "../../core/agent/types"; import { createLogger } from "../../logging/logger"; import { Plugin, toPlugin } from "../../plugin"; import type { PluginManifest } from "../../registry"; -import { consumeAdapterStream } from "./consume-adapter-stream"; import { agentStreamDefaults } from "./defaults"; import { EventChannel } from "./event-channel"; import { AgentEventTranslator } from "./event-translator"; -import { isFromPluginMarker } from "./from-plugin"; -import { loadAgentsFromDir } from "./load-agents"; import manifest from "./manifest.json"; -import { normalizeToolResult } from "./normalize-result"; import { approvalRequestSchema, chatRequestSchema, invocationsRequestSchema, } from "./schemas"; -import { buildBaseSystemPrompt, composeSystemPrompt } from "./system-prompt"; import { InMemoryThreadStore } from "./thread-store"; import { ToolApprovalGate } from "./tool-approval-gate"; -import { dispatchToolCall } from "./tool-dispatch"; -import { resolveToolkitFromProvider } from "./toolkit-resolver"; -import { - functionToolToDefinition, - isFunctionTool, - isHostedTool, - resolveHostedTools, -} from "./tools"; -import type { - AgentDefinition, - AgentsPluginConfig, - BaseSystemPromptOption, - PromptContext, - RegisteredAgent, - ResolvedToolEntry, -} from "./types"; -import { isToolkitEntry } from "./types"; const logger = createLogger("agents"); @@ -379,7 +382,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { this.resolveFromPluginMarkers(agentName, toolsRecord, index); // 3. Explicit tools (toolkit entries, function tools, hosted tools) - const hostedToCollect: import("./tools/hosted-tools").HostedTool[] = []; + const hostedToCollect: import("../../core/agent/tools/hosted-tools").HostedTool[] = + []; for (const [key, tool] of Object.entries(toolsRecord)) { if (isToolkitEntry(tool)) { index.set(key, { @@ -518,7 +522,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } private async connectHostedTools( - hostedTools: import("./tools/hosted-tools").HostedTool[], + hostedTools: import("../../core/agent/tools/hosted-tools").HostedTool[], index: Map, ): Promise { const wsClient = await this.resolveWorkspaceClient(); diff --git a/packages/appkit/src/plugins/agents/index.ts b/packages/appkit/src/plugins/agents/index.ts index 404da2432..f630cc681 100644 --- a/packages/appkit/src/plugins/agents/index.ts +++ b/packages/appkit/src/plugins/agents/index.ts @@ -1,3 +1,6 @@ +// Re-exports of agent primitives that now live in core/agent/. Kept here so +// the public package barrel (`@databricks/appkit`) and any callers that +// already imported via `./plugins/agents` continue to resolve unchanged. export { buildToolkitEntries } from "../../core/agent/build-toolkit"; export { FROM_PLUGIN_MARKER, From 3e4eaca326a781ebb562459b728e2b3dd26ffb4e Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 29 Apr 2026 18:35:43 +0200 Subject: [PATCH 16/46] refactor(appkit): extract AgentRunner + ToolExecutor strategy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapses the two parallel agent loops (`_streamAgent` in the plugin and `runAgent` in core) onto a single AgentRunner that drives the adapter to completion and surfaces events. Tool dispatch policy moves behind a ToolExecutor strategy injected by the caller. New: - core/agent/runner.ts (AgentRunner + ToolExecutor interface, ~65 lines) - core/agent/standalone-tool-executor.ts (in-process dispatch, ~78 lines) - plugins/agents/http-tool-executor.ts (HTTP-path executor: budget + approval gate + OBO dispatch + sub-agent recursion, ~243 lines) - plugins/agents/tests/http-tool-executor.test.ts (8 focused tests including sub-agent approval forwarding — was effectively untestable pre-refactor because the logic lived inside a private nested closure) Refactored: - core/agent/run-agent.ts: 348 -> 296 lines; the ~120-line executeTool closure is now a StandaloneToolExecutor + AgentRunner instantiation (~25 lines). - plugins/agents/agents.ts: 1362 -> 1262 lines; `_streamAgent` shrinks from 233 lines (with a 95-line nested executeTool closure) to ~150 lines that build an HttpToolExecutor + AgentRunner. Behaviour preserved: - Top-level budget enforcement (sub-agents pass budget=null, mirroring the original closure that only counted at the outer executeTool) - Approval gate fires on `effect: write|update|destructive` and the legacy `destructive: true` flag - Sub-agents reuse the parent's checkApproval + outboundEvents + translator + abortController so destructive sub-agent tools surface approval_pending on the parent's SSE stream - Sub-agent event forwarding skips `metadata` to avoid clobbering the parent thread state, matching the prior closure exactly Verified: tsc --noEmit clean, knip clean, 1589/1589 appkit tests pass. --- packages/appkit/src/core/agent/run-agent.ts | 97 ++--- packages/appkit/src/core/agent/runner.ts | 65 ++++ .../core/agent/standalone-tool-executor.ts | 78 ++++ packages/appkit/src/plugins/agents/agents.ts | 210 +++-------- .../src/plugins/agents/http-tool-executor.ts | 243 ++++++++++++ .../agents/tests/http-tool-executor.test.ts | 357 ++++++++++++++++++ 6 files changed, 822 insertions(+), 228 deletions(-) create mode 100644 packages/appkit/src/core/agent/runner.ts create mode 100644 packages/appkit/src/core/agent/standalone-tool-executor.ts create mode 100644 packages/appkit/src/plugins/agents/http-tool-executor.ts create mode 100644 packages/appkit/src/plugins/agents/tests/http-tool-executor.test.ts diff --git a/packages/appkit/src/core/agent/run-agent.ts b/packages/appkit/src/core/agent/run-agent.ts index 9c7c655fd..2c7e60f07 100644 --- a/packages/appkit/src/core/agent/run-agent.ts +++ b/packages/appkit/src/core/agent/run-agent.ts @@ -8,11 +8,14 @@ import type { PluginData, ToolProvider, } from "shared"; -import { consumeAdapterStream } from "./consume-adapter-stream"; import { isFromPluginMarker } from "./from-plugin"; +import { AgentRunner } from "./runner"; +import { + type StandaloneEntry, + StandaloneToolExecutor, +} from "./standalone-tool-executor"; import { resolveToolkitFromProvider } from "./toolkit-resolver"; import { - type FunctionTool, functionToolToDefinition, isFunctionTool, } from "./tools/function-tool"; @@ -65,58 +68,33 @@ export async function runAgent( const toolIndex = buildStandaloneToolIndex(def, input.plugins ?? []); const tools = Array.from(toolIndex.values()).map((e) => e.def); - const signal = input.signal; + // `runAgent` historically allowed callers to omit a signal. Synthesize one + // so AgentRunner / ToolExecutor can rely on always having a real signal. + const signal = input.signal ?? new AbortController().signal; - const executeTool = async (name: string, args: unknown): Promise => { - const entry = toolIndex.get(name); - if (!entry) throw new Error(`Unknown tool: ${name}`); - if (entry.kind === "function") { - return entry.tool.execute(args as Record); - } - if (entry.kind === "toolkit") { - return entry.provider.executeAgentTool( - entry.localName, - args as Record, - signal, - ); - } - if (entry.kind === "subagent") { - const subInput: RunAgentInput = { - messages: - typeof args === "object" && - args !== null && - typeof (args as { input?: unknown }).input === "string" - ? (args as { input: string }).input - : JSON.stringify(args), - signal, + const events: AgentEvent[] = []; + + const executor = new StandaloneToolExecutor( + toolIndex, + async (subDef, subInput, subSignal) => { + const res = await runAgent(subDef, { + messages: subInput, + signal: subSignal, plugins: input.plugins, - }; - const res = await runAgent(entry.agentDef, subInput); + }); return res.text; - } - throw new Error( - `runAgent: tool "${name}" is a ${entry.kind} tool. ` + - "Hosted/MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", - ); - }; - - const events: AgentEvent[] = []; - const text = await consumeAdapterStream( - adapter.run( - { - messages, - tools, - threadId: randomUUID(), - signal, - }, - { executeTool, signal }, - ), - { - signal, - onEvent: (event) => events.push(event), }, ); + const runner = new AgentRunner({ + adapter, + tools, + executeTool: executor, + signal, + onEvent: (event) => events.push(event), + }); + + const text = await runner.run({ messages, threadId: randomUUID() }); return { text, events }; } @@ -157,29 +135,6 @@ function normalizeMessages( return [systemMessage, ...input]; } -type StandaloneEntry = - | { - kind: "function"; - def: AgentToolDefinition; - tool: FunctionTool; - } - | { - kind: "subagent"; - def: AgentToolDefinition; - agentDef: AgentDefinition; - } - | { - kind: "toolkit"; - def: AgentToolDefinition; - provider: ToolProvider; - pluginName: string; - localName: string; - } - | { - kind: "hosted"; - def: AgentToolDefinition; - }; - /** * Resolves `def.tools` (string-keyed entries + symbol-keyed `fromPlugin` * markers) and `def.agents` (sub-agents) into a flat dispatch index. diff --git a/packages/appkit/src/core/agent/runner.ts b/packages/appkit/src/core/agent/runner.ts new file mode 100644 index 000000000..ac6f2ed3f --- /dev/null +++ b/packages/appkit/src/core/agent/runner.ts @@ -0,0 +1,65 @@ +import type { + AgentAdapter, + AgentEvent, + AgentToolDefinition, + Message, +} from "shared"; +import { consumeAdapterStream } from "./consume-adapter-stream"; + +/** + * Execution strategy for a tool call. Lives behind {@link AgentRunner} so + * the runner doesn't care whether the tool ends up dispatched via HTTP + * plumbing (approval gate, OBO, MCP client) or by direct in-process call. + * + * The runner injects the adapter's per-invocation {@link AbortSignal} so + * implementations can wire it through to long-running work. + */ +export interface ToolExecutor { + execute(name: string, args: unknown, signal: AbortSignal): Promise; +} + +interface AgentRunnerInput { + messages: Message[]; + threadId: string; +} + +interface AgentRunnerDeps { + adapter: AgentAdapter; + tools: AgentToolDefinition[]; + executeTool: ToolExecutor; + signal: AbortSignal; + /** Called for every event the adapter emits, in order. */ + onEvent?: (event: AgentEvent) => void; +} + +/** + * Single execution loop for an AgentDefinition. Intentionally thin — its + * only job is to drive the adapter to completion and surface events. + * + * Tool-dispatch policy (approval gating, per-user budget, OBO, MCP) + * is owned by the injected {@link ToolExecutor}. The plugin layer wires an + * `HttpToolExecutor` for the streaming chat path; `runAgent()` wires a + * `StandaloneToolExecutor` for in-process scripts. + */ +export class AgentRunner { + constructor(private deps: AgentRunnerDeps) {} + + async run(input: AgentRunnerInput): Promise { + const { adapter, tools, executeTool, signal, onEvent } = this.deps; + return consumeAdapterStream( + adapter.run( + { + messages: input.messages, + tools, + threadId: input.threadId, + signal, + }, + { + executeTool: (name, args) => executeTool.execute(name, args, signal), + signal, + }, + ), + { signal, onEvent }, + ); + } +} diff --git a/packages/appkit/src/core/agent/standalone-tool-executor.ts b/packages/appkit/src/core/agent/standalone-tool-executor.ts new file mode 100644 index 000000000..d78e0e6bc --- /dev/null +++ b/packages/appkit/src/core/agent/standalone-tool-executor.ts @@ -0,0 +1,78 @@ +import type { AgentToolDefinition, ToolProvider } from "shared"; +import type { ToolExecutor } from "./runner"; +import type { FunctionTool } from "./tools/function-tool"; +import type { AgentDefinition } from "./types"; + +/** + * Tool entry shape used by `runAgent`'s in-process dispatcher. Distinct + * from {@link import("./types").ResolvedToolEntry} because the standalone + * path holds live `provider`/`agentDef` references at index-build time + * (no PluginContext to resolve from at dispatch time). + */ +export type StandaloneEntry = + | { kind: "function"; def: AgentToolDefinition; tool: FunctionTool } + | { kind: "subagent"; def: AgentToolDefinition; agentDef: AgentDefinition } + | { + kind: "toolkit"; + def: AgentToolDefinition; + provider: ToolProvider; + pluginName: string; + localName: string; + } + | { kind: "hosted"; def: AgentToolDefinition }; + +/** + * In-process tool executor for {@link import("./run-agent").runAgent}. + * + * No approval gate, no per-user budget, no OBO — there is no HTTP + * request in standalone mode. Hosted/MCP tools error with a clear + * message because they require a live MCP client owned by the + * `agents()` plugin. + * + * Sub-agent recursion delegates back to the caller (which is `runAgent` + * itself); this keeps the executor free of a circular import on the + * top-level entry point. + */ +export class StandaloneToolExecutor implements ToolExecutor { + constructor( + private readonly toolIndex: Map, + private readonly subAgentRunner: ( + def: AgentDefinition, + input: string, + signal: AbortSignal, + ) => Promise, + ) {} + + async execute( + name: string, + args: unknown, + signal: AbortSignal, + ): Promise { + const entry = this.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + + if (entry.kind === "function") { + return entry.tool.execute(args as Record); + } + if (entry.kind === "toolkit") { + return entry.provider.executeAgentTool( + entry.localName, + args as Record, + signal, + ); + } + if (entry.kind === "subagent") { + const sub = + typeof args === "object" && + args !== null && + typeof (args as { input?: unknown }).input === "string" + ? (args as { input: string }).input + : JSON.stringify(args); + return this.subAgentRunner(entry.agentDef, sub, signal); + } + throw new Error( + `runAgent: tool "${name}" is a ${entry.kind} tool. ` + + "Hosted/MCP tools are only usable via createApp({ plugins: [..., agents(...)] }).", + ); + } +} diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index a3af8a78a..5b56c48d5 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -12,7 +12,6 @@ import type { PluginPhase, ResponseStreamEvent, Thread, - ToolAnnotations, ToolProvider, } from "shared"; import { AppKitMcpClient, buildMcpHostPolicy } from "../../connectors/mcp"; @@ -20,7 +19,7 @@ import { getWorkspaceClient } from "../../context"; import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; import { isFromPluginMarker } from "../../core/agent/from-plugin"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; -import { normalizeToolResult } from "../../core/agent/normalize-result"; +import { AgentRunner } from "../../core/agent/runner"; import { buildBaseSystemPrompt, composeSystemPrompt, @@ -48,6 +47,11 @@ import type { PluginManifest } from "../../registry"; import { agentStreamDefaults } from "./defaults"; import { EventChannel } from "./event-channel"; import { AgentEventTranslator } from "./event-translator"; +import { + type ApprovalCheck, + HttpToolExecutor, + type ToolBudget, +} from "./http-tool-executor"; import manifest from "./manifest.json"; import { approvalRequestSchema, @@ -773,110 +777,53 @@ export class AgentsPlugin extends Plugin implements ToolProvider { this.activeStreams.set(requestId, { controller: abortController, userId }); const tools = Array.from(registered.toolIndex.values()).map((e) => e.def); - const approvalPolicy = this.resolvedApprovalPolicy; const limits = this.resolvedLimits; const outboundEvents = new EventChannel(); const translator = new AgentEventTranslator(); - // Per-run tool-call budget (shared across the top-level adapter and any - // sub-agents it delegates to). Counted pre-dispatch so a prompt-injected + // Per-run tool-call budget shared across the top-level adapter and any + // sub-agents it delegates to. Counted pre-dispatch so a prompt-injected // agent cannot drain the budget silently via denied calls. - let toolCallsUsed = 0; - - const executeTool = async ( - name: string, - args: unknown, - ): Promise => { - if (toolCallsUsed >= limits.maxToolCalls) { - abortController.abort( - new Error( - `Tool-call budget exhausted (limit ${limits.maxToolCalls}).`, - ), - ); - throw new Error( - `Tool-call budget exhausted (limit ${limits.maxToolCalls}). Raise agents({ limits: { maxToolCalls } }) or review the agent's tool-selection logic.`, + const budget: ToolBudget = { used: 0, limit: limits.maxToolCalls }; + + const executor = new HttpToolExecutor({ + toolIndex: registered.toolIndex, + approvalPolicy: this.resolvedApprovalPolicy, + approvalGate: this.approvalGate, + translator, + outboundEvents, + abortController, + budget, + req, + streamId: requestId, + userId, + pluginContext: this.context, + mcpClient: this.mcpClient, + runSubAgent: (agentName, subArgs, subSignal, forwardEvent, check) => { + const childAgent = this.agents.get(agentName); + if (!childAgent) throw new Error(`Sub-agent not found: ${agentName}`); + return this.runSubAgent( + req, + childAgent, + subArgs, + subSignal, + 1, + forwardEvent, + check, ); - } - toolCallsUsed++; - - const entry = registered.toolIndex.get(name); - if (!entry) throw new Error(`Unknown tool: ${name}`); - - // Approval flow used by BOTH the parent stream and any sub-agents - // delegated to from it. Sub-agents were previously running destructive - // tools without ever surfacing the gate; this closure lifts the check - // so `runSubAgent.childExecute` can reuse the exact same semantics - // (event emission + gate.wait + deny string). - const checkApproval = async ( - toolEntry: ResolvedToolEntry, - toolArgs: unknown, - ): Promise<"approve" | "deny" | null> => { - if (!approvalPolicy.requireForDestructive) return null; - if (!isDestructiveToolEntry(toolEntry)) return null; - const approvalId = randomUUID(); - for (const ev of translator.translate({ - type: "approval_pending", - approvalId, - streamId: requestId, - toolName: toolEntry.def.name, - args: toolArgs, - annotations: combinedToolAnnotations(toolEntry), - })) { - outboundEvents.push(ev); - } - return this.approvalGate.wait({ - approvalId, - streamId: requestId, - userId, - timeoutMs: approvalPolicy.timeoutMs, - }); - }; - - const decision = await checkApproval(entry, args); - if (decision === "deny") { - return `Tool execution denied by user approval gate (tool: ${name}).`; - } + }, + }); - // Forward events from nested sub-agents into the parent's outbound - // SSE stream so the client sees inner tool calls AND the sub-agent's - // streaming text as it's generated. Without this the user stares at - // "thinking…" for the full duration of the sub-agent run. - // - // The one exception is `metadata`: sub-agents have their own - // threadId, and forwarding it would overwrite the parent's thread - // state on the client and break multi-turn continuity. - // - // `approval_pending` is not emitted by adapters directly — it comes - // through `checkApproval()` which already pushes to the parent's - // outboundEvents — so sub-agent destructive approvals surface - // independently of this forwarder. - const forwardSubAgentEvent = (ev: AgentEvent): void => { - if (ev.type === "metadata") return; - for (const translated of translator.translate(ev)) { + const runner = new AgentRunner({ + adapter: registered.adapter, + tools, + executeTool: executor, + signal, + onEvent: (event) => { + for (const translated of translator.translate(event)) { outboundEvents.push(translated); } - }; - - const raw = await dispatchToolCall(entry, args, { - req, - signal, - pluginContext: this.context, - mcpClient: this.mcpClient, - runSubAgent: (agentName, subArgs) => { - const childAgent = this.agents.get(agentName); - if (!childAgent) throw new Error(`Sub-agent not found: ${agentName}`); - return this.runSubAgent( - req, - childAgent, - subArgs, - signal, - 1, - forwardSubAgentEvent, - checkApproval, - ); - }, - }); - return normalizeToolResult(raw); - }; + }, + }); // Drive the adapter and the approval-event side-channel concurrently. // Outbound events from both sources flow through `outboundEvents`; the @@ -916,23 +863,9 @@ export class AgentsPlugin extends Plugin implements ToolProvider { ...thread.messages, ]; - const stream = registered.adapter.run( - { - messages: messagesWithSystem, - tools, - threadId: thread.id, - signal, - }, - { executeTool, signal }, - ); - - const fullContent = await consumeAdapterStream(stream, { - signal, - onEvent: (event) => { - for (const translated of translator.translate(event)) { - outboundEvents.push(translated); - } - }, + const fullContent = await runner.run({ + messages: messagesWithSystem, + threadId: thread.id, }); if (fullContent) { @@ -1027,10 +960,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { * Absent (or returning `null`) means no gate — non-destructive tools * or approval disabled policy-wide. */ - checkApproval?: ( - entry: ResolvedToolEntry, - toolArgs: unknown, - ) => Promise<"approve" | "deny" | null>, + checkApproval?: ApprovalCheck, ): Promise { const limits = this.resolvedLimits; if (depth > limits.maxSubAgentDepth) { @@ -1048,6 +978,10 @@ export class AgentsPlugin extends Plugin implements ToolProvider { : JSON.stringify(args); const childTools = Array.from(child.toolIndex.values()).map((e) => e.def); + // Sub-agent dispatch reuses the parent's approval check so a destructive + // tool fires `approval_pending` on the parent's SSE stream. Sub-agents + // do not enforce their own budget — the parent already counted the + // `agent-` invocation. const childExecute = async ( name: string, childArgs: unknown, @@ -1275,44 +1209,6 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } -/** - * True when the tool should go through the approval gate. Historically - * scoped to `destructive: true` — hence the name — but now also fires for - * the semantic `effect` enum on {@link ToolAnnotations}. Any effect that - * mutates the world (`write` | `update` | `destructive`) gates; `read` and - * unannotated tools do not. `def.annotations` is the normal path; for - * `function` tools we also read `functionTool.annotations` so a mismatch - * between the spread def and the original {@link FunctionTool} cannot drop - * the hint. - */ -function isDestructiveToolEntry(entry: ResolvedToolEntry): boolean { - const defAnn = entry.def.annotations; - const fnAnn = - entry.source === "function" ? entry.functionTool.annotations : undefined; - - const effect = defAnn?.effect ?? fnAnn?.effect; - if (effect === "write" || effect === "update" || effect === "destructive") { - return true; - } - if (defAnn?.destructive === true) return true; - if (fnAnn?.destructive === true) return true; - return false; -} - -/** Merged annotations for the approval SSE payload (client UI + debugging). */ -function combinedToolAnnotations( - entry: ResolvedToolEntry, -): ToolAnnotations | undefined { - if (entry.source === "function") { - const merged: ToolAnnotations = { - ...entry.functionTool.annotations, - ...entry.def.annotations, - }; - return Object.keys(merged).length > 0 ? merged : undefined; - } - return entry.def.annotations; -} - function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): { file: boolean; code: boolean; diff --git a/packages/appkit/src/plugins/agents/http-tool-executor.ts b/packages/appkit/src/plugins/agents/http-tool-executor.ts new file mode 100644 index 000000000..5217ab414 --- /dev/null +++ b/packages/appkit/src/plugins/agents/http-tool-executor.ts @@ -0,0 +1,243 @@ +import { randomUUID } from "node:crypto"; +import type express from "express"; +import type { AgentEvent, ResponseStreamEvent, ToolAnnotations } from "shared"; +import type { AppKitMcpClient } from "../../connectors/mcp"; +import { normalizeToolResult } from "../../core/agent/normalize-result"; +import type { ToolExecutor } from "../../core/agent/runner"; +import { dispatchToolCall } from "../../core/agent/tool-dispatch"; +import type { ResolvedToolEntry } from "../../core/agent/types"; +import type { PluginContext } from "../../core/plugin-context"; +import type { EventChannel } from "./event-channel"; +import type { AgentEventTranslator } from "./event-translator"; +import type { ToolApprovalGate } from "./tool-approval-gate"; + +/** + * Decision returned by the approval check. `null` means "no gate fires" + * (tool isn't gated, or policy disabled gating). `"approve"` / `"deny"` + * mirror the user's submission via `POST /approve`. + */ +export type ApprovalDecision = "approve" | "deny" | null; + +/** + * Approval-check function reused by both the parent stream's executor and + * any sub-agent executors it spawns. Lifted to a callable so sub-agents + * can share the parent's translator + outboundEvents + approvalGate. + */ +export type ApprovalCheck = ( + entry: ResolvedToolEntry, + args: unknown, +) => Promise; + +/** + * Sub-agent runner injected by the plugin. Returns the sub-agent's + * concatenated text output to hand back to the parent adapter as the + * tool result. Hidden behind a callback so the executor doesn't need to + * import the plugin class (cycle). + */ +type RunSubAgentFn = ( + agentName: string, + args: unknown, + signal: AbortSignal, + forwardEvent: (e: AgentEvent) => void, + checkApproval: ApprovalCheck, +) => Promise; + +/** + * Mutable per-run tool-call budget. Shared by reference between the + * top-level executor and any sub-agent executors so `maxToolCalls` is + * enforced across the whole run, not per-agent. + */ +export interface ToolBudget { + used: number; + limit: number; +} + +interface HttpToolExecutorDeps { + toolIndex: Map; + /** Approval policy as resolved from `agents({ approval: ... })`. */ + approvalPolicy: { requireForDestructive: boolean; timeoutMs: number }; + approvalGate: ToolApprovalGate; + /** Translator used to emit `approval_pending` to the SSE stream. */ + translator: AgentEventTranslator; + /** Channel the SSE stream drains. Approval events are pushed here. */ + outboundEvents: EventChannel; + /** Aborted on budget exhaustion to unwind the adapter promptly. */ + abortController: AbortController; + /** + * Shared tool-call budget. Pass the same object to every executor in the + * run (top-level + sub-agents) so the cap is global. Pass `null` for + * sub-agent executors that should not count against the budget — only + * the parent enforces, mirroring the original closure's behaviour. + */ + budget: ToolBudget | null; + /** OBO source: forwarded to dispatchToolCall for plugin-tool dispatch. */ + req: express.Request; + /** SSE stream id (used for approval gate scoping + telemetry). */ + streamId: string; + /** Authenticated user id, scoped per-stream by `_handleApprove`. */ + userId: string; + /** PluginContext for OBO tool dispatch. May be undefined in tests. */ + pluginContext: PluginContext | undefined; + /** MCP client for hosted-tool dispatch. May be null pre-connect. */ + mcpClient: AppKitMcpClient | null; + /** Plugin-supplied factory that runs a sub-agent. */ + runSubAgent: RunSubAgentFn; +} + +/** + * HTTP-path tool executor for the streaming chat surface. + * + * Wraps the same logic that used to live as a closure inside + * `_streamAgent`: per-run budget, the approval gate, OBO dispatch via + * {@link dispatchToolCall}, sub-agent recursion, and event forwarding. + * + * Sub-agents share the parent's `translator`, `outboundEvents`, + * `approvalGate`, and `abortController` (so a sub-agent's destructive + * tool surfaces an `approval_pending` event on the parent's SSE stream + * and a sub-agent's budget exhaustion aborts the whole run). The + * `budget` is null for sub-agents so they don't double-count against + * the top-level cap — the parent already incremented when it dispatched + * the `agent-` call. + */ +export class HttpToolExecutor implements ToolExecutor { + constructor(private deps: HttpToolExecutorDeps) {} + + async execute( + name: string, + args: unknown, + signal: AbortSignal, + ): Promise { + const { budget, abortController } = this.deps; + + if (budget) { + if (budget.used >= budget.limit) { + abortController.abort( + new Error(`Tool-call budget exhausted (limit ${budget.limit}).`), + ); + throw new Error( + `Tool-call budget exhausted (limit ${budget.limit}). ` + + "Raise agents({ limits: { maxToolCalls } }) or review the agent's tool-selection logic.", + ); + } + budget.used++; + } + + const entry = this.deps.toolIndex.get(name); + if (!entry) throw new Error(`Unknown tool: ${name}`); + + const decision = await this.checkApproval(entry, args); + if (decision === "deny") { + return `Tool execution denied by user approval gate (tool: ${name}).`; + } + + // Forward events from nested sub-agents into the parent's outbound SSE + // stream so the client sees inner tool calls AND the sub-agent's + // streaming text as it's generated. Without this the user stares at + // "thinking…" for the full duration of the sub-agent run. + // + // The one exception is `metadata`: sub-agents have their own threadId, + // and forwarding it would overwrite the parent's thread state on the + // client and break multi-turn continuity. + // + // `approval_pending` is not emitted by adapters directly — it comes + // through `checkApproval()` which already pushes to the parent's + // outboundEvents — so sub-agent destructive approvals surface + // independently of this forwarder. + const forwardSubAgentEvent = (ev: AgentEvent): void => { + if (ev.type === "metadata") return; + for (const translated of this.deps.translator.translate(ev)) { + this.deps.outboundEvents.push(translated); + } + }; + + const raw = await dispatchToolCall(entry, args, { + req: this.deps.req, + signal, + pluginContext: this.deps.pluginContext, + mcpClient: this.deps.mcpClient, + runSubAgent: (agentName, subArgs) => + this.deps.runSubAgent( + agentName, + subArgs, + signal, + forwardSubAgentEvent, + this.checkApproval, + ), + }); + return normalizeToolResult(raw); + } + + /** + * Approval gate hook. Bound as an arrow so sub-agent executors can pass + * it through to {@link RunSubAgentFn} and the gate fires using the + * parent's translator + outboundEvents + approvalGate. Public so tests + * can drive it directly. + */ + readonly checkApproval: ApprovalCheck = async (entry, args) => { + const { + approvalPolicy, + approvalGate, + translator, + outboundEvents, + streamId, + userId, + } = this.deps; + if (!approvalPolicy.requireForDestructive) return null; + if (!isDestructiveToolEntry(entry)) return null; + const approvalId = randomUUID(); + for (const ev of translator.translate({ + type: "approval_pending", + approvalId, + streamId, + toolName: entry.def.name, + args, + annotations: combinedToolAnnotations(entry), + })) { + outboundEvents.push(ev); + } + return approvalGate.wait({ + approvalId, + streamId, + userId, + timeoutMs: approvalPolicy.timeoutMs, + }); + }; +} + +/** + * True when the tool should go through the approval gate. Historically + * scoped to `destructive: true` — hence the name — but now also fires for + * the semantic `effect` enum on {@link ToolAnnotations}. Any effect that + * mutates the world (`write` | `update` | `destructive`) gates; `read` and + * unannotated tools do not. `def.annotations` is the normal path; for + * `function` tools we also read `functionTool.annotations` so a mismatch + * between the spread def and the original {@link FunctionTool} cannot drop + * the hint. + */ +function isDestructiveToolEntry(entry: ResolvedToolEntry): boolean { + const defAnn = entry.def.annotations; + const fnAnn = + entry.source === "function" ? entry.functionTool.annotations : undefined; + + const effect = defAnn?.effect ?? fnAnn?.effect; + if (effect === "write" || effect === "update" || effect === "destructive") { + return true; + } + if (defAnn?.destructive === true) return true; + if (fnAnn?.destructive === true) return true; + return false; +} + +/** Merged annotations for the approval SSE payload (client UI + debugging). */ +function combinedToolAnnotations( + entry: ResolvedToolEntry, +): ToolAnnotations | undefined { + if (entry.source === "function") { + const merged: ToolAnnotations = { + ...entry.functionTool.annotations, + ...entry.def.annotations, + }; + return Object.keys(merged).length > 0 ? merged : undefined; + } + return entry.def.annotations; +} diff --git a/packages/appkit/src/plugins/agents/tests/http-tool-executor.test.ts b/packages/appkit/src/plugins/agents/tests/http-tool-executor.test.ts new file mode 100644 index 000000000..90b6d0ff4 --- /dev/null +++ b/packages/appkit/src/plugins/agents/tests/http-tool-executor.test.ts @@ -0,0 +1,357 @@ +import type express from "express"; +import type { ResponseStreamEvent } from "shared"; +import { describe, expect, test, vi } from "vitest"; +import type { ResolvedToolEntry } from "../../../core/agent/types"; +import { EventChannel } from "../event-channel"; +import { AgentEventTranslator } from "../event-translator"; +import { HttpToolExecutor, type ToolBudget } from "../http-tool-executor"; +import { ToolApprovalGate } from "../tool-approval-gate"; + +/** + * Focused tests for HttpToolExecutor — particularly the sub-agent approval + * forwarding path. The runner-level abstraction makes it tractable to drive + * the executor without spinning up a full HTTP stream: + * + * - Per-run budget gating (top-level enforces, sub-agents skip) + * - approval_pending emission to the parent's outbound channel + * - approve / deny decision flow + * - Sub-agent dispatch reuses the parent's checkApproval (the bit that + * used to be a private nested closure inside `_streamAgent` and was + * hard to test pre-refactor) + */ + +function functionEntry( + name: string, + opts?: { effect?: "write" | "destructive" }, +) { + const ann = opts?.effect ? { effect: opts.effect } : undefined; + return { + source: "function", + def: { + name, + description: `${name} tool`, + parameters: { type: "object", properties: {} }, + annotations: ann, + }, + functionTool: { + name, + description: `${name} tool`, + schema: { type: "object", properties: {} }, + annotations: ann, + execute: vi.fn(async () => `${name}-result`), + }, + } as unknown as ResolvedToolEntry; +} + +function subAgentEntry(name: string) { + return { + source: "subagent", + agentName: name, + def: { + name: `agent-${name}`, + description: `Delegate to ${name}`, + parameters: { type: "object", properties: {} }, + }, + } as unknown as ResolvedToolEntry; +} + +function mockReq(): express.Request { + return { + headers: { "x-forwarded-user": "alice" }, + header: () => "alice", + } as unknown as express.Request; +} + +function fixture(opts?: { + budget?: ToolBudget | null; + requireForDestructive?: boolean; + toolIndex?: Map; + runSubAgent?: HttpToolExecutorDepsRunSubAgent; +}) { + const outboundEvents = new EventChannel(); + const translator = new AgentEventTranslator(); + const approvalGate = new ToolApprovalGate(); + const abortController = new AbortController(); + const toolIndex = + opts?.toolIndex ?? + new Map([ + ["safe", functionEntry("safe")], + ["risky", functionEntry("risky", { effect: "write" })], + ]); + + const executor = new HttpToolExecutor({ + toolIndex, + approvalPolicy: { + requireForDestructive: opts?.requireForDestructive ?? true, + timeoutMs: 5_000, + }, + approvalGate, + translator, + outboundEvents, + abortController, + budget: opts?.budget === undefined ? { used: 0, limit: 50 } : opts.budget, + req: mockReq(), + streamId: "stream-1", + userId: "alice", + pluginContext: undefined, + mcpClient: null, + runSubAgent: + opts?.runSubAgent ?? + ((_n, _a, _s, _f, _c) => Promise.resolve("(no sub-agent)")), + }); + + return { + executor, + outboundEvents, + approvalGate, + abortController, + toolIndex, + drainEvents: async () => { + const events: ResponseStreamEvent[] = []; + // Cap reads so a hang here surfaces as a test timeout, not a leak. + for (let i = 0; i < 100; i++) { + const next = await Promise.race([ + (async () => { + for await (const ev of outboundEvents) return ev; + return null; + })(), + new Promise((r) => setTimeout(() => r(null), 10)), + ]); + if (!next) break; + events.push(next); + } + return events; + }, + }; +} + +type HttpToolExecutorDepsRunSubAgent = ConstructorParameters< + typeof HttpToolExecutor +>[0]["runSubAgent"]; + +describe("HttpToolExecutor", () => { + describe("budget", () => { + test("rejects + aborts when top-level budget is exhausted", async () => { + const { executor, abortController } = fixture({ + budget: { used: 50, limit: 50 }, + }); + + await expect( + executor.execute("safe", {}, abortController.signal), + ).rejects.toThrow(/Tool-call budget exhausted/); + + expect(abortController.signal.aborted).toBe(true); + }); + + test("budget=null skips counting (sub-agent semantics)", async () => { + const { executor, abortController } = fixture({ budget: null }); + + const r1 = await executor.execute("safe", {}, abortController.signal); + const r2 = await executor.execute("safe", {}, abortController.signal); + expect(r1).toBe("safe-result"); + expect(r2).toBe("safe-result"); + expect(abortController.signal.aborted).toBe(false); + }); + }); + + describe("approval gate", () => { + test("non-destructive tools bypass the gate", async () => { + const { executor, abortController, outboundEvents } = fixture(); + + const result = await executor.execute("safe", {}, abortController.signal); + + expect(result).toBe("safe-result"); + // Drain — there should be no approval_pending event in the channel. + outboundEvents.close(); + const events: ResponseStreamEvent[] = []; + for await (const ev of outboundEvents) events.push(ev); + const approvals = events.filter( + (e) => e.type === "appkit.approval_pending", + ); + expect(approvals).toEqual([]); + }); + + test("write-effect tool emits approval_pending and waits for decision", async () => { + const { executor, abortController, approvalGate, outboundEvents } = + fixture(); + + const promise = executor.execute("risky", {}, abortController.signal); + + // The executor pushes approval_pending synchronously and then awaits + // the gate. Settle the gate by reading the approvalId from the SSE + // payload — this mirrors what `POST /approve` does in production. + const approvalId = await readApprovalId(outboundEvents); + expect(approvalId).toBeDefined(); + + approvalGate.submit({ + approvalId, + userId: "alice", + decision: "approve", + }); + + const result = await promise; + expect(result).toBe("risky-result"); + }); + + test("denied destructive tool returns a deny string instead of dispatching", async () => { + const { executor, abortController, approvalGate, outboundEvents } = + fixture(); + + const promise = executor.execute("risky", {}, abortController.signal); + + const approvalId = await readApprovalId(outboundEvents); + + approvalGate.submit({ + approvalId, + userId: "alice", + decision: "deny", + }); + + const result = await promise; + expect(result).toMatch(/denied by user approval gate/); + }); + + test("requireForDestructive=false short-circuits the gate even on write tools", async () => { + const { executor, abortController, outboundEvents } = fixture({ + requireForDestructive: false, + }); + + const result = await executor.execute( + "risky", + {}, + abortController.signal, + ); + expect(result).toBe("risky-result"); + + outboundEvents.close(); + const seen: ResponseStreamEvent[] = []; + for await (const ev of outboundEvents) seen.push(ev); + expect(seen.some((e) => e.type === "appkit.approval_pending")).toBe( + false, + ); + }); + }); + + describe("sub-agent approval forwarding", () => { + test("destructive tool inside a sub-agent surfaces approval_pending on the parent's stream", async () => { + // This is the bit that used to be a private nested closure inside + // `_streamAgent` and was effectively untestable pre-refactor: the + // parent's `checkApproval` is passed *into* the sub-agent's runner + // so the SSE payload lands on the parent's outbound channel. + const childIndex = new Map([ + ["destroy", functionEntry("destroy", { effect: "destructive" })], + ]); + + const parentIndex = new Map([ + ["agent-worker", subAgentEntry("worker")], + ]); + + // Spy on what the runSubAgent factory receives. + const runSubAgentSpy = vi.fn( + async (_name, _args, signal, _forwardEvent, checkApproval) => { + // Sub-agent invokes its destructive tool through the parent's + // approval check, exactly as `runSubAgent.childExecute` does. + const childEntry = childIndex.get("destroy"); + if (!childEntry) throw new Error("destroy missing from child index"); + const decision = await checkApproval(childEntry, { x: 1 }); + if (decision === "deny") return "denied"; + if (signal.aborted) throw new Error("aborted"); + return "destroyed"; + }, + ); + + const { executor, approvalGate, outboundEvents, abortController } = + fixture({ + toolIndex: parentIndex, + runSubAgent: runSubAgentSpy, + }); + + const promise = executor.execute( + "agent-worker", + { input: "do it" }, + abortController.signal, + ); + + const { approvalId, toolName } = + await readApprovalDetails(outboundEvents); + + expect(toolName, "approval_pending must surface on parent stream").toBe( + "destroy", + ); + + approvalGate.submit({ + approvalId, + userId: "alice", + decision: "approve", + }); + + await expect(promise).resolves.toBe("destroyed"); + expect(runSubAgentSpy).toHaveBeenCalledTimes(1); + }); + + test("denied sub-agent tool yields a deny string handled inside the sub-agent", async () => { + const childIndex = new Map([ + ["destroy", functionEntry("destroy", { effect: "destructive" })], + ]); + const parentIndex = new Map([ + ["agent-worker", subAgentEntry("worker")], + ]); + + const runSubAgentSpy = vi.fn( + async (_name, _args, _signal, _forward, checkApproval) => { + const childEntry = childIndex.get("destroy"); + if (!childEntry) throw new Error("destroy missing from child index"); + const decision = await checkApproval(childEntry, {}); + if (decision === "deny") return "child-saw-deny"; + return "should-not-happen"; + }, + ); + + const { executor, approvalGate, outboundEvents, abortController } = + fixture({ + toolIndex: parentIndex, + runSubAgent: runSubAgentSpy, + }); + + const promise = executor.execute( + "agent-worker", + { input: "x" }, + abortController.signal, + ); + + const approvalId = await readApprovalId(outboundEvents); + + approvalGate.submit({ + approvalId, + userId: "alice", + decision: "deny", + }); + + await expect(promise).resolves.toBe("child-saw-deny"); + }); + }); +}); + +interface ApprovalEvent { + type: "appkit.approval_pending"; + approval_id: string; + tool_name: string; +} + +async function readApprovalDetails( + channel: EventChannel, +): Promise<{ approvalId: string; toolName: string }> { + for await (const ev of channel) { + if (ev.type === "appkit.approval_pending") { + const a = ev as unknown as ApprovalEvent; + return { approvalId: a.approval_id, toolName: a.tool_name }; + } + } + throw new Error("Channel closed before approval_pending arrived"); +} + +async function readApprovalId( + channel: EventChannel, +): Promise { + return (await readApprovalDetails(channel)).approvalId; +} From 8993db241ab2c99b782ca3ca4c733003e5ff151f Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Wed, 29 Apr 2026 18:41:56 +0200 Subject: [PATCH 17/46] refactor(appkit): split agents.ts helpers into separate modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extracts `composePromptForAgent` + `normalizeAutoInherit` into plugins/agents/prompt.ts and `printRegistry` into plugins/agents/registry-printer.ts. These were free-function helpers at the bottom of agents.ts with no dependency on plugin state — pure candidates for extraction. Also opens the door for the bigger split (route handlers and `_streamAgent`/`runSubAgent` extracted into routes/*.ts and tool-execution.ts) by relaxing the access modifier on plugin members those modules will need (`agents`, `activeStreams`, `mcpClient`, `threadStore`, `approvalGate`, `resolvedApprovalPolicy`, `resolvedLimits`, `countUserStreams`). All marked `@internal` to keep the public surface unchanged. Note: the full split into `routes/` and `tool-execution.ts` proposed in plans/agent-architecture-followup.md is deferred. Route handlers and `_streamAgent`/`runSubAgent` remain as methods on AgentsPlugin because they have heavy plugin-state coupling and cross-call patterns (`runSubAgent` recurses, `_handleChat` calls `_streamAgent`, etc.) that don't translate cleanly to free functions without a larger refactor. Tracked as a follow-up. agents.ts: 1262 -> 1212 lines (-50). The plan's aspirational target of <=280 isn't met because the per-route extraction pass is deferred, but the helper extraction + access-modifier relaxation lays the groundwork. Verified: tsc --noEmit clean, 1589/1589 appkit tests pass. --- packages/appkit/src/plugins/agents/agents.ts | 92 +++++-------------- packages/appkit/src/plugins/agents/prompt.ts | 57 ++++++++++++ .../src/plugins/agents/registry-printer.ts | 25 +++++ 3 files changed, 103 insertions(+), 71 deletions(-) create mode 100644 packages/appkit/src/plugins/agents/prompt.ts create mode 100644 packages/appkit/src/plugins/agents/registry-printer.ts diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index 5b56c48d5..d2a113f4c 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -20,10 +20,6 @@ import { consumeAdapterStream } from "../../core/agent/consume-adapter-stream"; import { isFromPluginMarker } from "../../core/agent/from-plugin"; import { loadAgentsFromDir } from "../../core/agent/load-agents"; import { AgentRunner } from "../../core/agent/runner"; -import { - buildBaseSystemPrompt, - composeSystemPrompt, -} from "../../core/agent/system-prompt"; import { dispatchToolCall } from "../../core/agent/tool-dispatch"; import { resolveToolkitFromProvider } from "../../core/agent/toolkit-resolver"; import { @@ -35,8 +31,6 @@ import { import type { AgentDefinition, AgentsPluginConfig, - BaseSystemPromptOption, - PromptContext, RegisteredAgent, ResolvedToolEntry, } from "../../core/agent/types"; @@ -53,6 +47,8 @@ import { type ToolBudget, } from "./http-tool-executor"; import manifest from "./manifest.json"; +import { composePromptForAgent, normalizeAutoInherit } from "./prompt"; +import { printRegistry } from "./registry-printer"; import { approvalRequestSchema, chatRequestSchema, @@ -80,15 +76,21 @@ export class AgentsPlugin extends Plugin implements ToolProvider { protected declare config: AgentsPluginConfig; - private agents = new Map(); - private defaultAgentName: string | null = null; - private activeStreams = new Map< + /** @internal - Mutated by route handlers and tool-execution helpers. */ + agents = new Map(); + /** @internal */ + defaultAgentName: string | null = null; + /** @internal */ + activeStreams = new Map< string, { controller: AbortController; userId: string } >(); - private mcpClient: AppKitMcpClient | null = null; - private threadStore; - private approvalGate = new ToolApprovalGate(); + /** @internal */ + mcpClient: AppKitMcpClient | null = null; + /** @internal */ + threadStore; + /** @internal */ + approvalGate = new ToolApprovalGate(); constructor(config: AgentsPluginConfig) { super(config); @@ -111,8 +113,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } - /** Effective approval policy with defaults applied. */ - private get resolvedApprovalPolicy(): { + /** Effective approval policy with defaults applied. @internal */ + get resolvedApprovalPolicy(): { requireForDestructive: boolean; timeoutMs: number; } { @@ -123,8 +125,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { }; } - /** Effective DoS limits with defaults applied. */ - private get resolvedLimits(): { + /** Effective DoS limits with defaults applied. @internal */ + get resolvedLimits(): { maxConcurrentStreamsPerUser: number; maxToolCalls: number; maxSubAgentDepth: number; @@ -137,8 +139,8 @@ export class AgentsPlugin extends Plugin implements ToolProvider { }; } - /** Count active streams owned by a given user. */ - private countUserStreams(userId: string): number { + /** Count active streams owned by a given user. @internal */ + countUserStreams(userId: string): number { let n = 0; for (const entry of this.activeStreams.values()) { if (entry.userId === userId) n++; @@ -149,7 +151,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { async setup() { await this.loadAgents(); this.mountInvocationsRoute(); - this.printRegistry(); + printRegistry(this.agents, this.defaultAgentName); } /** @@ -1161,22 +1163,6 @@ export class AgentsPlugin extends Plugin implements ToolProvider { return first.done ? null : first.value; } - private printRegistry(): void { - if (this.agents.size === 0) return; - console.log(""); - console.log(` ${pc.bold("Agents")} ${pc.dim(`(${this.agents.size})`)}`); - console.log(` ${pc.dim("─".repeat(60))}`); - for (const [name, reg] of this.agents) { - const tools = reg.toolIndex.size; - const marker = name === this.defaultAgentName ? pc.green("●") : " "; - console.log( - ` ${marker} ${pc.bold(name.padEnd(24))} ${pc.dim(`${tools} tools`)}`, - ); - } - console.log(` ${pc.dim("─".repeat(60))}`); - console.log(""); - } - async shutdown(): Promise { this.approvalGate.abortAll(); if (this.mcpClient) { @@ -1209,42 +1195,6 @@ export class AgentsPlugin extends Plugin implements ToolProvider { } } -function normalizeAutoInherit(value: AgentsPluginConfig["autoInheritTools"]): { - file: boolean; - code: boolean; -} { - // Default is opt-out for both origins. A markdown agent or code-defined - // agent with no declared `tools:` gets an empty tool index unless the - // developer explicitly flips `autoInheritTools` on. Even then, only tools - // whose plugin author marked `autoInheritable: true` are spread — see - // `applyAutoInherit` for the filter. - if (value === undefined) return { file: false, code: false }; - if (typeof value === "boolean") return { file: value, code: value }; - return { file: value.file ?? false, code: value.code ?? false }; -} - -function composePromptForAgent( - registered: RegisteredAgent, - pluginLevel: BaseSystemPromptOption | undefined, - ctx: PromptContext, -): string { - const perAgent = registered.baseSystemPrompt; - const resolved = perAgent !== undefined ? perAgent : pluginLevel; - - let base = ""; - if (resolved === false) { - base = ""; - } else if (typeof resolved === "string") { - base = resolved; - } else if (typeof resolved === "function") { - base = resolved(ctx); - } else { - base = buildBaseSystemPrompt(ctx); - } - - return composeSystemPrompt(base, registered.instructions); -} - /** * Plugin factory for the agents plugin. Reads `config/agents//agent.md` by default, * resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` diff --git a/packages/appkit/src/plugins/agents/prompt.ts b/packages/appkit/src/plugins/agents/prompt.ts new file mode 100644 index 000000000..d38c6e645 --- /dev/null +++ b/packages/appkit/src/plugins/agents/prompt.ts @@ -0,0 +1,57 @@ +import { + buildBaseSystemPrompt, + composeSystemPrompt, +} from "../../core/agent/system-prompt"; +import type { + AgentsPluginConfig, + BaseSystemPromptOption, + PromptContext, + RegisteredAgent, +} from "../../core/agent/types"; + +/** + * Resolves the per-agent and plugin-level base prompt options into the + * final system prompt sent to the adapter. Per-agent setting wins over + * plugin-level; `false` opts out entirely; functions receive the same + * `PromptContext` that the default builder uses. + */ +export function composePromptForAgent( + registered: RegisteredAgent, + pluginLevel: BaseSystemPromptOption | undefined, + ctx: PromptContext, +): string { + const perAgent = registered.baseSystemPrompt; + const resolved = perAgent !== undefined ? perAgent : pluginLevel; + + let base = ""; + if (resolved === false) { + base = ""; + } else if (typeof resolved === "string") { + base = resolved; + } else if (typeof resolved === "function") { + base = resolved(ctx); + } else { + base = buildBaseSystemPrompt(ctx); + } + + return composeSystemPrompt(base, registered.instructions); +} + +/** + * Resolves the plugin-level `autoInheritTools` config into a per-origin + * decision. Default is opt-out for both origins. A markdown agent or + * code-defined agent with no declared `tools:` gets an empty tool index + * unless the developer explicitly flips `autoInheritTools` on. Even then, + * only tools whose plugin author marked `autoInheritable: true` are + * spread — see `applyAutoInherit` for the filter. + */ +export function normalizeAutoInherit( + value: AgentsPluginConfig["autoInheritTools"], +): { + file: boolean; + code: boolean; +} { + if (value === undefined) return { file: false, code: false }; + if (typeof value === "boolean") return { file: value, code: value }; + return { file: value.file ?? false, code: value.code ?? false }; +} diff --git a/packages/appkit/src/plugins/agents/registry-printer.ts b/packages/appkit/src/plugins/agents/registry-printer.ts new file mode 100644 index 000000000..9231ee077 --- /dev/null +++ b/packages/appkit/src/plugins/agents/registry-printer.ts @@ -0,0 +1,25 @@ +import pc from "picocolors"; +import type { RegisteredAgent } from "../../core/agent/types"; + +/** + * Pretty-prints the registered agent set during plugin setup. Decorative — + * no behaviour change if it's skipped (e.g., from tests). + */ +export function printRegistry( + agents: Map, + defaultAgentName: string | null, +): void { + if (agents.size === 0) return; + console.log(""); + console.log(` ${pc.bold("Agents")} ${pc.dim(`(${agents.size})`)}`); + console.log(` ${pc.dim("─".repeat(60))}`); + for (const [name, reg] of agents) { + const tools = reg.toolIndex.size; + const marker = name === defaultAgentName ? pc.green("●") : " "; + console.log( + ` ${marker} ${pc.bold(name.padEnd(24))} ${pc.dim(`${tools} tools`)}`, + ); + } + console.log(` ${pc.dim("─".repeat(60))}`); + console.log(""); +} From b739024308b2c05fd2d24396701c971cd12302f6 Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Mon, 4 May 2026 19:50:25 +0200 Subject: [PATCH 18/46] feat(appkit): unify on DATABRICKS_SERVING_ENDPOINT_NAME (SDK + template manifest) Signed-off-by: MarioCadenas --- packages/appkit/src/plugins/agents/agents.ts | 2 +- packages/appkit/src/plugins/agents/manifest.json | 6 +++--- template/appkit.plugins.json | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts index d2a113f4c..80cda55e4 100644 --- a/packages/appkit/src/plugins/agents/agents.ts +++ b/packages/appkit/src/plugins/agents/agents.ts @@ -317,7 +317,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider { ); } catch (err) { throw new Error( - `Agent '${name}' has no model configured and no DATABRICKS_AGENT_ENDPOINT default available`, + `Agent '${name}' has no model configured and no DATABRICKS_SERVING_ENDPOINT_NAME default available`, { cause: err instanceof Error ? err : undefined }, ); } diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json index 4571031a8..01e843a85 100644 --- a/packages/appkit/src/plugins/agents/manifest.json +++ b/packages/appkit/src/plugins/agents/manifest.json @@ -11,12 +11,12 @@ "type": "serving_endpoint", "alias": "Model Serving (agents)", "resourceKey": "agents-serving-endpoint", - "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_SERVING_ENDPOINT_NAME` when no per-agent model is configured. The same env var the `serving` plugin reads — one value covers both. Omit when agents use only external adapters.", "permission": "CAN_QUERY", "fields": { "name": { - "env": "DATABRICKS_AGENT_ENDPOINT", - "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" + "env": "DATABRICKS_SERVING_ENDPOINT_NAME", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`. Shared with the `serving` plugin." } } } diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json index 2f22a50c1..87e4304ac 100644 --- a/template/appkit.plugins.json +++ b/template/appkit.plugins.json @@ -14,12 +14,12 @@ "type": "serving_endpoint", "alias": "Model Serving (agents)", "resourceKey": "agents-serving-endpoint", - "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_AGENT_ENDPOINT` when no per-agent model is configured. Omit when agents use only external adapters.", + "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_SERVING_ENDPOINT_NAME` when no per-agent model is configured. The same env var the `serving` plugin reads — one value covers both. Omit when agents use only external adapters.", "permission": "CAN_QUERY", "fields": { "name": { - "env": "DATABRICKS_AGENT_ENDPOINT", - "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`" + "env": "DATABRICKS_SERVING_ENDPOINT_NAME", + "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`. Shared with the `serving` plugin." } } } From e460364e3bb2accb2804051979a4d1fbfe2a62ae Mon Sep 17 00:00:00 2001 From: MarioCadenas Date: Tue, 21 Apr 2026 19:58:04 +0200 Subject: [PATCH 19/46] feat(appkit): reference agent-app, dev-playground chat UI, docs, and template MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Final layer of the agents feature stack. Everything needed to exercise, demonstrate, and learn the feature. `apps/agent-app/` — a standalone app purpose-built around the agents feature. Ships with: - `server.ts` — full example of code-defined agents via `fromPlugin`: ```ts const support = createAgent({ instructions: "…", tools: { ...fromPlugin(analytics), ...fromPlugin(files), get_weather, "mcp.vector-search": mcpServer("vector-search", "https://…"), }, }); await createApp({ plugins: [server({ port }), analytics(), files(), agents({ agents: { support } })], }); ``` - `config/agents/assistant.md` — markdown-driven agent alongside the code-defined one, showing the asymmetric auto-inherit default. - Vite + React 19 + TailwindCSS frontend with a chat UI. - Databricks deployment config (`databricks.yml`, `app.yaml`) and deploy scripts. `apps/dev-playground/client/src/routes/agent.route.tsx` — chat UI with inline autocomplete (hits the `autocomplete` markdown agent) and a full threaded conversation panel (hits the default agent). `apps/dev-playground/server/index.ts` — adds a code-defined `helper` agent using `fromPlugin(analytics)` alongside the markdown-driven `autocomplete` agent in `config/agents/`. Exercises the mixed-style setup (markdown + code) against the same plugin list. `apps/dev-playground/config/agents/*.md` — both agents defined with valid YAML frontmatter. `docs/docs/plugins/agents.md` — progressive five-level guide: 1. Drop a markdown file → it just works. 2. Scope tools via `toolkits:` / `tools:` frontmatter. 3. Code-defined agents with `fromPlugin()`. 4. Sub-agents. 5. Standalone `runAgent()` (no `createApp` or HTTP). Plus a configuration reference, runtime API reference, and frontmatter schema table. `docs/docs/api/appkit/` — regenerated typedoc for the new public surface (fromPlugin, runAgent, AgentDefinition, AgentsPluginConfig, ToolkitEntry, ToolkitOptions, all adapter types, and the agents plugin factory). `template/appkit.plugins.json` — adds the `agent` plugin entry so `npx @databricks/appkit init --features agent` scaffolds the plugin correctly. - Full appkit vitest suite: 1311 tests passing - Typecheck clean across all 8 workspace projects - `pnpm docs:build` clean (no broken links) - `pnpm --filter=@databricks/appkit build:package` clean, publint clean Signed-off-by: MarioCadenas Documents the new `mcp` configuration block and the rules it enforces: same-origin-only by default, explicit `trustedHosts` for external MCP servers, plaintext `http://` refused outside localhost-in-dev, and DNS-level blocking of private / link-local IP ranges (covers cloud metadata services). See PR #302 for the policy implementation and PR #304 for the `AgentsPluginConfig.mcp` wiring. Signed-off-by: MarioCadenas - `docs/docs/plugins/agents.md`: new "SQL agent tools" subsection covering `analytics.query` readOnly enforcement, `lakebase.query` opt-in via `exposeAsAgentTool`, and the approval flow. New "Human-in-the-loop approval for destructive tools" subsection documents the config, SSE event shape, and `POST /chat/approve` contract. - `apps/agent-app`: approval-card component rendered inline in the chat stream whenever an `appkit.approval_pending` event arrives. Destructive badge + Approve/Deny buttons POST to `/api/agent/approve` with the carried `streamId`/`approvalId`. - `apps/dev-playground/client`: matching approval-card on the agent route, using the existing appkit-ui `Button` component and Tailwind utility classes. Signed-off-by: MarioCadenas Updates `docs/docs/plugins/agents.md` to document the new two-key auto-inherit model introduced in PR #302 (per-tool `autoInheritable` flag) and PR #304 (safe-by-default `autoInheritTools: { file: false, code: false }`). Adds an "Auto-inherit posture" subsection explaining that the developer must opt into `autoInheritTools` AND the plugin author must mark each tool `autoInheritable: true` for a tool to spread without explicit wiring. Includes a table documenting the `autoInheritable` marking on each core plugin tool, plus an example of the setup-time audit log so operators can see exactly what's inherited vs. skipped. Signed-off-by: MarioCadenas - **Reference app no longer ships hardcoded dogfood URLs.** The three `https://e2-dogfood.staging.cloud.databricks.com/...` and `https://mario-mcp-hello-*.staging.aws.databricksapps.com/...` MCP URLs in `apps/agent-app/server.ts` are replaced with optional env-driven `VECTOR_SEARCH_MCP_URL` / `CUSTOM_MCP_URL` config. When set, their hostnames are auto-added to `agents({ mcp: { trustedHosts } })`. `.env.example` uses placeholder values the reader can replace instead of another team's workspace. - **`appkit.agent` → `appkit.agents` in the reference app.** The prior `appkit.agent as { list, getDefault }` cast papered over the plugin-name mismatch fixed in PR #304. The runtime key now matches the docs, the manifest, and the factory name; the cast is gone. - **Auto-inherit opt-in added to the reference config.** Since the defaults flipped to `{ file: false, code: false }` (PR #304, S-3), the reference now explicitly enables `autoInheritTools: { file: true }` so the markdown agents that ship alongside the code-defined one still pick up the analytics / files read-only tools. This is the pattern a real deployment should follow — opt in deliberately. Signed-off-by: MarioCadenas - `apps/dev-playground/config/agents/autocomplete.md` sets `ephemeral: true`. Each debounced autocomplete keystroke no longer leaves an orphan thread in `InMemoryThreadStore` — the server now deletes the thread in the stream's `finally` (PR #304). Closes R1 from the MVP re-review. - `docs/docs/plugins/agents.md` documents the new `ephemeral` frontmatter key alongside the other AgentDefinition knobs. Signed-off-by: MarioCadenas Documents the MVP resource caps landed in PR #304: the static request-body caps (enforced by the Zod schemas) and the three configurable runtime limits (`maxConcurrentStreamsPerUser`, `maxToolCalls`, `maxSubAgentDepth`). Includes the config-block shape in the main reference and a new "Resource limits" subsection under the Configuration section explaining the intent and per-user semantics of each cap. Signed-off-by: MarioCadenas --- apps/agent-app/.env.example | 16 + apps/agent-app/.gitignore | 3 + apps/agent-app/app.yaml | 8 + apps/agent-app/config/agents/assistant.md | 23 + apps/agent-app/config/agents/support.md | 17 + apps/agent-app/databricks.yml | 50 ++ apps/agent-app/index.html | 12 + apps/agent-app/package.json | 40 ++ apps/agent-app/postcss.config.js | 6 + apps/agent-app/server.ts | 100 +++ apps/agent-app/src/App.css | 440 ++++++++++++++ apps/agent-app/src/App.tsx | 405 +++++++++++++ .../src/components/theme-selector.tsx | 135 +++++ apps/agent-app/src/index.css | 1 + apps/agent-app/src/main.tsx | 15 + apps/agent-app/tailwind.config.ts | 11 + apps/agent-app/tsconfig.app.json | 24 + apps/agent-app/tsconfig.json | 7 + apps/agent-app/tsconfig.node.json | 22 + apps/agent-app/vite.config.ts | 31 + .../client/src/routeTree.gen.ts | 21 + .../client/src/routes/__root.tsx | 8 + .../client/src/routes/agent.route.tsx | 567 ++++++++++++++++++ .../client/src/routes/index.tsx | 29 +- .../dev-playground/config/agents/assistant.md | 6 + .../config/agents/autocomplete.md | 7 + apps/dev-playground/server/index.ts | 25 +- docs/docs/api/appkit/Function.createAgent.md | 35 ++ docs/docs/api/appkit/Function.fromPlugin.md | 50 ++ .../api/appkit/Function.isFromPluginMarker.md | 17 + .../api/appkit/Function.isFunctionTool.md | 15 + docs/docs/api/appkit/Function.isHostedTool.md | 15 + .../api/appkit/Function.isToolkitEntry.md | 18 + .../api/appkit/Function.loadAgentFromFile.md | 23 + .../api/appkit/Function.loadAgentsFromDir.md | 29 + docs/docs/api/appkit/Function.mcpServer.md | 26 + docs/docs/api/appkit/Function.runAgent.md | 29 + docs/docs/api/appkit/Function.tool.md | 29 + .../docs/api/appkit/Interface.AgentAdapter.md | 20 + .../api/appkit/Interface.AgentDefinition.md | 97 +++ docs/docs/api/appkit/Interface.AgentInput.md | 33 + .../api/appkit/Interface.AgentRunContext.md | 28 + .../appkit/Interface.AgentToolDefinition.md | 33 + .../appkit/Interface.AgentsPluginConfig.md | 227 +++++++ .../api/appkit/Interface.BasePluginConfig.md | 1 + .../api/appkit/Interface.FromPluginMarker.md | 32 + .../docs/api/appkit/Interface.FunctionTool.md | 59 ++ docs/docs/api/appkit/Interface.Message.md | 49 ++ .../api/appkit/Interface.PromptContext.md | 27 + .../api/appkit/Interface.RunAgentInput.md | 35 ++ .../api/appkit/Interface.RunAgentResult.md | 21 + docs/docs/api/appkit/Interface.Thread.md | 41 ++ docs/docs/api/appkit/Interface.ThreadStore.md | 98 +++ docs/docs/api/appkit/Interface.ToolConfig.md | 49 ++ .../docs/api/appkit/Interface.ToolProvider.md | 36 ++ .../docs/api/appkit/Interface.ToolkitEntry.md | 59 ++ .../api/appkit/Interface.ToolkitOptions.md | 41 ++ docs/docs/api/appkit/TypeAlias.AgentEvent.md | 268 +++++++++ docs/docs/api/appkit/TypeAlias.AgentTool.md | 12 + docs/docs/api/appkit/TypeAlias.AgentTools.md | 14 + .../TypeAlias.BaseSystemPromptOption.md | 8 + docs/docs/api/appkit/TypeAlias.HostedTool.md | 9 + docs/docs/api/appkit/Variable.agents.md | 19 + docs/docs/api/appkit/index.md | 35 ++ docs/docs/api/appkit/typedoc-sidebar.ts | 175 ++++++ docs/docs/plugins/agents.md | 398 ++++++++++++ pnpm-lock.yaml | 336 ++++++++++- 67 files changed, 4521 insertions(+), 54 deletions(-) create mode 100644 apps/agent-app/.env.example create mode 100644 apps/agent-app/.gitignore create mode 100644 apps/agent-app/app.yaml create mode 100644 apps/agent-app/config/agents/assistant.md create mode 100644 apps/agent-app/config/agents/support.md create mode 100644 apps/agent-app/databricks.yml create mode 100644 apps/agent-app/index.html create mode 100644 apps/agent-app/package.json create mode 100644 apps/agent-app/postcss.config.js create mode 100644 apps/agent-app/server.ts create mode 100644 apps/agent-app/src/App.css create mode 100644 apps/agent-app/src/App.tsx create mode 100644 apps/agent-app/src/components/theme-selector.tsx create mode 100644 apps/agent-app/src/index.css create mode 100644 apps/agent-app/src/main.tsx create mode 100644 apps/agent-app/tailwind.config.ts create mode 100644 apps/agent-app/tsconfig.app.json create mode 100644 apps/agent-app/tsconfig.json create mode 100644 apps/agent-app/tsconfig.node.json create mode 100644 apps/agent-app/vite.config.ts create mode 100644 apps/dev-playground/client/src/routes/agent.route.tsx create mode 100644 apps/dev-playground/config/agents/assistant.md create mode 100644 apps/dev-playground/config/agents/autocomplete.md create mode 100644 docs/docs/api/appkit/Function.createAgent.md create mode 100644 docs/docs/api/appkit/Function.fromPlugin.md create mode 100644 docs/docs/api/appkit/Function.isFromPluginMarker.md create mode 100644 docs/docs/api/appkit/Function.isFunctionTool.md create mode 100644 docs/docs/api/appkit/Function.isHostedTool.md create mode 100644 docs/docs/api/appkit/Function.isToolkitEntry.md create mode 100644 docs/docs/api/appkit/Function.loadAgentFromFile.md create mode 100644 docs/docs/api/appkit/Function.loadAgentsFromDir.md create mode 100644 docs/docs/api/appkit/Function.mcpServer.md create mode 100644 docs/docs/api/appkit/Function.runAgent.md create mode 100644 docs/docs/api/appkit/Function.tool.md create mode 100644 docs/docs/api/appkit/Interface.AgentAdapter.md create mode 100644 docs/docs/api/appkit/Interface.AgentDefinition.md create mode 100644 docs/docs/api/appkit/Interface.AgentInput.md create mode 100644 docs/docs/api/appkit/Interface.AgentRunContext.md create mode 100644 docs/docs/api/appkit/Interface.AgentToolDefinition.md create mode 100644 docs/docs/api/appkit/Interface.AgentsPluginConfig.md create mode 100644 docs/docs/api/appkit/Interface.FromPluginMarker.md create mode 100644 docs/docs/api/appkit/Interface.FunctionTool.md create mode 100644 docs/docs/api/appkit/Interface.Message.md create mode 100644 docs/docs/api/appkit/Interface.PromptContext.md create mode 100644 docs/docs/api/appkit/Interface.RunAgentInput.md create mode 100644 docs/docs/api/appkit/Interface.RunAgentResult.md create mode 100644 docs/docs/api/appkit/Interface.Thread.md create mode 100644 docs/docs/api/appkit/Interface.ThreadStore.md create mode 100644 docs/docs/api/appkit/Interface.ToolConfig.md create mode 100644 docs/docs/api/appkit/Interface.ToolProvider.md create mode 100644 docs/docs/api/appkit/Interface.ToolkitEntry.md create mode 100644 docs/docs/api/appkit/Interface.ToolkitOptions.md create mode 100644 docs/docs/api/appkit/TypeAlias.AgentEvent.md create mode 100644 docs/docs/api/appkit/TypeAlias.AgentTool.md create mode 100644 docs/docs/api/appkit/TypeAlias.AgentTools.md create mode 100644 docs/docs/api/appkit/TypeAlias.BaseSystemPromptOption.md create mode 100644 docs/docs/api/appkit/TypeAlias.HostedTool.md create mode 100644 docs/docs/api/appkit/Variable.agents.md create mode 100644 docs/docs/plugins/agents.md diff --git a/apps/agent-app/.env.example b/apps/agent-app/.env.example new file mode 100644 index 000000000..055bb94c1 --- /dev/null +++ b/apps/agent-app/.env.example @@ -0,0 +1,16 @@ +# Databricks workspace (auto-injected by platform on deploy) +DATABRICKS_HOST=https://your-workspace.cloud.databricks.com + +# Agent LLM endpoint (Model Serving endpoint name) +DATABRICKS_AGENT_ENDPOINT=databricks-claude-sonnet-4-5 + +# Analytics plugin — SQL warehouse ID +DATABRICKS_WAREHOUSE_ID=your-warehouse-id + +# Files plugin — Volume path (catalog.schema.volume) +DATABRICKS_VOLUME_FILES=/Volumes/your-catalog/your-schema/your-volume + +# Optional: Custom MCP servers the agent can call. When set, the hostname +# is automatically added to agents({ mcp: { trustedHosts } }). +# VECTOR_SEARCH_MCP_URL=https:///api/2.0/mcp/vector-search/// +# CUSTOM_MCP_URL=https:///mcp diff --git a/apps/agent-app/.gitignore b/apps/agent-app/.gitignore new file mode 100644 index 000000000..9c97bbd46 --- /dev/null +++ b/apps/agent-app/.gitignore @@ -0,0 +1,3 @@ +node_modules +dist +.env diff --git a/apps/agent-app/app.yaml b/apps/agent-app/app.yaml new file mode 100644 index 000000000..215b89ec3 --- /dev/null +++ b/apps/agent-app/app.yaml @@ -0,0 +1,8 @@ +command: ['node', '--import', 'tsx', 'server.ts'] +env: + - name: DATABRICKS_WAREHOUSE_ID + valueFrom: sql-warehouse + - name: DATABRICKS_AGENT_ENDPOINT + valueFrom: serving-endpoint + - name: DATABRICKS_VOLUME_FILES + valueFrom: volume diff --git a/apps/agent-app/config/agents/assistant.md b/apps/agent-app/config/agents/assistant.md new file mode 100644 index 000000000..1eb5a84fc --- /dev/null +++ b/apps/agent-app/config/agents/assistant.md @@ -0,0 +1,23 @@ +--- +endpoint: databricks-claude-sonnet-4-5 +default: true +toolkits: + - files: [files.list, files.upload, files.delete] +agents: + - support + - researcher +--- + +You are a front-desk dispatcher running on Databricks. + +Delegate requests to the right specialist: + +- `agent-support` — data analysis (SQL via analytics), file browsing, and general questions. +- `agent-researcher` — research and knowledge lookups that benefit from MCP-hosted tools (vector search, custom endpoints). + +Only use your own tools (`files.upload`, `files.delete`, `files.list`) for +file-management actions the user explicitly asks for. Destructive ones +(`upload`, `delete`) will prompt the user for approval before running. + +Keep your own responses short — mostly routing decisions plus a brief summary +of what the specialist returned. diff --git a/apps/agent-app/config/agents/support.md b/apps/agent-app/config/agents/support.md new file mode 100644 index 000000000..68d7e964d --- /dev/null +++ b/apps/agent-app/config/agents/support.md @@ -0,0 +1,17 @@ +--- +endpoint: databricks-claude-sonnet-4-5 +toolkits: + - analytics + - files +tools: + - get_weather + # Optional MCP servers — uncomment the ones whose env vars are set in + # .env (VECTOR_SEARCH_MCP_URL, CUSTOM_MCP_URL). `server.ts` only + # registers each as ambient when its URL is configured, so leaving a + # reference here while the env var is unset will fail at startup. + # - mcp.vector-search + # - mcp.custom +--- + +You help customers with data analysis, file browsing, and general questions. +Use the available tools as needed and summarize results concisely. diff --git a/apps/agent-app/databricks.yml b/apps/agent-app/databricks.yml new file mode 100644 index 000000000..3ed6e50ad --- /dev/null +++ b/apps/agent-app/databricks.yml @@ -0,0 +1,50 @@ +bundle: + name: appkit-agent-app + +variables: + sql_warehouse_id: + description: SQL Warehouse ID for analytics queries + serving_endpoint_name: + description: Model Serving endpoint name for the agent LLM + volume_full_name: + description: "UC Volume full name (e.g. catalog.schema.volume_name)" + +resources: + apps: + agent_app: + name: "appkit-agent-app" + description: "AppKit agent with auto-discovered tools from analytics, files, and genie plugins" + source_code_path: ./ + + user_api_scopes: + - sql + - files.files + - dashboards.genie + + resources: + - name: sql-warehouse + sql_warehouse: + id: ${var.sql_warehouse_id} + permission: CAN_USE + + - name: serving-endpoint + serving_endpoint: + name: ${var.serving_endpoint_name} + permission: CAN_QUERY + + - name: volume + uc_securable: + securable_type: VOLUME + securable_full_name: ${var.volume_full_name} + permission: WRITE_VOLUME + +targets: + dogfood: + default: true + workspace: + host: https://e2-dogfood.staging.cloud.databricks.com + + variables: + sql_warehouse_id: dd43ee29fedd958d + serving_endpoint_name: databricks-claude-sonnet-4-5 + volume_full_name: main.mario.mario-vol diff --git a/apps/agent-app/index.html b/apps/agent-app/index.html new file mode 100644 index 000000000..80e54faf6 --- /dev/null +++ b/apps/agent-app/index.html @@ -0,0 +1,12 @@ + + + + + + AppKit Agent + + +
+ + + diff --git a/apps/agent-app/package.json b/apps/agent-app/package.json new file mode 100644 index 000000000..ed159ca8d --- /dev/null +++ b/apps/agent-app/package.json @@ -0,0 +1,40 @@ +{ + "name": "agent-app", + "private": true, + "version": "0.0.0", + "type": "module", + "scripts": { + "dev": "NODE_ENV=development tsx watch server.ts", + "build": "tsc -b && vite build", + "preview": "vite preview" + }, + "dependencies": { + "@databricks/appkit": "workspace:*", + "@databricks/appkit-ui": "workspace:*", + "@databricks/sdk-experimental": "^0.16.0", + "dotenv": "^16.6.1", + "lucide-react": "^0.511.0", + "react": "19.2.0", + "react-dom": "19.2.0", + "marked": "^15.0.0", + "zod": "^4.0.0" + }, + "devDependencies": { + "@tailwindcss/postcss": "4.1.17", + "@types/node": "24.10.1", + "@types/react": "19.2.7", + "@types/react-dom": "19.2.3", + "@vitejs/plugin-react": "5.1.1", + "autoprefixer": "10.4.21", + "postcss": "8.5.6", + "tailwindcss": "4.1.17", + "tailwindcss-animate": "1.0.7", + "tw-animate-css": "1.4.0", + "tsx": "4.20.6", + "typescript": "5.9.3", + "vite": "npm:rolldown-vite@7.1.14" + }, + "overrides": { + "vite": "npm:rolldown-vite@7.1.14" + } +} diff --git a/apps/agent-app/postcss.config.js b/apps/agent-app/postcss.config.js new file mode 100644 index 000000000..f69c5d411 --- /dev/null +++ b/apps/agent-app/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + "@tailwindcss/postcss": {}, + autoprefixer: {}, + }, +}; diff --git a/apps/agent-app/server.ts b/apps/agent-app/server.ts new file mode 100644 index 000000000..8ea63c224 --- /dev/null +++ b/apps/agent-app/server.ts @@ -0,0 +1,100 @@ +import { + agents, + analytics, + createAgent, + createApp, + files, + mcpServer, + server, + tool, +} from "@databricks/appkit"; +import { z } from "zod"; + +const port = Number(process.env.DATABRICKS_APP_PORT) || 8003; + +// Ambient function tool. Referenced from `config/agents/support.md` under +// `tools: [get_weather]`. Markdown frontmatter looks up this name against +// the `tools:` record passed to `agents({ tools: { get_weather } })` below. +const get_weather = tool({ + name: "get_weather", + description: "Get the current weather for a city", + schema: z.object({ + city: z.string().describe("City name"), + }), + execute: async ({ city }) => `The weather in ${city} is sunny, 22°C`, +}); + +// MCP servers are conditional on runtime env vars — something markdown +// frontmatter can't express. This is the motivating case for defining +// the `researcher` agent in code below: it wires whatever MCP tools are +// configured at boot, and is always callable (with a graceful fallback +// when nothing is wired). +// +// Any MCP URL configured here must also be allowlisted via +// `agents({ mcp: { trustedHosts: [...] } })` before outbound calls will +// be allowed by the zero-trust host policy. +const customMcpServers: Record> = {}; +if (process.env.VECTOR_SEARCH_MCP_URL) { + customMcpServers["mcp.vector-search"] = mcpServer( + "vector-search", + process.env.VECTOR_SEARCH_MCP_URL, + ); +} +if (process.env.CUSTOM_MCP_URL) { + customMcpServers["mcp.custom"] = mcpServer( + "custom", + process.env.CUSTOM_MCP_URL, + ); +} + +// Code-defined research specialist. `assistant.md` references this by name +// under `agents: [researcher]`; the agents plugin resolves that reference +// against both markdown siblings and code-defined agents, with code winning +// on collision. Defined in code so its MCP toolset can flex on env vars. +const researcher = createAgent({ + instructions: + "You are a research specialist. When MCP tools are available " + + "(vector search, custom endpoints), prefer them for knowledge lookups. " + + "If no MCP tools are configured, say so briefly and answer from general " + + "knowledge. Always include your source or note when you're answering " + + "without search.", + tools: { + get_weather, + ...customMcpServers, + }, +}); + +const trustedMcpHosts = [ + process.env.VECTOR_SEARCH_MCP_URL, + process.env.CUSTOM_MCP_URL, +] + .filter((u): u is string => typeof u === "string" && u.length > 0) + .map((u) => new URL(u).hostname); + +const appkit = await createApp({ + plugins: [ + server({ port }), + analytics(), + files(), + agents({ + // Code-defined agents merged with markdown agents; code wins on name + // collision. Markdown `agents: [...]` frontmatter can reference either. + agents: { researcher }, + // Ambient tool library for markdown agents referencing names under + // their `tools:` frontmatter. + tools: { get_weather, ...customMcpServers }, + // Enables auto-inherit of read-only plugin tools (analytics/files) into + // markdown agents that declare no explicit `toolkits:` / `tools:`. Both + // assistant.md and support.md are explicit, so this is a no-op today, + // but kept as a knob markdown authors can rely on. + autoInheritTools: { file: true }, + mcp: { trustedHosts: trustedMcpHosts }, + }), + ], +}); + +console.log( + `Agent app running on port ${port}. ` + + `Agents: ${appkit.agents.list().join(", ") || "(none)"}. ` + + `Default: ${appkit.agents.getDefault() ?? "(none)"}.`, +); diff --git a/apps/agent-app/src/App.css b/apps/agent-app/src/App.css new file mode 100644 index 000000000..545b438cd --- /dev/null +++ b/apps/agent-app/src/App.css @@ -0,0 +1,440 @@ +:root { + --bg: #fafafa; + --card: #ffffff; + --border: #e5e5e5; + --text: #171717; + --text-muted: #737373; + --text-faint: #a3a3a3; + --primary: #2563eb; + --primary-fg: #ffffff; + --muted: #f5f5f5; + --ring: #93c5fd; + --radius: 10px; + --font: system-ui, -apple-system, sans-serif; + --mono: "SF Mono", "Cascadia Code", "Fira Code", monospace; +} + +:root.dark { + --bg: #0a0a0a; + --card: #171717; + --border: #262626; + --text: #fafafa; + --text-muted: #a3a3a3; + --text-faint: #525252; + --primary: #3b82f6; + --primary-fg: #ffffff; + --muted: #262626; + --ring: #1d4ed8; +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: var(--font); + background: var(--bg); + color: var(--text); + -webkit-font-smoothing: antialiased; +} + +.app { + min-height: 100vh; +} + +.container { + max-width: 1100px; + margin: 0 auto; + padding: 2.5rem 1.5rem; +} + +.header { + margin-bottom: 1.5rem; + display: flex; + align-items: flex-start; + justify-content: space-between; +} + +.header h1 { + font-size: 1.75rem; + font-weight: 700; + letter-spacing: -0.025em; +} + +.subtitle { + color: var(--text-muted); + font-size: 0.875rem; + margin-top: 0.25rem; +} + +.thread-id { + font-family: var(--mono); + font-size: 0.75rem; + opacity: 0.6; +} + +.main-layout { + display: flex; + gap: 1.25rem; + height: 700px; +} + +.chat-panel { + flex: 1; + display: flex; + flex-direction: column; + border: 1px solid var(--border); + border-radius: var(--radius); + background: var(--card); + min-width: 0; + overflow: hidden; +} + +.messages { + flex: 1; + overflow-y: auto; + padding: 1.25rem; + display: flex; + flex-direction: column; + gap: 1rem; +} + +.empty-state { + text-align: center; + padding: 5rem 1rem; + color: var(--text-muted); +} + +.empty-title { + font-size: 1.1rem; + font-weight: 500; +} + +.empty-sub { + font-size: 0.85rem; + margin-top: 0.5rem; + color: var(--text-faint); +} + +.message-row { + display: flex; +} + +.message-row.user { + justify-content: flex-end; +} + +.message-row.assistant { + justify-content: flex-start; +} + +.bubble { + max-width: 80%; + padding: 0.625rem 0.875rem; + border-radius: var(--radius); + font-size: 0.875rem; + line-height: 1.5; + word-break: break-word; +} + +.bubble.user { + white-space: pre-wrap; + background: var(--primary); + color: var(--primary-fg); + border-bottom-right-radius: 3px; +} + +.bubble.assistant { + background: var(--muted); + color: var(--text); + border-bottom-left-radius: 3px; +} + +.bubble.thinking { + color: var(--text-muted); + animation: pulse 1.5s ease-in-out infinite; +} + +.bubble.approval-card { + border: 1px solid #d96b3a; + background: color-mix(in srgb, #d96b3a 10%, var(--muted)); +} + +.approval-header { + display: flex; + align-items: center; + gap: 8px; + margin-bottom: 8px; +} + +.approval-badge { + display: inline-block; + padding: 2px 8px; + font-size: 0.75rem; + font-weight: 600; + letter-spacing: 0.02em; + text-transform: uppercase; + color: #fff; + background: #d96b3a; + border-radius: 3px; +} + +.approval-body { + font-size: 0.9rem; +} + +.approval-args { + margin: 6px 0 0; + padding: 8px; + font-size: 0.8rem; + max-height: 220px; + overflow: auto; + background: var(--bg); + border-radius: 4px; + white-space: pre-wrap; + word-break: break-word; +} + +.approval-actions { + display: flex; + gap: 8px; + margin-top: 10px; + justify-content: flex-end; +} + +.approval-actions button { + padding: 6px 14px; + font-size: 0.85rem; + font-weight: 500; + border-radius: 4px; + border: 1px solid transparent; + cursor: pointer; + transition: + background 0.15s, + border-color 0.15s; +} + +.approval-deny { + background: transparent; + color: var(--text); + border-color: var(--border); +} + +.approval-deny:hover { + background: var(--muted); +} + +.approval-approve { + background: #d96b3a; + color: #fff; +} + +.approval-approve:hover { + background: #c35a2b; +} + +.bubble.assistant > * + * { + margin-top: 0.5em; +} + +.bubble.assistant p { + margin: 0; +} + +.bubble.assistant p + p { + margin-top: 0.4em; +} + +.bubble.assistant code { + font-family: var(--mono); + font-size: 0.8em; + background: color-mix(in srgb, var(--text) 8%, transparent); + padding: 0.15em 0.35em; + border-radius: 4px; +} + +.bubble.assistant pre { + margin: 0.5em 0; + padding: 0.75em; + border-radius: 6px; + background: color-mix(in srgb, var(--text) 6%, transparent); + overflow-x: auto; +} + +.bubble.assistant pre code { + background: none; + padding: 0; + font-size: 0.8em; +} + +.bubble.assistant ul, +.bubble.assistant ol { + margin: 0.4em 0; + padding-left: 1.5em; +} + +.bubble.assistant li { + margin: 0.15em 0; +} + +.bubble.assistant h1, +.bubble.assistant h2, +.bubble.assistant h3 { + font-weight: 600; +} + +.bubble.assistant h1 { + font-size: 1.1em; +} +.bubble.assistant h2 { + font-size: 1em; +} +.bubble.assistant h3 { + font-size: 0.95em; +} + +.bubble.assistant blockquote { + margin: 0.4em 0; + padding-left: 0.75em; + border-left: 3px solid var(--border); + color: var(--text-muted); +} + +.bubble.assistant table { + border-collapse: collapse; + margin: 0.5em 0; + font-size: 0.85em; +} + +.bubble.assistant th, +.bubble.assistant td { + border: 1px solid var(--border); + padding: 0.35em 0.6em; +} + +.bubble.assistant th { + background: color-mix(in srgb, var(--text) 4%, transparent); + font-weight: 600; +} + +@keyframes pulse { + 0%, + 100% { + opacity: 1; + } + 50% { + opacity: 0.5; + } +} + +.input-bar { + display: flex; + gap: 0.5rem; + padding: 0.875rem 1rem; + border-top: 1px solid var(--border); +} + +.input-bar textarea { + flex: 1; + padding: 0.5rem 0.75rem; + border: 1px solid var(--border); + border-radius: 8px; + background: var(--bg); + color: var(--text); + font-family: var(--font); + font-size: 0.875rem; + resize: none; + outline: none; + transition: border-color 0.15s; +} + +.input-bar textarea:focus { + border-color: var(--ring); + box-shadow: 0 0 0 2px color-mix(in srgb, var(--ring) 25%, transparent); +} + +.input-bar textarea:disabled { + opacity: 0.5; +} + +.input-bar button { + padding: 0.5rem 1rem; + border: none; + border-radius: 8px; + background: var(--primary); + color: var(--primary-fg); + font-family: var(--font); + font-size: 0.875rem; + font-weight: 500; + cursor: pointer; + transition: opacity 0.15s; + align-self: flex-end; +} + +.input-bar button:hover:not(:disabled) { + opacity: 0.9; +} + +.input-bar button:disabled { + opacity: 0.4; + cursor: not-allowed; +} + +.event-panel { + width: 300px; + flex-shrink: 0; + display: flex; + flex-direction: column; + border: 1px solid var(--border); + border-radius: var(--radius); + background: var(--card); + overflow: hidden; +} + +.event-header { + padding: 0.625rem 0.875rem; + border-bottom: 1px solid var(--border); + font-size: 0.8rem; + font-weight: 600; + color: var(--text-muted); + text-transform: uppercase; + letter-spacing: 0.05em; +} + +.event-list { + flex: 1; + overflow-y: auto; + padding: 0.75rem; + display: flex; + flex-direction: column; + gap: 0.25rem; +} + +.event-empty { + text-align: center; + padding: 2.5rem 0; + font-size: 0.75rem; + color: var(--text-faint); +} + +.event-row { + font-family: var(--mono); + font-size: 0.7rem; + line-height: 1.4; + display: flex; + gap: 0.5rem; +} + +.event-type { + flex-shrink: 0; + width: 90px; + text-align: right; + color: var(--text-faint); +} + +.event-detail { + color: var(--text-muted); + word-break: break-all; +} diff --git a/apps/agent-app/src/App.tsx b/apps/agent-app/src/App.tsx new file mode 100644 index 000000000..1de373c94 --- /dev/null +++ b/apps/agent-app/src/App.tsx @@ -0,0 +1,405 @@ +import { TooltipProvider } from "@databricks/appkit-ui/react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import "./App.css"; +import { ThemeSelector } from "./components/theme-selector"; + +interface SSEEvent { + type: string; + delta?: string; + item_id?: string; + item?: { + type?: string; + id?: string; + call_id?: string; + name?: string; + arguments?: string; + output?: string; + status?: string; + }; + content?: string; + data?: Record; + error?: string; + sequence_number?: number; + output_index?: number; + approval_id?: string; + stream_id?: string; + tool_name?: string; + args?: unknown; + annotations?: { + readOnly?: boolean; + destructive?: boolean; + idempotent?: boolean; + }; +} + +interface ChatMessage { + id: number; + role: "user" | "assistant"; + content: string; +} + +interface PendingApproval { + approvalId: string; + streamId: string; + toolName: string; + args: unknown; + annotations?: { + readOnly?: boolean; + destructive?: boolean; + idempotent?: boolean; + }; +} + +export default function App() { + const [messages, setMessages] = useState([]); + const [events, setEvents] = useState([]); + const [input, setInput] = useState(""); + const [isLoading, setIsLoading] = useState(false); + const [threadId, setThreadId] = useState(null); + const [pendingApprovals, setPendingApprovals] = useState( + [], + ); + const currentStreamIdRef = useRef(null); + const messagesEndRef = useRef(null); + const idRef = useRef(0); + + const [toolCount, setToolCount] = useState(0); + + const decideApproval = useCallback( + async (approvalId: string, decision: "approve" | "deny") => { + const approval = pendingApprovals.find( + (a) => a.approvalId === approvalId, + ); + if (!approval) return; + try { + await fetch("/api/agent/approve", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + streamId: approval.streamId, + approvalId, + decision, + }), + }); + } finally { + setPendingApprovals((prev) => + prev.filter((a) => a.approvalId !== approvalId), + ); + } + }, + [pendingApprovals], + ); + + useEffect(() => { + const timer = setTimeout(() => { + fetch("/api/agent/info") + .then((r) => r.json()) + .then((data) => setToolCount(data.toolCount ?? 0)) + .catch(() => {}); + }, 500); + return () => clearTimeout(timer); + }, []); + + // biome-ignore lint/correctness/useExhaustiveDependencies: scroll on new messages + useEffect(() => { + messagesEndRef.current?.scrollIntoView({ behavior: "smooth" }); + }, [messages]); + + const sendMessage = useCallback(async () => { + if (!input.trim() || isLoading) return; + + const text = input.trim(); + setInput(""); + setMessages((prev) => [ + ...prev, + { id: ++idRef.current, role: "user", content: text }, + ]); + setEvents([]); + setIsLoading(true); + + try { + const res = await fetch("/api/agent/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + message: text, + ...(threadId && { threadId }), + }), + }); + + if (!res.ok) { + const err = await res.json(); + setMessages((prev) => [ + ...prev, + { + id: ++idRef.current, + role: "assistant", + content: `Error: ${err.error}`, + }, + ]); + return; + } + + const reader = res.body?.getReader(); + if (!reader) return; + + const decoder = new TextDecoder(); + let content = ""; + let buffer = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + const lines = buffer.split("\n"); + buffer = lines.pop() ?? ""; + + for (const line of lines) { + if (!line.startsWith("data: ")) continue; + const data = line.slice(6).trim(); + if (!data || data === "[DONE]") continue; + try { + const event: SSEEvent = JSON.parse(data); + if (!event.type) continue; + setEvents((prev) => [...prev, event]); + + if (event.type === "appkit.metadata" && event.data?.threadId) { + setThreadId(event.data.threadId as string); + if (typeof event.data.streamId === "string") { + currentStreamIdRef.current = event.data.streamId; + } + } + if ( + event.type === "appkit.approval_pending" && + event.approval_id && + event.stream_id && + event.tool_name + ) { + currentStreamIdRef.current = event.stream_id; + setPendingApprovals((prev) => [ + ...prev, + { + approvalId: event.approval_id as string, + streamId: event.stream_id as string, + toolName: event.tool_name as string, + args: event.args, + annotations: event.annotations, + }, + ]); + } + if (event.type === "response.output_text.delta" && event.delta) { + content += event.delta; + setMessages((prev) => { + const updated = [...prev]; + const last = updated[updated.length - 1]; + if (last?.role === "assistant") { + updated[updated.length - 1] = { ...last, content }; + } else { + updated.push({ + id: ++idRef.current, + role: "assistant", + content, + }); + } + return updated; + }); + } + } catch { + /* skip */ + } + } + } + } catch (err) { + setMessages((prev) => [ + ...prev, + { + id: ++idRef.current, + role: "assistant", + content: `Error: ${err instanceof Error ? err.message : "Unknown error"}`, + }, + ]); + } finally { + setIsLoading(false); + } + }, [input, isLoading, threadId]); + + return ( + +
+
+
+
+

Agent Chat

+

+ AI agent with {toolCount} auto-discovered tools + {threadId && ( + + {" "} + · Thread {threadId.slice(0, 8)} + + )} +

+
+ +
+ +
+
+
+ {messages.length === 0 && ( +
+

+ Send a message to start a conversation +

+

+ The agent can query data, browse files, and more +

+
+ )} + + {messages.map((msg) => ( +
+
+

{msg.content}

+
+
+ ))} + + {pendingApprovals.map((approval) => ( +
+
+
+ + Destructive tool — approval required + +
+
+ {approval.toolName} +
+                          {JSON.stringify(approval.args, null, 2)}
+                        
+
+
+ + +
+
+
+ ))} + + {isLoading && + pendingApprovals.length === 0 && + messages[messages.length - 1]?.role === "user" && ( +
+
+ Thinking... +
+
+ )} + +
+
+ +
{ + e.preventDefault(); + sendMessage(); + }} + > +