diff --git a/apps/website/content/docs/chat/api/api-docs.json b/apps/website/content/docs/chat/api/api-docs.json index 221d785b8..a181023bd 100644 --- a/apps/website/content/docs/chat/api/api-docs.json +++ b/apps/website/content/docs/chat/api/api-docs.json @@ -1759,6 +1759,19 @@ "description": "Clears local view state (classifiers, surface store, lifecycle counters)\nfor a new thread.\n\nResets messageCount to 0 and inputSubmittedAt to null. componentReady and\nfirstMessageSent are NOT reset (sticky for the chat instance lifetime).", "params": [] }, + { + "name": "humanContent", + "signature": "humanContent(message: unknown)", + "description": "Renderable content for a human-role message bubble. Most human\nmessages are typed prompts and pass through `messageContent`\nunchanged. A2UI action messages (e.g. form submits, button clicks\non a rendered surface) flow through the same submit channel and\nland in the message stream as a HumanMessage whose content is a\nJSON-serialized `A2uiActionMessage`. Showing the raw JSON as if\nthe user typed it leaks the protocol; per the A2UI v0.9 spec\nthose events resemble tool calls more than user utterances.\n\n`a2uiActionLabel` returns a short human-readable label for\nrecognized action shapes (\"Search flights\", \"Selected flight UA123\",\netc.) — or null for any non-action content, in which case we fall\nback to the original text.", + "params": [ + { + "name": "message", + "type": "unknown", + "description": "", + "optional": false + } + ] + }, { "name": "isGenuiTurn", "signature": "isGenuiTurn(message: unknown, _prevMsg: unknown, index: number)", diff --git a/libs/chat/src/lib/a2ui/action-label.ts b/libs/chat/src/lib/a2ui/action-label.ts new file mode 100644 index 000000000..c6835b027 --- /dev/null +++ b/libs/chat/src/lib/a2ui/action-label.ts @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: MIT +/** + * Synthesize a short human-readable label for a serialized A2UI action + * message, so the chat composition can render "Search flights" instead + * of a raw `{"version":"v1","action":...}` JSON dump as a user bubble. + * + * Per the A2UI v0.9 spec, action messages flow on the client → agent + * return channel and are framed as typed events (closer to tool calls + * than user utterances). The spec is silent on chat-bubble rendering; + * Google's "A2UI in Practice" article and the Stream Chat reference + * both warn against modeling actions as chat-history user turns. + * + * This helper returns null for any content that isn't a v1 A2UI action + * message; callers should fall back to the original content in that case. + * + * Sources: + * - https://a2ui.org/specification/v0.9-a2ui/ + * - https://medium.com/google-cloud/a2ui-in-practice-patterns-pitfalls-and-the-messages-that-hold-it-together-658720b83789 + * - https://getstream.io/blog/a2ui-chat-integration/ + */ + +/** Known action names that have a curated label. The default for any + * other action name is a camelCase → "Camel Case" humanization. */ +const KNOWN_LABELS: Record string> = { + bookingSubmit: () => 'Search flights', + flightSelect: (ctx) => { + const id = unwrapContextString(ctx, 'flightId') ?? unwrapContextString(ctx, 'flight_id'); + return id ? `Selected flight ${id}` : 'Selected flight'; + }, + modifySearch: () => 'Modify search', +}; + +export function a2uiActionLabel(content: string): string | null { + if (typeof content !== 'string' || content.length === 0) return null; + // Cheap pre-check to skip parsing non-JSON content (markdown, prose, etc). + const trimmed = content.trimStart(); + if (!trimmed.startsWith('{')) return null; + let parsed: unknown; + try { + parsed = JSON.parse(trimmed); + } catch { + return null; + } + if (!isRecord(parsed)) return null; + if (parsed['version'] !== 'v1') return null; + const action = parsed['action']; + if (!isRecord(action)) return null; + const name = action['name']; + if (typeof name !== 'string' || name.length === 0) return null; + + const known = KNOWN_LABELS[name]; + if (known) return known(action['context']); + return humanizeCamelCase(name); +} + +function isRecord(value: unknown): value is Record { + return value != null && typeof value === 'object' && !Array.isArray(value); +} + +/** "bookingSubmit" → "Booking submit". "addToCart" → "Add to cart". */ +function humanizeCamelCase(name: string): string { + const spaced = name.replace(/([a-z])([A-Z])/g, '$1 $2'); + const lower = spaced.toLowerCase(); + return lower.charAt(0).toUpperCase() + lower.slice(1); +} + +/** + * Extract a string-typed value from an A2UI context structure. The v1 + * wire shape carries each value as a DynamicValue (`{literalString: ...}`, + * `{literalNumber: ...}`, `{path: ...}`); we want the literal string only. + * + * Context can be either: + * - a dict: `{ key1: {literalString: "..."}, key2: ... }` (compact form) + * - an array of entries: `[{key, value: {literalString: "..."}}, ...]` + * (the spec's canonical wire shape for A2uiActionContextEntry[]) + */ +function unwrapContextString(context: unknown, key: string): string | null { + if (Array.isArray(context)) { + const entry = context.find( + (e): e is { key: unknown; value: unknown } => + isRecord(e) && (e as Record)['key'] === key, + ); + if (!entry) return null; + return readLiteralString(entry.value); + } + if (isRecord(context)) { + return readLiteralString(context[key]); + } + return null; +} + +function readLiteralString(value: unknown): string | null { + if (typeof value === 'string') return value; + if (isRecord(value) && typeof value['literalString'] === 'string') { + return value['literalString'] as string; + } + return null; +} diff --git a/libs/chat/src/lib/compositions/chat/chat.component.ts b/libs/chat/src/lib/compositions/chat/chat.component.ts index 692e43fed..cbfed62e4 100644 --- a/libs/chat/src/lib/compositions/chat/chat.component.ts +++ b/libs/chat/src/lib/compositions/chat/chat.component.ts @@ -32,6 +32,7 @@ import { ChatScrollBubbleComponent } from '../../primitives/chat-scroll-bubble/c import { createContentClassifier, type ContentClassifier } from '../../streaming/content-classifier'; import { createPartialArgsBridge, type PartialArgsBridge } from '../../a2ui/partial-args-bridge'; import { createA2uiSurfaceStore, type A2uiSurfaceStore } from '../../a2ui/surface-store'; +import { a2uiActionLabel } from '../../a2ui/action-label'; import { messageContent } from '../shared/message-utils'; import { CHAT_HOST_TOKENS, ensureChatRootStyles } from '../../styles/chat-tokens'; import type { ChatRenderEvent } from './chat-render-event'; @@ -180,7 +181,7 @@ export function isPinned(
- {{ messageContent(message) }} + {{ humanContent(message) }} @@ -360,6 +361,29 @@ export class ChatComponent { readonly messageContent = messageContent; + /** + * Renderable content for a human-role message bubble. Most human + * messages are typed prompts and pass through `messageContent` + * unchanged. A2UI action messages (e.g. form submits, button clicks + * on a rendered surface) flow through the same submit channel and + * land in the message stream as a HumanMessage whose content is a + * JSON-serialized `A2uiActionMessage`. Showing the raw JSON as if + * the user typed it leaks the protocol; per the A2UI v0.9 spec + * those events resemble tool calls more than user utterances. + * + * `a2uiActionLabel` returns a short human-readable label for + * recognized action shapes ("Search flights", "Selected flight UA123", + * etc.) — or null for any non-action content, in which case we fall + * back to the original text. + */ + protected humanContent(message: unknown): string { + // Cast: `messageContent` is typed against LangChain's BaseMessage, but + // templates iterate the chat-lib's looser `Message` shape. Either type + // is fine at runtime (`extractText` only reads `.content`). + const raw = messageContent(message as Parameters[0]); + return a2uiActionLabel(raw) ?? raw; + } + /** * True while a message's reasoning is mid-stream — i.e. it's the latest * message, the agent is loading, the message has reasoning content, and