From 16d6f92cb5e2d06d20e85abc982c1566516c30b3 Mon Sep 17 00:00:00 2001 From: William Date: Fri, 29 May 2026 18:06:19 +0800 Subject: [PATCH 1/3] feat(discord): inbound + outbound media attachments Discord was fully text-only: inbound attachments were ignored and the agent couldn't send files. Wire the channel to the existing attachment infrastructure both directions. Inbound: message attachments (guild messages + DM gateway dispatch) are fetched from the Discord CDN and uploaded as session attachments (images become vision input); audio attachments are transcribed via the agent's STT and appended as text. Media-only messages now trigger the agent. Attachments over the 25 MiB cap are skipped. Outbound: the agent's attachment_send deliveries (SSE 'attachment' event) are sent back as Discord file uploads via AttachmentBuilder, with any caption as the message content. Discord is bundled into the CLI (private package), so this ships with the next openhermit release rather than a standalone publish. Bumps 0.2.0 -> 0.3.0 for changelog clarity. Adds the first unit tests for this package. Co-Authored-By: Claude Opus 4.8 --- apps/channels/discord/README.md | 1 + apps/channels/discord/package.json | 3 +- apps/channels/discord/src/bot.ts | 50 +++++++++- apps/channels/discord/src/bridge.ts | 112 ++++++++++++++++++++++- apps/channels/discord/src/discord-api.ts | 26 ++++++ apps/channels/discord/test/bot.test.ts | 25 +++++ docs/channel-adapter.md | 3 +- docs/manual/17-channels.md | 1 + 8 files changed, 212 insertions(+), 9 deletions(-) create mode 100644 apps/channels/discord/test/bot.test.ts diff --git a/apps/channels/discord/README.md b/apps/channels/discord/README.md index 3e3fa120..c4061f2c 100644 --- a/apps/channels/discord/README.md +++ b/apps/channels/discord/README.md @@ -11,6 +11,7 @@ - sender identity extraction from Discord user IDs and display names - optional `allowed_channel_ids` allow-list - outbound delivery for `session_send` +- **media**: inbound file/image attachments are uploaded to the agent (images become vision input) and audio attachments are transcribed via STT; the agent's `attachment_send` deliveries are sent back as Discord file uploads. Attachments over the 25 MiB cap are skipped. ## Configuration diff --git a/apps/channels/discord/package.json b/apps/channels/discord/package.json index e6b207c2..038eedee 100644 --- a/apps/channels/discord/package.json +++ b/apps/channels/discord/package.json @@ -1,12 +1,13 @@ { "name": "@openhermit/channel-discord", "private": true, - "version": "0.2.0", + "version": "0.3.0", "type": "module", "main": "src/index.ts", "scripts": { "build": "tsc -b", "typecheck": "tsc -p tsconfig.typecheck.json --pretty false", + "test": "node --import tsx --test test/*.test.ts", "dev": "tsx src/index.ts" }, "dependencies": { diff --git a/apps/channels/discord/src/bot.ts b/apps/channels/discord/src/bot.ts index 59d98c90..09558f7a 100644 --- a/apps/channels/discord/src/bot.ts +++ b/apps/channels/discord/src/bot.ts @@ -1,6 +1,6 @@ import { ChannelType, Events, type Message } from 'discord.js'; -import type { DiscordApi, DiscordMessageEvent } from './discord-api.js'; +import type { DiscordApi, DiscordIncomingAttachment, DiscordMessageEvent } from './discord-api.js'; import type { DiscordBridge } from './bridge.js'; export interface BotOptions { @@ -32,18 +32,22 @@ export class DiscordBot { // Partials.Channel — handle them via the raw gateway dispatch instead. this.discord.client.on('raw' as any, (packet: any) => { if (packet.t !== 'MESSAGE_CREATE') return; - const { guild_id: guildId, author, content, channel_id: channelId, id: messageId } = packet.d ?? {}; - if (guildId || !author || author.bot || !content) return; + const { guild_id: guildId, author, content, channel_id: channelId, id: messageId, attachments } = packet.d ?? {}; + if (guildId || !author || author.bot) return; + const mapped = mapRawAttachments(attachments); + // Allow media-only DMs (no text) through when files are attached. + if (!content && mapped.length === 0) return; const event: DiscordMessageEvent = { channelId, userId: author.id, username: author.username, displayName: author.global_name ?? author.username, - text: content, + text: content ?? '', messageId, isDm: true, mentioned: true, + ...(mapped.length > 0 ? { attachments: mapped } : {}), }; void this.bridge.handleMessage(event).catch((err: Error) => { this.log(`error handling DM: ${err.message}`); @@ -68,11 +72,14 @@ export class DiscordBot { } } if (message.author.bot) return; - if (!message.content) return; // DMs are handled via the raw gateway dispatch above. if (message.channel.type === ChannelType.DM) return; + const mapped = mapMessageAttachments(message); + // Allow media-only messages (no text) through when files are attached. + if (!message.content && mapped.length === 0) return; + const mentioned = this.isMentioned(message); const text = this.stripMention(message.content); @@ -96,6 +103,7 @@ export class DiscordBot { isDm: false, mentioned, ...(message.guildId ? { guildId: message.guildId } : {}), + ...(mapped.length > 0 ? { attachments: mapped } : {}), }; try { @@ -111,6 +119,8 @@ export class DiscordBot { } } + // (helpers below the class) + private isMentioned(message: Message): boolean { const botId = this.discord.botUserId; if (!botId) return false; @@ -123,3 +133,33 @@ export class DiscordBot { return text.replace(new RegExp(`<@!?${botId}>\\s*`, 'g'), '').trim(); } } + +/** Map discord.js Message attachments to the channel-neutral shape. */ +export function mapMessageAttachments(message: Message): DiscordIncomingAttachment[] { + const out: DiscordIncomingAttachment[] = []; + for (const att of message.attachments.values()) { + out.push({ + url: att.url, + name: att.name ?? 'attachment', + ...(att.contentType ? { contentType: att.contentType } : {}), + ...(typeof att.size === 'number' ? { size: att.size } : {}), + }); + } + return out; +} + +/** Map raw gateway dispatch attachments (snake_case) to the neutral shape. */ +export function mapRawAttachments(attachments: unknown): DiscordIncomingAttachment[] { + if (!Array.isArray(attachments)) return []; + const out: DiscordIncomingAttachment[] = []; + for (const att of attachments) { + if (!att || typeof att.url !== 'string') continue; + out.push({ + url: att.url, + name: typeof att.filename === 'string' ? att.filename : 'attachment', + ...(typeof att.content_type === 'string' ? { contentType: att.content_type } : {}), + ...(typeof att.size === 'number' ? { size: att.size } : {}), + }); + } + return out; +} diff --git a/apps/channels/discord/src/bridge.ts b/apps/channels/discord/src/bridge.ts index aa8d2ae6..2b948591 100644 --- a/apps/channels/discord/src/bridge.ts +++ b/apps/channels/discord/src/bridge.ts @@ -7,11 +7,20 @@ import { stripSilenceTokens } from '@openhermit/shared'; import type { DiscordApi, DiscordMessageEvent } from './discord-api.js'; import { formatAgentResponse, markdownToDiscord } from './formatting.js'; +/** Gateway-enforced attachment cap (25 MiB). Skip oversized media early. */ +const MAX_MEDIA_BYTES = 25 * 1024 * 1024; + interface TurnResult { text: string | undefined; error: string | undefined; } +/** Outcome of resolving an inbound message's attachments. */ +interface ResolvedInbound { + text: string; + attachments?: { type: 'file'; id: string }[]; +} + export class DiscordBridge implements ChannelOutbound { readonly channel = 'discord'; @@ -80,12 +89,66 @@ export class DiscordBridge implements ChannelOutbound { async handleMessage(event: DiscordMessageEvent): Promise { const text = event.text.trim(); - if (!text) return; + if (!text && !(event.attachments && event.attachments.length > 0)) return; const sessionId = await this.getSessionId(event.channelId); await this.runInChannelQueue(event.channelId, () => this.sendToAgent(event, sessionId, text)); } + /** + * Fetch each inbound attachment from the Discord CDN and either transcribe + * it (audio) or upload it as a durable session attachment (everything else). + */ + private async resolveInbound( + sessionId: string, + event: DiscordMessageEvent, + baseText: string, + ): Promise { + let text = baseText; + const ids: { type: 'file'; id: string }[] = []; + const transcripts: string[] = []; + + for (const att of event.attachments ?? []) { + if (att.size && att.size > MAX_MEDIA_BYTES) { + this.log(`skipping oversized attachment ${att.name} (${att.size} bytes)`); + continue; + } + let bytes: Uint8Array; + try { + const res = await fetch(att.url); + if (!res.ok) throw new Error(`status ${res.status}`); + bytes = new Uint8Array(await res.arrayBuffer()); + } catch (err) { + this.log(`failed to fetch attachment ${att.name}: ${err instanceof Error ? err.message : String(err)}`); + continue; + } + const mime = att.contentType ?? 'application/octet-stream'; + if (mime.startsWith('audio/')) { + try { + const { text: transcript } = await this.client.transcribeAudio({ bytes, mimeType: mime }); + if (transcript.trim()) transcripts.push(transcript.trim()); + } catch (err) { + this.log(`stt failed for ${att.name}: ${err instanceof Error ? err.message : String(err)}`); + } + } else { + try { + const blob = new Blob([bytes as unknown as BlobPart], { type: mime }); + const uploaded = await this.client.uploadAttachment(sessionId, blob, att.name); + ids.push({ type: 'file', id: uploaded.id! }); + } catch (err) { + this.log(`upload failed for ${att.name}: ${err instanceof Error ? err.message : String(err)}`); + } + } + } + + if (transcripts.length > 0) { + const joined = transcripts.join('\n\n'); + text = text ? `${text}\n\n[Transcribed voice message]\n${joined}` : `[Transcribed voice message]\n${joined}`; + } + + return { text, ...(ids.length > 0 ? { attachments: ids } : {}) }; + } + private async runInChannelQueue(channelId: string, task: () => Promise): Promise { const previousTurn = this.turnQueues.get(channelId); const currentTurn = this.runAfterPreviousTurn(previousTurn, task).finally(() => { @@ -135,9 +198,14 @@ export class DiscordBridge implements ChannelOutbound { ): Promise { await this.ensureSession(sessionId, event); + const resolved = await this.resolveInbound(sessionId, event, text); + // Nothing usable (e.g. all attachments failed to fetch and no text). + if (!resolved.text && !resolved.attachments) return; + const postResult = await this.client.postMessage(sessionId, { - text, + text: resolved.text, mentioned: event.mentioned, + ...(resolved.attachments ? { attachments: resolved.attachments } : {}), sender: { channel: 'discord', channelUserId: event.userId, @@ -158,6 +226,35 @@ export class DiscordBridge implements ChannelOutbound { } } + /** + * Deliver an outbound `attachment` SSE event as a Discord file upload. + * Bytes are pulled lazily from the agent-local API. + */ + private async deliverAttachment( + channelId: string, + payload: Record, + ): Promise { + const sessionId = String(payload.sessionId ?? ''); + const attachmentId = String(payload.attachmentId ?? ''); + if (!sessionId || !attachmentId) { + this.log('attachment event missing sessionId/attachmentId'); + return; + } + const caption = + typeof payload.caption === 'string' && payload.caption.length > 0 + ? payload.caption + : undefined; + const hintedName = + typeof payload.name === 'string' && payload.name.length > 0 ? payload.name : undefined; + + const { bytes, filename } = await this.client.downloadAttachmentBytes(sessionId, attachmentId); + await this.discord.sendFile(channelId, { + bytes, + filename: hintedName ?? filename ?? 'attachment', + ...(caption ? { caption } : {}), + }); + } + private async ensureSession( sessionId: string, event: DiscordMessageEvent, @@ -283,6 +380,17 @@ export class DiscordBridge implements ChannelOutbound { continue; } + if (frame.event === 'attachment') { + try { + await this.deliverAttachment(channelId, payload); + } catch (err) { + this.log( + `attachment delivery failed: ${err instanceof Error ? err.message : String(err)}`, + ); + } + continue; + } + if (frame.event === 'agent_end') { sawAgentEnd = true; continue; diff --git a/apps/channels/discord/src/discord-api.ts b/apps/channels/discord/src/discord-api.ts index 850c4cb1..06f935b9 100644 --- a/apps/channels/discord/src/discord-api.ts +++ b/apps/channels/discord/src/discord-api.ts @@ -1,10 +1,19 @@ import { + AttachmentBuilder, Client, GatewayIntentBits, Partials, type Message, } from 'discord.js'; +/** An inbound file attached to a Discord message (CDN-hosted). */ +export interface DiscordIncomingAttachment { + url: string; + name: string; + contentType?: string; + size?: number; +} + export interface DiscordMessageEvent { channelId: string; userId: string; @@ -15,6 +24,7 @@ export interface DiscordMessageEvent { isDm: boolean; mentioned: boolean; guildId?: string; + attachments?: DiscordIncomingAttachment[]; } export class DiscordApi { @@ -62,6 +72,22 @@ export class DiscordApi { return (channel as any).send(text) as Promise; } + /** Send a file attachment, optionally with caption text in the same message. */ + async sendFile( + channelId: string, + file: { bytes: Uint8Array; filename: string; caption?: string }, + ): Promise { + const channel = await this.client.channels.fetch(channelId); + if (!channel || !('send' in channel)) { + throw new Error(`Channel ${channelId} not found or not text-based`); + } + const attachment = new AttachmentBuilder(Buffer.from(file.bytes), { name: file.filename }); + const payload: { files: AttachmentBuilder[]; content?: string } = { files: [attachment] }; + if (file.caption && file.caption.length > 0) payload.content = file.caption; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + return (channel as any).send(payload) as Promise; + } + async editMessage(channelId: string, messageId: string, text: string): Promise { const channel = await this.client.channels.fetch(channelId); if (!channel || !('messages' in channel)) return; diff --git a/apps/channels/discord/test/bot.test.ts b/apps/channels/discord/test/bot.test.ts new file mode 100644 index 00000000..eefc9227 --- /dev/null +++ b/apps/channels/discord/test/bot.test.ts @@ -0,0 +1,25 @@ +import assert from 'node:assert/strict'; +import { test } from 'node:test'; + +import { mapRawAttachments } from '../src/bot.js'; + +test('mapRawAttachments maps gateway-dispatch attachments to the neutral shape', () => { + const mapped = mapRawAttachments([ + { url: 'https://cdn.discordapp.com/a.png', filename: 'a.png', content_type: 'image/png', size: 1234 }, + { url: 'https://cdn.discordapp.com/b.pdf', filename: 'b.pdf', content_type: 'application/pdf', size: 99 }, + ]); + assert.deepEqual(mapped, [ + { url: 'https://cdn.discordapp.com/a.png', name: 'a.png', contentType: 'image/png', size: 1234 }, + { url: 'https://cdn.discordapp.com/b.pdf', name: 'b.pdf', contentType: 'application/pdf', size: 99 }, + ]); +}); + +test('mapRawAttachments tolerates missing fields and non-arrays', () => { + assert.deepEqual(mapRawAttachments(undefined), []); + assert.deepEqual(mapRawAttachments('nope'), []); + assert.deepEqual(mapRawAttachments([{ url: 'https://x/y' }]), [ + { url: 'https://x/y', name: 'attachment' }, + ]); + // Entries without a url are dropped. + assert.deepEqual(mapRawAttachments([{ filename: 'no-url.txt' }]), []); +}); diff --git a/docs/channel-adapter.md b/docs/channel-adapter.md index 6f81a910..4ebf3365 100644 --- a/docs/channel-adapter.md +++ b/docs/channel-adapter.md @@ -11,7 +11,7 @@ These ship inside the CLI binary and are registered automatically: | Platform | Package | Connection | |----------|---------|------------| | Telegram | `@openhermit/channel-telegram` | polling or webhook | -| Discord | `@openhermit/channel-discord` | Discord gateway via `discord.js` | +| Discord | `@openhermit/channel-discord` | Discord gateway via `discord.js`; text + media (files/images, audio transcribed) | | Slack | `@openhermit/channel-slack` | Slack Socket Mode | ## External Plugin Adapters @@ -166,6 +166,7 @@ Discord: - guild messages and DMs - mention detection before routing - optional `allowed_channel_ids` +- media inbound (CDN attachments uploaded as session attachments; images become vision input; audio transcribed via STT) and outbound (`attachment_send` → Discord file upload); media over the 25 MiB cap is skipped Slack: diff --git a/docs/manual/17-channels.md b/docs/manual/17-channels.md index 130b343b..2a339979 100644 --- a/docs/manual/17-channels.md +++ b/docs/manual/17-channels.md @@ -61,6 +61,7 @@ A gateway restart (`hermit gateway stop && hermit gateway start`) is required fo - Bot applications are per agent. - Slash commands optional; the agent works in plain channel chat once invited. +- Media: file/image attachments are uploaded to the agent (images become vision input) and audio attachments are transcribed; the agent can send files back. Attachments over 25 MiB are skipped. ### Slack From 5486f64cd441af7a5703d48585742f29b2ab8bb5 Mon Sep 17 00:00:00 2001 From: William Date: Fri, 29 May 2026 18:59:28 +0800 Subject: [PATCH 2/3] fix(discord): bump gateway pin + lockfile to 0.3.0 The gateway optionalDependencies still pinned @openhermit/channel-discord at 0.2.0 while the workspace moved to 0.3.0, so a fresh install wouldn't link the updated adapter and the media changes wouldn't take effect in the bundled CLI. Bump the gateway pin and lockfile to match. Co-Authored-By: Claude Opus 4.8 --- apps/gateway/package.json | 2 +- package-lock.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/gateway/package.json b/apps/gateway/package.json index 7ed9db1d..b8d227d4 100644 --- a/apps/gateway/package.json +++ b/apps/gateway/package.json @@ -39,6 +39,6 @@ "optionalDependencies": { "@openhermit/channel-telegram": "0.2.0", "@openhermit/channel-slack": "0.2.0", - "@openhermit/channel-discord": "0.2.0" + "@openhermit/channel-discord": "0.3.0" } } diff --git a/package-lock.json b/package-lock.json index 4e37a0dd..68678d30 100644 --- a/package-lock.json +++ b/package-lock.json @@ -53,7 +53,7 @@ }, "apps/channels/discord": { "name": "@openhermit/channel-discord", - "version": "0.2.0", + "version": "0.3.0", "dependencies": { "@openhermit/sdk": "^0.6.0", "@openhermit/shared": "0.2.0", @@ -324,7 +324,7 @@ "vite": "^6.3.2" }, "optionalDependencies": { - "@openhermit/channel-discord": "0.2.0", + "@openhermit/channel-discord": "0.3.0", "@openhermit/channel-slack": "0.2.0", "@openhermit/channel-telegram": "0.2.0" } From b2628305d01ea30ea49e3453c94513c401deb660 Mon Sep 17 00:00:00 2001 From: William Date: Fri, 29 May 2026 19:28:29 +0800 Subject: [PATCH 3/3] fix(discord): bound CDN attachment fetch with a timeout A stalled Discord CDN connection during inbound attachment download could block the per-channel message queue indefinitely. Add a 15s AbortSignal.timeout() to the fetch. Co-Authored-By: Claude Opus 4.8 --- apps/channels/discord/src/bridge.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/channels/discord/src/bridge.ts b/apps/channels/discord/src/bridge.ts index 2b948591..7ac8393b 100644 --- a/apps/channels/discord/src/bridge.ts +++ b/apps/channels/discord/src/bridge.ts @@ -10,6 +10,9 @@ import { formatAgentResponse, markdownToDiscord } from './formatting.js'; /** Gateway-enforced attachment cap (25 MiB). Skip oversized media early. */ const MAX_MEDIA_BYTES = 25 * 1024 * 1024; +/** Bound CDN attachment fetches so a stalled connection can't block the queue. */ +const MEDIA_FETCH_TIMEOUT_MS = 15_000; + interface TurnResult { text: string | undefined; error: string | undefined; @@ -115,7 +118,8 @@ export class DiscordBridge implements ChannelOutbound { } let bytes: Uint8Array; try { - const res = await fetch(att.url); + // Bound the CDN fetch so a stalled connection can't block the queue. + const res = await fetch(att.url, { signal: AbortSignal.timeout(MEDIA_FETCH_TIMEOUT_MS) }); if (!res.ok) throw new Error(`status ${res.status}`); bytes = new Uint8Array(await res.arrayBuffer()); } catch (err) {