CodebuffAI
diff --git a/‎packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts‎
Lines changed: 0 additions & 40 deletions b/‎packages/agent-runtime/src/__tests__/tool-stream-parser.test.ts‎
Lines changed: 0 additions & 40 deletions
diff --git a/‎packages/agent-runtime/src/__tests__/tool-validation-error.test.ts‎
Lines changed: 128 additions & 0 deletions b/‎packages/agent-runtime/src/__tests__/tool-validation-error.test.ts‎
Lines changed: 128 additions & 0 deletions
diff --git a/‎packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts‎
Lines changed: 0 additions & 3 deletions b/‎packages/agent-runtime/src/__tests__/xml-tool-result-ordering.test.ts‎
Lines changed: 0 additions & 3 deletions
@@ -47,10 +47,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -70,7 +66,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -114,10 +109,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -137,7 +128,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -191,10 +181,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -214,7 +200,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -267,10 +252,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const responseChunks: any[] = []
 
     function onResponseChunk(chunk: any) {
@@ -295,7 +276,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -341,10 +321,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -364,7 +340,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -414,10 +389,6 @@ describe('processStreamWithTags', () => {
       },
     }
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -437,7 +408,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -468,10 +438,6 @@ describe('processStreamWithTags', () => {
 
     const processors = {}
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -491,7 +457,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
@@ -515,10 +480,6 @@ describe('processStreamWithTags', () => {
 
     const processors = {}
 
-    function onError(name: string, error: string) {
-      events.push({ name, error, type: 'error' })
-    }
-
     const result: string[] = []
     const responseChunks: any[] = []
 
@@ -538,7 +499,6 @@ describe('processStreamWithTags', () => {
       stream,
       processors,
       defaultProcessor,
-      onError,
       onResponseChunk,
       executeXmlToolCall: async () => {},
     })) {
 
@@ -1,6 +1,7 @@
 import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
 import { getInitialSessionState } from '@codebuff/common/types/session-state'
 import { promptSuccess } from '@codebuff/common/util/error'
+import { jsonToolResult } from '@codebuff/common/util/messages'
 import { beforeEach, describe, expect, it } from 'bun:test'
 
 import { mockFileContext } from './test-utils'
@@ -12,6 +13,10 @@ import type {
   AgentRuntimeScopedDeps,
 } from '@codebuff/common/types/contracts/agent-runtime'
 import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+import type {
+  AssistantMessage,
+  ToolMessage,
+} from '@codebuff/common/types/messages/codebuff-message'
 import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
 
 describe('tool validation error handling', () => {
@@ -225,4 +230,127 @@ describe('tool validation error handling', () => {
     )
     expect(errorEvents.length).toBe(0)
   })
+
+  it('should preserve tool_call/tool_result ordering when custom tool setup is async', async () => {
+    const toolName = 'delayed_custom_tool'
+    const agentWithCustomTool: AgentTemplate = {
+      ...testAgentTemplate,
+      toolNames: [toolName, 'end_turn'],
+    }
+
+    const delayedToolCallChunk: StreamChunk = {
+      type: 'tool-call',
+      toolName,
+      toolCallId: 'delayed-custom-tool-call-id',
+      input: {
+        query: 'test',
+      },
+    }
+
+    async function* mockStream() {
+      yield delayedToolCallChunk
+      return promptSuccess('mock-message-id')
+    }
+
+    const fileContextWithCustomTool = {
+      ...mockFileContext,
+      customToolDefinitions: {
+        [toolName]: {
+          inputSchema: {
+            type: 'object',
+            properties: {
+              query: { type: 'string' },
+            },
+            required: ['query'],
+            additionalProperties: false,
+          },
+          endsAgentStep: false,
+          description: 'A delayed custom tool for ordering tests',
+        },
+      },
+    }
+
+    const sessionState = getInitialSessionState(fileContextWithCustomTool)
+    const agentState = sessionState.mainAgentState
+
+    agentRuntimeImpl.requestMcpToolData = async () => {
+      // Force an async gap so tool_call emission happens after stream completion.
+      await new Promise((resolve) => setTimeout(resolve, 20))
+      return []
+    }
+    agentRuntimeImpl.requestToolCall = async () => ({
+      output: jsonToolResult({ ok: true }),
+    })
+
+    await processStream({
+      ...agentRuntimeImpl,
+      agentContext: {},
+      agentState,
+      agentStepId: 'test-step-id',
+      agentTemplate: agentWithCustomTool,
+      ancestorRunIds: [],
+      clientSessionId: 'test-session',
+      fileContext: fileContextWithCustomTool,
+      fingerprintId: 'test-fingerprint',
+      fullResponse: '',
+      localAgentTemplates: { 'test-agent': agentWithCustomTool },
+      messages: [],
+      prompt: 'test prompt',
+      repoId: undefined,
+      repoUrl: undefined,
+      runId: 'test-run-id',
+      signal: new AbortController().signal,
+      stream: mockStream(),
+      system: 'test system',
+      tools: {},
+      userId: 'test-user',
+      userInputId: 'test-input-id',
+      onCostCalculated: async () => {},
+      onResponseChunk: () => {},
+    })
+
+    const assistantToolCallMessages = agentState.messageHistory.filter(
+      (m): m is AssistantMessage =>
+        m.role === 'assistant' &&
+        m.content.some((c) => c.type === 'tool-call' && c.toolName === toolName),
+    )
+    const toolMessages = agentState.messageHistory.filter(
+      (m): m is ToolMessage => m.role === 'tool' && m.toolName === toolName,
+    )
+
+    expect(assistantToolCallMessages.length).toBe(1)
+    expect(toolMessages.length).toBe(1)
+
+    const assistantToolCallPart = assistantToolCallMessages[0].content.find(
+      (
+        c,
+      ): c is Extract<AssistantMessage['content'][number], { type: 'tool-call' }> =>
+        c.type === 'tool-call' && c.toolName === toolName,
+    )
+    expect(assistantToolCallPart).toBeDefined()
+    expect(toolMessages[0].toolCallId).toBe(assistantToolCallPart!.toolCallId)
+
+    const assistantIndex = agentState.messageHistory.indexOf(
+      assistantToolCallMessages[0],
+    )
+    const toolResultIndex = agentState.messageHistory.indexOf(toolMessages[0])
+    expect(assistantIndex).toBeGreaterThanOrEqual(0)
+    expect(toolResultIndex).toBeGreaterThan(assistantIndex)
+
+    const assistantToolCallIds = new Set(
+      agentState.messageHistory.flatMap((message) => {
+        if (message.role !== 'assistant') {
+          return []
+        }
+        return message.content.flatMap((part) =>
+          part.type === 'tool-call' ? [part.toolCallId] : [],
+        )
+      }),
+    )
+    const orphanToolResults = agentState.messageHistory.filter(
+      (message): message is ToolMessage =>
+        message.role === 'tool' && !assistantToolCallIds.has(message.toolCallId),
+    )
+    expect(orphanToolResults.length).toBe(0)
+  })
 })
@@ -59,7 +59,6 @@ describe('XML tool result ordering', () => {
       stream,
       processors: {},
       defaultProcessor,
-      onError: () => {},
       onResponseChunk,
       executeXmlToolCall: async ({ toolName, input }) => {
         executionOrder.push(`executeXmlToolCall:${toolName}`)
@@ -136,7 +135,6 @@ describe('XML tool result ordering', () => {
       stream,
       processors: {},
       defaultProcessor,
-      onError: () => {},
       onResponseChunk,
       executeXmlToolCall: async ({ toolName }) => {
         // Simulate tool_call event
@@ -206,7 +204,6 @@ describe('XML tool result ordering', () => {
         stream,
         processors: {},
         defaultProcessor: () => ({ onTagStart: () => {}, onTagEnd: () => {} }),
-        onError: () => {},
         onResponseChunk: () => {},
         executeXmlToolCall: async () => {
           // Simulate tool execution with async work