getsentry · cameroncooke · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.cursor/BUGBOT.md b/.cursor/BUGBOT.md
@@ -12,7 +12,7 @@ For full details see [README.md](README.md) and [docs/ARCHITECTURE.md](docs/ARCH
 ## 1. Security Checklist — Critical
 
 * No hard-coded secrets, tokens or DSNs.
-* All shell commands must flow through `CommandExecutor` with validated arguments (no direct `child_process` calls).
+* MCP tool logic functions that orchestrate long-running processes with sub-processes (e.g., `xcodebuild`) must flow through `CommandExecutor` with validated arguments. Standalone utility modules that invoke simple, short-lived commands may use direct `child_process`/`fs` imports and standard vitest mocking.
 * Paths must be sanitised via helpers in `src/utils/validation.ts`.
 * Sentry breadcrumbs / logs must **NOT** include user PII.
 
@@ -22,7 +22,7 @@ For full details see [README.md](README.md) and [docs/ARCHITECTURE.md](docs/ARCH
 
 | Rule | Quick diff heuristic |
 |------|----------------------|
-| Dependency injection only | New `child_process` \| `fs` import ⇒ **critical** |
+| Dependency injection for tool logic | New `child_process` \| `fs` import in MCP tool logic ⇒ **warning** (check if process is complex/long-running) |
 | Handler / Logic split | `handler` > 20 LOC or contains branching ⇒ **critical** |
 | Plugin auto-registration | Manual `registerTool(...)` / `registerResource(...)` ⇒ **critical** |
 
@@ -45,7 +45,8 @@ export const handler = (p: FooBarParams) => fooBarLogic(p);
 
 ## 3. Testing Checklist
 
-* **External-boundary rule**: Use `createMockExecutor` / `createMockFileSystemExecutor` for command execution and filesystem side effects.
+* **External-boundary rule for tool logic**: Use `createMockExecutor` / `createMockFileSystemExecutor` for complex process orchestration (xcodebuild, multi-step pipelines) in MCP tool logic functions.
+* **Simple utilities**: Standalone utility modules with simple command calls can use direct imports and standard vitest mocking.
 * **Internal mocking is allowed**: `vi.mock`, `vi.fn`, `vi.spyOn`, and `.mock*` are acceptable for internal modules/collaborators.
 * Each tool must have tests covering happy-path **and** at least one failure path.
 * Avoid the `any` type unless justified with an inline comment.
@@ -66,15 +67,15 @@ export const handler = (p: FooBarParams) => fooBarLogic(p);
 |--------------|--------------------|
 | Complex logic in `handler` | Move to `*Logic` function |
 | Re-implementing logging | Use `src/utils/logger.ts` |
-| Direct `fs` / `child_process` usage | Inject `FileSystemExecutor` / `CommandExecutor` |
+| Direct `fs` / `child_process` in tool logic orchestrating complex processes | Inject `FileSystemExecutor` / `CommandExecutor` |
 | Chained re-exports | Export directly from source |
 
 ---
 
 ### How Bugbot Can Verify Rules
 
 1. **External-boundary violations**: confirm tests use injected executors/filesystem for external side effects.
-2. **DI compliance**: search for direct `child_process` / `fs` imports outside approved patterns.
+2. **DI compliance**: check direct `child_process` / `fs` imports in MCP tool logic; standalone utilities with simple commands are acceptable.
 3. **Docs accuracy**: compare `docs/TOOLS.md` against `src/mcp/tools/**`.
 4. **Style**: ensure ESLint and Prettier pass (`npm run lint`, `npm run format:check`).
 

diff --git a/docs/dev/ARCHITECTURE.md b/docs/dev/ARCHITECTURE.md
@@ -418,13 +418,13 @@ Not all parts are required for every tool. For example, `swift_package_build` ha
 
 ### Testing Principles
 
-XcodeBuildMCP uses a **Dependency Injection (DI)** pattern for testing external boundaries (command execution, filesystem, and other side effects). Vitest mocking libraries (`vi.mock`, `vi.fn`, etc.) are acceptable for internal collaborators when needed. This keeps tests robust while preserving deterministic behavior at external boundaries.
+XcodeBuildMCP uses a **Dependency Injection (DI)** pattern for MCP tool logic functions that orchestrate complex, long-running processes with sub-processes (e.g., `xcodebuild`), where standard vitest mocking produces race conditions. Standalone utility modules with simple commands may use direct `child_process`/`fs` imports and standard vitest mocking. Vitest mocking libraries (`vi.mock`, `vi.fn`, etc.) are also acceptable for internal collaborators.
 
 For detailed guidelines, see the [Testing Guide](TESTING.md).
 
 ### Test Structure Example
 
-Tests inject mock "executors" for external interactions like command-line execution or file system access. This allows for deterministic testing of tool logic without mocking the implementation itself. The project provides helper functions like `createMockExecutor` and `createMockFileSystemExecutor` in `src/test-utils/mock-executors.ts` to facilitate this pattern.
+Tests for MCP tool logic inject mock "executors" for complex process orchestration (e.g., xcodebuild). This allows for deterministic testing without race conditions from non-deterministic sub-process ordering. The project provides helper functions like `createMockExecutor` and `createMockFileSystemExecutor` in `src/test-utils/mock-executors.ts`. Standalone utility modules with simple commands use standard vitest mocking.
 
 ```typescript
 import { describe, it, expect } from 'vitest';

diff --git a/docs/dev/CODE_QUALITY.md b/docs/dev/CODE_QUALITY.md
@@ -53,17 +53,18 @@ XcodeBuildMCP enforces several architectural patterns that cannot be expressed t
 
 ### 1. Dependency Injection Pattern
 
-**Rule**: All tools must use dependency injection for external interactions.
+**Rule**: MCP tool logic functions that orchestrate complex, long-running processes with sub-processes (e.g., `xcodebuild`) must use dependency injection for external interactions. This is because standard vitest mocking produces race conditions when sub-process ordering is non-deterministic.
+
+Standalone utility modules that invoke simple, short-lived commands (e.g., `xcrun devicectl list`, `xcrun xcresulttool get`) may use direct `child_process`/`fs` imports and be tested with standard vitest mocking.
 
 ✅ **Allowed**:
-- `createMockExecutor()` for command execution mocking
-- `createMockFileSystemExecutor()` for file system mocking
-- Logic functions accepting `executor?: CommandExecutor` parameter
+- `createMockExecutor()` / `createMockFileSystemExecutor()` for complex process orchestration in tool logic
+- Logic functions accepting `executor?: CommandExecutor` parameter for xcodebuild and similar pipelines
+- Direct `child_process`/`fs` imports in standalone utility modules with simple commands, tested via vitest mocking
 
 ❌ **Forbidden**:
-- Direct calls to `execSync`, `spawn`, or `exec` in production code
 - Testing handler functions directly
-- Real external side effects in unit tests (xcodebuild/xcrun/filesystem writes outside mocks)
+- Real external side effects in unit tests (real `xcodebuild`, `xcrun`, AXe, filesystem writes/reads outside test harness)
 
 ### 2. Handler Signature Compliance
 

diff --git a/docs/dev/CONTRIBUTING.md b/docs/dev/CONTRIBUTING.md
@@ -270,7 +270,7 @@ Before making changes, please familiarize yourself with:
 All contributions must adhere to the testing standards outlined in the [**XcodeBuildMCP Plugin Testing Guidelines (TESTING.md)**](TESTING.md). This is the canonical source of truth for all testing practices.
 
 **Key Principles (Summary):**
-- **Dependency Injection for External Boundaries**: All external dependencies (command execution, file system access) must be injected into tool logic functions using the `CommandExecutor` and `FileSystemExecutor` patterns.
+- **Dependency Injection for Complex Processes**: MCP tool logic functions that orchestrate complex, long-running processes with sub-processes (e.g., `xcodebuild`) must use injected `CommandExecutor` and `FileSystemExecutor` patterns. Standalone utility modules with simple commands may use direct imports and standard vitest mocking.
 - **Internal Mocking Is Allowed**: Vitest mocking (`vi.mock`, `vi.fn`, `vi.spyOn`, etc.) is acceptable for internal modules/collaborators.
 - **Test Production Code**: Tests must import and execute the actual tool logic, not mock implementations.
 - **Comprehensive Coverage**: Tests must cover input validation, command generation, and output processing.

diff --git a/docs/dev/TESTING.md b/docs/dev/TESTING.md
@@ -20,28 +20,32 @@ This document provides comprehensive testing guidelines for XcodeBuildMCP plugin
 
 ### 🚨 CRITICAL: External Dependency Mocking Rules
 
-### ABSOLUTE RULE: External side effects must use dependency injection utilities
+### When to use dependency-injection executors
 
-### Use dependency-injection mocks for EXTERNAL dependencies:
-- `createMockExecutor()` / `createNoopExecutor()` for command execution (`xcrun`, `xcodebuild`, AXe, etc.)
-- `createMockFileSystemExecutor()` / `createNoopFileSystemExecutor()` for file system interactions
+`CommandExecutor` / `FileSystemExecutor` DI is required for **MCP tool logic functions** that orchestrate complex, long-running processes with sub-processes (e.g., `xcodebuild`, multi-step build pipelines). Standard vitest mocking produces race conditions with these because sub-process ordering is non-deterministic.
+
+- `createMockExecutor()` / `createNoopExecutor()` for command execution in tool logic
+- `createMockFileSystemExecutor()` / `createNoopFileSystemExecutor()` for file system interactions in tool logic
+
+### When standard vitest mocking is fine
+
+Standalone utility modules that invoke simple, short-lived commands (e.g., `xcrun devicectl list`, `xcrun xcresulttool get`) may use direct `child_process`/`fs` imports and be tested with standard vitest mocking (`vi.fn`, `vi.mock`, `vi.spyOn`, etc.). This is simpler and perfectly adequate for deterministic, single-command utilities.
 
 ### Internal mocking guidance:
 - Vitest mocking (`vi.fn`, `vi.mock`, `vi.spyOn`, `.mockResolvedValue`, etc.) is allowed for internal modules and in-memory collaborators
 - Prefer straightforward, readable test doubles over over-engineered mocks
 
 ### Still forbidden:
 - Hitting real external systems in unit tests (real `xcodebuild`, `xcrun`, AXe, filesystem writes/reads outside test harness)
-- Bypassing dependency injection for external effects
 
 ### OUR CORE PRINCIPLE
 
-**Simple Rule**: Use dependency-injection mock executors for external boundaries; use Vitest mocking only for internal behavior.
+**Simple Rule**: Use dependency-injection mock executors for complex process orchestration in tool logic; use standard vitest mocking for simple utility modules and internal behavior.
 
 **Why This Rule Exists**:
-1. **Reliability**: External side effects stay deterministic and hermetic
-2. **Clarity**: Internal collaboration assertions remain concise and readable
-3. **Architectural Enforcement**: External boundaries are explicit in tool logic signatures
+1. **Reliability**: Complex multi-process orchestration stays deterministic and hermetic via DI executors
+2. **Simplicity**: Simple utilities use standard vitest mocking without unnecessary abstraction
+3. **Architectural Enforcement**: External boundaries for complex processes are explicit in tool logic signatures
 4. **Maintainability**: Tests fail for behavior regressions, not incidental environment differences
 
 ### Integration Testing with Dependency Injection
@@ -111,7 +115,7 @@ Test → Plugin Handler → utilities → [DEPENDENCY INJECTION] createMockExecu
 
 ### Handler Requirements
 
-All plugin handlers must support dependency injection:
+MCP tool logic functions that orchestrate complex processes must support dependency injection:
 
 ```typescript
 export function tool_nameLogic(
@@ -134,12 +138,9 @@ export default {
 };
 ```
 
-**Important**: The dependency injection pattern applies to ALL handlers, including:
-- Tool handlers
-- Resource handlers
-- Any future handler types (prompts, etc.)
+**Important**: The dependency injection pattern applies to tool and resource handler logic that orchestrates complex, long-running processes (e.g., `xcodebuild`). Standalone utility modules with simple commands may use direct imports and standard vitest mocking.
 
-Always use default parameter values (e.g., `= getDefaultCommandExecutor()`) to ensure production code works without explicit executor injection, while tests can override with mock executors.
+Always use default parameter values (e.g., `= getDefaultCommandExecutor()`) in tool logic to ensure production code works without explicit executor injection, while tests can override with mock executors.
 
 ### Test Requirements
 

diff --git a/src/test-utils/test-helpers.ts b/src/test-utils/test-helpers.ts
@@ -0,0 +1,204 @@
+/**
+ * Shared test helpers for extracting text content from tool responses.
+ */
+
+import { expect } from 'vitest';
+import type { ToolHandlerContext, ImageAttachment } from '../rendering/types.ts';
+import type { PipelineEvent } from '../types/pipeline-events.ts';
+import type { ToolResponse, NextStepParamsMap } from '../types/common.ts';
+import type { ToolHandler } from '../utils/typed-tool-factory.ts';
+import { renderEvents } from '../rendering/render.ts';
+import { createRenderSession } from '../rendering/render.ts';
+import { handlerContextStorage } from '../utils/typed-tool-factory.ts';
+
+/**
+ * Extract and join all text content items from a tool response.
+ */
+export function allText(result: {
+  content: ReadonlyArray<{ type: string; text?: string; [key: string]: unknown }>;
+}): string {
+  return result.content
+    .filter(
+      (c): c is { type: 'text'; text: string } => c.type === 'text' && typeof c.text === 'string',
+    )
+    .map((c) => c.text)
+    .join('\n');
+}
+
+/**
+ * Assert that a tool response represents a pending xcodebuild result
+ * with an optional next-step tool reference.
+ */
+export interface MockToolHandlerResult {
+  events: PipelineEvent[];
+  attachments: ImageAttachment[];
+  nextStepParams?: NextStepParamsMap;
+  text(): string;
+  isError(): boolean;
+}
+
+export function createMockToolHandlerContext(): {
+  ctx: ToolHandlerContext;
+  result: MockToolHandlerResult;
+  run: <T>(fn: () => Promise<T>) => Promise<T>;
+} {
+  const events: PipelineEvent[] = [];
+  const attachments: ImageAttachment[] = [];
+  const ctx: ToolHandlerContext = {
+    emit: (event) => {
+      events.push(event);
+    },
+    attach: (image) => {
+      attachments.push(image);
+    },
+  };
+  const resultObj: MockToolHandlerResult = {
+    events,
+    attachments,
+    get nextStepParams() {
+      return ctx.nextStepParams;
+    },
+    text() {
+      return renderEvents(events, 'text');
+    },
+    isError() {
+      return events.some(
+        (e) =>
+          (e.type === 'status-line' && e.level === 'error') ||
+          (e.type === 'summary' && e.status === 'FAILED'),
+      );
+    },
+  };
+  return {
+    ctx,
+    result: resultObj,
+    run: async <T>(fn: () => Promise<T>): Promise<T> => {
+      return handlerContextStorage.run(ctx, fn);
+    },
+  };
+}
+
+export async function runToolLogic<T>(logic: () => Promise<T>): Promise<{
+  response: T;
+  result: MockToolHandlerResult;
+}> {
+  const { result, run } = createMockToolHandlerContext();
+  const response = await run(logic);
+  return { response, result };
+}
+
+export interface RunLogicResult {
+  content: Array<{ type: string; text?: string; data?: string; mimeType?: string }>;
+  isError?: boolean;
+  nextStepParams?: NextStepParamsMap;
+  attachments?: ImageAttachment[];
+  text?: string;
+}
+
+/**
+ * Run a tool's logic function in a mock handler context and return a
+ * ToolResponse-shaped result for backward-compatible test assertions.
+ */
+export async function runLogic(logic: () => Promise<unknown>): Promise<RunLogicResult> {
+  const { result, run } = createMockToolHandlerContext();
+  const response = await run(logic);
+
+  if (
+    response &&
+    typeof response === 'object' &&
+    'content' in (response as Record<string, unknown>)
+  ) {
+    return response as RunLogicResult;
+  }
+
+  const text = result.text();
+  const textContent = text.length > 0 ? [{ type: 'text' as const, text }] : [];
+  const imageContent = result.attachments.map((attachment) => ({
+    type: 'image' as const,
+    data: attachment.data,
+    mimeType: attachment.mimeType,
+  }));
+
+  return {
+    content: [...textContent, ...imageContent],
+    isError: result.isError() ? true : undefined,
+    nextStepParams: result.nextStepParams,
+    attachments: result.attachments,
+    text,
+  };
+}
+
+export interface CallHandlerResult {
+  content: Array<{ type: 'text'; text: string }>;
+  isError?: boolean;
+  nextStepParams?: NextStepParamsMap;
+}
+
+/**
+ * Call a tool handler in test mode, providing a session context and
+ * returning a ToolResponse-shaped result for backward-compatible assertions.
+ */
+export async function callHandler(
+  handler:
+    | ToolHandler
+    | ((args: Record<string, unknown>, ctx?: ToolHandlerContext) => Promise<void>),
+  args: Record<string, unknown>,
+): Promise<CallHandlerResult> {
+  const session = createRenderSession('text');
+  const ctx: ToolHandlerContext = {
+    emit: (event) => session.emit(event),
+    attach: (image) => session.attach(image),
+  };
+  await handler(args, ctx);
+  const text = session.finalize();
+  return {
+    content: text ? [{ type: 'text' as const, text }] : [],
+    isError: session.isError() || undefined,
+    nextStepParams: ctx.nextStepParams,
+  };
+}
+
+function isMockToolHandlerResult(
+  result: ToolResponse | MockToolHandlerResult,
+): result is MockToolHandlerResult {
+  return 'events' in result && Array.isArray(result.events) && typeof result.text === 'function';
+}
+
+export function expectPendingBuildResponse(
+  result: ToolResponse | MockToolHandlerResult,
+  nextStepToolId?: string,
+): void {
+  if (isMockToolHandlerResult(result)) {
+    expect(result.events.some((event) => event.type === 'summary')).toBe(true);
+
+    if (nextStepToolId) {
+      expect(result.nextStepParams).toEqual(
+        expect.objectContaining({
+          [nextStepToolId]: expect.any(Object),
+        }),
+      );
+    } else {
+      expect(result.nextStepParams).toBeUndefined();
+    }
+    return;
+  }
+
+  expect(result.content).toEqual([]);
+  expect(result._meta).toEqual(
+    expect.objectContaining({
+      pendingXcodebuild: expect.objectContaining({
+        kind: 'pending-xcodebuild',
+      }),
+    }),
+  );
+
+  if (nextStepToolId) {
+    expect(result.nextStepParams).toEqual(
+      expect.objectContaining({
+        [nextStepToolId]: expect.any(Object),
+      }),
+    );
+  } else {
+    expect(result.nextStepParams).toBeUndefined();
+  }
+}
diff --git a/src/types/common.ts b/src/types/common.ts
@@ -137,6 +137,7 @@ export interface PlatformBuildOptions {
   simulatorId?: string;
   deviceId?: string;
   useLatestOS?: boolean;
+  packageCachePath?: string;
   arch?: string;
   logPrefix: string;
 }