diff --git a/issues/2.md b/issues/2.md new file mode 100644 index 0000000..8f0bffc --- /dev/null +++ b/issues/2.md @@ -0,0 +1,350 @@ +## Research: Intent-to-Action Transformation — How a Generic Workflow Step Becomes Concrete + +### 1. The Core Problem + +A coding agent receives a generic intent such as **"verify-build"**. This is a template name +that means "compile the project and confirm whether the build succeeds or fails". But the +_concrete steps_ vary entirely by workspace: + +- For a CMake project with presets: `cmake --preset linux-release && cmake --build --preset linux-build -j2` +- For a Node.js project: `npm ci && npm run build` +- For a Rust project: `cargo build` +- For a Gradle project: `./gradlew assemble` + +The question is: **what is the correct point in the machinery to perform this transformation, +and which components are responsible for deriving the concrete steps?** + +--- + +### 2. What Must NOT Happen — No Hardcoded Instantiation + +The transformation must not be done by pre-wiring cmake commands (or any other build-system +commands) into static configuration files. A hardcoded solution: + +- Cannot adapt when a project changes its build system or adds presets. +- Does not scale across projects or workspaces. +- Defeats the purpose of a coding agent that is supposed to _reason_ about its environment. + +The transformation must be agent-driven and workspace-aware at runtime. + +--- + +### 3. Current agentloop Machinery and the Transformation Points + +agentloop already has the primitives for this transformation. The pipeline is: + +``` +Generic intent: "verify-build" + │ + ▼ +[1] ProjectExplorer.exploreWorkspace() ← LLM-driven exploration via file-list + file-read + │ WorkspaceContext { + │ workspaceInfo: { language="cmake", packageManager="cmake", ... }, + │ buildSystems: [ + │ { name="cmake", configFile="CMakeLists.txt", + │ notes="CMakePresets.json present. Use preset 'linux-release' for release." } + │ ], + │ explorerNotes: "..." + │ } + ▼ +[2] generatePlan() / planner ← LLM reasons: intent + WorkspaceContext → concrete PlanStep[] + │ PlanStep { description="Configure with cmake --preset linux-release …", + │ toolsNeeded=["shell"], agentProfile="build-verify" } + ▼ +[3] executePlan() / orchestrator ← activates per-step agent profile, runs subagent + │ build-verify profile → shell tool, low temperature, build-verify skill + ▼ +[4] StepResult { output="…compiler output…", status="success"|"failed" } +``` + +#### Step [1] — `ProjectExplorer` in `src/agents/project-explorer.ts` + +This is the **workspace probe** — an LLM-driven agent, not a hardcoded heuristic. +`exploreWorkspace({ registry, llm })` runs a `project-explorer` subagent with access to the +`file-list` and `file-read` tools. The agent: + +1. Lists the root directory tree with `file-list` to understand project layout. +2. Identifies and reads all key manifest/configuration files (package.json, Cargo.toml, + CMakeLists.txt, CMakePresets.json, build.gradle, pom.xml, go.mod, etc.) with `file-read`. +3. Reasons from the file contents to produce a **`WorkspaceContext`** — a generic map with a + `workspaceInfo: WorkspaceInfo` key (the flat summary the planner needs) plus additional + keys: `buildSystems[]` (one entry per detected build system, with `name`, `configFile`, and + human-readable `notes`) and `explorerNotes` (free-form observations, e.g. monorepo layout). + +**Key properties**: +- **No hardcoded commands**: the agent reads `CMakeLists.txt` and `CMakePresets.json` and + reasons about which preset names to use, rather than assuming `cmake --preset default`. + If CMake is not on PATH (e.g. on Windows with a non-standard installation), the notes will + reflect that the invocation style must be determined at execution time. +- **Multi-build-system support**: a monorepo with a C++ core (CMake) and Rust bindings + (Cargo) produces two `buildSystems` entries, both forwarded to the planner. +- **One project, one call**: exploration happens once before planning; the resulting + `WorkspaceContext` is reused across all planning and re-planning calls. + +`WorkspaceContext` is defined in `src/workspace.ts` as: +```typescript +export interface WorkspaceContext extends Record { + workspaceInfo: WorkspaceInfo; +} +``` +The `extends Record` part allows exploration agents to contribute arbitrary +additional keys (e.g. `buildSystems`, `explorerNotes`, `dependencyGraph`) without requiring +changes to the planner or orchestrator interfaces. + +For backward compatibility, `toWorkspaceContext(info: WorkspaceInfo): WorkspaceContext` wraps +the result of the existing `analyzeWorkspace()` heuristic into a minimal context when the +LLM-driven explorer is not available. + +#### Step [2] — `generatePlan()` / `buildPlannerTask()` in `src/subagents/planner.ts` + +The planner LLM receives the generic intent plus the full `WorkspaceContext` and reasons about +what concrete steps to produce. This is the **intent-to-steps transformation**: + +``` +Task: verify-build +Workspace: language=cmake, packageManager=cmake, gitInitialized=true +Detected build systems: + - cmake (CMakeLists.txt): CMakePresets.json present. Use preset 'linux-release' for + release builds. Preset 'linux-test' for tests via CTest. +Available tools: shell, file-read, file-list +``` + +When the context was produced by `ProjectExplorer`, the `buildSystems[].notes` strings give +the planner the information it needs to derive concrete commands (e.g. "use the linux-release +preset") without any commands being hardcoded anywhere in the system. + +#### Step [3] — `executePlan()` / `runStep()` in `src/orchestrator.ts` + +The orchestrator executes each step as a `runSubagent` call. When a step carries an +`agentProfile` annotation, `activateProfile()` loads the profile (tools, model, temperature, +skills). The agent then has both the **concrete step description** (from the planner) and the +**domain guidance** (from its skill) to execute reliably. + +--- + +### 4. Which Interaction Patterns from the Baseline Research Are Essential + +The baseline branch (`copilot/research-agent-fws`) identified eight gaps in agentloop. For +intent-to-action transformation, three are directly essential: + +--- + +#### 4.1 Plan-Execute-Verify Loop (Baseline Issue 3) — **Critical** + +**Why it matters for "verify-build"**: The word "verify" in the intent means the agent must +_confirm_ that the build succeeded — not merely that the build process ran without throwing an +exception. Today, `executePlan()` marks a step as `status: "success"` as soon as the subagent +returns without throwing. A build that silently failed (zero exit code but wrong output, a +`make` that skipped targets, a test that passed vacuously) is indistinguishable from a correct +build. + +**The missing piece**: A `VerificationAgent` (proposed in Issue 3) runs after each step and +produces a structured `VerificationResult { passed, reasoning, issues[] }`. For a build step, +the verifier checks: "Does the output contain evidence of a successful compilation? Are there +error messages? Is the binary present?" + +**Dynamic replanning on failure**: When the verifier flags the build as failed, the system +calls `refinePlan()` with the verifier's feedback (e.g., "missing dependency X"). The +orchestrator replaces the remaining steps with a revised plan that installs the dependency and +retries the build. This is the essential self-correction loop for "verify-build". + +**Interaction pattern** (from Issue 3): +``` +executePlan() + └─ for each step: + ├─ runStep() ← executes the build command + ├─ verifyStep() ← checks build output for success/failure + │ ├─ pass → next step + │ └─ fail → refinePlan(feedback) → re-execute + └─ checkpoint.save() +``` + +**Without this pattern**, a "verify-build" intent can only execute the build — it cannot +actually verify the outcome. + +--- + +#### 4.2 Dynamic Task Decomposition (Baseline Issue 4) — **Important** + +**Why it matters**: The "verify-build" intent may require sub-steps that cannot be known at +planning time. For example: +- The planner produces a step "run the build" +- During execution, the build fails with "submodules not initialised" +- The agent needs to inject a sub-step "git submodule update --init --recursive" _before_ + retrying the build + +This is addressed by **Dynamic Task Decomposition** (Issue 4, section 4): a complex step can +call a `decompose_task` tool at runtime to inject new sub-steps immediately after the current +step. The orchestrator's `executePlan()` maintains a mutable steps list and inserts the new +steps in-place. + +**Interaction pattern**: +``` +executePlan() + ├─ steps = [... mutable list ...] + └─ step i: "Run build" + └─ subagent calls decompose_task({newSteps: [ + { description: "Init submodules", … }, + { description: "Re-run build", … } + ]}) + → steps[i+1..] = [init-submodules, re-run-build, ...original-remaining-steps] +``` + +**Without this pattern**, intent-to-action transformation is only as good as the planner's +initial plan. When the environment deviates from expectations (missing deps, wrong tool +version, first-time setup required), the agent has no way to adapt mid-execution. + +--- + +#### 4.3 Hierarchical Delegation (Baseline Issue 4) — **Architectural → Partially Implemented** + +**Why it matters**: At a higher level of organisation, a _coordinator agent_ can receive the +"verify-build" intent and delegate workspace analysis and step instantiation to a child agent. +This is the **Hierarchical pattern** from Issue 4. + +**Interaction pattern**: +``` +Coordinator receives: "verify-build" + └─ calls exploreWorkspace({ registry }) ← ProjectExplorer subagent (NEW) + └─ project-explorer: reads workspace via file-list + file-read + → WorkspaceContext { workspaceInfo, buildSystems, explorerNotes } + └─ calls generatePlan("verify-build", context, registry, profileRegistry) + → Plan with concrete step descriptions derived from buildSystems notes + └─ calls executePlan(plan, registry, { profileRegistry }) + └─ per-step subagents execute build/test/lint commands + └─ coordinator synthesises final report +``` + +**What has been implemented**: `ProjectExplorer` (`src/agents/project-explorer.ts`) is the +workspace-analyst child agent. It uses `file-list` and `file-read` tools — no hardcoded +command strings — and produces a `WorkspaceContext` that `coordinatedExecute()` (updated to +accept `workspaceContext: WorkspaceContext`) forwards directly to the planner. + +**What remains**: The higher-level coordinator that orchestrates `ProjectExplorer` → +`generatePlan` → `executePlan` as a single flow. Today these are called in sequence in user +code; wrapping them in a `coordinatedWorkflowExecute()` function would complete this pattern. + +--- + +#### 4.4 Toolbox Refiner (Baseline Issue 5) — **Supporting** + +**Why it matters**: The build-verify agent only needs `shell`, `file-read`, and `file-list`. +Exposing all 16+ registered tools dilutes the agent's focus and wastes context budget. The +**Toolbox Refiner** (Issue 5) dynamically narrows the exposed tool set per invocation based on +the step's declared `toolsNeeded` list and the task description. + +This is already partially addressed by the profile-based `tools[]` list in agent profiles. +The Toolbox Refiner would make this dynamic (keyword or embedding matching) rather than +requiring a manually-maintained allowlist per profile. + +--- + +### 5. The Role of Templates in the Dynamic System + +Templates (agent profiles + skills) play a supporting role — they are **not** the source of +concrete steps. Their actual function is: + +| Template element | Role | +|---|---| +| Agent profile (`tools`, `temperature`, `maxIterations`) | Shapes the execution environment for a step | +| Skill (`promptFragment`) | Provides domain guidance to the agent running the step — what to look for, what errors mean, how to report | + +The **concrete steps** always come from the planner, which derives them from: +1. The generic intent ("verify-build") +2. The workspace context (`WorkspaceInfo` from `analyzeWorkspace()`) +3. The available agent profiles (the planner can annotate `agentProfile` per step) + +A `build-verify` profile + skill gives the executing agent the knowledge to: +- Identify which build system is in use (from the workspace `language` field) +- Interpret compiler output (error triage heuristics in the skill) +- Produce a structured success/failure report + +But the specific commands to run come from the workspace analysis, injected into the planner +context at planning time. + +--- + +### 6. Recommended Interaction Pattern: Full "verify-build" Flow + +Combining the above, the complete agent-driven "verify-build" flow using agentloop components: + +``` +User: "verify-build" + │ + ▼ +[A] exploreWorkspace({ registry, llm }) ← ProjectExplorer reads workspace via tools + → WorkspaceContext { + workspaceInfo: { language="cmake", ... }, + buildSystems: [{ name="cmake", configFile="CMakeLists.txt", + notes="CMakePresets.json present. Use preset linux-release." }] + } + │ + ▼ +[B] generatePlan("verify-build", context, registry, profileRegistry) + → Plan { + steps: [ + { description: "Configure and build: cmake --preset linux-release …", + toolsNeeded: ["shell"], agentProfile: "build-verify" }, + { description: "Report build result", + toolsNeeded: [], agentProfile: "build-verify" } + ] + } + │ + ▼ +[C] executePlan(plan, registry, { verificationEnabled: true, task: "verify-build", context }) + │ + ├─ step 0: runStep() → shell("cmake --preset …") → output + │ verifyStep() → VerificationResult { passed, reasoning, issues } + │ └─ fail? → refinePlan(feedback, context) → re-execute + │ + └─ step 1: runStep() → agent synthesises report from step 0 output + verifyStep() → confirm report contains success/failure conclusion + │ + ▼ +ExecutionResult { stepResults, success, verificationResults } +``` + +The key properties of this flow: +- **Generic intent, concrete execution**: "verify-build" is never mapped to cmake commands in + config — the ProjectExplorer reads the project's own files and the planner derives commands + from what the explorer found. +- **Platform-agnostic**: the explorer doesn't assume `cmake` is on PATH; it reads + `CMakePresets.json` and notes which presets are available; the executing agent resolves the + invocation for the target platform. +- **Multi-build-system aware**: the explorer reports all detected build systems; the planner + can produce steps for each. +- **Self-correcting**: the PEV loop (Issue 3) catches silent failures and replans using the + same `WorkspaceContext`. +- **Extensible**: adding support for a new build system only requires the explorer's LLM to + recognise a new config file format — no TypeScript code changes needed. +- **Composable**: the same flow applies to "run-tests", "lint", or any other lifecycle intent. + +--- + +### 7. Gap Summary Relative to Baseline Research + +| Baseline Issue | Pattern | Essential for "verify-build"? | Current status | +|---|---|---|---| +| Issue 3 | Plan-Execute-Verify loop | ✅ Critical — without it, "verify" is just "run" | ❌ Not yet implemented | +| Issue 3 | Dynamic replanning on verification failure | ✅ Critical — enables self-correction | ❌ Not yet implemented | +| Issue 4 | Dynamic task decomposition | ✅ Important — handles mid-execution surprises | ❌ Not yet implemented | +| Issue 4 | Hierarchical delegation / ProjectExplorer | 🔶 Architectural — enables active workspace probing | ✅ **Implemented** (this PR) | +| Issue 5 | Toolbox Refiner | 🔶 Supporting — reduces noise in build agent | ❌ Not yet implemented | +| Issue 2 | Persistent memory | 🔶 Optional — cache workspace analysis across sessions | ❌ Not yet implemented | + +### 8. What Has Been Improved in This PR + +| Change | Effect | +|---|---| +| `ProjectExplorer` agent (`src/agents/project-explorer.ts`) | LLM-driven workspace exploration using `file-list` + `file-read` tools; no hardcoded command strings; supports multi-build-system workspaces | +| `WorkspaceContext` type + `toWorkspaceContext()` in `src/workspace.ts` | Generic context map that flows through the planning pipeline; holds `workspaceInfo` plus arbitrary explorer-contributed keys (`buildSystems`, `explorerNotes`) | +| `generatePlan()` / `refinePlan()` accept `WorkspaceContext` (updated in `src/subagents/planner.ts`) | Planner task now includes `buildSystems[].notes` and `explorerNotes` when present → LLM derives concrete commands from actual file content, not hardcoded heuristics | +| `coordinatedExecute()` accepts `workspaceContext: WorkspaceContext` (updated in `src/agents/coordinator.ts`) | High-level entry point is now compatible with the `ProjectExplorer` output | +| `plan` and `plan-and-run` tools wrap `analyzeWorkspace()` with `toWorkspaceContext()` | Backward-compatible fallback for simple workspaces (Node/Python/Go) where LLM-driven exploration isn't needed | +| `build-verify` and `test-runner` agent profiles | Execution environment for build/test steps — define which tools and temperature are appropriate | +| `build-verify` skill | Domain guidance injected into the build agent — how to identify the build system, interpret output, triage errors | + +These improvements implement the Hierarchical Delegation pattern (Step [1] workspace probe) +and advance Step [2] (planner context) of the transformation pipeline. The verification loop +and dynamic replanning (Steps [3] and [4]) require the Plan-Execute-Verify implementation +from Issue 3 to be complete. diff --git a/src/__tests__/builtin-agent-profiles.test.ts b/src/__tests__/builtin-agent-profiles.test.ts index ed5fa0d..1078a49 100644 --- a/src/__tests__/builtin-agent-profiles.test.ts +++ b/src/__tests__/builtin-agent-profiles.test.ts @@ -24,8 +24,8 @@ beforeAll(async () => { }); describe("builtin agent profiles", () => { - it("loads exactly 5 builtin profiles", () => { - expect(registry.list()).toHaveLength(5); + it("loads exactly 7 builtin profiles", () => { + expect(registry.list()).toHaveLength(7); }); it("coder profile has name === 'coder' and model === 'gpt-4o'", () => { diff --git a/src/__tests__/builtin-skills.test.ts b/src/__tests__/builtin-skills.test.ts index 2d6e48d..3c96455 100644 --- a/src/__tests__/builtin-skills.test.ts +++ b/src/__tests__/builtin-skills.test.ts @@ -20,9 +20,10 @@ describe("built-in skill library", () => { "test-writer", "git-workflow", "security-auditor", + "build-verify", ]; - it("loads all 5 built-in skills", () => { + it("loads all 6 built-in skills", () => { const names = registry.list().map((s) => s.name); for (const name of BUILTIN_NAMES) { expect(names).toContain(name); diff --git a/src/__tests__/fixtures/workspace-cargo/Cargo.toml b/src/__tests__/fixtures/workspace-cargo/Cargo.toml new file mode 100644 index 0000000..965b593 --- /dev/null +++ b/src/__tests__/fixtures/workspace-cargo/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "my-app" +version = "0.1.0" +edition = "2021" diff --git a/src/__tests__/fixtures/workspace-cargo/tests/.keep b/src/__tests__/fixtures/workspace-cargo/tests/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-cmake-presets/CMakeLists.txt b/src/__tests__/fixtures/workspace-cmake-presets/CMakeLists.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json b/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json new file mode 100644 index 0000000..62e6c2f --- /dev/null +++ b/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json @@ -0,0 +1 @@ +{"version":3,"cmakeMinimumRequired":{"major":3,"minor":21},"configurePresets":[{"name":"default","binaryDir":"build"}],"buildPresets":[{"name":"default","configurePreset":"default"}],"testPresets":[{"name":"default","configurePreset":"default"}]} diff --git a/src/__tests__/fixtures/workspace-cmake/CMakeLists.txt b/src/__tests__/fixtures/workspace-cmake/CMakeLists.txt new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-cmake/tests/.keep b/src/__tests__/fixtures/workspace-cmake/tests/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts b/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts new file mode 100644 index 0000000..5b1dae2 --- /dev/null +++ b/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts @@ -0,0 +1 @@ +plugins { kotlin("jvm") version "1.9.0" } diff --git a/src/__tests__/fixtures/workspace-gradle-kotlin/src/test/.keep b/src/__tests__/fixtures/workspace-gradle-kotlin/src/test/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-gradle/build.gradle b/src/__tests__/fixtures/workspace-gradle/build.gradle new file mode 100644 index 0000000..b95276a --- /dev/null +++ b/src/__tests__/fixtures/workspace-gradle/build.gradle @@ -0,0 +1 @@ +plugins { id("java") } diff --git a/src/__tests__/fixtures/workspace-gradle/src/test/.keep b/src/__tests__/fixtures/workspace-gradle/src/test/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/fixtures/workspace-maven/pom.xml b/src/__tests__/fixtures/workspace-maven/pom.xml new file mode 100644 index 0000000..12ac61c --- /dev/null +++ b/src/__tests__/fixtures/workspace-maven/pom.xml @@ -0,0 +1 @@ +4.0.0com.examplemy-app1.0 diff --git a/src/__tests__/fixtures/workspace-maven/src/test/.keep b/src/__tests__/fixtures/workspace-maven/src/test/.keep new file mode 100644 index 0000000..e69de29 diff --git a/src/__tests__/planner.test.ts b/src/__tests__/planner.test.ts index a2fa856..236e82f 100644 --- a/src/__tests__/planner.test.ts +++ b/src/__tests__/planner.test.ts @@ -9,7 +9,7 @@ import type { BaseChatModel } from "@langchain/core/language_models/chat_models" import { ToolRegistry } from "../tools/registry"; import { generatePlan, validatePlan, refinePlan } from "../subagents/planner"; import type { Plan } from "../subagents/planner"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; // ───────────────────────────────────────────────────────────────────────────── // Helpers @@ -36,17 +36,19 @@ function makeRegistry(...names: string[]): ToolRegistry { return registry; } -/** A representative workspace used across tests. */ -const MOCK_WORKSPACE: WorkspaceInfo = { - language: "node", - framework: "express", - packageManager: "npm", - hasTests: true, - testCommand: "npm test", - lintCommand: "npm run lint", - buildCommand: "npm run build", - entryPoints: ["src/index.ts"], - gitInitialized: true, +/** A representative workspace context used across tests. */ +const MOCK_WORKSPACE: WorkspaceContext = { + workspaceInfo: { + language: "node", + framework: "express", + packageManager: "npm", + hasTests: true, + testCommand: "npm test", + lintCommand: "npm run lint", + buildCommand: "npm run build", + entryPoints: ["src/index.ts"], + gitInitialized: true, + }, }; // ───────────────────────────────────────────────────────────────────────────── @@ -461,3 +463,92 @@ describe("generatePlan — agentProfile field", () => { expect(userMsg!.content).toContain("devops"); }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// WorkspaceContext — richer context from ProjectExplorer +// ───────────────────────────────────────────────────────────────────────────── + +describe("generatePlan — WorkspaceContext with buildSystems", () => { + it("includes buildSystems notes in the planner task when context has buildSystems", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ + content: JSON.stringify({ + steps: [{ description: "build the project", toolsNeeded: [], estimatedComplexity: "medium" }], + }), + tool_calls: [], + }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const richContext: WorkspaceContext = { + workspaceInfo: { + language: "rust", + framework: "none", + packageManager: "cargo", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cargo", + configFile: "Cargo.toml", + notes: "Workspace with members: core, cli. Use --workspace flag for full builds.", + }, + ], + }; + + await generatePlan("verify the build", richContext, new ToolRegistry(), mockLlm); + + const messages: Array<{ content: string }> = invoke.mock.calls[0][0]; + const userMsg = messages.find( + (m) => typeof m.content === "string" && m.content.includes("verify the build") + ); + expect(userMsg).toBeDefined(); + // buildSystems notes should appear in the task + expect(userMsg!.content).toContain("cargo"); + expect(userMsg!.content).toContain("Cargo.toml"); + expect(userMsg!.content).toContain("--workspace"); + }); + + it("includes explorerNotes in the planner task when context has explorerNotes", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ + content: JSON.stringify({ + steps: [{ description: "step", toolsNeeded: [], estimatedComplexity: "low" }], + }), + tool_calls: [], + }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const contextWithNotes: WorkspaceContext = { + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + explorerNotes: "Multi-language monorepo: C++ core with Python bindings", + }; + + await generatePlan("verify-the-cpp-bindings", contextWithNotes, new ToolRegistry(), mockLlm); + + const messages: Array<{ content: string }> = invoke.mock.calls[0][0]; + const userMsg = messages.find( + (m) => typeof m.content === "string" && m.content.includes("verify-the-cpp-bindings") + ); + expect(userMsg).toBeDefined(); + expect(userMsg!.content).toContain("Multi-language monorepo"); + }); +}); diff --git a/src/__tests__/project-explorer.test.ts b/src/__tests__/project-explorer.test.ts new file mode 100644 index 0000000..10b3bba --- /dev/null +++ b/src/__tests__/project-explorer.test.ts @@ -0,0 +1,303 @@ +// Mock ChatMistralAI before any imports — prevents ESM parse errors in Jest +jest.mock("@langchain/mistralai", () => ({ + ChatMistralAI: jest.fn(), +})); + +import { z } from "zod"; +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +import { ToolRegistry } from "../tools/registry"; +import { exploreWorkspace } from "../agents/project-explorer"; +import type { WorkspaceContext } from "../workspace"; + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function makeMockLlm(invokeFn: jest.Mock): BaseChatModel { + return { + bindTools: jest.fn().mockReturnValue({ invoke: invokeFn }), + } as unknown as BaseChatModel; +} + +/** Minimal registry with file-list and file-read stubs for unit tests. */ +function makeExplorerRegistry(): ToolRegistry { + const registry = new ToolRegistry(); + registry.register({ + name: "file-list", + description: "List directory contents", + schema: z.object({ path: z.string().optional(), recursive: z.boolean().optional() }), + execute: async () => JSON.stringify({ entries: [] }), + }); + registry.register({ + name: "file-read", + description: "Read a file", + schema: z.object({ path: z.string() }), + execute: async () => JSON.stringify({ content: "", encoding: "utf-8", sizeBytes: 0 }), + }); + return registry; +} + +// ───────────────────────────────────────────────────────────────────────────── +// exploreWorkspace +// ───────────────────────────────────────────────────────────────────────────── + +describe("exploreWorkspace", () => { + it("returns a WorkspaceContext with workspaceInfo from the LLM output", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "rust", + framework: "none", + packageManager: "cargo", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: ["src/main.rs"], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cargo", + configFile: "Cargo.toml", + notes: "Single-crate project. Use 'cargo build' and 'cargo test'.", + }, + ], + explorerNotes: "Standard Rust project layout.", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("rust"); + expect(context.workspaceInfo.packageManager).toBe("cargo"); + expect(context.workspaceInfo.hasTests).toBe(true); + expect(context.workspaceInfo.entryPoints).toContain("src/main.rs"); + expect(context.workspaceInfo.gitInitialized).toBe(true); + }); + + it("captures buildSystems from LLM output in the context map", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + buildSystems: [ + { + name: "cmake", + configFile: "CMakeLists.txt", + notes: "CMakePresets.json present. Use preset 'linux-release' for release builds.", + }, + ], + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("cpp"); + const buildSystems = context.buildSystems as Array<{ name: string; configFile: string; notes: string }>; + expect(Array.isArray(buildSystems)).toBe(true); + expect(buildSystems).toHaveLength(1); + expect(buildSystems[0].name).toBe("cmake"); + expect(buildSystems[0].notes).toContain("linux-release"); + }); + + it("captures explorerNotes in the context map when present", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "java", + framework: "spring", + packageManager: "gradle", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + explorerNotes: "Gradle wrapper (./gradlew) is present. Kotlin DSL (build.gradle.kts).", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.explorerNotes).toContain("Gradle wrapper"); + expect(context.explorerNotes).toContain("Kotlin DSL"); + }); + + it("handles multiple build systems in a monorepo", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cmake", + configFile: "CMakeLists.txt", + notes: "C++ core library.", + }, + { + name: "cargo", + configFile: "bindings/rust/Cargo.toml", + notes: "Rust FFI bindings to the C++ core.", + }, + ], + explorerNotes: "Hybrid C++/Rust project.", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + const buildSystems = context.buildSystems as Array<{ name: string }>; + expect(buildSystems).toHaveLength(2); + expect(buildSystems.map((b) => b.name)).toContain("cmake"); + expect(buildSystems.map((b) => b.name)).toContain("cargo"); + }); + + it("strips markdown code fences from the LLM output", async () => { + const inner = JSON.stringify({ + workspaceInfo: { + language: "go", + framework: "none", + packageManager: "go mod", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }); + const fenced = `\`\`\`json\n${inner}\n\`\`\``; + + const invoke = jest.fn().mockResolvedValueOnce({ content: fenced, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + expect(context.workspaceInfo.language).toBe("go"); + }); + + it("returns a fallback context when the LLM output is not valid JSON", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ content: "I could not explore", tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("unknown"); + expect(context.workspaceInfo.framework).toBe("none"); + expect(context.buildSystems).toBeUndefined(); + }); + + it("returns a fallback context when workspaceInfo is missing from JSON", async () => { + const invoke = jest + .fn() + .mockResolvedValueOnce({ content: JSON.stringify({ noInfo: true }), tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("unknown"); + }); + + it("uses the file-list and file-read tools (registered by name) when exploring", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "node", + framework: "none", + packageManager: "npm", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }); + + const invoke = jest + .fn() + .mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const registry = makeExplorerRegistry(); + await exploreWorkspace({ registry, llm: mockLlm }); + + // The LLM should have been bound with the file-list and file-read tools + expect(mockLlm.bindTools).toHaveBeenCalledTimes(1); + const boundTools = (mockLlm.bindTools as jest.Mock).mock.calls[0][0] as Array<{ name: string }>; + const toolNames = boundTools.map((t) => t.name); + expect(toolNames).toContain("file-list"); + expect(toolNames).toContain("file-read"); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// WorkspaceContext type +// ───────────────────────────────────────────────────────────────────────────── + +describe("WorkspaceContext type", () => { + it("workspaceInfo key is required", () => { + const ctx: WorkspaceContext = { + workspaceInfo: { + language: "python", + framework: "django", + packageManager: "poetry", + hasTests: true, + testCommand: "pytest", + lintCommand: "flake8", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }; + expect(ctx.workspaceInfo.language).toBe("python"); + }); + + it("allows arbitrary additional keys in the context map", () => { + const ctx: WorkspaceContext = { + workspaceInfo: { + language: "go", + framework: "none", + packageManager: "go mod", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [{ name: "go", configFile: "go.mod", notes: "Standard Go module." }], + explorerNotes: "Simple CLI tool.", + customKey: { nested: true }, + }; + expect(ctx.buildSystems).toBeDefined(); + expect(ctx.explorerNotes).toBe("Simple CLI tool."); + expect(ctx.customKey).toEqual({ nested: true }); + }); +}); diff --git a/src/__tests__/workspace.test.ts b/src/__tests__/workspace.test.ts index 718b616..df50e20 100644 --- a/src/__tests__/workspace.test.ts +++ b/src/__tests__/workspace.test.ts @@ -118,3 +118,163 @@ describe("analyzeWorkspace — git detection", () => { expect(info.gitInitialized).toBe(true); }); }); + +describe("analyzeWorkspace — Rust/Cargo project", () => { + const root = path.join(fixturesDir, "workspace-cargo"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'rust'", () => { + expect(info.language).toBe("rust"); + }); + + it("uses 'cargo' as the package manager", () => { + expect(info.packageManager).toBe("cargo"); + }); + + it("defaults the build command to 'cargo build'", () => { + expect(info.buildCommand).toBe("cargo build"); + }); + + it("defaults the test command to 'cargo test'", () => { + expect(info.testCommand).toBe("cargo test"); + }); + + it("defaults the lint command to 'cargo clippy'", () => { + expect(info.lintCommand).toBe("cargo clippy"); + }); + + it("reports hasTests as true when a tests/ directory exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — CMake project (no presets)", () => { + const root = path.join(fixturesDir, "workspace-cmake"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'cmake'", () => { + expect(info.language).toBe("cmake"); + }); + + it("uses 'cmake' as the package manager", () => { + expect(info.packageManager).toBe("cmake"); + }); + + it("uses classic out-of-source build command when no presets file is present", () => { + expect(info.buildCommand).toBe("cmake -S . -B build && cmake --build build"); + }); + + it("defaults the test command to ctest", () => { + expect(info.testCommand).toBe("ctest --output-on-failure"); + }); + + it("reports hasTests as true when a tests/ directory exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — CMake project (with CMakePresets.json)", () => { + const root = path.join(fixturesDir, "workspace-cmake-presets"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'cmake'", () => { + expect(info.language).toBe("cmake"); + }); + + it("uses preset-based build command when CMakePresets.json is present", () => { + expect(info.buildCommand).toBe( + "cmake --preset default && cmake --build --preset default" + ); + }); + + it("uses preset-based test command when CMakePresets.json is present", () => { + expect(info.testCommand).toBe("ctest --preset default"); + }); +}); + +describe("analyzeWorkspace — Gradle (Java) project", () => { + const root = path.join(fixturesDir, "workspace-gradle"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'java'", () => { + expect(info.language).toBe("java"); + }); + + it("uses 'gradle' as the package manager", () => { + expect(info.packageManager).toBe("gradle"); + }); + + it("uses 'gradle build' as the build command (no gradlew wrapper)", () => { + expect(info.buildCommand).toBe("gradle build"); + }); + + it("uses 'gradle test' as the test command", () => { + expect(info.testCommand).toBe("gradle test"); + }); + + it("reports hasTests as true when src/test exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — Gradle (Kotlin DSL) project", () => { + const root = path.join(fixturesDir, "workspace-gradle-kotlin"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'kotlin'", () => { + expect(info.language).toBe("kotlin"); + }); + + it("uses 'gradle' as the package manager", () => { + expect(info.packageManager).toBe("gradle"); + }); +}); + +describe("analyzeWorkspace — Maven project", () => { + const root = path.join(fixturesDir, "workspace-maven"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'java'", () => { + expect(info.language).toBe("java"); + }); + + it("uses 'maven' as the package manager", () => { + expect(info.packageManager).toBe("maven"); + }); + + it("uses 'mvn package -DskipTests' as the build command (no wrapper)", () => { + expect(info.buildCommand).toBe("mvn package -DskipTests"); + }); + + it("uses 'mvn test' as the test command", () => { + expect(info.testCommand).toBe("mvn test"); + }); + + it("reports hasTests as true when src/test exists", () => { + expect(info.hasTests).toBe(true); + }); +}); diff --git a/src/agents/builtin/build-verify.agent.json b/src/agents/builtin/build-verify.agent.json new file mode 100644 index 0000000..6fec92e --- /dev/null +++ b/src/agents/builtin/build-verify.agent.json @@ -0,0 +1,12 @@ +{ + "name": "build-verify", + "description": "Build verification agent — compiles the workspace and reports success or failure with compiler diagnostics", + "version": "1.0.0", + "temperature": 0.1, + "skills": ["build-verify"], + "tools": ["shell", "file-read", "file-list"], + "maxIterations": 10, + "constraints": { + "requireConfirmation": [] + } +} diff --git a/src/agents/builtin/test-runner.agent.json b/src/agents/builtin/test-runner.agent.json new file mode 100644 index 0000000..64a992e --- /dev/null +++ b/src/agents/builtin/test-runner.agent.json @@ -0,0 +1,12 @@ +{ + "name": "test-runner", + "description": "Test execution agent — runs the project test suite, reports failures, and suggests targeted fixes", + "version": "1.0.0", + "temperature": 0.2, + "skills": ["test-writer"], + "tools": ["shell", "file-read", "file-write", "file-edit", "file-list", "code-search"], + "maxIterations": 20, + "constraints": { + "requireConfirmation": [] + } +} diff --git a/src/agents/coordinator.ts b/src/agents/coordinator.ts index e3e07b2..c4a997c 100644 --- a/src/agents/coordinator.ts +++ b/src/agents/coordinator.ts @@ -6,7 +6,7 @@ import { runSubagent } from "../subagents/runner"; import { generatePlan } from "../subagents/planner"; import { executePlan } from "../orchestrator"; import type { ExecutionOptions, ExecutionResult } from "../orchestrator"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; import type { AgentProfile } from "./types"; import type { AgentProfileRegistry } from "./registry"; @@ -122,8 +122,13 @@ export interface CoordinatedExecuteOptions { registry: ToolRegistry; /** Profile registry used for routing and per-step profile resolution. */ profileRegistry: AgentProfileRegistry; - /** Workspace analysis result (language, framework, etc.) — required by the planner. */ - workspaceInfo: WorkspaceInfo; + /** + * Workspace context for the planner. Use `toWorkspaceContext(await analyzeWorkspace(root))` + * for a quick heuristic analysis, or pass the result of `exploreWorkspace()` (ProjectExplorer + * agent) for a richer, LLM-derived context that handles multi-build-system workspaces without + * any hardcoded command strings. + */ + workspaceContext: WorkspaceContext; /** Optional LLM instance; created from config when omitted. */ llm?: BaseChatModel; /** @@ -168,7 +173,7 @@ export async function coordinatedExecute( const { registry, profileRegistry, - workspaceInfo, + workspaceContext, llm, executionOptions = {}, invoke, @@ -182,7 +187,7 @@ export async function coordinatedExecute( const profile = await routeRequest(request, profileRegistry, registry, llm); // Step 2: Generate a plan (with profile annotations when a registry is available) - const plan = await generatePlan(request, workspaceInfo, registry, llm, profileRegistry); + const plan = await generatePlan(request, workspaceContext, registry, llm, profileRegistry); // Step 3: Choose execution path based on plan size if (plan.steps.length <= planThreshold) { diff --git a/src/agents/project-explorer.ts b/src/agents/project-explorer.ts new file mode 100644 index 0000000..ffda07a --- /dev/null +++ b/src/agents/project-explorer.ts @@ -0,0 +1,202 @@ +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +import { logger } from "../logger"; +import { ToolRegistry } from "../tools/registry"; +import { runSubagent } from "../subagents/runner"; +import type { WorkspaceInfo, WorkspaceContext } from "../workspace"; + +// ───────────────────────────────────────────────────────────────────────────── +// Types +// ───────────────────────────────────────────────────────────────────────────── + +/** + * One detected build system within a workspace. + * A workspace may contain several (e.g. a C++ core + Python bindings). + */ +export interface BuildSystemInfo { + /** Short identifier for the build system (e.g. 'cargo', 'cmake', 'gradle'). */ + name: string; + /** Workspace-root-relative path to the primary config file. */ + configFile: string; + /** Relevant details the planner should use when choosing how to invoke this system. */ + notes: string; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Prompt +// ───────────────────────────────────────────────────────────────────────────── + +const EXPLORER_SYSTEM_PROMPT = + `You are a workspace exploration agent. Your job is to analyze a software project's file system ` + + `using the available tools and produce a structured JSON description of the workspace.\n\n` + + `Exploration steps (use the tools to gather information):\n` + + `1. Call file-list with recursive=true to understand the top-level directory structure.\n` + + `2. Identify key project files (package.json, Cargo.toml, CMakeLists.txt, CMakePresets.json, ` + + `build.gradle, build.gradle.kts, pom.xml, go.mod, pyproject.toml, requirements.txt, Makefile, etc.).\n` + + `3. Call file-read on each identified key file to extract language, framework, build system, ` + + `package manager, and test setup information.\n` + + `4. A project may contain MORE THAN ONE build system — report all of them.\n\n` + + `After exploration, respond ONLY with a valid JSON object matching this exact schema (no prose, ` + + `no markdown fences):\n` + + `{\n` + + ` "workspaceInfo": {\n` + + ` "language": "primary language (node|python|go|rust|java|kotlin|cpp|unknown)",\n` + + ` "framework": "detected framework or 'none'",\n` + + ` "packageManager": "package manager (npm|yarn|pnpm|cargo|gradle|maven|poetry|pip|go mod|unknown)",\n` + + ` "hasTests": true|false,\n` + + ` "testCommand": "empty string — the planner derives the actual command from buildSystems notes",\n` + + ` "lintCommand": "empty string — same rationale",\n` + + ` "buildCommand": "empty string — same rationale",\n` + + ` "entryPoints": ["list of main entry-point files relative to root, or empty"],\n` + + ` "gitInitialized": true|false\n` + + ` },\n` + + ` "buildSystems": [\n` + + ` {\n` + + ` "name": "build system identifier",\n` + + ` "configFile": "root-relative path to primary config file",\n` + + ` "notes": "concise details the planner needs: preset names, wrapper scripts, workspace members, etc."\n` + + ` }\n` + + ` ],\n` + + ` "explorerNotes": "optional free-form observations (e.g. multi-language monorepo, unusual layout)"\n` + + `}\n\n` + + `Rules:\n` + + `- Leave testCommand, buildCommand, and lintCommand as empty strings. The planner LLM will derive\n` + + ` the actual commands from the buildSystems[].notes you provide.\n` + + `- Include ALL detected build systems. Do not skip secondary ones.\n` + + `- For CMake: note whether CMakePresets.json exists and list relevant preset names from it.\n` + + `- For Cargo: note whether it is a workspace (multiple members) or a single crate.\n` + + `- For Gradle: note whether ./gradlew wrapper is present and whether Kotlin DSL is used.\n` + + `- For Maven: note whether ./mvnw wrapper is present.\n` + + `- Produce at least the workspaceInfo object even if no build system was found.`; + +// ───────────────────────────────────────────────────────────────────────────── +// Public API +// ───────────────────────────────────────────────────────────────────────────── + +/** Options for `exploreWorkspace()`. */ +export interface ExploreWorkspaceOptions { + /** Tool registry that must contain 'file-list' and 'file-read'. */ + registry: ToolRegistry; + /** Optional LLM — created from config when omitted. */ + llm?: BaseChatModel; + /** + * Maximum LLM iterations for the explorer subagent. + * Each iteration may call tools; defaults to 10. + */ + maxIterations?: number; +} + +/** + * Run the ProjectExplorer agent to produce a rich `WorkspaceContext`. + * + * The agent uses `file-list` and `file-read` tools to examine the workspace + * and derives build system information through LLM reasoning — no hardcoded + * commands are baked in. The resulting `WorkspaceContext` can be passed + * directly to `generatePlan()`. + * + * If `file-list` or `file-read` are not registered in `registry`, the explorer + * still runs but the LLM has no tool access; it will produce a best-effort + * context based on its own knowledge. + * + * @param options Registry, optional LLM, and optional iteration limit. + */ +export async function exploreWorkspace(options: ExploreWorkspaceOptions): Promise { + const { registry, llm, maxIterations = 10 } = options; + + const task = + `Explore the project workspace using file-list and file-read.\n` + + `Steps:\n` + + `a) Call file-list (recursive=true) to see the full directory tree.\n` + + `b) Identify and read all key project manifest / configuration files.\n` + + `c) Identify all build systems present (there may be more than one).\n` + + `Then produce the final JSON object as described in the system prompt.`; + + const result = await runSubagent( + { + name: "project-explorer", + systemPrompt: EXPLORER_SYSTEM_PROMPT, + tools: ["file-list", "file-read"], + maxIterations, + }, + task, + registry, + llm + ); + + logger.info({ subagent: "project-explorer" }, "Workspace exploration complete"); + + return parseExplorerOutput(result.output); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Internal helpers +// ───────────────────────────────────────────────────────────────────────────── + +function buildFallbackContext(): WorkspaceContext { + const workspaceInfo: WorkspaceInfo = { + language: "unknown", + framework: "none", + packageManager: "unknown", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }; + return { workspaceInfo }; +} + +function parseWorkspaceInfo(raw: unknown): WorkspaceInfo { + const base: WorkspaceInfo = buildFallbackContext().workspaceInfo; + if (typeof raw !== "object" || raw === null) return base; + const obj = raw as Record; + + return { + language: typeof obj.language === "string" ? obj.language : base.language, + framework: typeof obj.framework === "string" ? obj.framework : base.framework, + packageManager: typeof obj.packageManager === "string" ? obj.packageManager : base.packageManager, + hasTests: typeof obj.hasTests === "boolean" ? obj.hasTests : base.hasTests, + testCommand: typeof obj.testCommand === "string" ? obj.testCommand : base.testCommand, + lintCommand: typeof obj.lintCommand === "string" ? obj.lintCommand : base.lintCommand, + buildCommand: typeof obj.buildCommand === "string" ? obj.buildCommand : base.buildCommand, + entryPoints: Array.isArray(obj.entryPoints) + ? (obj.entryPoints as unknown[]).filter((e): e is string => typeof e === "string") + : base.entryPoints, + gitInitialized: typeof obj.gitInitialized === "boolean" ? obj.gitInitialized : base.gitInitialized, + }; +} + +function parseExplorerOutput(output: string): WorkspaceContext { + // Strip optional markdown code fences + const stripped = output.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim(); + + let parsed: unknown; + try { + parsed = JSON.parse(stripped); + } catch { + logger.warn( + { preview: stripped.slice(0, 200) }, + "ProjectExplorer output is not valid JSON; returning fallback context" + ); + return buildFallbackContext(); + } + + if (typeof parsed !== "object" || parsed === null) { + return buildFallbackContext(); + } + + const raw = parsed as Record; + const workspaceInfo = parseWorkspaceInfo(raw.workspaceInfo); + const context: WorkspaceContext = { workspaceInfo }; + + // Preserve additional discovered data in the context map + if (raw.buildSystems !== undefined) { + context.buildSystems = raw.buildSystems as BuildSystemInfo[]; + } + if (typeof raw.explorerNotes === "string" && raw.explorerNotes.trim() !== "") { + context.explorerNotes = raw.explorerNotes; + } + + return context; +} diff --git a/src/skills/builtin/build-verify.skill.md b/src/skills/builtin/build-verify.skill.md new file mode 100644 index 0000000..699a76f --- /dev/null +++ b/src/skills/builtin/build-verify.skill.md @@ -0,0 +1,43 @@ +--- +name: build-verify +description: Workflow guidance for verifying that a project compiles and links correctly +version: 1.0.0 +slot: section +--- + +## Build Verification Workflow + +The goal of this workflow is to confirm the project compiles cleanly and to surface any errors with actionable context. + +### Step sequence + +1. **Identify the build system** — inspect the workspace root for `CMakeLists.txt`, `Cargo.toml`, `package.json`, `build.gradle`, or `pom.xml` to determine which build tool to invoke. +2. **Install / update dependencies** — run the dependency installation step *before* building: + - CMake: `git submodule update --init --recursive` (if submodules present) + - Node: `npm ci` or `yarn install --frozen-lockfile` + - Rust: `cargo fetch` + - Gradle: `./gradlew dependencies` (optional) +3. **Configure the build** (if required): + - CMake: `cmake -S . -B build [-DCMAKE_BUILD_TYPE=Release]` or `cmake --preset ` + - Gradle: no separate configure step +4. **Compile**: + - CMake: `cmake --build build [--parallel $(nproc)]` or `cmake --build --preset ` + - Node: `npm run build` + - Rust: `cargo build [--release]` + - Gradle: `./gradlew assemble` (compile only, no tests) + - Maven: `mvn package -DskipTests` +5. **Report** — emit a structured summary: overall status (success/failure), number of errors and warnings, and the first 20 lines of compiler output for failures. + +### Error triage heuristics + +- **Linker errors** (`undefined reference`, `unresolved symbol`): check `CMakeLists.txt` for missing `target_link_libraries` entries; for Gradle check `dependencies` block. +- **Missing headers / imports**: confirm that all required packages are declared in the manifest and that dependency installation succeeded in step 2. +- **Type / compilation errors** in generated code: regenerate protobuf, Thrift, or OpenAPI sources before building. +- **Out-of-date build cache**: perform a clean build (`rm -rf build && cmake …` or `cargo clean && cargo build`) to rule out stale artifacts. + +### Parallel build flag + +When invoking multi-core builds, pass a parallelism flag to keep wall-clock time low: +- CMake/Ninja: `--parallel $(nproc)` or `-j$(nproc)` +- Maven: `-T 1C` (one thread per CPU core) +- Gradle: `--parallel` diff --git a/src/subagents/planner.ts b/src/subagents/planner.ts index 82c77be..eedac30 100644 --- a/src/subagents/planner.ts +++ b/src/subagents/planner.ts @@ -3,7 +3,7 @@ import type { BaseChatModel } from "@langchain/core/language_models/chat_models" import { logger } from "../logger"; import { ToolRegistry } from "../tools/registry"; import { runSubagent } from "./runner"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; import type { AgentProfileRegistry } from "../agents/registry"; // ───────────────────────────────────────────────────────────────────────────── @@ -75,16 +75,43 @@ const PLANNER_SYSTEM_PROMPT = /** Build the user-facing task string sent to the planner subagent. */ function buildPlannerTask( task: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, availableTools: string[], availableProfiles?: Array<{ name: string; description: string }> ): string { const toolList = availableTools.length > 0 ? availableTools.join(", ") : "(none)"; + const wi = context.workspaceInfo; let result = `Task: ${task}\n` + - `Workspace: language=${workspaceInfo.language}, framework=${workspaceInfo.framework}, ` + - `packageManager=${workspaceInfo.packageManager}, gitInitialized=${workspaceInfo.gitInitialized}\n` + - `Available tools: ${toolList}`; + `Workspace: language=${wi.language}, framework=${wi.framework}, ` + + `packageManager=${wi.packageManager}, gitInitialized=${wi.gitInitialized}`; + + // Include lifecycle commands so the planner can generate concrete, workspace-specific steps. + // These are typically empty when the context was produced by ProjectExplorer; in that case + // the buildSystems notes below carry the authoritative information. + const lifecycleLines: string[] = []; + if (wi.buildCommand) lifecycleLines.push(`build="${wi.buildCommand}"`); + if (wi.testCommand) lifecycleLines.push(`test="${wi.testCommand}"`); + if (wi.lintCommand) lifecycleLines.push(`lint="${wi.lintCommand}"`); + if (lifecycleLines.length > 0) { + result += `, ${lifecycleLines.join(", ")}`; + } + + // Include richer build system details produced by the ProjectExplorer agent when available. + // The planner uses these notes to derive concrete, workspace-specific commands at runtime. + if (Array.isArray(context.buildSystems) && context.buildSystems.length > 0) { + result += `\nDetected build systems:`; + for (const bs of context.buildSystems as Array<{ name: string; configFile: string; notes: string }>) { + result += `\n - ${bs.name} (${bs.configFile}): ${bs.notes}`; + } + } + + // Free-form notes from the explorer (e.g. "multi-language monorepo") + if (typeof context.explorerNotes === "string" && context.explorerNotes.trim()) { + result += `\nWorkspace notes: ${context.explorerNotes}`; + } + + result += `\nAvailable tools: ${toolList}`; if (availableProfiles && availableProfiles.length > 0) { const profileList = availableProfiles.map((p) => `${p.name}: ${p.description}`).join("; "); result += `\nAvailable agent profiles: ${profileList}`; @@ -180,8 +207,13 @@ export function validatePlan(plan: Plan, registry: ToolRegistry): PlanValidation * * The planner runs without any tools — it only needs to reason and output JSON. * - * @param task Natural-language description of what the agent should accomplish. - * @param workspaceInfo Workspace analysis result (language, framework, etc.). + * @param task Natural-language description of what the agent should accomplish. + * @param context Workspace context produced by `analyzeWorkspace` + `toWorkspaceContext`, + * or by the richer `exploreWorkspace` (ProjectExplorer agent). + * When the context was produced by ProjectExplorer the `buildSystems` and + * `explorerNotes` keys are included in the planner task so the model can + * derive concrete, workspace-specific commands without relying on any + * hardcoded command strings. * @param registry Tool registry used to list available tool names for the prompt. * @param llm Optional LLM instance — created from config when omitted. * @param profileRegistry Optional profile registry; when provided, profile names and descriptions @@ -189,7 +221,7 @@ export function validatePlan(plan: Plan, registry: ToolRegistry): PlanValidation */ export async function generatePlan( task: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, registry: ToolRegistry, llm?: BaseChatModel, profileRegistry?: AgentProfileRegistry @@ -198,7 +230,7 @@ export async function generatePlan( const availableProfiles = profileRegistry ? profileRegistry.list().map((p) => ({ name: p.name, description: p.description })) : undefined; - const plannerTask = buildPlannerTask(task, workspaceInfo, availableTools, availableProfiles); + const plannerTask = buildPlannerTask(task, context, availableTools, availableProfiles); // Build the system prompt, replacing the {{profileList}} placeholder const profileListStr = @@ -231,7 +263,7 @@ export async function generatePlan( * @param task The original task string. * @param originalPlan The plan that failed validation. * @param feedback Human-readable description of what is wrong. - * @param workspaceInfo Workspace analysis result. + * @param context Workspace context (same object passed to `generatePlan`). * @param registry Tool registry used to supply the up-to-date tool list. * @param llm Optional LLM instance. */ @@ -239,7 +271,7 @@ export async function refinePlan( task: string, originalPlan: Plan, feedback: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, registry: ToolRegistry, llm?: BaseChatModel ): Promise { diff --git a/src/tools/plan-and-run.ts b/src/tools/plan-and-run.ts index 75acd7b..b7dc0a0 100644 --- a/src/tools/plan-and-run.ts +++ b/src/tools/plan-and-run.ts @@ -5,7 +5,7 @@ import { logger } from "../logger"; import { spinner } from "../spinner"; import { executePlan } from "../orchestrator"; import { generatePlan, refinePlan, validatePlan } from "../subagents/planner"; -import { analyzeWorkspace } from "../workspace"; +import { analyzeWorkspace, toWorkspaceContext } from "../workspace"; import type { ToolDefinition } from "./registry"; import { toolRegistry } from "./registry"; @@ -31,8 +31,9 @@ export const toolDefinition: ToolDefinition = { logger.info({ tool: "plan-and-run", goal }, "generating plan"); const workspaceInfo = await analyzeWorkspace(appConfig.workspaceRoot); + const context = toWorkspaceContext(workspaceInfo); - let plan = await generatePlan(goal, workspaceInfo, toolRegistry); + let plan = await generatePlan(goal, context, toolRegistry); let validation = validatePlan(plan, toolRegistry); if (!validation.valid) { @@ -45,7 +46,7 @@ export const toolDefinition: ToolDefinition = { plan, `These tools are not available: ${validation.invalidTools.join(", ")}. ` + "Use only tools from the available list.", - workspaceInfo, + context, toolRegistry ); validation = validatePlan(plan, toolRegistry); diff --git a/src/tools/plan.ts b/src/tools/plan.ts index 61331f6..cf904b3 100644 --- a/src/tools/plan.ts +++ b/src/tools/plan.ts @@ -4,7 +4,7 @@ import { appConfig } from "../config"; import { logger } from "../logger"; import { spinner } from "../spinner"; import { generatePlan, refinePlan, validatePlan } from "../subagents/planner"; -import { analyzeWorkspace } from "../workspace"; +import { analyzeWorkspace, toWorkspaceContext } from "../workspace"; import type { ToolDefinition } from "./registry"; import { toolRegistry } from "./registry"; @@ -24,8 +24,9 @@ export const toolDefinition: ToolDefinition = { logger.info({ tool: "plan", goal }, "generating plan"); const workspaceInfo = await analyzeWorkspace(appConfig.workspaceRoot); + const context = toWorkspaceContext(workspaceInfo); - let plan = await generatePlan(goal, workspaceInfo, toolRegistry); + let plan = await generatePlan(goal, context, toolRegistry); let validation = validatePlan(plan, toolRegistry); if (!validation.valid) { @@ -38,7 +39,7 @@ export const toolDefinition: ToolDefinition = { plan, `These tools are not available: ${validation.invalidTools.join(", ")}. ` + "Use only tools from the available list.", - workspaceInfo, + context, toolRegistry ); validation = validatePlan(plan, toolRegistry); diff --git a/src/workspace.ts b/src/workspace.ts index 84b1952..5c2550e 100644 --- a/src/workspace.ts +++ b/src/workspace.ts @@ -3,11 +3,11 @@ import * as path from "path"; /** Structured information about the project workspace. */ export interface WorkspaceInfo { - /** Primary language detected: 'node', 'python', 'go', or 'unknown'. */ + /** Primary language detected: 'node', 'python', 'go', 'rust', 'cmake', or 'unknown'. */ language: string; /** Framework detected from dependencies (e.g. 'react', 'django'), or 'none'. */ framework: string; - /** Package manager inferred from lock files or language (e.g. 'npm', 'pip'). */ + /** Package manager inferred from lock files or language (e.g. 'npm', 'pip', 'cargo', 'gradle'). */ packageManager: string; /** True if a test directory or test script was found. */ hasTests: boolean; @@ -23,6 +23,28 @@ export interface WorkspaceInfo { gitInitialized: boolean; } +/** + * Generic workspace context map that flows through the planning pipeline. + * + * The well-known `workspaceInfo` key holds a flat `WorkspaceInfo` summary that + * is backward-compatible with `generatePlan()` and the rest of the orchestration + * pipeline. Additional keys may be contributed by exploration agents — for + * example `buildSystems` and `explorerNotes` from `ProjectExplorer` — without + * requiring changes to existing consumers. + */ +export interface WorkspaceContext extends Record { + /** Flat summary required by the planner and orchestrator. */ + workspaceInfo: WorkspaceInfo; +} + +/** + * Wrap a `WorkspaceInfo` in a minimal `WorkspaceContext`. + * Use this when calling `generatePlan()` with the result of `analyzeWorkspace()`. + */ +export function toWorkspaceContext(info: WorkspaceInfo): WorkspaceContext { + return { workspaceInfo: info }; +} + /** Lifecycle target names extracted from Makefiles. */ const LIFECYCLE_TARGETS = ["test", "lint", "build", "run", "install"] as const; @@ -168,6 +190,115 @@ async function analyzeGo(rootPath: string): Promise> { return info; } +/** + * Analyse a Rust/Cargo workspace. + * Reads Cargo.toml for basic metadata and checks for a `tests/` directory. + */ +async function analyzeCargo(rootPath: string): Promise> { + const info: Partial = { + language: "rust", + packageManager: "cargo", + testCommand: "cargo test", + lintCommand: "cargo clippy", + buildCommand: "cargo build", + }; + + // Override defaults with Makefile targets when available + const make = await parseMakefileTargets(rootPath); + if (make["test"]) info.testCommand = make["test"]; + if (make["lint"]) info.lintCommand = make["lint"]; + if (make["build"]) info.buildCommand = make["build"]; + + // Consider tests present if a tests/ directory or any #[cfg(test)] usage exists + info.hasTests = + (await exists(path.join(rootPath, "tests"))) || + (await exists(path.join(rootPath, "src", "tests"))); + + return info; +} + +/** + * Analyse a CMake workspace. + * Reads CMakeLists.txt for basic metadata and suggests cmake preset commands + * when a CMakePresets.json file is present. + */ +async function analyzeCMake(rootPath: string): Promise> { + const info: Partial = { + language: "cmake", + packageManager: "cmake", + testCommand: "ctest --output-on-failure", + lintCommand: "", + buildCommand: "cmake --build build", + }; + + // When CMakePresets.json is present, recommend the preset-based workflow + if (await exists(path.join(rootPath, "CMakePresets.json"))) { + info.buildCommand = "cmake --preset default && cmake --build --preset default"; + info.testCommand = "ctest --preset default"; + } else { + // Classic out-of-source build pattern + info.buildCommand = "cmake -S . -B build && cmake --build build"; + } + + // Override with Makefile targets when available (common for CMake super-builds) + const make = await parseMakefileTargets(rootPath); + if (make["test"]) info.testCommand = make["test"]; + if (make["build"]) info.buildCommand = make["build"]; + + // Detect tests by presence of a CTestTestfile, tests/ directory, or test subdirectory + info.hasTests = + (await exists(path.join(rootPath, "CTestTestfile.cmake"))) || + (await exists(path.join(rootPath, "tests"))) || + (await exists(path.join(rootPath, "test"))); + + return info; +} + +/** + * Analyse a Gradle (Java/Kotlin/Android) workspace. + */ +async function analyzeGradle(rootPath: string): Promise> { + // Prefer ./gradlew wrapper when present + const gradleCmd = (await exists(path.join(rootPath, "gradlew"))) ? "./gradlew" : "gradle"; + + const info: Partial = { + language: "java", + packageManager: "gradle", + testCommand: `${gradleCmd} test`, + lintCommand: `${gradleCmd} check`, + buildCommand: `${gradleCmd} build`, + }; + + // Check for Kotlin DSL (build.gradle.kts) to refine the language label + if (await exists(path.join(rootPath, "build.gradle.kts"))) { + info.language = "kotlin"; + } + + info.hasTests = (await exists(path.join(rootPath, "src", "test"))); + + return info; +} + +/** + * Analyse a Maven (Java) workspace. + */ +async function analyzeMaven(rootPath: string): Promise> { + // Prefer ./mvnw wrapper when present + const mvnCmd = (await exists(path.join(rootPath, "mvnw"))) ? "./mvnw" : "mvn"; + + const info: Partial = { + language: "java", + packageManager: "maven", + testCommand: `${mvnCmd} test`, + lintCommand: `${mvnCmd} verify`, + buildCommand: `${mvnCmd} package -DskipTests`, + }; + + info.hasTests = (await exists(path.join(rootPath, "src", "test"))); + + return info; +} + /** * Analyse the workspace rooted at `rootPath` and return a `WorkspaceInfo` * object. Language is detected via well-known indicator files; commands are @@ -201,6 +332,17 @@ export async function analyzeWorkspace(rootPath: string): Promise langInfo = await analyzePython(rootPath); } else if (await exists(path.join(rootPath, "go.mod"))) { langInfo = await analyzeGo(rootPath); + } else if (await exists(path.join(rootPath, "Cargo.toml"))) { + langInfo = await analyzeCargo(rootPath); + } else if (await exists(path.join(rootPath, "CMakeLists.txt"))) { + langInfo = await analyzeCMake(rootPath); + } else if ( + (await exists(path.join(rootPath, "build.gradle"))) || + (await exists(path.join(rootPath, "build.gradle.kts"))) + ) { + langInfo = await analyzeGradle(rootPath); + } else if (await exists(path.join(rootPath, "pom.xml"))) { + langInfo = await analyzeMaven(rootPath); } return { ...base, ...langInfo }; diff --git a/tests/e2e/scenarios/plan-feature.e2e.test.ts b/tests/e2e/scenarios/plan-feature.e2e.test.ts index 191ecf7..0fe1fc0 100644 --- a/tests/e2e/scenarios/plan-feature.e2e.test.ts +++ b/tests/e2e/scenarios/plan-feature.e2e.test.ts @@ -14,23 +14,25 @@ import { generatePlan } from "../../../src/subagents/planner"; import { appConfig } from "../../../src/config"; import { createMockLlm } from "../helpers/mock-llm"; import { createWorkspaceFixture } from "../helpers/workspace-fixture"; -import type { WorkspaceInfo } from "../../../src/workspace"; +import type { WorkspaceContext } from "../../../src/workspace"; // When E2E_USE_REAL_LLM=true the test uses undefined (falls back to createLLM). // Note: that requires removing the jest.mock above and setting a real MISTRAL_API_KEY. const USE_REAL_LLM = process.env.E2E_USE_REAL_LLM === "true"; -/** Representative workspace metadata used as planner input. */ -const MOCK_WORKSPACE: WorkspaceInfo = { - language: "node", - framework: "express", - packageManager: "npm", - hasTests: true, - testCommand: "npm test", - lintCommand: "npm run lint", - buildCommand: "npm run build", - entryPoints: ["src/index.ts"], - gitInitialized: false, +/** Representative workspace context used as planner input. */ +const MOCK_WORKSPACE: WorkspaceContext = { + workspaceInfo: { + language: "node", + framework: "express", + packageManager: "npm", + hasTests: true, + testCommand: "npm test", + lintCommand: "npm run lint", + buildCommand: "npm run build", + entryPoints: ["src/index.ts"], + gitInitialized: false, + }, }; describe("E2E: Generate a plan for a feature", () => {