From 07bedbbe29003a9630cdfbc8b8e12b7ae6afb3cb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:33:29 +0000 Subject: [PATCH 1/5] Initial plan From f09c2d84dc49e3d3dcd1069a6c53722fc906ad54 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 12:45:51 +0000 Subject: [PATCH 2/5] =?UTF-8?q?feat:=20task=20decomposition=20research=20?= =?UTF-8?q?=E2=80=94=20workspace=20detection,=20planner=20context,=20workf?= =?UTF-8?q?low=20templates?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/huberp/agentloop/sessions/3a8fc459-0fd4-4ab9-8e11-a59e4d76960b Co-authored-by: huberp <4027454+huberp@users.noreply.github.com> --- issues/2.md | 208 ++++++++++++++++++ src/__tests__/builtin-agent-profiles.test.ts | 4 +- src/__tests__/builtin-skills.test.ts | 4 +- .../fixtures/workspace-cargo/Cargo.toml | 4 + .../workspace-cmake-presets/CMakeLists.txt | 0 .../workspace-cmake-presets/CMakePresets.json | 1 + .../fixtures/workspace-cmake/CMakeLists.txt | 0 .../workspace-gradle-kotlin/build.gradle.kts | 1 + .../fixtures/workspace-gradle/build.gradle | 1 + .../fixtures/workspace-maven/pom.xml | 1 + src/__tests__/workspace.test.ts | 160 ++++++++++++++ src/agents/builtin/build-verify.agent.json | 12 + src/agents/builtin/test-runner.agent.json | 12 + src/skills/builtin/build-verify.skill.md | 43 ++++ src/skills/builtin/cmake-workflow.skill.md | 82 +++++++ src/subagents/planner.ts | 14 +- src/workspace.ts | 124 ++++++++++- 17 files changed, 664 insertions(+), 7 deletions(-) create mode 100644 issues/2.md create mode 100644 src/__tests__/fixtures/workspace-cargo/Cargo.toml create mode 100644 src/__tests__/fixtures/workspace-cmake-presets/CMakeLists.txt create mode 100644 src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json create mode 100644 src/__tests__/fixtures/workspace-cmake/CMakeLists.txt create mode 100644 src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts create mode 100644 src/__tests__/fixtures/workspace-gradle/build.gradle create mode 100644 src/__tests__/fixtures/workspace-maven/pom.xml create mode 100644 src/agents/builtin/build-verify.agent.json create mode 100644 src/agents/builtin/test-runner.agent.json create mode 100644 src/skills/builtin/build-verify.skill.md create mode 100644 src/skills/builtin/cmake-workflow.skill.md diff --git a/issues/2.md b/issues/2.md new file mode 100644 index 00000000..402e5406 --- /dev/null +++ b/issues/2.md @@ -0,0 +1,208 @@ +## Research: Task Planner and Task Decomposition for Coding Agents + +### 1. Problem Statement + +Modern AI coding agents need to tackle tasks that span multiple steps, require diverse tools, and benefit from specialised domain knowledge at each step. The key challenge is **task decomposition**: how should a high-level goal (e.g. "add input validation to all POST handlers") be broken into concrete, executable steps that an agent can carry out reliably? + +A related challenge is **workflow templates**: many coding workflows (build, test, lint, release) have a fixed *shape* but vary in their concrete commands depending on the workspace. Can these patterns be captured once and reused across projects? + +--- + +### 2. Baseline: The `plan-and-run` Loop in agentloop + +agentloop already ships a layered planning architecture: + +| Component | Location | Role | +|---|---|---| +| `generatePlan` | `src/subagents/planner.ts` | LLM-powered decomposition of a goal into `PlanStep[]` | +| `refinePlan` | `src/subagents/planner.ts` | Corrects a plan that references unknown tools | +| `validatePlan` | `src/subagents/planner.ts` | Checks that all tool names in the plan are registered | +| `executePlan` | `src/orchestrator.ts` | Runs steps in sequence; supports `retry/skip/abort` on failure and checkpoint/resume | +| `plan` tool | `src/tools/plan.ts` | Exposes plan generation to the agent loop as a callable tool | +| `plan-and-run` tool | `src/tools/plan-and-run.ts` | Combines generation + execution in one tool call | + +The planner runs as a **tool-free subagent**: it receives workspace context and a list of available tools, then returns a JSON plan. The orchestrator dispatches each step to a `runSubagent` call (or `SubagentManager` for complex steps) with an iteration budget derived from `estimatedComplexity`. + +**Key insight already present in agentloop**: each `PlanStep` carries an optional `agentProfile` field. The orchestrator activates the named profile (model, temperature, tool subset) for that step. This enables per-step specialisation without a separate orchestration framework. + +--- + +### 3. What Other Frameworks Do + +#### 3.1 LangGraph (LangChain) +- Models agent behaviour as a **directed graph** of nodes (LLM calls, tool calls) with conditional edges. +- Supports cycles (retry loops), parallel fan-out/fan-in, and human-in-the-loop interrupts. +- Templates are *graph patterns* stored as reusable subgraphs. +- Complexity: full graph authoring required for every new workflow shape. + +#### 3.2 AutoGen (Microsoft) +- Multi-agent conversation: a **Planner** agent, a **Coder** agent, and an **Executor** agent exchange messages until the task is done. +- Task decomposition happens in natural language — the Planner emits step descriptions that the Coder implements. +- Workflow templates are **system prompts** for each role, often provided in a configuration YAML. +- Strength: easy to add domain-expert agents. Weakness: conversation history grows rapidly; quality depends on message-passing discipline. + +#### 3.3 CrewAI +- Defines **Crew** (team of agents), **Agents** (role + backstory + tools), and **Tasks** (description + expected output + dependencies). +- Supports sequential and hierarchical execution; tasks can pass their output as context to dependent tasks. +- Workflow templates are *Crew + Task YAML configurations* that can be parameterised and re-instantiated for different inputs. +- Strong alignment with the "workflow template" concept in this issue: a `BuildVerifyCrew` YAML is a reusable template instantiated per workspace. + +#### 3.4 OpenAI Assistants + Structured Outputs +- Persistent thread context allows multi-turn tasks without re-injecting history. +- `run_step` objects provide a built-in audit trail of each tool call and its output. +- Templates are **Assistant instructions** (system prompt) combined with few-shot examples in the thread. +- Limitation: tied to the OpenAI API; no local model support. + +#### 3.5 Copilot Coding Agent (GitHub Copilot) +The example in this issue shows a subtask with: +```json +{ + "name": "build-verify", + "agent_type": "task", + "description": "Build the plugin to verify changes", + "prompt": "...\nSteps:\n1. Run: sudo bash scripts/install-linux-deps.sh\n2. Run: git submodule update --init --recursive\n3. Run: cmake --preset linux-release\n4. Run: cmake --build --preset linux-build -j2\n\nReport whether the build succeeded or failed..." +} +``` + +Key observations: +- The template name (`build-verify`) is **stable and reusable** across tasks. +- The concrete steps (cmake preset names, script paths) are **workspace-specific** and were derived from workspace knowledge. +- Instantiation happens once, at workspace-setup time — not re-derived on every task. +- This is equivalent to agentloop's `agentProfile` + `skill` combination, but the steps are baked into the prompt string rather than generated by a planner. + +--- + +### 4. Template Taxonomy for Coding Agents + +Based on the above analysis, coding workflow templates fall into three categories: + +#### Category A — Build Lifecycle Templates +Fixed structure, workspace-specific commands: + +| Template | Shape | Workspace-specific parts | +|---|---|---| +| `build-verify` | configure → compile → report | preset name, script paths, parallelism flag | +| `clean-build` | clean → configure → compile | build directory, preset/profile | +| `release-package` | build → test → package → sign | packaging format, signing key | + +#### Category B — Quality Gate Templates +Fixed checklist, tool-specific commands: + +| Template | Shape | Workspace-specific parts | +|---|---|---| +| `test-and-fix` | run tests → parse failures → locate code → fix → re-run | test runner command, test output format | +| `lint-and-format` | run linter → parse output → apply fixes → re-verify | linter binary, fix flags | +| `security-scan` | run scanner → parse findings → generate report | scanner CLI, severity threshold | + +#### Category C — Development Workflow Templates +Higher-level patterns: + +| Template | Shape | Notes | +|---|---|---| +| `feature-branch` | branch → implement → test → PR | Uses git tools + planner | +| `dependency-update` | audit → update → test → commit | Integrates vulnerability check | +| `hotfix` | branch from tag → apply fix → test → backport | Requires git-log, cherry-pick | + +--- + +### 5. How agentloop Can Implement Workflow Templates + +agentloop's existing primitives map cleanly onto the template concept: + +#### 5.1 Templates as Agent Profiles + Skills (recommended) + +A workflow template = **agent profile** (what tools, model, iteration budget) + **skill** (domain knowledge, step sequence, error heuristics). + +Example — `build-verify` profile (`src/agents/builtin/build-verify.agent.json`): +```json +{ + "name": "build-verify", + "description": "Build verification agent — compiles the workspace and reports success or failure", + "temperature": 0.1, + "skills": ["build-verify"], + "tools": ["shell", "file-read", "file-list"], + "maxIterations": 10 +} +``` + +The paired `build-verify` skill (`src/skills/builtin/build-verify.skill.md`) injects: +- Step sequence (identify build system → install deps → configure → compile → report) +- Error triage heuristics (linker errors, missing headers, stale cache) +- Parallelism flags per build tool + +The planner can then annotate a step with `"agentProfile": "build-verify"` and the orchestrator will activate the matching profile for that step — automatically binding the right skill, tool subset, and temperature. + +#### 5.2 Templates as Planner Context (workspace-aware instantiation) + +The planner prompt includes `workspaceInfo` fields including the detected lifecycle commands (`buildCommand`, `testCommand`, `lintCommand`). This allows the planner to produce **concrete, workspace-specific steps** in one shot: + +``` +Workspace: language=cmake, packageManager=cmake, + build="cmake --preset linux-release && cmake --build --preset linux-build", + test="ctest --preset linux-test" +``` + +The planner output then directly embeds the correct commands rather than using a generic placeholder. + +#### 5.3 Template Instantiation: Agent vs Static + +| Approach | When to use | Trade-offs | +|---|---|---| +| **Planner-time instantiation** (current) | Novel tasks, unknown workspaces | Flexible, adapts to workspace; requires LLM call | +| **Profile+skill pre-configuration** (new) | Recurring workflows (CI, build-verify) | Fast, deterministic, version-controlled; less adaptive | +| **Hybrid** (recommended) | Plan overall task, but use pre-defined profiles per step | Best of both worlds | + +The hybrid approach is already supported: the planner annotates `agentProfile` on steps, and the orchestrator activates the profile. Adding skills that encode the step sequence means the profile-activated agent "knows" the right procedure without the planner having to enumerate every sub-step. + +--- + +### 6. Concrete Example: CMake Build-Verify Flow + +**Goal**: "Build the plugin to verify changes compile correctly" + +**Planner output** (with workspace context `build="cmake --preset linux-release && cmake --build --preset linux-build -j2"`): + +```json +{ + "steps": [ + { + "description": "Install Linux build dependencies", + "toolsNeeded": ["shell"], + "estimatedComplexity": "low", + "agentProfile": "devops" + }, + { + "description": "Update git submodules", + "toolsNeeded": ["shell"], + "estimatedComplexity": "low", + "agentProfile": "devops" + }, + { + "description": "Build the project using cmake --preset linux-release && cmake --build --preset linux-build -j2 and report compiler output", + "toolsNeeded": ["shell"], + "estimatedComplexity": "medium", + "agentProfile": "build-verify" + } + ] +} +``` + +The `build-verify` agent profile activates the `build-verify` skill, which provides the step sequence and error triage guidance. The concrete commands come from `workspaceInfo.buildCommand`, injected into the planner prompt. + +--- + +### 7. Recommendations and Gaps Addressed + +| Gap | Solution implemented | +|---|---| +| Planner didn't know lifecycle commands | ✅ `buildPlannerTask` now includes `build`, `test`, `lint` commands from `WorkspaceInfo` | +| Only Node/Python/Go workspace detection | ✅ Added CMake, Rust/Cargo, Gradle, Maven analyzers in `workspace.ts` | +| No build-workflow agent profile | ✅ `build-verify.agent.json` and `test-runner.agent.json` added | +| No build-workflow skill | ✅ `build-verify.skill.md` and `cmake-workflow.skill.md` added | + +### 8. Remaining Open Questions + +1. **Template registry**: Should templates be discoverable at runtime (e.g. `list-templates` tool) so the planner can reference them by name? The current profile registry partially serves this role. +2. **Workspace-once vs task-every-time**: For expensive workspace analysis (submodule init, dependency install), should a "workspace setup" template run once at session start and cache results? This aligns with CrewAI's `before_kickoff` hook concept. +3. **Multi-repo / monorepo**: `analyzeWorkspace` currently detects one build system per root. Monorepos with mixed build systems (e.g. a CMake C++ library + a Node.js frontend) need a recursive scan. +4. **Template versioning**: When the workspace changes (new preset, renamed script), how are baked templates kept in sync? A solution is to keep commands in `WorkspaceInfo` (auto-detected) rather than hard-coding them in profile prompts. diff --git a/src/__tests__/builtin-agent-profiles.test.ts b/src/__tests__/builtin-agent-profiles.test.ts index ed5fa0d7..1078a492 100644 --- a/src/__tests__/builtin-agent-profiles.test.ts +++ b/src/__tests__/builtin-agent-profiles.test.ts @@ -24,8 +24,8 @@ beforeAll(async () => { }); describe("builtin agent profiles", () => { - it("loads exactly 5 builtin profiles", () => { - expect(registry.list()).toHaveLength(5); + it("loads exactly 7 builtin profiles", () => { + expect(registry.list()).toHaveLength(7); }); it("coder profile has name === 'coder' and model === 'gpt-4o'", () => { diff --git a/src/__tests__/builtin-skills.test.ts b/src/__tests__/builtin-skills.test.ts index 2d6e48d7..b2107d10 100644 --- a/src/__tests__/builtin-skills.test.ts +++ b/src/__tests__/builtin-skills.test.ts @@ -20,9 +20,11 @@ describe("built-in skill library", () => { "test-writer", "git-workflow", "security-auditor", + "build-verify", + "cmake-workflow", ]; - it("loads all 5 built-in skills", () => { + it("loads all 7 built-in skills", () => { const names = registry.list().map((s) => s.name); for (const name of BUILTIN_NAMES) { expect(names).toContain(name); diff --git a/src/__tests__/fixtures/workspace-cargo/Cargo.toml b/src/__tests__/fixtures/workspace-cargo/Cargo.toml new file mode 100644 index 00000000..965b5937 --- /dev/null +++ b/src/__tests__/fixtures/workspace-cargo/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "my-app" +version = "0.1.0" +edition = "2021" diff --git a/src/__tests__/fixtures/workspace-cmake-presets/CMakeLists.txt b/src/__tests__/fixtures/workspace-cmake-presets/CMakeLists.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json b/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json new file mode 100644 index 00000000..62e6c2f0 --- /dev/null +++ b/src/__tests__/fixtures/workspace-cmake-presets/CMakePresets.json @@ -0,0 +1 @@ +{"version":3,"cmakeMinimumRequired":{"major":3,"minor":21},"configurePresets":[{"name":"default","binaryDir":"build"}],"buildPresets":[{"name":"default","configurePreset":"default"}],"testPresets":[{"name":"default","configurePreset":"default"}]} diff --git a/src/__tests__/fixtures/workspace-cmake/CMakeLists.txt b/src/__tests__/fixtures/workspace-cmake/CMakeLists.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts b/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts new file mode 100644 index 00000000..5b1dae2a --- /dev/null +++ b/src/__tests__/fixtures/workspace-gradle-kotlin/build.gradle.kts @@ -0,0 +1 @@ +plugins { kotlin("jvm") version "1.9.0" } diff --git a/src/__tests__/fixtures/workspace-gradle/build.gradle b/src/__tests__/fixtures/workspace-gradle/build.gradle new file mode 100644 index 00000000..b95276ac --- /dev/null +++ b/src/__tests__/fixtures/workspace-gradle/build.gradle @@ -0,0 +1 @@ +plugins { id("java") } diff --git a/src/__tests__/fixtures/workspace-maven/pom.xml b/src/__tests__/fixtures/workspace-maven/pom.xml new file mode 100644 index 00000000..12ac61cc --- /dev/null +++ b/src/__tests__/fixtures/workspace-maven/pom.xml @@ -0,0 +1 @@ +4.0.0com.examplemy-app1.0 diff --git a/src/__tests__/workspace.test.ts b/src/__tests__/workspace.test.ts index 718b6169..df50e200 100644 --- a/src/__tests__/workspace.test.ts +++ b/src/__tests__/workspace.test.ts @@ -118,3 +118,163 @@ describe("analyzeWorkspace — git detection", () => { expect(info.gitInitialized).toBe(true); }); }); + +describe("analyzeWorkspace — Rust/Cargo project", () => { + const root = path.join(fixturesDir, "workspace-cargo"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'rust'", () => { + expect(info.language).toBe("rust"); + }); + + it("uses 'cargo' as the package manager", () => { + expect(info.packageManager).toBe("cargo"); + }); + + it("defaults the build command to 'cargo build'", () => { + expect(info.buildCommand).toBe("cargo build"); + }); + + it("defaults the test command to 'cargo test'", () => { + expect(info.testCommand).toBe("cargo test"); + }); + + it("defaults the lint command to 'cargo clippy'", () => { + expect(info.lintCommand).toBe("cargo clippy"); + }); + + it("reports hasTests as true when a tests/ directory exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — CMake project (no presets)", () => { + const root = path.join(fixturesDir, "workspace-cmake"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'cmake'", () => { + expect(info.language).toBe("cmake"); + }); + + it("uses 'cmake' as the package manager", () => { + expect(info.packageManager).toBe("cmake"); + }); + + it("uses classic out-of-source build command when no presets file is present", () => { + expect(info.buildCommand).toBe("cmake -S . -B build && cmake --build build"); + }); + + it("defaults the test command to ctest", () => { + expect(info.testCommand).toBe("ctest --output-on-failure"); + }); + + it("reports hasTests as true when a tests/ directory exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — CMake project (with CMakePresets.json)", () => { + const root = path.join(fixturesDir, "workspace-cmake-presets"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'cmake'", () => { + expect(info.language).toBe("cmake"); + }); + + it("uses preset-based build command when CMakePresets.json is present", () => { + expect(info.buildCommand).toBe( + "cmake --preset default && cmake --build --preset default" + ); + }); + + it("uses preset-based test command when CMakePresets.json is present", () => { + expect(info.testCommand).toBe("ctest --preset default"); + }); +}); + +describe("analyzeWorkspace — Gradle (Java) project", () => { + const root = path.join(fixturesDir, "workspace-gradle"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'java'", () => { + expect(info.language).toBe("java"); + }); + + it("uses 'gradle' as the package manager", () => { + expect(info.packageManager).toBe("gradle"); + }); + + it("uses 'gradle build' as the build command (no gradlew wrapper)", () => { + expect(info.buildCommand).toBe("gradle build"); + }); + + it("uses 'gradle test' as the test command", () => { + expect(info.testCommand).toBe("gradle test"); + }); + + it("reports hasTests as true when src/test exists", () => { + expect(info.hasTests).toBe(true); + }); +}); + +describe("analyzeWorkspace — Gradle (Kotlin DSL) project", () => { + const root = path.join(fixturesDir, "workspace-gradle-kotlin"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'kotlin'", () => { + expect(info.language).toBe("kotlin"); + }); + + it("uses 'gradle' as the package manager", () => { + expect(info.packageManager).toBe("gradle"); + }); +}); + +describe("analyzeWorkspace — Maven project", () => { + const root = path.join(fixturesDir, "workspace-maven"); + + let info: WorkspaceInfo; + beforeAll(async () => { + info = await analyzeWorkspace(root); + }); + + it("detects language as 'java'", () => { + expect(info.language).toBe("java"); + }); + + it("uses 'maven' as the package manager", () => { + expect(info.packageManager).toBe("maven"); + }); + + it("uses 'mvn package -DskipTests' as the build command (no wrapper)", () => { + expect(info.buildCommand).toBe("mvn package -DskipTests"); + }); + + it("uses 'mvn test' as the test command", () => { + expect(info.testCommand).toBe("mvn test"); + }); + + it("reports hasTests as true when src/test exists", () => { + expect(info.hasTests).toBe(true); + }); +}); diff --git a/src/agents/builtin/build-verify.agent.json b/src/agents/builtin/build-verify.agent.json new file mode 100644 index 00000000..6fec92e5 --- /dev/null +++ b/src/agents/builtin/build-verify.agent.json @@ -0,0 +1,12 @@ +{ + "name": "build-verify", + "description": "Build verification agent — compiles the workspace and reports success or failure with compiler diagnostics", + "version": "1.0.0", + "temperature": 0.1, + "skills": ["build-verify"], + "tools": ["shell", "file-read", "file-list"], + "maxIterations": 10, + "constraints": { + "requireConfirmation": [] + } +} diff --git a/src/agents/builtin/test-runner.agent.json b/src/agents/builtin/test-runner.agent.json new file mode 100644 index 00000000..64a992ea --- /dev/null +++ b/src/agents/builtin/test-runner.agent.json @@ -0,0 +1,12 @@ +{ + "name": "test-runner", + "description": "Test execution agent — runs the project test suite, reports failures, and suggests targeted fixes", + "version": "1.0.0", + "temperature": 0.2, + "skills": ["test-writer"], + "tools": ["shell", "file-read", "file-write", "file-edit", "file-list", "code-search"], + "maxIterations": 20, + "constraints": { + "requireConfirmation": [] + } +} diff --git a/src/skills/builtin/build-verify.skill.md b/src/skills/builtin/build-verify.skill.md new file mode 100644 index 00000000..699a76f5 --- /dev/null +++ b/src/skills/builtin/build-verify.skill.md @@ -0,0 +1,43 @@ +--- +name: build-verify +description: Workflow guidance for verifying that a project compiles and links correctly +version: 1.0.0 +slot: section +--- + +## Build Verification Workflow + +The goal of this workflow is to confirm the project compiles cleanly and to surface any errors with actionable context. + +### Step sequence + +1. **Identify the build system** — inspect the workspace root for `CMakeLists.txt`, `Cargo.toml`, `package.json`, `build.gradle`, or `pom.xml` to determine which build tool to invoke. +2. **Install / update dependencies** — run the dependency installation step *before* building: + - CMake: `git submodule update --init --recursive` (if submodules present) + - Node: `npm ci` or `yarn install --frozen-lockfile` + - Rust: `cargo fetch` + - Gradle: `./gradlew dependencies` (optional) +3. **Configure the build** (if required): + - CMake: `cmake -S . -B build [-DCMAKE_BUILD_TYPE=Release]` or `cmake --preset ` + - Gradle: no separate configure step +4. **Compile**: + - CMake: `cmake --build build [--parallel $(nproc)]` or `cmake --build --preset ` + - Node: `npm run build` + - Rust: `cargo build [--release]` + - Gradle: `./gradlew assemble` (compile only, no tests) + - Maven: `mvn package -DskipTests` +5. **Report** — emit a structured summary: overall status (success/failure), number of errors and warnings, and the first 20 lines of compiler output for failures. + +### Error triage heuristics + +- **Linker errors** (`undefined reference`, `unresolved symbol`): check `CMakeLists.txt` for missing `target_link_libraries` entries; for Gradle check `dependencies` block. +- **Missing headers / imports**: confirm that all required packages are declared in the manifest and that dependency installation succeeded in step 2. +- **Type / compilation errors** in generated code: regenerate protobuf, Thrift, or OpenAPI sources before building. +- **Out-of-date build cache**: perform a clean build (`rm -rf build && cmake …` or `cargo clean && cargo build`) to rule out stale artifacts. + +### Parallel build flag + +When invoking multi-core builds, pass a parallelism flag to keep wall-clock time low: +- CMake/Ninja: `--parallel $(nproc)` or `-j$(nproc)` +- Maven: `-T 1C` (one thread per CPU core) +- Gradle: `--parallel` diff --git a/src/skills/builtin/cmake-workflow.skill.md b/src/skills/builtin/cmake-workflow.skill.md new file mode 100644 index 00000000..856f0366 --- /dev/null +++ b/src/skills/builtin/cmake-workflow.skill.md @@ -0,0 +1,82 @@ +--- +name: cmake-workflow +description: CMake-specific build, test, and packaging patterns including preset-based workflows +version: 1.0.0 +slot: section +--- + +## CMake Workflow Guidelines + +### Project layout conventions + +- Source lives in `src/`; headers in `include/`; tests in `tests/` or `test/`. +- Out-of-source builds go in `build/` (excluded from version control via `.gitignore`). +- `CMakeLists.txt` at the repository root is the entry point; each subdirectory may have its own `CMakeLists.txt`. + +### Preset-based workflow (preferred when `CMakePresets.json` exists) + +```bash +# Configure +cmake --preset # e.g. linux-release, debug, ci + +# Build +cmake --build --preset [--parallel $(nproc)] + +# Test +ctest --preset [--output-on-failure] +``` + +List available presets: +```bash +cmake --list-presets # configure presets +cmake --build --list-presets # build presets +ctest --list-presets # test presets +``` + +### Classic out-of-source workflow (no presets) + +```bash +# Configure (Release build, Ninja generator recommended) +cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release + +# Build (parallel) +cmake --build build --parallel $(nproc) + +# Test +cd build && ctest --output-on-failure +``` + +### Dependency management + +- **Submodules**: always run `git submodule update --init --recursive` before configuring. +- **find_package**: ensure system libraries are installed (e.g. `sudo apt install libssl-dev`). +- **FetchContent / CPM.cmake**: dependencies are downloaded during configure; verify internet access or a local cache is available. +- **vcpkg / Conan**: run `vcpkg install` or `conan install .` before `cmake -S . -B build`. + +### Install-step dependencies pattern + +When a project ships a dependency-installation script (e.g. `scripts/install-linux-deps.sh`), run it *before* the CMake configure step: + +```bash +sudo bash scripts/install-linux-deps.sh +git submodule update --init --recursive +cmake --preset +cmake --build --preset --parallel $(nproc) +``` + +### Common CMake variables + +| Variable | Purpose | +|---|---| +| `CMAKE_BUILD_TYPE` | `Debug`, `Release`, `RelWithDebInfo`, `MinSizeRel` | +| `CMAKE_INSTALL_PREFIX` | Install destination (default `/usr/local`) | +| `CMAKE_TOOLCHAIN_FILE` | Cross-compile or vcpkg toolchain | +| `BUILD_SHARED_LIBS` | `ON` to build shared libraries by default | +| `CMAKE_EXPORT_COMPILE_COMMANDS` | `ON` to generate `compile_commands.json` for tooling | + +### Diagnosing build failures + +1. Check the **configure step** output first — missing dependencies abort here. +2. Look for the **first** error in compiler output; subsequent errors are often cascading. +3. Enable verbose output to see exact compiler flags: `cmake --build build --verbose` or `VERBOSE=1 make`. +4. Use `--fresh` flag to force a clean reconfigure: `cmake --fresh --preset `. diff --git a/src/subagents/planner.ts b/src/subagents/planner.ts index 82c77be1..624c6c56 100644 --- a/src/subagents/planner.ts +++ b/src/subagents/planner.ts @@ -83,8 +83,18 @@ function buildPlannerTask( let result = `Task: ${task}\n` + `Workspace: language=${workspaceInfo.language}, framework=${workspaceInfo.framework}, ` + - `packageManager=${workspaceInfo.packageManager}, gitInitialized=${workspaceInfo.gitInitialized}\n` + - `Available tools: ${toolList}`; + `packageManager=${workspaceInfo.packageManager}, gitInitialized=${workspaceInfo.gitInitialized}`; + + // Include lifecycle commands so the planner can generate concrete, workspace-specific steps + const lifecycleLines: string[] = []; + if (workspaceInfo.buildCommand) lifecycleLines.push(`build="${workspaceInfo.buildCommand}"`); + if (workspaceInfo.testCommand) lifecycleLines.push(`test="${workspaceInfo.testCommand}"`); + if (workspaceInfo.lintCommand) lifecycleLines.push(`lint="${workspaceInfo.lintCommand}"`); + if (lifecycleLines.length > 0) { + result += `, ${lifecycleLines.join(", ")}`; + } + + result += `\nAvailable tools: ${toolList}`; if (availableProfiles && availableProfiles.length > 0) { const profileList = availableProfiles.map((p) => `${p.name}: ${p.description}`).join("; "); result += `\nAvailable agent profiles: ${profileList}`; diff --git a/src/workspace.ts b/src/workspace.ts index 84b19524..9ce29008 100644 --- a/src/workspace.ts +++ b/src/workspace.ts @@ -3,11 +3,11 @@ import * as path from "path"; /** Structured information about the project workspace. */ export interface WorkspaceInfo { - /** Primary language detected: 'node', 'python', 'go', or 'unknown'. */ + /** Primary language detected: 'node', 'python', 'go', 'rust', 'cmake', or 'unknown'. */ language: string; /** Framework detected from dependencies (e.g. 'react', 'django'), or 'none'. */ framework: string; - /** Package manager inferred from lock files or language (e.g. 'npm', 'pip'). */ + /** Package manager inferred from lock files or language (e.g. 'npm', 'pip', 'cargo', 'gradle'). */ packageManager: string; /** True if a test directory or test script was found. */ hasTests: boolean; @@ -168,6 +168,115 @@ async function analyzeGo(rootPath: string): Promise> { return info; } +/** + * Analyse a Rust/Cargo workspace. + * Reads Cargo.toml for basic metadata and checks for a `tests/` directory. + */ +async function analyzeCargo(rootPath: string): Promise> { + const info: Partial = { + language: "rust", + packageManager: "cargo", + testCommand: "cargo test", + lintCommand: "cargo clippy", + buildCommand: "cargo build", + }; + + // Override defaults with Makefile targets when available + const make = await parseMakefileTargets(rootPath); + if (make["test"]) info.testCommand = make["test"]; + if (make["lint"]) info.lintCommand = make["lint"]; + if (make["build"]) info.buildCommand = make["build"]; + + // Consider tests present if a tests/ directory or any #[cfg(test)] usage exists + info.hasTests = + (await exists(path.join(rootPath, "tests"))) || + (await exists(path.join(rootPath, "src", "tests"))); + + return info; +} + +/** + * Analyse a CMake workspace. + * Reads CMakeLists.txt for basic metadata and suggests cmake preset commands + * when a CMakePresets.json file is present. + */ +async function analyzeCMake(rootPath: string): Promise> { + const info: Partial = { + language: "cmake", + packageManager: "cmake", + testCommand: "ctest --output-on-failure", + lintCommand: "", + buildCommand: "cmake --build build", + }; + + // When CMakePresets.json is present, recommend the preset-based workflow + if (await exists(path.join(rootPath, "CMakePresets.json"))) { + info.buildCommand = "cmake --preset default && cmake --build --preset default"; + info.testCommand = "ctest --preset default"; + } else { + // Classic out-of-source build pattern + info.buildCommand = "cmake -S . -B build && cmake --build build"; + } + + // Override with Makefile targets when available (common for CMake super-builds) + const make = await parseMakefileTargets(rootPath); + if (make["test"]) info.testCommand = make["test"]; + if (make["build"]) info.buildCommand = make["build"]; + + // Detect tests by presence of a CTestTestfile, tests/ directory, or test subdirectory + info.hasTests = + (await exists(path.join(rootPath, "CTestTestfile.cmake"))) || + (await exists(path.join(rootPath, "tests"))) || + (await exists(path.join(rootPath, "test"))); + + return info; +} + +/** + * Analyse a Gradle (Java/Kotlin/Android) workspace. + */ +async function analyzeGradle(rootPath: string): Promise> { + // Prefer ./gradlew wrapper when present + const gradleCmd = (await exists(path.join(rootPath, "gradlew"))) ? "./gradlew" : "gradle"; + + const info: Partial = { + language: "java", + packageManager: "gradle", + testCommand: `${gradleCmd} test`, + lintCommand: `${gradleCmd} check`, + buildCommand: `${gradleCmd} build`, + }; + + // Check for Kotlin DSL (build.gradle.kts) to refine the language label + if (await exists(path.join(rootPath, "build.gradle.kts"))) { + info.language = "kotlin"; + } + + info.hasTests = (await exists(path.join(rootPath, "src", "test"))); + + return info; +} + +/** + * Analyse a Maven (Java) workspace. + */ +async function analyzeMaven(rootPath: string): Promise> { + // Prefer ./mvnw wrapper when present + const mvnCmd = (await exists(path.join(rootPath, "mvnw"))) ? "./mvnw" : "mvn"; + + const info: Partial = { + language: "java", + packageManager: "maven", + testCommand: `${mvnCmd} test`, + lintCommand: `${mvnCmd} verify`, + buildCommand: `${mvnCmd} package -DskipTests`, + }; + + info.hasTests = (await exists(path.join(rootPath, "src", "test"))); + + return info; +} + /** * Analyse the workspace rooted at `rootPath` and return a `WorkspaceInfo` * object. Language is detected via well-known indicator files; commands are @@ -201,6 +310,17 @@ export async function analyzeWorkspace(rootPath: string): Promise langInfo = await analyzePython(rootPath); } else if (await exists(path.join(rootPath, "go.mod"))) { langInfo = await analyzeGo(rootPath); + } else if (await exists(path.join(rootPath, "Cargo.toml"))) { + langInfo = await analyzeCargo(rootPath); + } else if (await exists(path.join(rootPath, "CMakeLists.txt"))) { + langInfo = await analyzeCMake(rootPath); + } else if ( + (await exists(path.join(rootPath, "build.gradle"))) || + (await exists(path.join(rootPath, "build.gradle.kts"))) + ) { + langInfo = await analyzeGradle(rootPath); + } else if (await exists(path.join(rootPath, "pom.xml"))) { + langInfo = await analyzeMaven(rootPath); } return { ...base, ...langInfo }; From 3e615c933263fb61eaf8f8d9917aeede284cd30c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 30 Mar 2026 13:25:37 +0000 Subject: [PATCH 3/5] =?UTF-8?q?refactor:=20rewrite=20research=20doc=20to?= =?UTF-8?q?=20focus=20on=20dynamic=20intent=E2=86=92action=20transformatio?= =?UTF-8?q?n,=20remove=20cmake-specific=20skill?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Agent-Logs-Url: https://github.com/huberp/agentloop/sessions/d37e4f03-c06f-4113-a6da-f608dbd6b3f4 Co-authored-by: huberp <4027454+huberp@users.noreply.github.com> --- issues/2.md | 400 +++++++++++------- src/__tests__/builtin-skills.test.ts | 3 +- .../fixtures/workspace-cargo/tests/.keep | 0 .../fixtures/workspace-cmake/tests/.keep | 0 .../workspace-gradle-kotlin/src/test/.keep | 0 .../fixtures/workspace-gradle/src/test/.keep | 0 .../fixtures/workspace-maven/src/test/.keep | 0 src/skills/builtin/cmake-workflow.skill.md | 82 ---- 8 files changed, 246 insertions(+), 239 deletions(-) create mode 100644 src/__tests__/fixtures/workspace-cargo/tests/.keep create mode 100644 src/__tests__/fixtures/workspace-cmake/tests/.keep create mode 100644 src/__tests__/fixtures/workspace-gradle-kotlin/src/test/.keep create mode 100644 src/__tests__/fixtures/workspace-gradle/src/test/.keep create mode 100644 src/__tests__/fixtures/workspace-maven/src/test/.keep delete mode 100644 src/skills/builtin/cmake-workflow.skill.md diff --git a/issues/2.md b/issues/2.md index 402e5406..67177cac 100644 --- a/issues/2.md +++ b/issues/2.md @@ -1,208 +1,298 @@ -## Research: Task Planner and Task Decomposition for Coding Agents +## Research: Intent-to-Action Transformation — How a Generic Workflow Step Becomes Concrete -### 1. Problem Statement +### 1. The Core Problem -Modern AI coding agents need to tackle tasks that span multiple steps, require diverse tools, and benefit from specialised domain knowledge at each step. The key challenge is **task decomposition**: how should a high-level goal (e.g. "add input validation to all POST handlers") be broken into concrete, executable steps that an agent can carry out reliably? +A coding agent receives a generic intent such as **"verify-build"**. This is a template name +that means "compile the project and confirm whether the build succeeds or fails". But the +_concrete steps_ vary entirely by workspace: -A related challenge is **workflow templates**: many coding workflows (build, test, lint, release) have a fixed *shape* but vary in their concrete commands depending on the workspace. Can these patterns be captured once and reused across projects? +- For a CMake project with presets: `cmake --preset linux-release && cmake --build --preset linux-build -j2` +- For a Node.js project: `npm ci && npm run build` +- For a Rust project: `cargo build` +- For a Gradle project: `./gradlew assemble` ---- +The question is: **what is the correct point in the machinery to perform this transformation, +and which components are responsible for deriving the concrete steps?** -### 2. Baseline: The `plan-and-run` Loop in agentloop +--- -agentloop already ships a layered planning architecture: +### 2. What Must NOT Happen — No Hardcoded Instantiation -| Component | Location | Role | -|---|---|---| -| `generatePlan` | `src/subagents/planner.ts` | LLM-powered decomposition of a goal into `PlanStep[]` | -| `refinePlan` | `src/subagents/planner.ts` | Corrects a plan that references unknown tools | -| `validatePlan` | `src/subagents/planner.ts` | Checks that all tool names in the plan are registered | -| `executePlan` | `src/orchestrator.ts` | Runs steps in sequence; supports `retry/skip/abort` on failure and checkpoint/resume | -| `plan` tool | `src/tools/plan.ts` | Exposes plan generation to the agent loop as a callable tool | -| `plan-and-run` tool | `src/tools/plan-and-run.ts` | Combines generation + execution in one tool call | +The transformation must not be done by pre-wiring cmake commands (or any other build-system +commands) into static configuration files. A hardcoded solution: -The planner runs as a **tool-free subagent**: it receives workspace context and a list of available tools, then returns a JSON plan. The orchestrator dispatches each step to a `runSubagent` call (or `SubagentManager` for complex steps) with an iteration budget derived from `estimatedComplexity`. +- Cannot adapt when a project changes its build system or adds presets. +- Does not scale across projects or workspaces. +- Defeats the purpose of a coding agent that is supposed to _reason_ about its environment. -**Key insight already present in agentloop**: each `PlanStep` carries an optional `agentProfile` field. The orchestrator activates the named profile (model, temperature, tool subset) for that step. This enables per-step specialisation without a separate orchestration framework. +The transformation must be agent-driven and workspace-aware at runtime. --- -### 3. What Other Frameworks Do - -#### 3.1 LangGraph (LangChain) -- Models agent behaviour as a **directed graph** of nodes (LLM calls, tool calls) with conditional edges. -- Supports cycles (retry loops), parallel fan-out/fan-in, and human-in-the-loop interrupts. -- Templates are *graph patterns* stored as reusable subgraphs. -- Complexity: full graph authoring required for every new workflow shape. - -#### 3.2 AutoGen (Microsoft) -- Multi-agent conversation: a **Planner** agent, a **Coder** agent, and an **Executor** agent exchange messages until the task is done. -- Task decomposition happens in natural language — the Planner emits step descriptions that the Coder implements. -- Workflow templates are **system prompts** for each role, often provided in a configuration YAML. -- Strength: easy to add domain-expert agents. Weakness: conversation history grows rapidly; quality depends on message-passing discipline. - -#### 3.3 CrewAI -- Defines **Crew** (team of agents), **Agents** (role + backstory + tools), and **Tasks** (description + expected output + dependencies). -- Supports sequential and hierarchical execution; tasks can pass their output as context to dependent tasks. -- Workflow templates are *Crew + Task YAML configurations* that can be parameterised and re-instantiated for different inputs. -- Strong alignment with the "workflow template" concept in this issue: a `BuildVerifyCrew` YAML is a reusable template instantiated per workspace. - -#### 3.4 OpenAI Assistants + Structured Outputs -- Persistent thread context allows multi-turn tasks without re-injecting history. -- `run_step` objects provide a built-in audit trail of each tool call and its output. -- Templates are **Assistant instructions** (system prompt) combined with few-shot examples in the thread. -- Limitation: tied to the OpenAI API; no local model support. - -#### 3.5 Copilot Coding Agent (GitHub Copilot) -The example in this issue shows a subtask with: -```json -{ - "name": "build-verify", - "agent_type": "task", - "description": "Build the plugin to verify changes", - "prompt": "...\nSteps:\n1. Run: sudo bash scripts/install-linux-deps.sh\n2. Run: git submodule update --init --recursive\n3. Run: cmake --preset linux-release\n4. Run: cmake --build --preset linux-build -j2\n\nReport whether the build succeeded or failed..." -} +### 3. Current agentloop Machinery and the Transformation Points + +agentloop already has the primitives for this transformation. The pipeline is: + +``` +Generic intent: "verify-build" + │ + ▼ +[1] analyzeWorkspace() ← detects build system, extracts lifecycle commands + │ WorkspaceInfo { language="cmake", buildCommand="cmake --preset…", … } + ▼ +[2] generatePlan() / planner ← LLM reasons: intent + workspace → concrete PlanStep[] + │ PlanStep { description="Run cmake --preset linux-release …", + │ toolsNeeded=["shell"], agentProfile="build-verify" } + ▼ +[3] executePlan() / orchestrator ← activates per-step agent profile, runs subagent + │ build-verify profile → shell tool, low temperature, build-verify skill + ▼ +[4] StepResult { output="…compiler output…", status="success"|"failed" } ``` -Key observations: -- The template name (`build-verify`) is **stable and reusable** across tasks. -- The concrete steps (cmake preset names, script paths) are **workspace-specific** and were derived from workspace knowledge. -- Instantiation happens once, at workspace-setup time — not re-derived on every task. -- This is equivalent to agentloop's `agentProfile` + `skill` combination, but the steps are baked into the prompt string rather than generated by a planner. +#### Step [1] — `analyzeWorkspace()` in `src/workspace.ts` ---- +This is the **workspace probe**. It inspects the repository root for indicator files +(`CMakeLists.txt`, `Cargo.toml`, `package.json`, `build.gradle`, `pom.xml`, etc.) and extracts +the concrete lifecycle commands (`buildCommand`, `testCommand`, `lintCommand`). The result is a +`WorkspaceInfo` object — the source of truth for what commands the workspace actually uses. -### 4. Template Taxonomy for Coding Agents +This is the right place for build-system detection: once, per session, before planning. + +#### Step [2] — `generatePlan()` / `buildPlannerTask()` in `src/subagents/planner.ts` + +The planner LLM receives the generic intent plus the `WorkspaceInfo` context (including +detected lifecycle commands) and reasons about what concrete steps to produce. This is the +**intent-to-steps transformation**: + +``` +Task: verify-build +Workspace: language=cmake, packageManager=cmake, + build="cmake --preset linux-release && cmake --build --preset linux-build", + test="ctest --preset default" +Available tools: shell, file-read, file-list +``` -Based on the above analysis, coding workflow templates fall into three categories: +The LLM returns a plan with concrete step descriptions drawn from the workspace context. No +static configuration is needed — the planner derives the steps dynamically from what +`analyzeWorkspace()` found. -#### Category A — Build Lifecycle Templates -Fixed structure, workspace-specific commands: +The planner can also annotate each step with an `agentProfile`, directing the orchestrator to +activate a specialised agent (e.g. `build-verify`) for that step. -| Template | Shape | Workspace-specific parts | -|---|---|---| -| `build-verify` | configure → compile → report | preset name, script paths, parallelism flag | -| `clean-build` | clean → configure → compile | build directory, preset/profile | -| `release-package` | build → test → package → sign | packaging format, signing key | +#### Step [3] — `executePlan()` / `runStep()` in `src/orchestrator.ts` -#### Category B — Quality Gate Templates -Fixed checklist, tool-specific commands: +The orchestrator executes each step as a `runSubagent` call. When a step carries an +`agentProfile` annotation, `activateProfile()` loads the profile (tools, model, temperature, +skills). The agent then has both the **concrete step description** (from the planner) and the +**domain guidance** (from its skill) to execute reliably. -| Template | Shape | Workspace-specific parts | -|---|---|---| -| `test-and-fix` | run tests → parse failures → locate code → fix → re-run | test runner command, test output format | -| `lint-and-format` | run linter → parse output → apply fixes → re-verify | linter binary, fix flags | -| `security-scan` | run scanner → parse findings → generate report | scanner CLI, severity threshold | +--- -#### Category C — Development Workflow Templates -Higher-level patterns: +### 4. Which Interaction Patterns from the Baseline Research Are Essential -| Template | Shape | Notes | -|---|---|---| -| `feature-branch` | branch → implement → test → PR | Uses git tools + planner | -| `dependency-update` | audit → update → test → commit | Integrates vulnerability check | -| `hotfix` | branch from tag → apply fix → test → backport | Requires git-log, cherry-pick | +The baseline branch (`copilot/research-agent-fws`) identified eight gaps in agentloop. For +intent-to-action transformation, three are directly essential: --- -### 5. How agentloop Can Implement Workflow Templates +#### 4.1 Plan-Execute-Verify Loop (Baseline Issue 3) — **Critical** -agentloop's existing primitives map cleanly onto the template concept: +**Why it matters for "verify-build"**: The word "verify" in the intent means the agent must +_confirm_ that the build succeeded — not merely that the build process ran without throwing an +exception. Today, `executePlan()` marks a step as `status: "success"` as soon as the subagent +returns without throwing. A build that silently failed (zero exit code but wrong output, a +`make` that skipped targets, a test that passed vacuously) is indistinguishable from a correct +build. -#### 5.1 Templates as Agent Profiles + Skills (recommended) +**The missing piece**: A `VerificationAgent` (proposed in Issue 3) runs after each step and +produces a structured `VerificationResult { passed, reasoning, issues[] }`. For a build step, +the verifier checks: "Does the output contain evidence of a successful compilation? Are there +error messages? Is the binary present?" -A workflow template = **agent profile** (what tools, model, iteration budget) + **skill** (domain knowledge, step sequence, error heuristics). +**Dynamic replanning on failure**: When the verifier flags the build as failed, the system +calls `refinePlan()` with the verifier's feedback (e.g., "missing dependency X"). The +orchestrator replaces the remaining steps with a revised plan that installs the dependency and +retries the build. This is the essential self-correction loop for "verify-build". -Example — `build-verify` profile (`src/agents/builtin/build-verify.agent.json`): -```json -{ - "name": "build-verify", - "description": "Build verification agent — compiles the workspace and reports success or failure", - "temperature": 0.1, - "skills": ["build-verify"], - "tools": ["shell", "file-read", "file-list"], - "maxIterations": 10 -} +**Interaction pattern** (from Issue 3): ``` +executePlan() + └─ for each step: + ├─ runStep() ← executes the build command + ├─ verifyStep() ← checks build output for success/failure + │ ├─ pass → next step + │ └─ fail → refinePlan(feedback) → re-execute + └─ checkpoint.save() +``` + +**Without this pattern**, a "verify-build" intent can only execute the build — it cannot +actually verify the outcome. -The paired `build-verify` skill (`src/skills/builtin/build-verify.skill.md`) injects: -- Step sequence (identify build system → install deps → configure → compile → report) -- Error triage heuristics (linker errors, missing headers, stale cache) -- Parallelism flags per build tool +--- -The planner can then annotate a step with `"agentProfile": "build-verify"` and the orchestrator will activate the matching profile for that step — automatically binding the right skill, tool subset, and temperature. +#### 4.2 Dynamic Task Decomposition (Baseline Issue 4) — **Important** -#### 5.2 Templates as Planner Context (workspace-aware instantiation) +**Why it matters**: The "verify-build" intent may require sub-steps that cannot be known at +planning time. For example: +- The planner produces a step "run the build" +- During execution, the build fails with "submodules not initialised" +- The agent needs to inject a sub-step "git submodule update --init --recursive" _before_ + retrying the build -The planner prompt includes `workspaceInfo` fields including the detected lifecycle commands (`buildCommand`, `testCommand`, `lintCommand`). This allows the planner to produce **concrete, workspace-specific steps** in one shot: +This is addressed by **Dynamic Task Decomposition** (Issue 4, section 4): a complex step can +call a `decompose_task` tool at runtime to inject new sub-steps immediately after the current +step. The orchestrator's `executePlan()` maintains a mutable steps list and inserts the new +steps in-place. +**Interaction pattern**: ``` -Workspace: language=cmake, packageManager=cmake, - build="cmake --preset linux-release && cmake --build --preset linux-build", - test="ctest --preset linux-test" +executePlan() + ├─ steps = [... mutable list ...] + └─ step i: "Run build" + └─ subagent calls decompose_task({newSteps: [ + { description: "Init submodules", … }, + { description: "Re-run build", … } + ]}) + → steps[i+1..] = [init-submodules, re-run-build, ...original-remaining-steps] ``` -The planner output then directly embeds the correct commands rather than using a generic placeholder. +**Without this pattern**, intent-to-action transformation is only as good as the planner's +initial plan. When the environment deviates from expectations (missing deps, wrong tool +version, first-time setup required), the agent has no way to adapt mid-execution. -#### 5.3 Template Instantiation: Agent vs Static +--- -| Approach | When to use | Trade-offs | -|---|---|---| -| **Planner-time instantiation** (current) | Novel tasks, unknown workspaces | Flexible, adapts to workspace; requires LLM call | -| **Profile+skill pre-configuration** (new) | Recurring workflows (CI, build-verify) | Fast, deterministic, version-controlled; less adaptive | -| **Hybrid** (recommended) | Plan overall task, but use pre-defined profiles per step | Best of both worlds | +#### 4.3 Hierarchical Delegation (Baseline Issue 4) — **Architectural** -The hybrid approach is already supported: the planner annotates `agentProfile` on steps, and the orchestrator activates the profile. Adding skills that encode the step sequence means the profile-activated agent "knows" the right procedure without the planner having to enumerate every sub-step. +**Why it matters**: At a higher level of organisation, a _coordinator agent_ can receive the +"verify-build" intent and delegate workspace analysis and step instantiation to a child agent. +This is the **Hierarchical pattern** from Issue 4. + +**Interaction pattern**: +``` +Coordinator receives: "verify-build" + └─ calls delegate_subagent("workspace-analyst") + └─ workspace-analyst: reads workspace, returns WorkspaceInfo + recommended steps + └─ coordinator constructs a plan from the returned recommendations + └─ calls delegate_subagent("build-verify") with concrete step descriptions + └─ build-verify agent: executes build, returns structured result + └─ coordinator synthesises final report +``` + +This pattern separates concerns cleanly: the coordinator holds the intent, the workspace +analyst provides grounding, and the build-verify agent executes. Today's planner partially +plays the coordinator role, but it cannot delegate to a workspace analyst because it is a +tool-free subagent that only outputs JSON. Hierarchical delegation would allow the planner to +_actively probe_ the workspace via tool calls rather than relying on pre-computed +`WorkspaceInfo`. --- -### 6. Concrete Example: CMake Build-Verify Flow - -**Goal**: "Build the plugin to verify changes compile correctly" - -**Planner output** (with workspace context `build="cmake --preset linux-release && cmake --build --preset linux-build -j2"`): - -```json -{ - "steps": [ - { - "description": "Install Linux build dependencies", - "toolsNeeded": ["shell"], - "estimatedComplexity": "low", - "agentProfile": "devops" - }, - { - "description": "Update git submodules", - "toolsNeeded": ["shell"], - "estimatedComplexity": "low", - "agentProfile": "devops" - }, - { - "description": "Build the project using cmake --preset linux-release && cmake --build --preset linux-build -j2 and report compiler output", - "toolsNeeded": ["shell"], - "estimatedComplexity": "medium", - "agentProfile": "build-verify" - } - ] -} -``` +#### 4.4 Toolbox Refiner (Baseline Issue 5) — **Supporting** -The `build-verify` agent profile activates the `build-verify` skill, which provides the step sequence and error triage guidance. The concrete commands come from `workspaceInfo.buildCommand`, injected into the planner prompt. +**Why it matters**: The build-verify agent only needs `shell`, `file-read`, and `file-list`. +Exposing all 16+ registered tools dilutes the agent's focus and wastes context budget. The +**Toolbox Refiner** (Issue 5) dynamically narrows the exposed tool set per invocation based on +the step's declared `toolsNeeded` list and the task description. + +This is already partially addressed by the profile-based `tools[]` list in agent profiles. +The Toolbox Refiner would make this dynamic (keyword or embedding matching) rather than +requiring a manually-maintained allowlist per profile. --- -### 7. Recommendations and Gaps Addressed +### 5. The Role of Templates in the Dynamic System + +Templates (agent profiles + skills) play a supporting role — they are **not** the source of +concrete steps. Their actual function is: -| Gap | Solution implemented | +| Template element | Role | |---|---| -| Planner didn't know lifecycle commands | ✅ `buildPlannerTask` now includes `build`, `test`, `lint` commands from `WorkspaceInfo` | -| Only Node/Python/Go workspace detection | ✅ Added CMake, Rust/Cargo, Gradle, Maven analyzers in `workspace.ts` | -| No build-workflow agent profile | ✅ `build-verify.agent.json` and `test-runner.agent.json` added | -| No build-workflow skill | ✅ `build-verify.skill.md` and `cmake-workflow.skill.md` added | +| Agent profile (`tools`, `temperature`, `maxIterations`) | Shapes the execution environment for a step | +| Skill (`promptFragment`) | Provides domain guidance to the agent running the step — what to look for, what errors mean, how to report | + +The **concrete steps** always come from the planner, which derives them from: +1. The generic intent ("verify-build") +2. The workspace context (`WorkspaceInfo` from `analyzeWorkspace()`) +3. The available agent profiles (the planner can annotate `agentProfile` per step) + +A `build-verify` profile + skill gives the executing agent the knowledge to: +- Identify which build system is in use (from the workspace `language` field) +- Interpret compiler output (error triage heuristics in the skill) +- Produce a structured success/failure report + +But the specific commands to run come from the workspace analysis, injected into the planner +context at planning time. + +--- + +### 6. Recommended Interaction Pattern: Full "verify-build" Flow -### 8. Remaining Open Questions +Combining the above, the complete agent-driven "verify-build" flow using agentloop components: -1. **Template registry**: Should templates be discoverable at runtime (e.g. `list-templates` tool) so the planner can reference them by name? The current profile registry partially serves this role. -2. **Workspace-once vs task-every-time**: For expensive workspace analysis (submodule init, dependency install), should a "workspace setup" template run once at session start and cache results? This aligns with CrewAI's `before_kickoff` hook concept. -3. **Multi-repo / monorepo**: `analyzeWorkspace` currently detects one build system per root. Monorepos with mixed build systems (e.g. a CMake C++ library + a Node.js frontend) need a recursive scan. -4. **Template versioning**: When the workspace changes (new preset, renamed script), how are baked templates kept in sync? A solution is to keep commands in `WorkspaceInfo` (auto-detected) rather than hard-coding them in profile prompts. +``` +User: "verify-build" + │ + ▼ +[A] analyzeWorkspace(rootPath) + → WorkspaceInfo { buildCommand="cmake --preset …", language="cmake", … } + │ + ▼ +[B] generatePlan("verify-build", workspaceInfo, registry, profileRegistry) + → Plan { + steps: [ + { description: "Run: cmake --preset …", + toolsNeeded: ["shell"], agentProfile: "build-verify" }, + { description: "Report build result", + toolsNeeded: [], agentProfile: "build-verify" } + ] + } + │ + ▼ +[C] executePlan(plan, registry, { verificationEnabled: true, task: "verify-build", workspaceInfo }) + │ + ├─ step 0: runStep() → shell("cmake --preset …") → output + │ verifyStep() → VerificationResult { passed, reasoning, issues } + │ └─ fail? → refinePlan(feedback) → re-execute + │ + └─ step 1: runStep() → agent synthesises report from step 0 output + verifyStep() → confirm report contains success/failure conclusion + │ + ▼ +ExecutionResult { stepResults, success, verificationResults } +``` + +The key properties of this flow: +- **Generic intent, concrete execution**: "verify-build" is never mapped to cmake commands in + config — the planner derives them from workspace analysis. +- **Self-correcting**: the PEV loop (Issue 3) catches silent failures and replans. +- **Extensible**: adding support for a new build system requires only updating + `analyzeWorkspace()` — no profile or template changes needed. +- **Composable**: the same flow applies to "run-tests", "lint", or any other lifecycle intent. + +--- + +### 7. Gap Summary Relative to Baseline Research + +| Baseline Issue | Pattern | Essential for "verify-build"? | Current status | +|---|---|---|---| +| Issue 3 | Plan-Execute-Verify loop | ✅ Critical — without it, "verify" is just "run" | ❌ Not yet implemented | +| Issue 3 | Dynamic replanning on verification failure | ✅ Critical — enables self-correction | ❌ Not yet implemented | +| Issue 4 | Dynamic task decomposition | ✅ Important — handles mid-execution surprises | ❌ Not yet implemented | +| Issue 4 | Hierarchical delegation | 🔶 Architectural — enables active workspace probing | ❌ Not yet implemented | +| Issue 5 | Toolbox Refiner | 🔶 Supporting — reduces noise in build agent | ❌ Not yet implemented | +| Issue 2 | Persistent memory | 🔶 Optional — cache workspace analysis across sessions | ❌ Not yet implemented | + +### 8. What Has Been Improved in This PR + +| Change | Effect | +|---|---| +| `analyzeWorkspace()` now detects CMake, Cargo, Gradle, Maven | Workspace analysis returns concrete lifecycle commands for more build systems | +| `buildPlannerTask()` includes `buildCommand`/`testCommand`/`lintCommand` | Planner receives concrete command strings → produces workspace-specific plan steps without hardcoding | +| `build-verify` and `test-runner` agent profiles | Execution environment for build/test steps — define which tools and temperature are appropriate | +| `build-verify` skill | Domain guidance injected into the build agent — how to identify the build system, interpret output, triage errors | + +These improvements advance Step [1] (workspace analysis) and Step [2] (planner context) of the +transformation pipeline. Steps [3] and [4] (verification and dynamic replanning) require the +Plan-Execute-Verify implementation from Issue 3 to be complete. diff --git a/src/__tests__/builtin-skills.test.ts b/src/__tests__/builtin-skills.test.ts index b2107d10..3c96455d 100644 --- a/src/__tests__/builtin-skills.test.ts +++ b/src/__tests__/builtin-skills.test.ts @@ -21,10 +21,9 @@ describe("built-in skill library", () => { "git-workflow", "security-auditor", "build-verify", - "cmake-workflow", ]; - it("loads all 7 built-in skills", () => { + it("loads all 6 built-in skills", () => { const names = registry.list().map((s) => s.name); for (const name of BUILTIN_NAMES) { expect(names).toContain(name); diff --git a/src/__tests__/fixtures/workspace-cargo/tests/.keep b/src/__tests__/fixtures/workspace-cargo/tests/.keep new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-cmake/tests/.keep b/src/__tests__/fixtures/workspace-cmake/tests/.keep new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-gradle-kotlin/src/test/.keep b/src/__tests__/fixtures/workspace-gradle-kotlin/src/test/.keep new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-gradle/src/test/.keep b/src/__tests__/fixtures/workspace-gradle/src/test/.keep new file mode 100644 index 00000000..e69de29b diff --git a/src/__tests__/fixtures/workspace-maven/src/test/.keep b/src/__tests__/fixtures/workspace-maven/src/test/.keep new file mode 100644 index 00000000..e69de29b diff --git a/src/skills/builtin/cmake-workflow.skill.md b/src/skills/builtin/cmake-workflow.skill.md deleted file mode 100644 index 856f0366..00000000 --- a/src/skills/builtin/cmake-workflow.skill.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -name: cmake-workflow -description: CMake-specific build, test, and packaging patterns including preset-based workflows -version: 1.0.0 -slot: section ---- - -## CMake Workflow Guidelines - -### Project layout conventions - -- Source lives in `src/`; headers in `include/`; tests in `tests/` or `test/`. -- Out-of-source builds go in `build/` (excluded from version control via `.gitignore`). -- `CMakeLists.txt` at the repository root is the entry point; each subdirectory may have its own `CMakeLists.txt`. - -### Preset-based workflow (preferred when `CMakePresets.json` exists) - -```bash -# Configure -cmake --preset # e.g. linux-release, debug, ci - -# Build -cmake --build --preset [--parallel $(nproc)] - -# Test -ctest --preset [--output-on-failure] -``` - -List available presets: -```bash -cmake --list-presets # configure presets -cmake --build --list-presets # build presets -ctest --list-presets # test presets -``` - -### Classic out-of-source workflow (no presets) - -```bash -# Configure (Release build, Ninja generator recommended) -cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release - -# Build (parallel) -cmake --build build --parallel $(nproc) - -# Test -cd build && ctest --output-on-failure -``` - -### Dependency management - -- **Submodules**: always run `git submodule update --init --recursive` before configuring. -- **find_package**: ensure system libraries are installed (e.g. `sudo apt install libssl-dev`). -- **FetchContent / CPM.cmake**: dependencies are downloaded during configure; verify internet access or a local cache is available. -- **vcpkg / Conan**: run `vcpkg install` or `conan install .` before `cmake -S . -B build`. - -### Install-step dependencies pattern - -When a project ships a dependency-installation script (e.g. `scripts/install-linux-deps.sh`), run it *before* the CMake configure step: - -```bash -sudo bash scripts/install-linux-deps.sh -git submodule update --init --recursive -cmake --preset -cmake --build --preset --parallel $(nproc) -``` - -### Common CMake variables - -| Variable | Purpose | -|---|---| -| `CMAKE_BUILD_TYPE` | `Debug`, `Release`, `RelWithDebInfo`, `MinSizeRel` | -| `CMAKE_INSTALL_PREFIX` | Install destination (default `/usr/local`) | -| `CMAKE_TOOLCHAIN_FILE` | Cross-compile or vcpkg toolchain | -| `BUILD_SHARED_LIBS` | `ON` to build shared libraries by default | -| `CMAKE_EXPORT_COMPILE_COMMANDS` | `ON` to generate `compile_commands.json` for tooling | - -### Diagnosing build failures - -1. Check the **configure step** output first — missing dependencies abort here. -2. Look for the **first** error in compiler output; subsequent errors are often cascading. -3. Enable verbose output to see exact compiler flags: `cmake --build build --verbose` or `VERBOSE=1 make`. -4. Use `--fresh` flag to force a clean reconfigure: `cmake --fresh --preset `. From cf9c451c3491b03b5eaaf2f9dcfcece44d2ad8fe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 05:40:23 +0000 Subject: [PATCH 4/5] feat: ProjectExplorer agent, WorkspaceContext type, and updated planning pipeline - Add `WorkspaceContext` interface and `toWorkspaceContext()` helper to src/workspace.ts - Add `src/agents/project-explorer.ts`: LLM-driven workspace exploration via file-list and file-read tools; produces WorkspaceContext with buildSystems[] and explorerNotes; no hardcoded command strings for any build system - Update generatePlan/refinePlan in src/subagents/planner.ts to accept WorkspaceContext; buildPlannerTask now includes buildSystems notes and explorerNotes in the planner prompt - Update coordinatedExecute in src/agents/coordinator.ts to accept workspaceContext - Update plan.ts and plan-and-run.ts tools to wrap analyzeWorkspace() with toWorkspaceContext() - Add src/__tests__/project-explorer.test.ts (11 tests) - Update src/__tests__/planner.test.ts and e2e test to use WorkspaceContext - Update issues/2.md to document the ProjectExplorer and WorkspaceContext design Agent-Logs-Url: https://github.com/huberp/agentloop/sessions/a68943d2-839b-4e55-ba44-3606340352a0 Co-authored-by: huberp <4027454+huberp@users.noreply.github.com> --- issues/2.md | 154 ++++++---- src/__tests__/planner.test.ts | 115 ++++++- src/__tests__/project-explorer.test.ts | 303 +++++++++++++++++++ src/agents/coordinator.ts | 15 +- src/agents/project-explorer.ts | 202 +++++++++++++ src/subagents/planner.ts | 50 ++- src/tools/plan-and-run.ts | 7 +- src/tools/plan.ts | 7 +- src/workspace.ts | 22 ++ tests/e2e/scenarios/plan-feature.e2e.test.ts | 26 +- 10 files changed, 801 insertions(+), 100 deletions(-) create mode 100644 src/__tests__/project-explorer.test.ts create mode 100644 src/agents/project-explorer.ts diff --git a/issues/2.md b/issues/2.md index 67177cac..8f0bffc7 100644 --- a/issues/2.md +++ b/issues/2.md @@ -37,11 +37,18 @@ agentloop already has the primitives for this transformation. The pipeline is: Generic intent: "verify-build" │ ▼ -[1] analyzeWorkspace() ← detects build system, extracts lifecycle commands - │ WorkspaceInfo { language="cmake", buildCommand="cmake --preset…", … } +[1] ProjectExplorer.exploreWorkspace() ← LLM-driven exploration via file-list + file-read + │ WorkspaceContext { + │ workspaceInfo: { language="cmake", packageManager="cmake", ... }, + │ buildSystems: [ + │ { name="cmake", configFile="CMakeLists.txt", + │ notes="CMakePresets.json present. Use preset 'linux-release' for release." } + │ ], + │ explorerNotes: "..." + │ } ▼ -[2] generatePlan() / planner ← LLM reasons: intent + workspace → concrete PlanStep[] - │ PlanStep { description="Run cmake --preset linux-release …", +[2] generatePlan() / planner ← LLM reasons: intent + WorkspaceContext → concrete PlanStep[] + │ PlanStep { description="Configure with cmake --preset linux-release …", │ toolsNeeded=["shell"], agentProfile="build-verify" } ▼ [3] executePlan() / orchestrator ← activates per-step agent profile, runs subagent @@ -50,35 +57,61 @@ Generic intent: "verify-build" [4] StepResult { output="…compiler output…", status="success"|"failed" } ``` -#### Step [1] — `analyzeWorkspace()` in `src/workspace.ts` - -This is the **workspace probe**. It inspects the repository root for indicator files -(`CMakeLists.txt`, `Cargo.toml`, `package.json`, `build.gradle`, `pom.xml`, etc.) and extracts -the concrete lifecycle commands (`buildCommand`, `testCommand`, `lintCommand`). The result is a -`WorkspaceInfo` object — the source of truth for what commands the workspace actually uses. +#### Step [1] — `ProjectExplorer` in `src/agents/project-explorer.ts` + +This is the **workspace probe** — an LLM-driven agent, not a hardcoded heuristic. +`exploreWorkspace({ registry, llm })` runs a `project-explorer` subagent with access to the +`file-list` and `file-read` tools. The agent: + +1. Lists the root directory tree with `file-list` to understand project layout. +2. Identifies and reads all key manifest/configuration files (package.json, Cargo.toml, + CMakeLists.txt, CMakePresets.json, build.gradle, pom.xml, go.mod, etc.) with `file-read`. +3. Reasons from the file contents to produce a **`WorkspaceContext`** — a generic map with a + `workspaceInfo: WorkspaceInfo` key (the flat summary the planner needs) plus additional + keys: `buildSystems[]` (one entry per detected build system, with `name`, `configFile`, and + human-readable `notes`) and `explorerNotes` (free-form observations, e.g. monorepo layout). + +**Key properties**: +- **No hardcoded commands**: the agent reads `CMakeLists.txt` and `CMakePresets.json` and + reasons about which preset names to use, rather than assuming `cmake --preset default`. + If CMake is not on PATH (e.g. on Windows with a non-standard installation), the notes will + reflect that the invocation style must be determined at execution time. +- **Multi-build-system support**: a monorepo with a C++ core (CMake) and Rust bindings + (Cargo) produces two `buildSystems` entries, both forwarded to the planner. +- **One project, one call**: exploration happens once before planning; the resulting + `WorkspaceContext` is reused across all planning and re-planning calls. + +`WorkspaceContext` is defined in `src/workspace.ts` as: +```typescript +export interface WorkspaceContext extends Record { + workspaceInfo: WorkspaceInfo; +} +``` +The `extends Record` part allows exploration agents to contribute arbitrary +additional keys (e.g. `buildSystems`, `explorerNotes`, `dependencyGraph`) without requiring +changes to the planner or orchestrator interfaces. -This is the right place for build-system detection: once, per session, before planning. +For backward compatibility, `toWorkspaceContext(info: WorkspaceInfo): WorkspaceContext` wraps +the result of the existing `analyzeWorkspace()` heuristic into a minimal context when the +LLM-driven explorer is not available. #### Step [2] — `generatePlan()` / `buildPlannerTask()` in `src/subagents/planner.ts` -The planner LLM receives the generic intent plus the `WorkspaceInfo` context (including -detected lifecycle commands) and reasons about what concrete steps to produce. This is the -**intent-to-steps transformation**: +The planner LLM receives the generic intent plus the full `WorkspaceContext` and reasons about +what concrete steps to produce. This is the **intent-to-steps transformation**: ``` Task: verify-build -Workspace: language=cmake, packageManager=cmake, - build="cmake --preset linux-release && cmake --build --preset linux-build", - test="ctest --preset default" +Workspace: language=cmake, packageManager=cmake, gitInitialized=true +Detected build systems: + - cmake (CMakeLists.txt): CMakePresets.json present. Use preset 'linux-release' for + release builds. Preset 'linux-test' for tests via CTest. Available tools: shell, file-read, file-list ``` -The LLM returns a plan with concrete step descriptions drawn from the workspace context. No -static configuration is needed — the planner derives the steps dynamically from what -`analyzeWorkspace()` found. - -The planner can also annotate each step with an `agentProfile`, directing the orchestrator to -activate a specialised agent (e.g. `build-verify`) for that step. +When the context was produced by `ProjectExplorer`, the `buildSystems[].notes` strings give +the planner the information it needs to derive concrete commands (e.g. "use the linux-release +preset") without any commands being hardcoded anywhere in the system. #### Step [3] — `executePlan()` / `runStep()` in `src/orchestrator.ts` @@ -163,7 +196,7 @@ version, first-time setup required), the agent has no way to adapt mid-execution --- -#### 4.3 Hierarchical Delegation (Baseline Issue 4) — **Architectural** +#### 4.3 Hierarchical Delegation (Baseline Issue 4) — **Architectural → Partially Implemented** **Why it matters**: At a higher level of organisation, a _coordinator agent_ can receive the "verify-build" intent and delegate workspace analysis and step instantiation to a child agent. @@ -172,20 +205,24 @@ This is the **Hierarchical pattern** from Issue 4. **Interaction pattern**: ``` Coordinator receives: "verify-build" - └─ calls delegate_subagent("workspace-analyst") - └─ workspace-analyst: reads workspace, returns WorkspaceInfo + recommended steps - └─ coordinator constructs a plan from the returned recommendations - └─ calls delegate_subagent("build-verify") with concrete step descriptions - └─ build-verify agent: executes build, returns structured result + └─ calls exploreWorkspace({ registry }) ← ProjectExplorer subagent (NEW) + └─ project-explorer: reads workspace via file-list + file-read + → WorkspaceContext { workspaceInfo, buildSystems, explorerNotes } + └─ calls generatePlan("verify-build", context, registry, profileRegistry) + → Plan with concrete step descriptions derived from buildSystems notes + └─ calls executePlan(plan, registry, { profileRegistry }) + └─ per-step subagents execute build/test/lint commands └─ coordinator synthesises final report ``` -This pattern separates concerns cleanly: the coordinator holds the intent, the workspace -analyst provides grounding, and the build-verify agent executes. Today's planner partially -plays the coordinator role, but it cannot delegate to a workspace analyst because it is a -tool-free subagent that only outputs JSON. Hierarchical delegation would allow the planner to -_actively probe_ the workspace via tool calls rather than relying on pre-computed -`WorkspaceInfo`. +**What has been implemented**: `ProjectExplorer` (`src/agents/project-explorer.ts`) is the +workspace-analyst child agent. It uses `file-list` and `file-read` tools — no hardcoded +command strings — and produces a `WorkspaceContext` that `coordinatedExecute()` (updated to +accept `workspaceContext: WorkspaceContext`) forwards directly to the planner. + +**What remains**: The higher-level coordinator that orchestrates `ProjectExplorer` → +`generatePlan` → `executePlan` as a single flow. Today these are called in sequence in user +code; wrapping them in a `coordinatedWorkflowExecute()` function would complete this pattern. --- @@ -235,14 +272,18 @@ Combining the above, the complete agent-driven "verify-build" flow using agentlo User: "verify-build" │ ▼ -[A] analyzeWorkspace(rootPath) - → WorkspaceInfo { buildCommand="cmake --preset …", language="cmake", … } +[A] exploreWorkspace({ registry, llm }) ← ProjectExplorer reads workspace via tools + → WorkspaceContext { + workspaceInfo: { language="cmake", ... }, + buildSystems: [{ name="cmake", configFile="CMakeLists.txt", + notes="CMakePresets.json present. Use preset linux-release." }] + } │ ▼ -[B] generatePlan("verify-build", workspaceInfo, registry, profileRegistry) +[B] generatePlan("verify-build", context, registry, profileRegistry) → Plan { steps: [ - { description: "Run: cmake --preset …", + { description: "Configure and build: cmake --preset linux-release …", toolsNeeded: ["shell"], agentProfile: "build-verify" }, { description: "Report build result", toolsNeeded: [], agentProfile: "build-verify" } @@ -250,11 +291,11 @@ User: "verify-build" } │ ▼ -[C] executePlan(plan, registry, { verificationEnabled: true, task: "verify-build", workspaceInfo }) +[C] executePlan(plan, registry, { verificationEnabled: true, task: "verify-build", context }) │ ├─ step 0: runStep() → shell("cmake --preset …") → output │ verifyStep() → VerificationResult { passed, reasoning, issues } - │ └─ fail? → refinePlan(feedback) → re-execute + │ └─ fail? → refinePlan(feedback, context) → re-execute │ └─ step 1: runStep() → agent synthesises report from step 0 output verifyStep() → confirm report contains success/failure conclusion @@ -265,10 +306,17 @@ ExecutionResult { stepResults, success, verificationResults } The key properties of this flow: - **Generic intent, concrete execution**: "verify-build" is never mapped to cmake commands in - config — the planner derives them from workspace analysis. -- **Self-correcting**: the PEV loop (Issue 3) catches silent failures and replans. -- **Extensible**: adding support for a new build system requires only updating - `analyzeWorkspace()` — no profile or template changes needed. + config — the ProjectExplorer reads the project's own files and the planner derives commands + from what the explorer found. +- **Platform-agnostic**: the explorer doesn't assume `cmake` is on PATH; it reads + `CMakePresets.json` and notes which presets are available; the executing agent resolves the + invocation for the target platform. +- **Multi-build-system aware**: the explorer reports all detected build systems; the planner + can produce steps for each. +- **Self-correcting**: the PEV loop (Issue 3) catches silent failures and replans using the + same `WorkspaceContext`. +- **Extensible**: adding support for a new build system only requires the explorer's LLM to + recognise a new config file format — no TypeScript code changes needed. - **Composable**: the same flow applies to "run-tests", "lint", or any other lifecycle intent. --- @@ -280,7 +328,7 @@ The key properties of this flow: | Issue 3 | Plan-Execute-Verify loop | ✅ Critical — without it, "verify" is just "run" | ❌ Not yet implemented | | Issue 3 | Dynamic replanning on verification failure | ✅ Critical — enables self-correction | ❌ Not yet implemented | | Issue 4 | Dynamic task decomposition | ✅ Important — handles mid-execution surprises | ❌ Not yet implemented | -| Issue 4 | Hierarchical delegation | 🔶 Architectural — enables active workspace probing | ❌ Not yet implemented | +| Issue 4 | Hierarchical delegation / ProjectExplorer | 🔶 Architectural — enables active workspace probing | ✅ **Implemented** (this PR) | | Issue 5 | Toolbox Refiner | 🔶 Supporting — reduces noise in build agent | ❌ Not yet implemented | | Issue 2 | Persistent memory | 🔶 Optional — cache workspace analysis across sessions | ❌ Not yet implemented | @@ -288,11 +336,15 @@ The key properties of this flow: | Change | Effect | |---|---| -| `analyzeWorkspace()` now detects CMake, Cargo, Gradle, Maven | Workspace analysis returns concrete lifecycle commands for more build systems | -| `buildPlannerTask()` includes `buildCommand`/`testCommand`/`lintCommand` | Planner receives concrete command strings → produces workspace-specific plan steps without hardcoding | +| `ProjectExplorer` agent (`src/agents/project-explorer.ts`) | LLM-driven workspace exploration using `file-list` + `file-read` tools; no hardcoded command strings; supports multi-build-system workspaces | +| `WorkspaceContext` type + `toWorkspaceContext()` in `src/workspace.ts` | Generic context map that flows through the planning pipeline; holds `workspaceInfo` plus arbitrary explorer-contributed keys (`buildSystems`, `explorerNotes`) | +| `generatePlan()` / `refinePlan()` accept `WorkspaceContext` (updated in `src/subagents/planner.ts`) | Planner task now includes `buildSystems[].notes` and `explorerNotes` when present → LLM derives concrete commands from actual file content, not hardcoded heuristics | +| `coordinatedExecute()` accepts `workspaceContext: WorkspaceContext` (updated in `src/agents/coordinator.ts`) | High-level entry point is now compatible with the `ProjectExplorer` output | +| `plan` and `plan-and-run` tools wrap `analyzeWorkspace()` with `toWorkspaceContext()` | Backward-compatible fallback for simple workspaces (Node/Python/Go) where LLM-driven exploration isn't needed | | `build-verify` and `test-runner` agent profiles | Execution environment for build/test steps — define which tools and temperature are appropriate | | `build-verify` skill | Domain guidance injected into the build agent — how to identify the build system, interpret output, triage errors | -These improvements advance Step [1] (workspace analysis) and Step [2] (planner context) of the -transformation pipeline. Steps [3] and [4] (verification and dynamic replanning) require the -Plan-Execute-Verify implementation from Issue 3 to be complete. +These improvements implement the Hierarchical Delegation pattern (Step [1] workspace probe) +and advance Step [2] (planner context) of the transformation pipeline. The verification loop +and dynamic replanning (Steps [3] and [4]) require the Plan-Execute-Verify implementation +from Issue 3 to be complete. diff --git a/src/__tests__/planner.test.ts b/src/__tests__/planner.test.ts index a2fa8562..236e82f4 100644 --- a/src/__tests__/planner.test.ts +++ b/src/__tests__/planner.test.ts @@ -9,7 +9,7 @@ import type { BaseChatModel } from "@langchain/core/language_models/chat_models" import { ToolRegistry } from "../tools/registry"; import { generatePlan, validatePlan, refinePlan } from "../subagents/planner"; import type { Plan } from "../subagents/planner"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; // ───────────────────────────────────────────────────────────────────────────── // Helpers @@ -36,17 +36,19 @@ function makeRegistry(...names: string[]): ToolRegistry { return registry; } -/** A representative workspace used across tests. */ -const MOCK_WORKSPACE: WorkspaceInfo = { - language: "node", - framework: "express", - packageManager: "npm", - hasTests: true, - testCommand: "npm test", - lintCommand: "npm run lint", - buildCommand: "npm run build", - entryPoints: ["src/index.ts"], - gitInitialized: true, +/** A representative workspace context used across tests. */ +const MOCK_WORKSPACE: WorkspaceContext = { + workspaceInfo: { + language: "node", + framework: "express", + packageManager: "npm", + hasTests: true, + testCommand: "npm test", + lintCommand: "npm run lint", + buildCommand: "npm run build", + entryPoints: ["src/index.ts"], + gitInitialized: true, + }, }; // ───────────────────────────────────────────────────────────────────────────── @@ -461,3 +463,92 @@ describe("generatePlan — agentProfile field", () => { expect(userMsg!.content).toContain("devops"); }); }); + +// ───────────────────────────────────────────────────────────────────────────── +// WorkspaceContext — richer context from ProjectExplorer +// ───────────────────────────────────────────────────────────────────────────── + +describe("generatePlan — WorkspaceContext with buildSystems", () => { + it("includes buildSystems notes in the planner task when context has buildSystems", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ + content: JSON.stringify({ + steps: [{ description: "build the project", toolsNeeded: [], estimatedComplexity: "medium" }], + }), + tool_calls: [], + }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const richContext: WorkspaceContext = { + workspaceInfo: { + language: "rust", + framework: "none", + packageManager: "cargo", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cargo", + configFile: "Cargo.toml", + notes: "Workspace with members: core, cli. Use --workspace flag for full builds.", + }, + ], + }; + + await generatePlan("verify the build", richContext, new ToolRegistry(), mockLlm); + + const messages: Array<{ content: string }> = invoke.mock.calls[0][0]; + const userMsg = messages.find( + (m) => typeof m.content === "string" && m.content.includes("verify the build") + ); + expect(userMsg).toBeDefined(); + // buildSystems notes should appear in the task + expect(userMsg!.content).toContain("cargo"); + expect(userMsg!.content).toContain("Cargo.toml"); + expect(userMsg!.content).toContain("--workspace"); + }); + + it("includes explorerNotes in the planner task when context has explorerNotes", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ + content: JSON.stringify({ + steps: [{ description: "step", toolsNeeded: [], estimatedComplexity: "low" }], + }), + tool_calls: [], + }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const contextWithNotes: WorkspaceContext = { + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + explorerNotes: "Multi-language monorepo: C++ core with Python bindings", + }; + + await generatePlan("verify-the-cpp-bindings", contextWithNotes, new ToolRegistry(), mockLlm); + + const messages: Array<{ content: string }> = invoke.mock.calls[0][0]; + const userMsg = messages.find( + (m) => typeof m.content === "string" && m.content.includes("verify-the-cpp-bindings") + ); + expect(userMsg).toBeDefined(); + expect(userMsg!.content).toContain("Multi-language monorepo"); + }); +}); diff --git a/src/__tests__/project-explorer.test.ts b/src/__tests__/project-explorer.test.ts new file mode 100644 index 00000000..10b3bba5 --- /dev/null +++ b/src/__tests__/project-explorer.test.ts @@ -0,0 +1,303 @@ +// Mock ChatMistralAI before any imports — prevents ESM parse errors in Jest +jest.mock("@langchain/mistralai", () => ({ + ChatMistralAI: jest.fn(), +})); + +import { z } from "zod"; +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +import { ToolRegistry } from "../tools/registry"; +import { exploreWorkspace } from "../agents/project-explorer"; +import type { WorkspaceContext } from "../workspace"; + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +function makeMockLlm(invokeFn: jest.Mock): BaseChatModel { + return { + bindTools: jest.fn().mockReturnValue({ invoke: invokeFn }), + } as unknown as BaseChatModel; +} + +/** Minimal registry with file-list and file-read stubs for unit tests. */ +function makeExplorerRegistry(): ToolRegistry { + const registry = new ToolRegistry(); + registry.register({ + name: "file-list", + description: "List directory contents", + schema: z.object({ path: z.string().optional(), recursive: z.boolean().optional() }), + execute: async () => JSON.stringify({ entries: [] }), + }); + registry.register({ + name: "file-read", + description: "Read a file", + schema: z.object({ path: z.string() }), + execute: async () => JSON.stringify({ content: "", encoding: "utf-8", sizeBytes: 0 }), + }); + return registry; +} + +// ───────────────────────────────────────────────────────────────────────────── +// exploreWorkspace +// ───────────────────────────────────────────────────────────────────────────── + +describe("exploreWorkspace", () => { + it("returns a WorkspaceContext with workspaceInfo from the LLM output", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "rust", + framework: "none", + packageManager: "cargo", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: ["src/main.rs"], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cargo", + configFile: "Cargo.toml", + notes: "Single-crate project. Use 'cargo build' and 'cargo test'.", + }, + ], + explorerNotes: "Standard Rust project layout.", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("rust"); + expect(context.workspaceInfo.packageManager).toBe("cargo"); + expect(context.workspaceInfo.hasTests).toBe(true); + expect(context.workspaceInfo.entryPoints).toContain("src/main.rs"); + expect(context.workspaceInfo.gitInitialized).toBe(true); + }); + + it("captures buildSystems from LLM output in the context map", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + buildSystems: [ + { + name: "cmake", + configFile: "CMakeLists.txt", + notes: "CMakePresets.json present. Use preset 'linux-release' for release builds.", + }, + ], + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("cpp"); + const buildSystems = context.buildSystems as Array<{ name: string; configFile: string; notes: string }>; + expect(Array.isArray(buildSystems)).toBe(true); + expect(buildSystems).toHaveLength(1); + expect(buildSystems[0].name).toBe("cmake"); + expect(buildSystems[0].notes).toContain("linux-release"); + }); + + it("captures explorerNotes in the context map when present", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "java", + framework: "spring", + packageManager: "gradle", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + explorerNotes: "Gradle wrapper (./gradlew) is present. Kotlin DSL (build.gradle.kts).", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.explorerNotes).toContain("Gradle wrapper"); + expect(context.explorerNotes).toContain("Kotlin DSL"); + }); + + it("handles multiple build systems in a monorepo", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "cpp", + framework: "none", + packageManager: "cmake", + hasTests: true, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [ + { + name: "cmake", + configFile: "CMakeLists.txt", + notes: "C++ core library.", + }, + { + name: "cargo", + configFile: "bindings/rust/Cargo.toml", + notes: "Rust FFI bindings to the C++ core.", + }, + ], + explorerNotes: "Hybrid C++/Rust project.", + }); + + const invoke = jest.fn().mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + const buildSystems = context.buildSystems as Array<{ name: string }>; + expect(buildSystems).toHaveLength(2); + expect(buildSystems.map((b) => b.name)).toContain("cmake"); + expect(buildSystems.map((b) => b.name)).toContain("cargo"); + }); + + it("strips markdown code fences from the LLM output", async () => { + const inner = JSON.stringify({ + workspaceInfo: { + language: "go", + framework: "none", + packageManager: "go mod", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }); + const fenced = `\`\`\`json\n${inner}\n\`\`\``; + + const invoke = jest.fn().mockResolvedValueOnce({ content: fenced, tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + expect(context.workspaceInfo.language).toBe("go"); + }); + + it("returns a fallback context when the LLM output is not valid JSON", async () => { + const invoke = jest.fn().mockResolvedValueOnce({ content: "I could not explore", tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("unknown"); + expect(context.workspaceInfo.framework).toBe("none"); + expect(context.buildSystems).toBeUndefined(); + }); + + it("returns a fallback context when workspaceInfo is missing from JSON", async () => { + const invoke = jest + .fn() + .mockResolvedValueOnce({ content: JSON.stringify({ noInfo: true }), tool_calls: [] }); + const registry = makeExplorerRegistry(); + + const context = await exploreWorkspace({ registry, llm: makeMockLlm(invoke) }); + + expect(context.workspaceInfo.language).toBe("unknown"); + }); + + it("uses the file-list and file-read tools (registered by name) when exploring", async () => { + const explorerOutput = JSON.stringify({ + workspaceInfo: { + language: "node", + framework: "none", + packageManager: "npm", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }); + + const invoke = jest + .fn() + .mockResolvedValueOnce({ content: explorerOutput, tool_calls: [] }); + + const mockLlm = { + bindTools: jest.fn().mockReturnValue({ invoke }), + } as unknown as BaseChatModel; + + const registry = makeExplorerRegistry(); + await exploreWorkspace({ registry, llm: mockLlm }); + + // The LLM should have been bound with the file-list and file-read tools + expect(mockLlm.bindTools).toHaveBeenCalledTimes(1); + const boundTools = (mockLlm.bindTools as jest.Mock).mock.calls[0][0] as Array<{ name: string }>; + const toolNames = boundTools.map((t) => t.name); + expect(toolNames).toContain("file-list"); + expect(toolNames).toContain("file-read"); + }); +}); + +// ───────────────────────────────────────────────────────────────────────────── +// WorkspaceContext type +// ───────────────────────────────────────────────────────────────────────────── + +describe("WorkspaceContext type", () => { + it("workspaceInfo key is required", () => { + const ctx: WorkspaceContext = { + workspaceInfo: { + language: "python", + framework: "django", + packageManager: "poetry", + hasTests: true, + testCommand: "pytest", + lintCommand: "flake8", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }, + }; + expect(ctx.workspaceInfo.language).toBe("python"); + }); + + it("allows arbitrary additional keys in the context map", () => { + const ctx: WorkspaceContext = { + workspaceInfo: { + language: "go", + framework: "none", + packageManager: "go mod", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: true, + }, + buildSystems: [{ name: "go", configFile: "go.mod", notes: "Standard Go module." }], + explorerNotes: "Simple CLI tool.", + customKey: { nested: true }, + }; + expect(ctx.buildSystems).toBeDefined(); + expect(ctx.explorerNotes).toBe("Simple CLI tool."); + expect(ctx.customKey).toEqual({ nested: true }); + }); +}); diff --git a/src/agents/coordinator.ts b/src/agents/coordinator.ts index e3e07b24..c4a997cd 100644 --- a/src/agents/coordinator.ts +++ b/src/agents/coordinator.ts @@ -6,7 +6,7 @@ import { runSubagent } from "../subagents/runner"; import { generatePlan } from "../subagents/planner"; import { executePlan } from "../orchestrator"; import type { ExecutionOptions, ExecutionResult } from "../orchestrator"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; import type { AgentProfile } from "./types"; import type { AgentProfileRegistry } from "./registry"; @@ -122,8 +122,13 @@ export interface CoordinatedExecuteOptions { registry: ToolRegistry; /** Profile registry used for routing and per-step profile resolution. */ profileRegistry: AgentProfileRegistry; - /** Workspace analysis result (language, framework, etc.) — required by the planner. */ - workspaceInfo: WorkspaceInfo; + /** + * Workspace context for the planner. Use `toWorkspaceContext(await analyzeWorkspace(root))` + * for a quick heuristic analysis, or pass the result of `exploreWorkspace()` (ProjectExplorer + * agent) for a richer, LLM-derived context that handles multi-build-system workspaces without + * any hardcoded command strings. + */ + workspaceContext: WorkspaceContext; /** Optional LLM instance; created from config when omitted. */ llm?: BaseChatModel; /** @@ -168,7 +173,7 @@ export async function coordinatedExecute( const { registry, profileRegistry, - workspaceInfo, + workspaceContext, llm, executionOptions = {}, invoke, @@ -182,7 +187,7 @@ export async function coordinatedExecute( const profile = await routeRequest(request, profileRegistry, registry, llm); // Step 2: Generate a plan (with profile annotations when a registry is available) - const plan = await generatePlan(request, workspaceInfo, registry, llm, profileRegistry); + const plan = await generatePlan(request, workspaceContext, registry, llm, profileRegistry); // Step 3: Choose execution path based on plan size if (plan.steps.length <= planThreshold) { diff --git a/src/agents/project-explorer.ts b/src/agents/project-explorer.ts new file mode 100644 index 00000000..904280cc --- /dev/null +++ b/src/agents/project-explorer.ts @@ -0,0 +1,202 @@ +import type { BaseChatModel } from "@langchain/core/language_models/chat_models"; + +import { logger } from "../logger"; +import { ToolRegistry } from "../tools/registry"; +import { runSubagent } from "../subagents/runner"; +import type { WorkspaceInfo, WorkspaceContext } from "../workspace"; + +// ───────────────────────────────────────────────────────────────────────────── +// Types +// ───────────────────────────────────────────────────────────────────────────── + +/** + * One detected build system within a workspace. + * A workspace may contain several (e.g. a C++ core + Python bindings). + */ +export interface BuildSystemInfo { + /** Short identifier for the build system (e.g. 'cargo', 'cmake', 'gradle'). */ + name: string; + /** Workspace-root-relative path to the primary config file. */ + configFile: string; + /** Relevant details the planner should use when choosing how to invoke this system. */ + notes: string; +} + +// ───────────────────────────────────────────────────────────────────────────── +// Prompt +// ───────────────────────────────────────────────────────────────────────────── + +const EXPLORER_SYSTEM_PROMPT = + `You are a workspace exploration agent. Your job is to analyse a software project's file system ` + + `using the available tools and produce a structured JSON description of the workspace.\n\n` + + `Exploration steps (use the tools to gather information):\n` + + `1. Call file-list with recursive=true to understand the top-level directory structure.\n` + + `2. Identify key project files (package.json, Cargo.toml, CMakeLists.txt, CMakePresets.json, ` + + `build.gradle, build.gradle.kts, pom.xml, go.mod, pyproject.toml, requirements.txt, Makefile, etc.).\n` + + `3. Call file-read on each identified key file to extract language, framework, build system, ` + + `package manager, and test setup information.\n` + + `4. A project may contain MORE THAN ONE build system — report all of them.\n\n` + + `After exploration, respond ONLY with a valid JSON object matching this exact schema (no prose, ` + + `no markdown fences):\n` + + `{\n` + + ` "workspaceInfo": {\n` + + ` "language": "primary language (node|python|go|rust|java|kotlin|cpp|unknown)",\n` + + ` "framework": "detected framework or 'none'",\n` + + ` "packageManager": "package manager (npm|yarn|pnpm|cargo|gradle|maven|poetry|pip|go mod|unknown)",\n` + + ` "hasTests": true|false,\n` + + ` "testCommand": "empty string — the planner derives the actual command from buildSystems notes",\n` + + ` "lintCommand": "empty string — same rationale",\n` + + ` "buildCommand": "empty string — same rationale",\n` + + ` "entryPoints": ["list of main entry-point files relative to root, or empty"],\n` + + ` "gitInitialized": true|false\n` + + ` },\n` + + ` "buildSystems": [\n` + + ` {\n` + + ` "name": "build system identifier",\n` + + ` "configFile": "root-relative path to primary config file",\n` + + ` "notes": "concise details the planner needs: preset names, wrapper scripts, workspace members, etc."\n` + + ` }\n` + + ` ],\n` + + ` "explorerNotes": "optional free-form observations (e.g. multi-language monorepo, unusual layout)"\n` + + `}\n\n` + + `Rules:\n` + + `- Leave testCommand, buildCommand, and lintCommand as empty strings. The planner LLM will derive\n` + + ` the actual commands from the buildSystems[].notes you provide.\n` + + `- Include ALL detected build systems. Do not skip secondary ones.\n` + + `- For CMake: note whether CMakePresets.json exists and list relevant preset names from it.\n` + + `- For Cargo: note whether it is a workspace (multiple members) or a single crate.\n` + + `- For Gradle: note whether ./gradlew wrapper is present and whether Kotlin DSL is used.\n` + + `- For Maven: note whether ./mvnw wrapper is present.\n` + + `- Produce at least the workspaceInfo object even if no build system was found.`; + +// ───────────────────────────────────────────────────────────────────────────── +// Public API +// ───────────────────────────────────────────────────────────────────────────── + +/** Options for `exploreWorkspace()`. */ +export interface ExploreWorkspaceOptions { + /** Tool registry that must contain 'file-list' and 'file-read'. */ + registry: ToolRegistry; + /** Optional LLM — created from config when omitted. */ + llm?: BaseChatModel; + /** + * Maximum LLM iterations for the explorer subagent. + * Each iteration may call tools; defaults to 10. + */ + maxIterations?: number; +} + +/** + * Run the ProjectExplorer agent to produce a rich `WorkspaceContext`. + * + * The agent uses `file-list` and `file-read` tools to examine the workspace + * and derives build system information through LLM reasoning — no hardcoded + * commands are baked in. The resulting `WorkspaceContext` can be passed + * directly to `generatePlan()`. + * + * If `file-list` or `file-read` are not registered in `registry`, the explorer + * still runs but the LLM has no tool access; it will produce a best-effort + * context based on its own knowledge. + * + * @param options Registry, optional LLM, and optional iteration limit. + */ +export async function exploreWorkspace(options: ExploreWorkspaceOptions): Promise { + const { registry, llm, maxIterations = 10 } = options; + + const task = + `Explore the project workspace using file-list and file-read.\n` + + `Steps:\n` + + `a) Call file-list (recursive=true) to see the full directory tree.\n` + + `b) Identify and read all key project manifest / configuration files.\n` + + `c) Identify all build systems present (there may be more than one).\n` + + `Then produce the final JSON object as described in the system prompt.`; + + const result = await runSubagent( + { + name: "project-explorer", + systemPrompt: EXPLORER_SYSTEM_PROMPT, + tools: ["file-list", "file-read"], + maxIterations, + }, + task, + registry, + llm + ); + + logger.info({ subagent: "project-explorer" }, "Workspace exploration complete"); + + return parseExplorerOutput(result.output); +} + +// ───────────────────────────────────────────────────────────────────────────── +// Internal helpers +// ───────────────────────────────────────────────────────────────────────────── + +function buildFallbackContext(): WorkspaceContext { + const workspaceInfo: WorkspaceInfo = { + language: "unknown", + framework: "none", + packageManager: "unknown", + hasTests: false, + testCommand: "", + lintCommand: "", + buildCommand: "", + entryPoints: [], + gitInitialized: false, + }; + return { workspaceInfo }; +} + +function parseWorkspaceInfo(raw: unknown): WorkspaceInfo { + const base: WorkspaceInfo = buildFallbackContext().workspaceInfo; + if (typeof raw !== "object" || raw === null) return base; + const obj = raw as Record; + + return { + language: typeof obj.language === "string" ? obj.language : base.language, + framework: typeof obj.framework === "string" ? obj.framework : base.framework, + packageManager: typeof obj.packageManager === "string" ? obj.packageManager : base.packageManager, + hasTests: typeof obj.hasTests === "boolean" ? obj.hasTests : base.hasTests, + testCommand: typeof obj.testCommand === "string" ? obj.testCommand : base.testCommand, + lintCommand: typeof obj.lintCommand === "string" ? obj.lintCommand : base.lintCommand, + buildCommand: typeof obj.buildCommand === "string" ? obj.buildCommand : base.buildCommand, + entryPoints: Array.isArray(obj.entryPoints) + ? (obj.entryPoints as unknown[]).filter((e): e is string => typeof e === "string") + : base.entryPoints, + gitInitialized: typeof obj.gitInitialized === "boolean" ? obj.gitInitialized : base.gitInitialized, + }; +} + +function parseExplorerOutput(output: string): WorkspaceContext { + // Strip optional markdown code fences + const stripped = output.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/, "").trim(); + + let parsed: unknown; + try { + parsed = JSON.parse(stripped); + } catch { + logger.warn( + { preview: stripped.slice(0, 200) }, + "ProjectExplorer output is not valid JSON; returning fallback context" + ); + return buildFallbackContext(); + } + + if (typeof parsed !== "object" || parsed === null) { + return buildFallbackContext(); + } + + const raw = parsed as Record; + const workspaceInfo = parseWorkspaceInfo(raw.workspaceInfo); + const context: WorkspaceContext = { workspaceInfo }; + + // Preserve additional discovered data in the context map + if (raw.buildSystems !== undefined) { + context.buildSystems = raw.buildSystems as BuildSystemInfo[]; + } + if (typeof raw.explorerNotes === "string" && raw.explorerNotes.trim() !== "") { + context.explorerNotes = raw.explorerNotes; + } + + return context; +} diff --git a/src/subagents/planner.ts b/src/subagents/planner.ts index 624c6c56..eedac309 100644 --- a/src/subagents/planner.ts +++ b/src/subagents/planner.ts @@ -3,7 +3,7 @@ import type { BaseChatModel } from "@langchain/core/language_models/chat_models" import { logger } from "../logger"; import { ToolRegistry } from "../tools/registry"; import { runSubagent } from "./runner"; -import type { WorkspaceInfo } from "../workspace"; +import type { WorkspaceContext } from "../workspace"; import type { AgentProfileRegistry } from "../agents/registry"; // ───────────────────────────────────────────────────────────────────────────── @@ -75,25 +75,42 @@ const PLANNER_SYSTEM_PROMPT = /** Build the user-facing task string sent to the planner subagent. */ function buildPlannerTask( task: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, availableTools: string[], availableProfiles?: Array<{ name: string; description: string }> ): string { const toolList = availableTools.length > 0 ? availableTools.join(", ") : "(none)"; + const wi = context.workspaceInfo; let result = `Task: ${task}\n` + - `Workspace: language=${workspaceInfo.language}, framework=${workspaceInfo.framework}, ` + - `packageManager=${workspaceInfo.packageManager}, gitInitialized=${workspaceInfo.gitInitialized}`; + `Workspace: language=${wi.language}, framework=${wi.framework}, ` + + `packageManager=${wi.packageManager}, gitInitialized=${wi.gitInitialized}`; - // Include lifecycle commands so the planner can generate concrete, workspace-specific steps + // Include lifecycle commands so the planner can generate concrete, workspace-specific steps. + // These are typically empty when the context was produced by ProjectExplorer; in that case + // the buildSystems notes below carry the authoritative information. const lifecycleLines: string[] = []; - if (workspaceInfo.buildCommand) lifecycleLines.push(`build="${workspaceInfo.buildCommand}"`); - if (workspaceInfo.testCommand) lifecycleLines.push(`test="${workspaceInfo.testCommand}"`); - if (workspaceInfo.lintCommand) lifecycleLines.push(`lint="${workspaceInfo.lintCommand}"`); + if (wi.buildCommand) lifecycleLines.push(`build="${wi.buildCommand}"`); + if (wi.testCommand) lifecycleLines.push(`test="${wi.testCommand}"`); + if (wi.lintCommand) lifecycleLines.push(`lint="${wi.lintCommand}"`); if (lifecycleLines.length > 0) { result += `, ${lifecycleLines.join(", ")}`; } + // Include richer build system details produced by the ProjectExplorer agent when available. + // The planner uses these notes to derive concrete, workspace-specific commands at runtime. + if (Array.isArray(context.buildSystems) && context.buildSystems.length > 0) { + result += `\nDetected build systems:`; + for (const bs of context.buildSystems as Array<{ name: string; configFile: string; notes: string }>) { + result += `\n - ${bs.name} (${bs.configFile}): ${bs.notes}`; + } + } + + // Free-form notes from the explorer (e.g. "multi-language monorepo") + if (typeof context.explorerNotes === "string" && context.explorerNotes.trim()) { + result += `\nWorkspace notes: ${context.explorerNotes}`; + } + result += `\nAvailable tools: ${toolList}`; if (availableProfiles && availableProfiles.length > 0) { const profileList = availableProfiles.map((p) => `${p.name}: ${p.description}`).join("; "); @@ -190,8 +207,13 @@ export function validatePlan(plan: Plan, registry: ToolRegistry): PlanValidation * * The planner runs without any tools — it only needs to reason and output JSON. * - * @param task Natural-language description of what the agent should accomplish. - * @param workspaceInfo Workspace analysis result (language, framework, etc.). + * @param task Natural-language description of what the agent should accomplish. + * @param context Workspace context produced by `analyzeWorkspace` + `toWorkspaceContext`, + * or by the richer `exploreWorkspace` (ProjectExplorer agent). + * When the context was produced by ProjectExplorer the `buildSystems` and + * `explorerNotes` keys are included in the planner task so the model can + * derive concrete, workspace-specific commands without relying on any + * hardcoded command strings. * @param registry Tool registry used to list available tool names for the prompt. * @param llm Optional LLM instance — created from config when omitted. * @param profileRegistry Optional profile registry; when provided, profile names and descriptions @@ -199,7 +221,7 @@ export function validatePlan(plan: Plan, registry: ToolRegistry): PlanValidation */ export async function generatePlan( task: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, registry: ToolRegistry, llm?: BaseChatModel, profileRegistry?: AgentProfileRegistry @@ -208,7 +230,7 @@ export async function generatePlan( const availableProfiles = profileRegistry ? profileRegistry.list().map((p) => ({ name: p.name, description: p.description })) : undefined; - const plannerTask = buildPlannerTask(task, workspaceInfo, availableTools, availableProfiles); + const plannerTask = buildPlannerTask(task, context, availableTools, availableProfiles); // Build the system prompt, replacing the {{profileList}} placeholder const profileListStr = @@ -241,7 +263,7 @@ export async function generatePlan( * @param task The original task string. * @param originalPlan The plan that failed validation. * @param feedback Human-readable description of what is wrong. - * @param workspaceInfo Workspace analysis result. + * @param context Workspace context (same object passed to `generatePlan`). * @param registry Tool registry used to supply the up-to-date tool list. * @param llm Optional LLM instance. */ @@ -249,7 +271,7 @@ export async function refinePlan( task: string, originalPlan: Plan, feedback: string, - workspaceInfo: WorkspaceInfo, + context: WorkspaceContext, registry: ToolRegistry, llm?: BaseChatModel ): Promise { diff --git a/src/tools/plan-and-run.ts b/src/tools/plan-and-run.ts index 75acd7b4..b7dc0a0d 100644 --- a/src/tools/plan-and-run.ts +++ b/src/tools/plan-and-run.ts @@ -5,7 +5,7 @@ import { logger } from "../logger"; import { spinner } from "../spinner"; import { executePlan } from "../orchestrator"; import { generatePlan, refinePlan, validatePlan } from "../subagents/planner"; -import { analyzeWorkspace } from "../workspace"; +import { analyzeWorkspace, toWorkspaceContext } from "../workspace"; import type { ToolDefinition } from "./registry"; import { toolRegistry } from "./registry"; @@ -31,8 +31,9 @@ export const toolDefinition: ToolDefinition = { logger.info({ tool: "plan-and-run", goal }, "generating plan"); const workspaceInfo = await analyzeWorkspace(appConfig.workspaceRoot); + const context = toWorkspaceContext(workspaceInfo); - let plan = await generatePlan(goal, workspaceInfo, toolRegistry); + let plan = await generatePlan(goal, context, toolRegistry); let validation = validatePlan(plan, toolRegistry); if (!validation.valid) { @@ -45,7 +46,7 @@ export const toolDefinition: ToolDefinition = { plan, `These tools are not available: ${validation.invalidTools.join(", ")}. ` + "Use only tools from the available list.", - workspaceInfo, + context, toolRegistry ); validation = validatePlan(plan, toolRegistry); diff --git a/src/tools/plan.ts b/src/tools/plan.ts index 61331f6f..cf904b3a 100644 --- a/src/tools/plan.ts +++ b/src/tools/plan.ts @@ -4,7 +4,7 @@ import { appConfig } from "../config"; import { logger } from "../logger"; import { spinner } from "../spinner"; import { generatePlan, refinePlan, validatePlan } from "../subagents/planner"; -import { analyzeWorkspace } from "../workspace"; +import { analyzeWorkspace, toWorkspaceContext } from "../workspace"; import type { ToolDefinition } from "./registry"; import { toolRegistry } from "./registry"; @@ -24,8 +24,9 @@ export const toolDefinition: ToolDefinition = { logger.info({ tool: "plan", goal }, "generating plan"); const workspaceInfo = await analyzeWorkspace(appConfig.workspaceRoot); + const context = toWorkspaceContext(workspaceInfo); - let plan = await generatePlan(goal, workspaceInfo, toolRegistry); + let plan = await generatePlan(goal, context, toolRegistry); let validation = validatePlan(plan, toolRegistry); if (!validation.valid) { @@ -38,7 +39,7 @@ export const toolDefinition: ToolDefinition = { plan, `These tools are not available: ${validation.invalidTools.join(", ")}. ` + "Use only tools from the available list.", - workspaceInfo, + context, toolRegistry ); validation = validatePlan(plan, toolRegistry); diff --git a/src/workspace.ts b/src/workspace.ts index 9ce29008..5c2550e9 100644 --- a/src/workspace.ts +++ b/src/workspace.ts @@ -23,6 +23,28 @@ export interface WorkspaceInfo { gitInitialized: boolean; } +/** + * Generic workspace context map that flows through the planning pipeline. + * + * The well-known `workspaceInfo` key holds a flat `WorkspaceInfo` summary that + * is backward-compatible with `generatePlan()` and the rest of the orchestration + * pipeline. Additional keys may be contributed by exploration agents — for + * example `buildSystems` and `explorerNotes` from `ProjectExplorer` — without + * requiring changes to existing consumers. + */ +export interface WorkspaceContext extends Record { + /** Flat summary required by the planner and orchestrator. */ + workspaceInfo: WorkspaceInfo; +} + +/** + * Wrap a `WorkspaceInfo` in a minimal `WorkspaceContext`. + * Use this when calling `generatePlan()` with the result of `analyzeWorkspace()`. + */ +export function toWorkspaceContext(info: WorkspaceInfo): WorkspaceContext { + return { workspaceInfo: info }; +} + /** Lifecycle target names extracted from Makefiles. */ const LIFECYCLE_TARGETS = ["test", "lint", "build", "run", "install"] as const; diff --git a/tests/e2e/scenarios/plan-feature.e2e.test.ts b/tests/e2e/scenarios/plan-feature.e2e.test.ts index 191ecf7c..0fe1fc00 100644 --- a/tests/e2e/scenarios/plan-feature.e2e.test.ts +++ b/tests/e2e/scenarios/plan-feature.e2e.test.ts @@ -14,23 +14,25 @@ import { generatePlan } from "../../../src/subagents/planner"; import { appConfig } from "../../../src/config"; import { createMockLlm } from "../helpers/mock-llm"; import { createWorkspaceFixture } from "../helpers/workspace-fixture"; -import type { WorkspaceInfo } from "../../../src/workspace"; +import type { WorkspaceContext } from "../../../src/workspace"; // When E2E_USE_REAL_LLM=true the test uses undefined (falls back to createLLM). // Note: that requires removing the jest.mock above and setting a real MISTRAL_API_KEY. const USE_REAL_LLM = process.env.E2E_USE_REAL_LLM === "true"; -/** Representative workspace metadata used as planner input. */ -const MOCK_WORKSPACE: WorkspaceInfo = { - language: "node", - framework: "express", - packageManager: "npm", - hasTests: true, - testCommand: "npm test", - lintCommand: "npm run lint", - buildCommand: "npm run build", - entryPoints: ["src/index.ts"], - gitInitialized: false, +/** Representative workspace context used as planner input. */ +const MOCK_WORKSPACE: WorkspaceContext = { + workspaceInfo: { + language: "node", + framework: "express", + packageManager: "npm", + hasTests: true, + testCommand: "npm test", + lintCommand: "npm run lint", + buildCommand: "npm run build", + entryPoints: ["src/index.ts"], + gitInitialized: false, + }, }; describe("E2E: Generate a plan for a feature", () => { From 000a8aefb9bc67415ebbb060a5385d645fd84ae5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 10 Apr 2026 05:41:42 +0000 Subject: [PATCH 5/5] fix: correct spelling 'analyse' to 'analyze' in project-explorer.ts Agent-Logs-Url: https://github.com/huberp/agentloop/sessions/a68943d2-839b-4e55-ba44-3606340352a0 Co-authored-by: huberp <4027454+huberp@users.noreply.github.com> --- src/agents/project-explorer.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agents/project-explorer.ts b/src/agents/project-explorer.ts index 904280cc..ffda07a3 100644 --- a/src/agents/project-explorer.ts +++ b/src/agents/project-explorer.ts @@ -27,7 +27,7 @@ export interface BuildSystemInfo { // ───────────────────────────────────────────────────────────────────────────── const EXPLORER_SYSTEM_PROMPT = - `You are a workspace exploration agent. Your job is to analyse a software project's file system ` + + `You are a workspace exploration agent. Your job is to analyze a software project's file system ` + `using the available tools and produce a structured JSON description of the workspace.\n\n` + `Exploration steps (use the tools to gather information):\n` + `1. Call file-list with recursive=true to understand the top-level directory structure.\n` +