diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab2d581c5..1f1809d26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -182,7 +182,7 @@ jobs: - name: Run cockpit example aimock e2e suites run: | set -e - for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular; do + for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular cockpit-chat-subagents-angular; do echo "::group::nx e2e $proj" npx nx e2e "$proj" --skip-nx-cache echo "::endgroup::" diff --git a/cockpit/chat/subagents/angular/e2e/.gitignore b/cockpit/chat/subagents/angular/e2e/.gitignore new file mode 100644 index 000000000..059a55910 --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/.gitignore @@ -0,0 +1,3 @@ +test-results/ +playwright-report/ +*.tmp diff --git a/cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts b/cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts new file mode 100644 index 000000000..6cc6375f5 --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: MIT +import { test, expect } from '@playwright/test'; +import { sendPromptAndWait } from '../../../../../libs/internal/aimock-harness/src'; + +const PROMPT = 'Plan a trip from LAX to JFK'; + +test('c-subagents: orchestrator dispatches task subagents, summary surfaces in bubble', async ({ + page, +}) => { + const bubble = await sendPromptAndWait(page, PROMPT); + + // The chat-tool-calls primitive renders a collapsible button labeled + // "Called task N times" for the orchestrator's task dispatches. Asserting + // it's in the DOM proves the orchestrator emitted real task tool_calls. + // + // We don't assert on because that primitive only + // renders while a subagent is in a RUNNING state — once all subagents + // complete (which is the state sendPromptAndWait returns at, since the + // agent is idle), the cards are filtered out of the DOM. The tool-call + // chip is the durable signal. + const taskChip = page.getByRole('button', { name: /called task|task/i }).first(); + await expect(taskChip).toBeVisible({ timeout: 30_000 }); + + // Final summary text contains an aviation-related phrase from the captured + // continuation. Loose regex so refactors to the subagent prompts (research/ + // booking/itinerary outputs) don't break the test. + const finalText = await bubble.innerText(); + expect(finalText.toLowerCase()).toMatch(/lax|jfk|itinerary|trip|flight/); +}); diff --git a/cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json b/cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json new file mode 100644 index 000000000..c55166c14 --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json @@ -0,0 +1,178 @@ +{ + "fixtures": [ + { + "match": { + "userMessage": "Plan a trip from LAX to JFK", + "model": "gpt-5-mini", + "turnIndex": 0, + "hasToolResult": false + }, + "response": { + "toolCalls": [ + { + "name": "task", + "arguments": "{\"role\":\"research\",\"task_description\":\"Gather info on LAX (Los Angeles International Airport) and JFK (John F. Kennedy International Airport). Include: major terminals used by typical carriers, ground transportation options at each airport, typical weather/temperature and conditions in New York (JFK) year-round and what to expect this time of year (assume current date May 2026), and any travel advisories or common delays for LAX-JFK route.\"}", + "id": "call_ScDzZETqORD2oBYSW1qz7Mk2" + } + ] + }, + "metadata": { + "systemHash": "13e1b238", + "toolsHash": "bca09fd7" + } + }, + { + "match": { + "userMessage": "Gather info on LAX (Los Angeles International Airport) and JFK (John F. Kennedy International Airport). Include: major terminals used by typical carriers, ground transportation options at each airport, typical weather/temperature and conditions in New York (JFK) year-round and what to expect this time of year (assume current date May 2026), and any travel advisories or common delays for LAX-JFK route.", + "model": "gpt-5-mini", + "turnIndex": 0, + "hasToolResult": false + }, + "response": { + "toolCalls": [ + { + "name": "get_airport_info", + "arguments": "{\"airport_code\": \"LAX\"}", + "id": "call_jWLEa38IenhEVyK4O8qIcxtr" + }, + { + "name": "get_airport_info", + "arguments": "{\"airport_code\": \"JFK\"}", + "id": "call_ijvxJqRmWdiOECD1wwOFSivZ" + } + ] + }, + "metadata": { + "systemHash": "bfabc0f4", + "toolsHash": "d00ddc0a" + } + }, + { + "match": { + "userMessage": "Gather info on LAX (Los Angeles International Airport) and JFK (John F. Kennedy International Airport). Include: major terminals used by typical carriers, ground transportation options at each airport, typical weather/temperature and conditions in New York (JFK) year-round and what to expect this time of year (assume current date May 2026), and any travel advisories or common delays for LAX-JFK route.", + "model": "gpt-5-mini", + "turnIndex": 1, + "hasToolResult": true + }, + "response": { + "content": "I looked up both airports: LAX (Los Angeles International) \u2014 9 terminals, current conditions about 72\u00b0F and partly cloudy \u2014 and JFK (John F. Kennedy International) \u2014 6 terminals, current conditions about 58\u00b0F and clear. Major carriers (typical terminal groupings) at LAX include American (Terminals 4\u20135), Delta (T2\u2013T3 and Tom Bradley for many intl. connections), United (T7\u2013T8), Southwest (primarily T1) and Tom Bradley/Terminal B handling most international carriers; at JFK, Delta is centered at T4, JetBlue at T5, American at T8, with many international carriers in T1/T7/T4. Ground transportation: LAX offers FlyAway buses to central LA, an on\u2011airport People Mover connection to Metro rail and the rental car center, taxis and designated rideshare areas; JFK is served by the AirTrain linking terminals to the LIRR (Jamaica) and NYC Subway (Howard Beach/Jamaica), plus taxis, app-based pick\u2011ups and shuttle/rental services. Typical NYC climate year\u2011round is cold, snowy winters and hot, humid summers with mild/transitional spring and fall; in May expect mild, springlike weather (roughly mid\u201150s to low\u201170s\u00b0F, occasional showers); the LAX\u2013JFK route is very busy and commonly sees ATC/airspace delays (NYC metro congestion), seasonal weather delays (winter snow nor\u2019easters or summer thunderstorms), and occasional long taxi/ground times at either airport \u2014 allow extra time, monitor status, and consider TSA PreCheck or earlier check\u2011in for smoother travel." + }, + "metadata": { + "systemHash": "bfabc0f4", + "toolsHash": "d00ddc0a" + } + }, + { + "match": { + "userMessage": "Plan a trip from LAX to JFK", + "model": "gpt-5-mini", + "turnIndex": 1, + "hasToolResult": true + }, + "response": { + "toolCalls": [ + { + "name": "task", + "arguments": "{\"role\":\"booking\",\"task_description\":\"Find flight options from LAX to JFK for one adult departing May 20, 2026, returning May 24, 2026. Provide 3 flight options: low-cost, balanced (good price and timing), and premium (more legroom or non\u2011stop at convenient times). Include airline, flight times (depart/arrive local), number of stops, total duration, and a rough price estimate in USD.\"}", + "id": "call_Q2EP65ATIaYLGdvQl0Ijj57K" + } + ] + }, + "metadata": { + "systemHash": "13e1b238", + "toolsHash": "bca09fd7" + } + }, + { + "match": { + "userMessage": "Find flight options from LAX to JFK for one adult departing May 20, 2026, returning May 24, 2026. Provide 3 flight options: low-cost, balanced (good price and timing), and premium (more legroom or non\u2011stop at convenient times). Include airline, flight times (depart/arrive local), number of stops, total duration, and a rough price estimate in USD.", + "model": "gpt-5-mini", + "turnIndex": 0, + "hasToolResult": false + }, + "response": { + "toolCalls": [ + { + "name": "find_routes", + "arguments": "{\"from_code\": \"LAX\", \"to_code\": \"JFK\", \"date_offset_days\": 0}", + "id": "call_U1N177nViX8Bt3o9jUm6C4aH" + }, + { + "name": "find_routes", + "arguments": "{\"from_code\": \"JFK\", \"to_code\": \"LAX\", \"date_offset_days\": 4}", + "id": "call_NHAd49cqGK50CvOC948tXXgn" + } + ] + }, + "metadata": { + "systemHash": "33b52d2f", + "toolsHash": "b71efcf3" + } + }, + { + "match": { + "userMessage": "Find flight options from LAX to JFK for one adult departing May 20, 2026, returning May 24, 2026. Provide 3 flight options: low-cost, balanced (good price and timing), and premium (more legroom or non\u2011stop at convenient times). Include airline, flight times (depart/arrive local), number of stops, total duration, and a rough price estimate in USD.", + "model": "gpt-5-mini", + "turnIndex": 1, + "hasToolResult": true + }, + "response": { + "content": "Below are three round\u2011trip options I found for LAX \u2192 JFK departing May 20, returning May 24 (one adult). Flights come from the schedule data; prices are rough market estimates for economy/premium seats.\n\n1) Low\u2011cost (cheapest, still nonstop)\n- Outbound: UA 123 (United) \u2014 LAX 08:00 \u2192 JFK 16:30 (local) \n - Nonstop, duration 5h30, aircraft: Boeing 787 \n- Return: B6 6601 (JetBlue) \u2014 JFK 07:30 \u2192 LAX 10:55 (local) \n - Nonstop, duration 6h25, aircraft: Airbus A321 \n- Round\u2011trip total flying time: 11h55 (330 + 385 min) \n- Rough price (economy, basic): ~USD 220\u2013280\n\n2) Balanced (good price + single\u2011carrier convenience)\n- Outbound: UA 123 (United) \u2014 LAX 08:00 \u2192 JFK 16:30 (local) \n - Nonstop, duration 5h30, Boeing 787 \n- Return: UA 456 (United) \u2014 JFK 10:00 \u2192 LAX 13:15 (local) \n - Nonstop, duration 6h15, Boeing 757 \u2014 note: schedule currently shows this leg as delayed in the data \n- Round\u2011trip total flying time: 11h45 (330 + 375 min) \n- Rough price (economy, standard): ~USD 320\u2013420\n\n3) Premium (more legroom / more comfortable cabin on convenient nonstop times)\n- Outbound: UA 123 (United) \u2014 LAX 08:00 \u2192 JFK 16:30 (local) \n - Nonstop, 5h30, Boeing 787 (widebody \u2014 better cabin space) \n- Return: UA 456 (United) \u2014 JFK 10:00 \u2192 LAX 13:15 (local) \n - Nonstop, 6h15, Boeing 757 \n- Round\u2011trip total flying time: 11h45 \n- Rough price: \n - Premium Economy / Economy Plus seats: ~USD 700\u20131,000 \n - Business / First class: ~USD 1,600\u20132,400\n\nNotes:\n- All options above are nonstop each leg. The dataset shows only these flights for your dates; if you want cheaper fares I can look for one\u2011stop itineraries or different carriers/dates. \n- UA456 is flagged as \u201cdelayed\u201d in the schedule data \u2014 consider the earlier JetBlue return if tight connections or schedule certainty matter. \n- Want me to pull live fares for any one of these options and check exact seat/price availability or preferences (flexible ticket, extra legroom, specific airline)?" + }, + "metadata": { + "systemHash": "33b52d2f", + "toolsHash": "b71efcf3" + } + }, + { + "match": { + "userMessage": "Plan a trip from LAX to JFK", + "model": "gpt-5-mini", + "turnIndex": 2, + "hasToolResult": true + }, + "response": { + "toolCalls": [ + { + "name": "task", + "arguments": "{\"role\":\"itinerary\",\"task_description\":\"Using the research and booking outputs, synthesize a concise trip plan for one adult traveling LAX\u2192JFK, departing May 20, 2026 and returning May 24, 2026. Include: chosen flight option (pick the balanced option), terminals to use at LAX and JFK for those flights, recommended arrival times at the airport, ground transportation options to/from each airport, packing/weather notes for May in NYC, and simple tips to avoid common delays.\"}", + "id": "call_abvWLlynVR1RaX6VNBh21aGc" + } + ] + }, + "metadata": { + "systemHash": "13e1b238", + "toolsHash": "bca09fd7" + } + }, + { + "match": { + "userMessage": "Using the research and booking outputs, synthesize a concise trip plan for one adult traveling LAX\u2192JFK, departing May 20, 2026 and returning May 24, 2026. Include: chosen flight option (pick the balanced option), terminals to use at LAX and JFK for those flights, recommended arrival times at the airport, ground transportation options to/from each airport, packing/weather notes for May in NYC, and simple tips to avoid common delays.", + "model": "gpt-5-mini", + "turnIndex": 0, + "hasToolResult": false + }, + "response": { + "content": "Pick the balanced nonstop: JetBlue LAX \u2192 JFK outbound May 20 and return May 24 (JetBlue operates at LAX Terminal 5 and arrives/departs JFK Terminal 5). \nPlan to arrive at LAX 2\u20132.5 hours before departure (allow extra time for traffic and checked bags) and at JFK about 2 hours before your return; check in on the airline app and have your mobile boarding pass ready. \nGround transport: at LAX use the FlyAway bus to/from downtown, rideshare/taxi pickups on the Lower (Arrivals) curb, or rental cars via the Rental Car Center shuttle; at JFK use the AirTrain (to Jamaica for LIRR or Howard Beach for the A subway), or door\u2011to\u2011door taxi/rideshare (allow 45\u201375 minutes to Manhattan depending on traffic). \nMay in NYC is mild (highs ~65\u201372\u00b0F / 18\u201322\u00b0C, lows ~50\u201355\u00b0F / 10\u201313\u00b0C) so pack layers, a light jacket and a compact umbrella; to avoid delays, pick mid\u2011morning flights when possible, enroll in TSA PreCheck/CLEAR, check the airline app for real\u2011time updates, and leave extra time for LAX traffic and JFK security lines." + }, + "metadata": { + "systemHash": "ec82747b" + } + }, + { + "match": { + "userMessage": "Plan a trip from LAX to JFK", + "model": "gpt-5-mini", + "turnIndex": 3, + "hasToolResult": true + }, + "response": { + "content": "All set \u2014 I gathered research, found flights, and prepared a concise itinerary.\n\nSummary plan (1\u20132 sentences):\n- Balanced option: round\u2011trip nonstop LAX \u2192 JFK departing May 20, returning May 24 using United (nonstop outbound/return) with a convenient schedule and moderate price. Arrive at LAX ~2\u20132.5 hours before departure and at JFK ~2 hours before your return; use LAX ground transportation (FlyAway, rideshare, rental shuttle) and JFK AirTrain/LIRR or rideshare to reach the city." + }, + "metadata": { + "systemHash": "13e1b238", + "toolsHash": "bca09fd7" + } + } + ] +} \ No newline at end of file diff --git a/cockpit/chat/subagents/angular/e2e/global-setup-impl.ts b/cockpit/chat/subagents/angular/e2e/global-setup-impl.ts new file mode 100644 index 000000000..ae254cd33 --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/global-setup-impl.ts @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: MIT +import { resolve } from 'node:path'; +import { createGlobalSetup } from '../../../../../libs/internal/aimock-harness/src'; + +export default createGlobalSetup({ + langgraphCwd: 'cockpit/langgraph/streaming/python', + // Each cockpit example pins its OWN langgraph port to avoid TIME_WAIT + // collisions when a sequential CI loop runs multiple per-example e2es + // back-to-back. Streaming uses 8123; tool-calls 8124; subagents 8125. + // The Angular proxy.conf.json target must match. + langgraphPort: 8125, + angularProject: 'cockpit-chat-subagents-angular', + angularPort: 4505, + fixturesDir: resolve(__dirname, 'fixtures'), +}); diff --git a/cockpit/chat/subagents/angular/e2e/playwright.config.ts b/cockpit/chat/subagents/angular/e2e/playwright.config.ts new file mode 100644 index 000000000..ae04d7fb7 --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/playwright.config.ts @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: MIT +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + testMatch: '**/*.spec.ts', + fullyParallel: false, + workers: 1, + retries: process.env.CI ? 2 : 0, + reporter: process.env.CI ? [['list'], ['html', { open: 'never' }]] : 'list', + use: { + baseURL: 'http://localhost:4505', + trace: 'retain-on-failure', + }, + projects: [{ name: 'chromium', use: { ...devices['Desktop Chrome'] } }], + globalSetup: './global-setup-impl.ts', + globalTeardown: require.resolve('../../../../../libs/internal/aimock-harness/src/global-teardown'), +}); diff --git a/cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh b/cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh new file mode 100755 index 000000000..5bfd53d1d --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh @@ -0,0 +1,194 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: MIT +# +# Capture a complete aimock fixture for the c-subagents graph by running the +# real langgraph dev server against aimock in --record mode. Captures every +# LLM call (orchestrator + each subagent's nested calls + tool-driven +# sub-rounds) at the HTTP layer. +# +# Why this shape (vs. direct Python LLM invocation): the c-subagents graph's +# `task` tool dispatches to subagent functions that run their own LLM-driven +# agent loops. Direct invocation only captures the orchestrator's calls; +# proxying through aimock captures every LLM call in the full graph. +# +# Run from repo root: +# OPENAI_API_KEY=sk-... bash cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../../../.." && pwd)" +cd "$REPO_ROOT" + +if [[ -z "${OPENAI_API_KEY:-}" ]]; then + # Try .env (examples first, then streaming as fallback for worktrees) + for env_path in examples/chat/python/.env cockpit/langgraph/streaming/python/.env; do + if [[ -f "$env_path" ]]; then + set -a; source "$env_path"; set +a + break + fi + done +fi +if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "OPENAI_API_KEY not set (in env or examples/chat/python/.env)" >&2 + exit 1 +fi + +AIMOCK_PORT=19999 +LANGGRAPH_PORT=8125 +FIXTURE_OUT="cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json" +# Aimock --record writes per-request files into /recorded/. +# We hand it a dedicated staging dir, then merge all recorded entries into the +# single multi-turn fixture file consumed by the e2e harness. +RECORD_DIR="$(pwd)/cockpit/chat/subagents/angular/e2e/fixtures/.staging" +rm -rf "$RECORD_DIR" +mkdir -p "$RECORD_DIR" +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +# Copy .env into the cockpit-streaming python project (gitignored). +# Use examples/.env when present; otherwise the streaming/.env already exists +# (worktree case where examples/.env hasn't been propagated). +mkdir -p cockpit/langgraph/streaming/python +if [[ -f "examples/chat/python/.env" ]]; then + cp examples/chat/python/.env cockpit/langgraph/streaming/python/.env +fi + +# 1. Start aimock in record mode +echo "[record] starting aimock --record on :$AIMOCK_PORT" +mkdir -p "$(dirname "$FIXTURE_OUT")" +npx -y -p @copilotkit/aimock llmock \ + --port "$AIMOCK_PORT" \ + --record \ + --provider-openai https://api.openai.com \ + --fixtures "$RECORD_DIR" \ + --chunk-size 4096 \ + > "$TMP_DIR/aimock.log" 2>&1 & +AIMOCK_PID=$! + +# Cleanup on exit +cleanup() { + if [[ -n "${LG_PID:-}" ]]; then + # Kill descendants first (uv → python → langgraph workers), then the parent + pkill -P "$LG_PID" 2>/dev/null || true + kill "$LG_PID" 2>/dev/null || true + fi + kill "$AIMOCK_PID" 2>/dev/null || true + wait 2>/dev/null || true + rm -rf "$TMP_DIR" +} +trap cleanup EXIT + +# Wait for aimock to be ready +for _ in {1..30}; do + if curl -sf "http://127.0.0.1:$AIMOCK_PORT/health" > /dev/null 2>&1; then break; fi + if curl -sf "http://127.0.0.1:$AIMOCK_PORT/" > /dev/null 2>&1; then break; fi + sleep 1 +done +echo "[record] aimock ready" + +# 2. Start langgraph dev pointed at aimock +echo "[record] starting langgraph dev on :$LANGGRAPH_PORT (OPENAI_BASE_URL=http://127.0.0.1:$AIMOCK_PORT/v1)" +# setsid on Linux gives us a new process group for clean teardown; on macOS +# fall back to plain background — `pkill -P` later handles descendants. +if command -v setsid >/dev/null 2>&1; then + RUN_PREFIX="setsid" +else + RUN_PREFIX="" +fi +( + cd cockpit/langgraph/streaming/python + OPENAI_BASE_URL="http://127.0.0.1:$AIMOCK_PORT/v1" OPENAI_API_KEY="test-record" \ + exec $RUN_PREFIX uv run langgraph dev --port "$LANGGRAPH_PORT" --no-browser +) > "$TMP_DIR/langgraph.log" 2>&1 & +LG_PID=$! + +# Wait for langgraph +for i in {1..60}; do + if curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/ok" > /dev/null; then break; fi + sleep 1 +done +if ! curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/ok" > /dev/null; then + echo "[record] langgraph failed to start; tail of log:" >&2 + tail -30 "$TMP_DIR/langgraph.log" >&2 + exit 2 +fi +echo "[record] langgraph ready" + +# 3. Submit a run via the LangGraph SDK HTTP API +THREAD=$(curl -sf -X POST "http://127.0.0.1:$LANGGRAPH_PORT/threads" -H 'content-type: application/json' -d '{}' | python3 -c 'import sys,json; print(json.load(sys.stdin)["thread_id"])') +echo "[record] thread: $THREAD" +RUN=$(curl -sf -X POST "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/runs" \ + -H 'content-type: application/json' \ + -d '{ + "assistant_id": "c-subagents", + "input": {"messages": [{"role": "user", "content": "Plan a trip from LAX to JFK"}]} + }' | python3 -c 'import sys,json; print(json.load(sys.stdin)["run_id"])') +echo "[record] run: $RUN" + +# 4. Poll the run status (pending → running → success/error/timeout/interrupted) +echo "[record] waiting for run to complete (this hits real OpenAI; ~30-180s)..." +RUN_STATUS="" +for i in {1..180}; do + RUN_STATUS=$(curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/runs/$RUN" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("status",""))') + case "$RUN_STATUS" in + success|error|timeout|interrupted) break ;; + esac + sleep 2 +done +echo "[record] final run status: $RUN_STATUS" +if [[ "$RUN_STATUS" != "success" ]]; then + echo "[record] run did not succeed (status=$RUN_STATUS)" >&2 + echo "--- langgraph.log tail ---" >&2 + tail -80 "$TMP_DIR/langgraph.log" >&2 + echo "--- aimock.log tail ---" >&2 + tail -40 "$TMP_DIR/aimock.log" >&2 + exit 3 +fi +MSG_COUNT=$(curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/state" | python3 -c 'import sys,json; s=json.load(sys.stdin); print(len(s["values"].get("messages",[])))') +echo "[record] run complete; ${MSG_COUNT} messages in state" +if [[ "$MSG_COUNT" == "0" ]]; then + echo "[record] run produced 0 messages — surfacing logs for debugging" >&2 + echo "--- langgraph.log tail ---" >&2 + tail -60 "$TMP_DIR/langgraph.log" >&2 + echo "--- aimock.log tail ---" >&2 + tail -60 "$TMP_DIR/aimock.log" >&2 + echo "--- run status ---" >&2 + curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/runs/$RUN" >&2 + exit 5 +fi + +# 5. Give aimock a moment to flush per-request fixture files +sleep 2 + +# 6. Merge all recorded-* files in $RECORD_DIR/recorded/ into one fixtures.json +RECORDED_DIR="$RECORD_DIR/recorded" +if [[ ! -d "$RECORDED_DIR" ]]; then + echo "[record] no recorded fixtures dir at $RECORDED_DIR" >&2 + echo "[record] aimock log tail:" >&2 + tail -40 "$TMP_DIR/aimock.log" >&2 + exit 4 +fi +RECORDED_FILES=$(find "$RECORDED_DIR" -name "*.json" | wc -l | tr -d ' ') +echo "[record] $RECORDED_FILES recorded fixture files in $RECORDED_DIR" +python3 - <&2 + exit 4 +fi +echo "[record] fixture written: $FIXTURE_OUT ($(wc -c < "$FIXTURE_OUT") bytes)" +ENTRY_COUNT=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); print(len(d.get("fixtures",[])))' "$FIXTURE_OUT") +echo "[record] $ENTRY_COUNT fixture entries" diff --git a/cockpit/chat/subagents/angular/e2e/tsconfig.json b/cockpit/chat/subagents/angular/e2e/tsconfig.json new file mode 100644 index 000000000..0b5aeecbf --- /dev/null +++ b/cockpit/chat/subagents/angular/e2e/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "types": ["node"] + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "test-results", "playwright-report"] +} diff --git a/cockpit/chat/subagents/angular/project.json b/cockpit/chat/subagents/angular/project.json index 139bdd234..41b40529d 100644 --- a/cockpit/chat/subagents/angular/project.json +++ b/cockpit/chat/subagents/angular/project.json @@ -81,6 +81,12 @@ "proxyConfig": "cockpit/chat/subagents/angular/proxy.conf.json" } }, + "e2e": { + "executor": "@nx/playwright:playwright", + "options": { + "config": "cockpit/chat/subagents/angular/e2e/playwright.config.ts" + } + }, "smoke": { "executor": "nx:run-commands", "options": { diff --git a/cockpit/chat/subagents/angular/proxy.conf.json b/cockpit/chat/subagents/angular/proxy.conf.json index 8523362d7..bd91684b3 100644 --- a/cockpit/chat/subagents/angular/proxy.conf.json +++ b/cockpit/chat/subagents/angular/proxy.conf.json @@ -1,6 +1,6 @@ { "/api": { - "target": "http://localhost:8123", + "target": "http://localhost:8125", "secure": false, "changeOrigin": true, "pathRewrite": { "^/api": "" }, diff --git a/docs/superpowers/plans/2026-05-16-cockpit-aimock-c-subagents.md b/docs/superpowers/plans/2026-05-16-cockpit-aimock-c-subagents.md new file mode 100644 index 000000000..d613cd8c8 --- /dev/null +++ b/docs/superpowers/plans/2026-05-16-cockpit-aimock-c-subagents.md @@ -0,0 +1,612 @@ +# Cockpit aimock E2E Phase 3 — c-subagents Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development. Steps use checkbox (`- [ ]`) syntax. + +**Goal:** Add an aimock-driven Playwright spec for `c-subagents` (orchestrator with `task` tool dispatching subagents) at `cockpit/chat/subagents/angular/e2e/`, mirroring the Phase 2 c-tool-calls pattern. + +**Architecture:** Per-example dir under the harness library landed in Phase 2 ([#356](https://github.com/cacheplane/angular-agent-framework/pull/356)). LangGraph port 8125 (next after streaming=8123 and tool-calls=8124). Multi-turn fixture (parent task tool_calls + tool results + continuation). No library changes. + +**Tech Stack:** `@copilotkit/aimock`, Playwright, `libs/internal/aimock-harness/`, `uv` for the python langgraph dev server. + +**Spec:** [docs/superpowers/specs/2026-05-16-cockpit-aimock-c-subagents-design.md](../specs/2026-05-16-cockpit-aimock-c-subagents-design.md) + +--- + +## Working environment + +- Worktree: `/tmp/c-subagents` (branch `claude/cockpit-aimock-c-subagents`). +- `node_modules` symlinked from main checkout; `npx`/`nx`/`uv` work directly. +- License header `// SPDX-License-Identifier: MIT` on line 1 of every new TS file. +- One commit per task. DO NOT push, amend, or `git add -A`. +- Spec commit already on the branch; this plan adds a second commit, then implementation commits. + +--- + +## Task 1: Scaffold per-example e2e dir (configs + helpers) + +**Files:** +- Create: `cockpit/chat/subagents/angular/e2e/tsconfig.json` +- Create: `cockpit/chat/subagents/angular/e2e/.gitignore` +- Create: `cockpit/chat/subagents/angular/e2e/playwright.config.ts` +- Create: `cockpit/chat/subagents/angular/e2e/global-setup-impl.ts` + +- [ ] **Step 1: Create tsconfig.json** + +Write `cockpit/chat/subagents/angular/e2e/tsconfig.json`: + +```json +{ + "compilerOptions": { + "target": "ES2022", + "module": "ES2022", + "moduleResolution": "Bundler", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "noEmit": true, + "types": ["node"] + }, + "include": ["**/*.ts"], + "exclude": ["node_modules", "test-results", "playwright-report"] +} +``` + +- [ ] **Step 2: Create .gitignore** + +Write `cockpit/chat/subagents/angular/e2e/.gitignore`: + +``` +test-results/ +playwright-report/ +*.tmp +``` + +- [ ] **Step 3: Create playwright.config.ts** + +Write `cockpit/chat/subagents/angular/e2e/playwright.config.ts`: + +```typescript +// SPDX-License-Identifier: MIT +import { defineConfig, devices } from '@playwright/test'; + +export default defineConfig({ + testDir: '.', + testMatch: '**/*.spec.ts', + fullyParallel: false, + workers: 1, + retries: process.env.CI ? 2 : 0, + reporter: process.env.CI ? [['list'], ['html', { open: 'never' }]] : 'list', + use: { + baseURL: 'http://localhost:4505', + trace: 'retain-on-failure', + }, + projects: [{ name: 'chromium', use: { ...devices['Desktop Chrome'] } }], + globalSetup: './global-setup-impl.ts', + globalTeardown: require.resolve('../../../../../libs/internal/aimock-harness/src/global-teardown'), +}); +``` + +- [ ] **Step 4: Create global-setup-impl.ts** + +Write `cockpit/chat/subagents/angular/e2e/global-setup-impl.ts`: + +```typescript +// SPDX-License-Identifier: MIT +import { resolve } from 'node:path'; +import { createGlobalSetup } from '../../../../../libs/internal/aimock-harness/src'; + +export default createGlobalSetup({ + langgraphCwd: 'cockpit/langgraph/streaming/python', + // Each cockpit example pins its OWN langgraph port to avoid TIME_WAIT + // collisions when a sequential CI loop runs multiple per-example e2es + // back-to-back. Streaming uses 8123; tool-calls 8124; subagents 8125. + // The Angular proxy.conf.json target must match. + langgraphPort: 8125, + angularProject: 'cockpit-chat-subagents-angular', + angularPort: 4505, + fixturesDir: resolve(__dirname, 'fixtures'), +}); +``` + +- [ ] **Step 5: Type-check** + +```bash +cd /tmp/c-subagents/cockpit/chat/subagents/angular/e2e +npx tsc --noEmit +``` + +Expected: no errors. + +- [ ] **Step 6: Commit Task 1** + +```bash +cd /tmp/c-subagents +git add cockpit/chat/subagents/angular/e2e/tsconfig.json \ + cockpit/chat/subagents/angular/e2e/.gitignore \ + cockpit/chat/subagents/angular/e2e/playwright.config.ts \ + cockpit/chat/subagents/angular/e2e/global-setup-impl.ts +git commit -m "feat(cockpit-chat-subagents): scaffold aimock e2e dir" +``` + +--- + +## Task 2: Update Angular proxy + project.json + +**Files:** +- Modify: `cockpit/chat/subagents/angular/proxy.conf.json` +- Modify: `cockpit/chat/subagents/angular/project.json` + +- [ ] **Step 1: Update proxy.conf.json target port** + +Open `cockpit/chat/subagents/angular/proxy.conf.json`. Change `target` from `"http://localhost:8123"` to `"http://localhost:8125"`: + +```json +{ + "/api": { + "target": "http://localhost:8125", + "secure": false, + "changeOrigin": true, + "pathRewrite": { "^/api": "" }, + "ws": true + } +} +``` + +- [ ] **Step 2: Add e2e target to project.json** + +Open `cockpit/chat/subagents/angular/project.json`. Add to `targets`: + +```json +"e2e": { + "executor": "@nx/playwright:playwright", + "options": { + "config": "cockpit/chat/subagents/angular/e2e/playwright.config.ts" + } +} +``` + +Verify the file is still valid JSON: + +```bash +cd /tmp/c-subagents +python3 -c "import json; json.load(open('cockpit/chat/subagents/angular/project.json'))" && echo "OK" +``` + +Expected: `OK`. + +- [ ] **Step 3: Commit Task 2** + +```bash +cd /tmp/c-subagents +git add cockpit/chat/subagents/angular/proxy.conf.json \ + cockpit/chat/subagents/angular/project.json +git commit -m "feat(cockpit-chat-subagents): wire e2e target + per-example langgraph port 8125" +``` + +--- + +## Task 3: Capture the c-subagents fixture via aimock record mode + +**Files:** +- Create: `cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh` +- Create: `cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json` (generated by script) + +The first capture attempt (direct LLM invocation in Python) failed because the c-subagents `task` tool dispatches to subagent functions that EACH run their own LLM-driven agent loops. Directly invoking the orchestrator only captures its LLM calls; subagent LLM calls (with role-specific system prompts and tool sub-calls) go uncaptured. + +The correct capture is at the HTTP boundary: run the real langgraph dev server against aimock in `--record` mode. Aimock proxies unmatched LLM requests to real OpenAI and saves every interaction as a fixture entry. Captures orchestrator + ALL subagent LLM calls + any nested tool-driven sub-rounds uniformly. + +- [ ] **Step 1: Write the capture script** + +Write `cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh`: + +```bash +#!/usr/bin/env bash +# SPDX-License-Identifier: MIT +# +# Capture a complete aimock fixture for the c-subagents graph by running the +# real langgraph dev server against aimock in --record mode. Captures every +# LLM call (orchestrator + each subagent's nested calls + tool-driven +# sub-rounds) at the HTTP layer. +# +# Why this shape (vs. direct Python LLM invocation): the c-subagents graph's +# `task` tool dispatches to subagent functions that run their own LLM-driven +# agent loops. Direct invocation only captures the orchestrator's calls; +# proxying through aimock captures every LLM call in the full graph. +# +# Run from repo root: +# OPENAI_API_KEY=sk-... bash cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "$0")/../../../../../.." && pwd)" +cd "$REPO_ROOT" + +if [[ -z "${OPENAI_API_KEY:-}" ]]; then + # Try .env + if [[ -f "examples/chat/python/.env" ]]; then + set -a; source examples/chat/python/.env; set +a + fi +fi +if [[ -z "${OPENAI_API_KEY:-}" ]]; then + echo "OPENAI_API_KEY not set (in env or examples/chat/python/.env)" >&2 + exit 1 +fi + +AIMOCK_PORT=19999 +LANGGRAPH_PORT=8125 +FIXTURE_OUT="cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json" +TMP_DIR="$(mktemp -d)" +trap 'rm -rf "$TMP_DIR"' EXIT + +# Copy .env into the cockpit-streaming python project (gitignored) +mkdir -p cockpit/langgraph/streaming/python +cp examples/chat/python/.env cockpit/langgraph/streaming/python/.env + +# 1. Start aimock in record mode +echo "[record] starting aimock --record on :$AIMOCK_PORT" +mkdir -p "$(dirname "$FIXTURE_OUT")" +npx -y -p @copilotkit/aimock aimock \ + --port "$AIMOCK_PORT" \ + --record \ + --provider-openai https://api.openai.com \ + --fixtures "$FIXTURE_OUT" \ + --chunk-size 4096 \ + > "$TMP_DIR/aimock.log" 2>&1 & +AIMOCK_PID=$! + +# Cleanup on exit +trap 'kill "$AIMOCK_PID" 2>/dev/null || true; [[ -n "${LG_PID:-}" ]] && kill -- "-$LG_PID" 2>/dev/null || true; wait 2>/dev/null || true; rm -rf "$TMP_DIR"' EXIT + +# Wait for aimock to be ready +for _ in {1..30}; do + if curl -sf "http://127.0.0.1:$AIMOCK_PORT/health" > /dev/null 2>&1; then break; fi + if curl -sf "http://127.0.0.1:$AIMOCK_PORT/" > /dev/null 2>&1; then break; fi + sleep 1 +done +echo "[record] aimock ready" + +# 2. Start langgraph dev pointed at aimock +echo "[record] starting langgraph dev on :$LANGGRAPH_PORT (OPENAI_BASE_URL=http://127.0.0.1:$AIMOCK_PORT/v1)" +( + cd cockpit/langgraph/streaming/python + OPENAI_BASE_URL="http://127.0.0.1:$AIMOCK_PORT/v1" OPENAI_API_KEY="test-record" \ + setsid uv run langgraph dev --port "$LANGGRAPH_PORT" --no-browser +) > "$TMP_DIR/langgraph.log" 2>&1 & +LG_PID=$! + +# Wait for langgraph +for i in {1..60}; do + if curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/ok" > /dev/null; then break; fi + sleep 1 +done +if ! curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/ok" > /dev/null; then + echo "[record] langgraph failed to start; tail of log:" >&2 + tail -30 "$TMP_DIR/langgraph.log" >&2 + exit 2 +fi +echo "[record] langgraph ready" + +# 3. Submit a run via the LangGraph SDK HTTP API +THREAD=$(curl -sf -X POST "http://127.0.0.1:$LANGGRAPH_PORT/threads" -H 'content-type: application/json' -d '{}' | python3 -c 'import sys,json; print(json.load(sys.stdin)["thread_id"])') +echo "[record] thread: $THREAD" +RUN=$(curl -sf -X POST "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/runs" \ + -H 'content-type: application/json' \ + -d '{ + "assistant_id": "c-subagents", + "input": {"messages": [{"role": "user", "content": "Plan a trip from LAX to JFK"}]} + }' | python3 -c 'import sys,json; print(json.load(sys.stdin)["run_id"])') +echo "[record] run: $RUN" + +# 4. Poll until run completes (next: [] AND no in-flight steps) +echo "[record] waiting for run to complete (this hits real OpenAI; ~30-90s)..." +for i in {1..120}; do + STATE=$(curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/state" | python3 -c 'import sys,json; s=json.load(sys.stdin); print(len(s.get("next",[])))') + if [[ "$STATE" == "0" ]]; then break; fi + sleep 2 +done + +# Verify completion +if [[ "$STATE" != "0" ]]; then + echo "[record] run did not complete within timeout" >&2 + exit 3 +fi +MSG_COUNT=$(curl -sf "http://127.0.0.1:$LANGGRAPH_PORT/threads/$THREAD/state" | python3 -c 'import sys,json; s=json.load(sys.stdin); print(len(s["values"].get("messages",[])))') +echo "[record] run complete; ${MSG_COUNT} messages in state" + +# 5. Give aimock a moment to flush +sleep 2 + +# 6. Cleanup is handled by trap; aimock writes the fixture file on shutdown +# Verify the fixture got written and is non-empty +if [[ ! -s "$FIXTURE_OUT" ]]; then + echo "[record] fixture file is missing or empty: $FIXTURE_OUT" >&2 + echo "[record] aimock log tail:" >&2 + tail -30 "$TMP_DIR/aimock.log" >&2 + exit 4 +fi +echo "[record] fixture written: $FIXTURE_OUT ($(wc -c < "$FIXTURE_OUT") bytes)" +ENTRY_COUNT=$(python3 -c 'import json,sys; d=json.load(open(sys.argv[1])); print(len(d.get("fixtures",[])))' "$FIXTURE_OUT") +echo "[record] $ENTRY_COUNT fixture entries" +``` + +Make executable: +```bash +chmod +x cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh +``` + +- [ ] **Step 2: Run the capture script** + +```bash +cd /tmp/c-subagents +node libs/licensing/scripts/generate-public-key.mjs 2>&1 | tail -1 +bash cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh +``` + +Expected output (approximate): +``` +[record] starting aimock --record on :19999 +[record] aimock ready +[record] starting langgraph dev on :8125 (...) +[record] langgraph ready +[record] thread: +[record] run: +[record] waiting for run to complete ... +[record] run complete; messages in state +[record] fixture written: cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json ( bytes) +[record] fixture entries +``` + +If the script reports `langgraph failed to start` or `run did not complete`, STOP and inspect `$TMP_DIR/langgraph.log` (path printed in error) — likely a missing python dep (`uv sync` first) or a stale langgraph process on port 8125 (`lsof -ti :8125 | xargs kill -9`). + +If `fixture file is missing or empty`, aimock might need a SIGTERM (not SIGKILL) to flush — adjust the trap to use `SIGTERM` and add a `sleep 3` after. + +If fixture has only 1-2 entries, the orchestrator hit a recursion limit before completing — increase the script's run-complete timeout or check the run state for errors. + +DO NOT commit `cockpit/langgraph/streaming/python/.env` (gitignored, verify with `git status`). + +- [ ] **Step 3: Inspect the captured fixture** + +```bash +cd /tmp/c-subagents +python3 -c " +import json +d = json.load(open('cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json')) +print(f'entries: {len(d[\"fixtures\"])}') +for i, fx in enumerate(d['fixtures'][:5]): + match = fx.get('match', {}) + resp = fx.get('response', {}) + summary = ( + f'tool_calls={len(resp.get(\"toolCalls\", []))}' if 'toolCalls' in resp + else f'content_len={len(resp.get(\"content\", \"\"))}' + ) + print(f' [{i}] match={list(match.keys())} → {summary}') +" +``` + +Expected: 5+ entries (orchestrator first + each subagent role's calls + orchestrator continuation). Note a distinctive phrase from the continuation entry's content for Task 4's assertion. + +- [ ] **Step 4: Commit Task 3** + +```bash +cd /tmp/c-subagents +git add cockpit/chat/subagents/angular/e2e/scripts/record-c-subagents.sh \ + cockpit/chat/subagents/angular/e2e/fixtures/c-subagents.json +git commit -m "feat(cockpit-chat-subagents): add capture script + fixture" +``` + +--- + +## Task 4: Write the c-subagents spec + +**Files:** +- Create: `cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts` + +- [ ] **Step 1: Write the spec** + +Write `cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts`: + +```typescript +// SPDX-License-Identifier: MIT +import { test, expect } from '@playwright/test'; +import { sendPromptAndWait } from '../../../../../libs/internal/aimock-harness/src'; + +const PROMPT = 'Plan a trip from LAX to JFK'; + +test('c-subagents: orchestrator dispatches task subagents, summary surfaces in bubble', async ({ + page, +}) => { + const bubble = await sendPromptAndWait(page, PROMPT); + + // At least one subagent card rendered in the chat-subagents UI primitive. + // Proves the orchestrator's `task` tool_call routed through chat-subagents' + // default subagentToolNames filter (which is ['task']). + const subagentCard = page.locator('chat-subagent-card').first(); + await expect(subagentCard).toBeAttached({ timeout: 30_000 }); + + // Final summary text contains an aviation-related phrase from the captured + // continuation. Loose regex so refactors to the subagent prompts (research/ + // booking/itinerary outputs) don't break the test. + const finalText = await bubble.innerText(); + expect(finalText.toLowerCase()).toMatch(/lax|jfk|itinerary|trip|flight/); +}); +``` + +- [ ] **Step 2: Run the spec** + +```bash +cd /tmp/c-subagents +npx playwright install --with-deps chromium # idempotent if already installed +npx nx e2e cockpit-chat-subagents-angular --skip-nx-cache +``` + +Expected: 1 test passes within ~60–120s (Angular dev-server cold-start dominates). + +If the spec fails: +- "subagent card not attached" → check the trace at `cockpit/chat/subagents/angular/e2e/test-results/`. The selector `chat-subagent-card` is verified against `libs/chat/src/lib/compositions/chat-subagent-card/chat-subagent-card.component.ts:33`. If a refactor renamed it, update the selector. +- "innerText regex didn't match" → look at the captured fixture's continuation text; pick a phrase that appears verbatim and update the regex. +- Otherwise STOP and report the failure. + +- [ ] **Step 3: Stability check** + +Run 3 times with port cooldown: + +```bash +cd /tmp/c-subagents +for i in 1 2 3; do + echo "=== Run $i ===" + rm -rf cockpit/chat/subagents/angular/e2e/test-results cockpit/chat/subagents/angular/e2e/playwright-report + sleep 8 + npx nx e2e cockpit-chat-subagents-angular --skip-nx-cache +done +``` + +Expected: 3/3 pass. + +- [ ] **Step 4: Commit Task 4** + +```bash +cd /tmp/c-subagents +git add cockpit/chat/subagents/angular/e2e/c-subagents.spec.ts +git commit -m "test(cockpit-chat-subagents): aimock e2e — orchestrator task fanout" +``` + +--- + +## Task 5: Update CI loop + +**Files:** +- Modify: `.github/workflows/ci.yml` + +- [ ] **Step 1: Locate the cockpit-e2e loop** + +Open `.github/workflows/ci.yml`. Find the cockpit-e2e job's run step (currently iterates `cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular`): + +```yaml + - name: Run cockpit example aimock e2e suites + run: | + set -e + for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular; do + echo "::group::nx e2e $proj" + npx nx e2e "$proj" --skip-nx-cache + echo "::endgroup::" + sleep 5 + done +``` + +- [ ] **Step 2: Append cockpit-chat-subagents-angular to the loop** + +Change the `for proj in ...` line to include the new project: + +```yaml + for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular cockpit-chat-subagents-angular; do +``` + +- [ ] **Step 3: Verify YAML parses** + +```bash +cd /tmp/c-subagents +npx -y js-yaml .github/workflows/ci.yml > /dev/null && echo "OK" +``` + +Expected: `OK`. + +- [ ] **Step 4: Commit Task 5** + +```bash +cd /tmp/c-subagents +git add .github/workflows/ci.yml +git commit -m "ci(cockpit): include cockpit-chat-subagents-angular in e2e loop" +``` + +--- + +## Task 6: Verify, push, open PR + +- [ ] **Step 1: Final local verification — run all three sequentially** + +```bash +cd /tmp/c-subagents +lsof -ti :8123 :8124 :8125 :4300 :4504 :4505 2>/dev/null | xargs kill -9 2>/dev/null +ps aux | grep -E "uv |langgraph dev" | grep -v grep | awk '{print $2}' | xargs kill -9 2>/dev/null +sleep 5 +for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular cockpit-chat-subagents-angular; do + echo "=== $proj ===" + npx nx e2e "$proj" --skip-nx-cache 2>&1 | tail -3 + sleep 5 +done +``` + +Expected: all 3 projects pass. + +If any fail, STOP and report. + +- [ ] **Step 2: Confirm working tree is clean** + +```bash +cd /tmp/c-subagents +rm -rf cockpit/chat/subagents/angular/e2e/test-results cockpit/chat/subagents/angular/e2e/playwright-report +rm -rf cockpit/chat/tool-calls/angular/e2e/test-results cockpit/langgraph/streaming/angular/e2e/test-results +git status --short +``` + +Expected: only the `node_modules` symlink and the `.env` file at `cockpit/langgraph/streaming/python/.env` as untracked. Both are gitignored. + +- [ ] **Step 3: Push branch** + +```bash +cd /tmp/c-subagents +git push -u origin claude/cockpit-aimock-c-subagents +``` + +- [ ] **Step 4: Open PR** + +```bash +gh pr create --title "test(cockpit): aimock e2e — c-subagents (Phase 3)" --body "$(cat <<'EOF' +## Summary + +Adds a per-example aimock e2e for \`c-subagents\` (orchestrator LLM with a \`task\` tool that dispatches subagents). Second per-example spec under the harness library landed in Phase 2 ([#356](https://github.com/cacheplane/angular-agent-framework/pull/356)). + +- **New per-example dir** at \`cockpit/chat/subagents/angular/e2e/\` (configs, fixture, capture script, spec). +- **Per-example langgraph port** 8125 (streaming=8123, tool-calls=8124, subagents=8125). Proxy.conf.json target updated to match. +- **Fixture** captured from real \`gpt-5-mini\` for the prompt \"Plan a trip from LAX to JFK\". Two entries: continuation (with \`hasToolResult: true\`) + first-call (with \`task\` tool_calls). +- **CI loop** updated to include the new project. + +Sits on Phase 2 ([#356](https://github.com/cacheplane/angular-agent-framework/pull/356)) + the c-* aviation refactor PR 1 ([#347](https://github.com/cacheplane/angular-agent-framework/pull/347)). + +## Test plan + +- [x] Pilot spec passes 3/3 stability runs locally +- [x] All three cockpit example e2e suites pass sequentially via the CI loop locally (streaming + tool-calls + subagents) +- [x] No harness library changes (proves the Phase 2 library handles richer scenarios) +- [ ] CI green on this PR + +## Notes for reviewers + +- Spec assertions are loose by design: presence of any \`\` + an aviation-related phrase in the final summary. Subagent prompts (research/booking/itinerary) can be edited without breaking the test. +- The \`task\` tool execution happens server-side in langgraph (real subagent functions); aimock only mocks the orchestrator LLM calls. + +Spec: \`docs/superpowers/specs/2026-05-16-cockpit-aimock-c-subagents-design.md\` +Plan: \`docs/superpowers/plans/2026-05-16-cockpit-aimock-c-subagents.md\` +EOF +)" +``` + +- [ ] **Step 5: Watch CI** + +```bash +gh pr checks --watch --interval 30 +``` + +When green, merge with `--squash` and clean up worktree. + +--- + +## Self-review checklist + +- [x] Spec coverage: library reuse (Tasks 1+4), per-example layout (Tasks 1+2+3+4), CI loop (Task 5), per-example port (Task 1+2), capture script + fixture (Task 3), spec assertions (Task 4), acceptance criteria (Task 6). +- [x] Placeholder scan: no TBD. ``-style placeholders avoided — the spec assertion uses a fixed loose regex. +- [x] Type consistency: `createGlobalSetup`, `sendPromptAndWait` match the library's exports as committed in PR #356. +- [x] Constraints: `@copilotkit/aimock` referenced in imports/plans only, NOT in commit messages or PR body. + +## Execution handoff + +Plan complete. Recommended: **subagent-driven-development** with one implementer for Tasks 1–5 (sequential, similar shape to Phase 2 c-tool-calls). Task 6 (push + PR + watch CI + merge) handled by the orchestrator. diff --git a/docs/superpowers/specs/2026-05-16-cockpit-aimock-c-subagents-design.md b/docs/superpowers/specs/2026-05-16-cockpit-aimock-c-subagents-design.md new file mode 100644 index 000000000..82c17bd3c --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-cockpit-aimock-c-subagents-design.md @@ -0,0 +1,149 @@ +# Cockpit aimock E2E — Phase 3: c-subagents + +> **Place in the larger plan.** Second per-example spec under the harness library landed in Phase 2 ([#356](https://github.com/cacheplane/angular-agent-framework/pull/356)). Validates the harness on a richer scenario than the c-tool-calls pilot — the orchestrator emits N task-tool fanouts (subagent dispatches) rather than a single tool call. + +## Goal + +Add an aimock-driven Playwright spec for `c-subagents`: the orchestrator LLM dispatches subagents via the `task` tool, the `` UI primitive renders cards, the orchestrator's final summary surfaces in the conversation. One new per-example e2e dir at `cockpit/chat/subagents/angular/e2e/`. No harness library changes — the library handles this scenario as-is. + +## Architecture + +Same per-example shape as Phase 2's `c-tool-calls/`. Differences: + +- **Angular project:** `cockpit-chat-subagents-angular` on port 4505 (per `apps/cockpit/scripts/capability-registry.ts`). +- **LangGraph port:** `8125`, next in sequence after streaming (8123) and tool-calls (8124). Avoids the TIME_WAIT collision pattern fixed in Phase 2. +- **Python graph:** `c_subagents` from `cockpit/langgraph/streaming/python/src/chat_graphs.py:_build_subagents_graph()` — orchestrator LLM bound with one `task` tool that dispatches to three subagent functions (research / booking / itinerary). +- **Multi-turn fixture shape:** parent's first call emits N `task` tool_calls (one or more, depends on what the captured LLM returns); ToolNode executes each `task(role, description)` server-side; orchestrator continuation emits final summary text. Same `hasToolResult: true` discriminator on the continuation entry as Phase 2's c-tool-calls. + +## File layout + +``` +cockpit/chat/subagents/angular/e2e/ +├── playwright.config.ts # relative-import ../../../../../libs/internal/aimock-harness/src +├── global-setup-impl.ts # langgraphPort: 8125 +├── tsconfig.json +├── .gitignore +├── fixtures/c-subagents.json # captured: parent task tool_calls + orchestrator continuation +├── scripts/record-c-subagents.py # dev-only capture recipe +└── c-subagents.spec.ts # asserts subagent card + final summary phrase +``` + +Modified: +- `cockpit/chat/subagents/angular/proxy.conf.json` — `target: "http://localhost:8125"` (was `:8123`) +- `cockpit/chat/subagents/angular/project.json` — add `e2e` target +- `.github/workflows/ci.yml` — add `cockpit-chat-subagents-angular` to the per-example CI loop + +## Components + +### `fixtures/c-subagents.json` + +Captured from real `gpt-5-mini` via the recipe script. The orchestrator's behavior under the `task` tool is what we mock: + +```json +{ + "fixtures": [ + { + "match": { "userMessage": "", "hasToolResult": true }, + "response": { "content": "" } + }, + { + "match": { "userMessage": "" }, + "response": { + "toolCalls": [ + { "name": "task", "arguments": { "role": "research", "task_description": "..." } }, + { "name": "task", "arguments": { "role": "booking", "task_description": "..." } }, + { "name": "task", "arguments": { "role": "itinerary", "task_description": "..." } } + ] + } + } + ] +} +``` + +If the LLM dispatches subagents one-at-a-time across multiple LLM calls (rather than fanning out in a single response), the fixture needs intermediate entries. The capture script discovers the actual shape and writes whatever the LLM did. The spec's assertions stay loose enough to tolerate both shapes. + +### `scripts/record-c-subagents.sh` + +Direct-LLM-invocation capture (Phase 2 c-tool-calls pattern) does NOT work for c-subagents: the `task` tool dispatches to subagent functions that EACH run their own LLM-driven agent loop. Direct invocation captures only the orchestrator's calls; subagent LLM calls (with role-specific system prompts and possibly tool-driven sub-rounds) go uncaptured and aimock 404s on them at replay time. + +The right capture is at the HTTP boundary. The script orchestrates three subprocesses: + +1. **aimock** in `--record` mode (`npx aimock --record --provider-openai https://api.openai.com --fixtures `). Proxies unmatched LLM requests to real OpenAI, saves every interaction as a fixture entry. +2. **langgraph dev** for the cockpit-streaming python project, with `OPENAI_BASE_URL=http://aimock:port/v1`. +3. **A LangGraph SDK HTTP call** that creates a thread, submits a run with the prompt against the `c-subagents` graph, polls until the run completes. + +After cleanup, the fixture file has entries for every LLM call in the full graph: orchestrator first call, each subagent role's calls, any sub-rounds within subagent agent loops, and the orchestrator continuation. Aimock's replay engine matches each call by its own discriminators (user/system message, tool result presence, etc.) — no manual entry hand-stitching needed. + +This is the canonical "capture multi-LLM flows" pattern and is reusable for future cockpit examples with similar shape (c-interrupts when refactored, c-generative-ui dashboard, etc.). + +### `c-subagents.spec.ts` + +```typescript +import { test, expect } from '@playwright/test'; +import { sendPromptAndWait } from '../../../../../libs/internal/aimock-harness/src'; + +const PROMPT = 'Plan a trip from LAX to JFK'; + +test('c-subagents: orchestrator dispatches task subagents, summary surfaces in bubble', async ({ page }) => { + const bubble = await sendPromptAndWait(page, PROMPT); + + // At least one subagent card rendered in the chat-subagents UI primitive. + // Proves the orchestrator's `task` tool_call routed through the chat-subagents + // primitive's filter (default subagentToolNames = ['task']). + const subagentCard = page.locator('chat-subagent-card').first(); + await expect(subagentCard).toBeAttached({ timeout: 30_000 }); + + // Final summary text contains an aviation-related phrase from the captured + // continuation. Loose match so refactors to the subagent prompts don't + // break the test. + const finalText = await bubble.innerText(); + expect(finalText.toLowerCase()).toMatch(/lax|jfk|itinerary|trip|flight/); +}); +``` + +### `global-setup-impl.ts` + +```typescript +import { resolve } from 'node:path'; +import { createGlobalSetup } from '../../../../../libs/internal/aimock-harness/src'; + +export default createGlobalSetup({ + langgraphCwd: 'cockpit/langgraph/streaming/python', + langgraphPort: 8125, + angularProject: 'cockpit-chat-subagents-angular', + angularPort: 4505, + fixturesDir: resolve(__dirname, 'fixtures'), +}); +``` + +### CI loop update + +In `.github/workflows/ci.yml`: + +```yaml +for proj in cockpit-langgraph-streaming-angular cockpit-chat-tool-calls-angular cockpit-chat-subagents-angular; do +``` + +One-line addition to the existing loop. + +## Risks and unknowns + +- **Orchestration shape varies per LLM call.** The `gpt-5-mini` model might dispatch 3 task tool_calls in one response, or one at a time across 3 LLM rounds, or some hybrid. The capture script handles the actual shape; the spec's assertions stay loose. If the LLM happens to NOT dispatch any task calls on the captured run (just emits text), STOP and try a more explicit prompt. +- **Subagent card selector.** Spec asserts `chat-subagent-card`. Verified via `libs/chat/src/lib/compositions/chat-subagent-card/chat-subagent-card.component.ts:33` selector. If a future refactor renames the component, the spec breaks visibly with a clear locator-not-found error, not silently. +- **Port 8125 conflict.** Already used elsewhere in dev tooling? Quick grep verified no current bindings in the repo. Future cockpit examples pick 8126, 8127, etc. +- **No new harness changes.** The library's globalSetup factory already accepts `langgraphPort`. No risk to other examples; Phase 2's streaming and tool-calls keep their existing ports. + +## Acceptance criteria + +Phase 3 merges when: +- `cockpit/chat/subagents/angular/e2e/` exists with all per-example files (config, fixture, spec, capture script, tsconfig, .gitignore). +- `cockpit/chat/subagents/angular/proxy.conf.json` targets `:8125`. +- `cockpit/chat/subagents/angular/project.json` has an `e2e` target pointing at the new playwright config. +- `nx e2e cockpit-chat-subagents-angular` passes locally + 3/3 stability runs. +- The CI loop in `.github/workflows/ci.yml` runs all three examples (streaming + tool-calls + subagents) green sequentially. +- No changes to `libs/internal/aimock-harness/` (proves library design is sound for richer scenarios). + +## What lands next (Phase 4+, NOT this PR) + +- Each remaining cockpit example targeted by aimock — one PR each. +- Eventually: migrate the chat aimock harness (`examples/chat/aimock-e2e/`) onto the same library to eliminate the duplicate `aimock-runner.ts`.