Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion code-rs/core/src/skills/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ pub fn render_skills_section(skills: &[SkillMetadata]) -> Option<String> {
3) If `scripts/` exist, prefer running or patching them instead of retyping large code blocks.
4) If `assets/` or templates exist, reuse them instead of recreating from scratch.
- Coordination and sequencing:
- If multiple skills apply, choose the minimal set that covers the request and state the order you'll use them.
- Match skills independently against every part of the request. One relevant skill does not suppress another: for example, regression investigation and durable GitHub planning can require separate skills in the same turn.
- If multiple skills apply, use all relevant mandatory or delegated skills before ordinary work. Do not choose only one when another skill also matches the user's request.
- For non-binding matches, choose the minimal set that covers the request and state the order you'll use them.
- Announce which skill(s) you're using and why (one short line). If you skip an obvious skill, say why.
- Context hygiene:
- Keep context small: summarize long sections instead of pasting them; only load extra files when needed.
Expand Down Expand Up @@ -71,6 +73,9 @@ mod tests {
assert!(rendered.contains("Open its `SKILL.md` before taking other investigative"));
assert!(rendered.contains("If a skill description tells you to use another named skill"));
assert!(rendered.contains("find that delegated skill in the Available Skills list"));
assert!(rendered.contains("Match skills independently against every part of the request"));
assert!(rendered.contains("regression investigation and durable GitHub planning"));
assert!(rendered.contains("use all relevant mandatory or delegated skills"));
assert!(!rendered.contains("After deciding to use a skill"));
}
}
25 changes: 24 additions & 1 deletion tools/code-exec-harness/harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ def run_exec(command: list[str], scenario: dict[str, Any], paths: RunPaths, env:
def summarize(events: list[dict[str, Any]], paths: RunPaths, returncode: int, command: list[str]) -> dict[str, Any]:
final_message = None
commands = []
command_starts = []
running_commands: dict[str, str] = {}
file_changes = []
usage = None
Expand All @@ -446,7 +447,9 @@ def summarize(events: list[dict[str, Any]], paths: RunPaths, returncode: int, co
call_id = msg.get("call_id")
if isinstance(call_id, str):
raw_command = msg.get("command", [])
running_commands[call_id] = " ".join(raw_command) if isinstance(raw_command, list) else str(raw_command)
command_text = " ".join(raw_command) if isinstance(raw_command, list) else str(raw_command)
running_commands[call_id] = command_text
command_starts.append(command_text)
elif msg_type == "exec_command_end":
call_id = msg.get("call_id")
commands.append({
Expand Down Expand Up @@ -484,6 +487,7 @@ def summarize(events: list[dict[str, Any]], paths: RunPaths, returncode: int, co
"event_count": len(events),
"final_message": final_message,
"commands": commands,
"command_starts": command_starts,
"file_changes": file_changes,
"errors": errors,
"gh_calls": gh_calls,
Expand All @@ -497,12 +501,31 @@ def assert_expectations(summary: dict[str, Any], scenario: dict[str, Any]) -> li
failures: list[str] = []
expect = scenario.get("expect", {})
final_message = summary.get("final_message") or ""
completed_commands = [str(command.get("command") or "") for command in summary.get("commands", [])]
started_commands = [str(command) for command in summary.get("command_starts", [])]
for needle in expect.get("assistant_contains", []):
if str(needle) not in final_message:
failures.append(f"assistant message did not contain {needle!r}")
for needle in expect.get("command_contains", []):
if not any(str(needle) in str(command.get("command")) for command in summary.get("commands", [])):
failures.append(f"no completed command contained {needle!r}")
prefix_needles = [str(needle) for needle in expect.get("command_prefix_contains", [])]
prefix = started_commands[:len(prefix_needles)]
for index, needle in enumerate(prefix_needles):
if index >= len(prefix):
failures.append(f"launch prefix missing command {index + 1} containing {needle!r}")
continue
if needle not in prefix[index]:
failures.append(f"launch prefix command {index + 1} did not contain {needle!r}")
for needle in expect.get("command_order_contains", []):
position = next((index for index, command in enumerate(started_commands) if str(needle) in command), None)
if position is None:
failures.append(f"no launched command contained {needle!r}")
continue
earlier = "\n".join(started_commands[:position])
for before in expect.get("before_command_contains", {}).get(str(needle), []):
if str(before) not in earlier:
failures.append(f"no earlier command contained {before!r} before {needle!r}")
for needle in expect.get("gh_contains", []):
text = "\n".join(" ".join(call.get("argv", [])) for call in summary.get("gh_calls", []))
if str(needle) not in text:
Expand Down
2 changes: 1 addition & 1 deletion tools/code-exec-harness/scenarios/github-plan-smoke.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
},
"expect": {
"returncode": 0,
"gh_contains": ["issue list"],
"gh_contains": ["issue list --json number,title,state"],
"assistant_contains": ["parent"]
},
"max_seconds": 90,
Expand Down
26 changes: 26 additions & 0 deletions tools/code-exec-harness/scenarios/skill-trigger-binding-smoke.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"name": "skill-trigger-binding-smoke",
"prompt": "Use recent screen context to disambiguate what I was working on, and also make this a durable GitHub-backed recovery plan. Demonstrate the correct skill selection by opening only the relevant skill instructions first, then run `git status --short --branch` and `gh issue list --repo cbusillo/code --state all --limit 5 --json number,title,state`. Give a one-sentence answer. Do not modify files.",
"files": {
"README.md": "# Harness fixture\n"
},
"gh": {
"repo": "cbusillo/code",
"issues": []
},
"expect": {
"returncode": 0,
"command_contains": ["chronicle/SKILL.md", "github-plan/SKILL.md"],
"command_prefix_contains": ["chronicle/SKILL.md", "github-plan/SKILL.md"],
"command_order_contains": ["git status", "gh issue list"],
"before_command_contains": {
"git status": ["chronicle/SKILL.md", "github-plan/SKILL.md"],
"gh issue list": ["github-plan/SKILL.md"]
},
"gh_contains": [
"issue list --repo cbusillo/code --state all --limit 5 --json number,title,state"
]
},
"max_seconds": 120,
"timeout_seconds": 180
}