Skip to content

Commit 2ca1c16

Browse files
committed
fix(adf): remediate agent spawning (opencode args, model flags, resource limits, compound review) Refs #117
1 parent 979f700 commit 2ca1c16

5 files changed

Lines changed: 237 additions & 2 deletions

File tree

crates/terraphim_orchestrator/src/compound.rs

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1060,4 +1060,67 @@ Done!"#;
10601060
assert!(!output.pass);
10611061
assert_eq!(output.findings.len(), 1);
10621062
}
1063+
1064+
// =========================================================================
1065+
// ADF Remediation Tests (Gitea #117)
1066+
// =========================================================================
1067+
1068+
#[test]
1069+
fn test_compound_config_cli_tool_override() {
1070+
let config = CompoundReviewConfig {
1071+
schedule: "0 2 * * *".to_string(),
1072+
max_duration_secs: 1800,
1073+
repo_path: PathBuf::from("/tmp"),
1074+
create_prs: false,
1075+
worktree_root: PathBuf::from("/tmp/worktrees"),
1076+
base_branch: "main".to_string(),
1077+
max_concurrent_agents: 3,
1078+
cli_tool: Some("/home/alex/.bun/bin/opencode".to_string()),
1079+
provider: Some("opencode-go".to_string()),
1080+
model: Some("glm-5".to_string()),
1081+
};
1082+
let swarm = SwarmConfig::from_compound_config(&config);
1083+
for group in &swarm.groups {
1084+
assert_eq!(group.cli_tool, "/home/alex/.bun/bin/opencode");
1085+
assert_eq!(group.model, Some("opencode-go/glm-5".to_string()));
1086+
}
1087+
}
1088+
1089+
#[test]
1090+
fn test_compound_config_no_override() {
1091+
let config = CompoundReviewConfig {
1092+
schedule: "0 2 * * *".to_string(),
1093+
max_duration_secs: 1800,
1094+
repo_path: PathBuf::from("/tmp"),
1095+
create_prs: false,
1096+
worktree_root: PathBuf::from("/tmp/worktrees"),
1097+
base_branch: "main".to_string(),
1098+
max_concurrent_agents: 3,
1099+
cli_tool: None,
1100+
provider: None,
1101+
model: None,
1102+
};
1103+
let swarm = SwarmConfig::from_compound_config(&config);
1104+
// Should use default groups unchanged
1105+
assert_eq!(swarm.groups[0].cli_tool, "opencode");
1106+
assert!(swarm.groups[0].model.is_none());
1107+
}
1108+
1109+
#[test]
1110+
fn test_compound_config_timeout_uses_max_duration() {
1111+
let config = CompoundReviewConfig {
1112+
schedule: "0 2 * * *".to_string(),
1113+
max_duration_secs: 900,
1114+
repo_path: PathBuf::from("/tmp"),
1115+
create_prs: false,
1116+
worktree_root: PathBuf::from("/tmp/worktrees"),
1117+
base_branch: "main".to_string(),
1118+
max_concurrent_agents: 3,
1119+
cli_tool: None,
1120+
provider: None,
1121+
model: None,
1122+
};
1123+
let swarm = SwarmConfig::from_compound_config(&config);
1124+
assert_eq!(swarm.timeout, Duration::from_secs(900));
1125+
}
10631126
}

crates/terraphim_orchestrator/src/lib.rs

Lines changed: 130 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -697,19 +697,70 @@ impl AgentOrchestrator {
697697
async fn poll_agent_exits(&mut self) {
698698
// Collect exited agents first to avoid borrow conflict
699699
let mut exited: Vec<(String, AgentDefinition, std::process::ExitStatus)> = Vec::new();
700+
// Collect agents that exceeded their wall-clock timeout
701+
let mut timed_out: Vec<String> = Vec::new();
702+
700703
for (name, managed) in &mut self.active_agents {
701704
match managed.handle.try_wait() {
702705
Ok(Some(status)) => {
703706
exited.push((name.clone(), managed.definition.clone(), status));
704707
}
705-
Ok(None) => {} // still running
708+
Ok(None) => {
709+
// Still running -- check wall-clock timeout
710+
if let Some(max_secs) = managed.definition.max_cpu_seconds {
711+
let elapsed = managed.started_at.elapsed();
712+
if elapsed > Duration::from_secs(max_secs) {
713+
warn!(
714+
agent = %name,
715+
elapsed_secs = elapsed.as_secs(),
716+
max_secs = max_secs,
717+
"agent exceeded wall-clock timeout, killing"
718+
);
719+
timed_out.push(name.clone());
720+
}
721+
}
722+
}
706723
Err(e) => {
707724
warn!(agent = %name, error = %e, "try_wait failed");
708725
}
709726
}
710727
}
711728

712-
// Process exits
729+
// Kill timed-out agents
730+
for name in timed_out {
731+
if let Some(mut managed) = self.active_agents.remove(&name) {
732+
let grace = Duration::from_secs(
733+
managed.definition.grace_period_secs.unwrap_or(5),
734+
);
735+
match managed.handle.shutdown(grace).await {
736+
Ok(graceful) => {
737+
info!(
738+
agent = %name,
739+
graceful = graceful,
740+
"timed-out agent terminated"
741+
);
742+
}
743+
Err(e) => {
744+
warn!(agent = %name, error = %e, "failed to kill timed-out agent");
745+
}
746+
}
747+
// Handle exit based on layer (similar to handle_agent_exit but for timeout)
748+
if managed.definition.layer == AgentLayer::Safety {
749+
let count = self.restart_counts.entry(name.clone()).or_insert(0);
750+
*count += 1;
751+
self.restart_cooldowns.insert(name.clone(), Instant::now());
752+
info!(
753+
agent = %name,
754+
restart_count = *count,
755+
"safety agent timed out, will restart after cooldown"
756+
);
757+
} else {
758+
info!(agent = %name, layer = ?managed.definition.layer, "agent timed out");
759+
}
760+
}
761+
}
762+
763+
// Process natural exits
713764
for (name, def, status) in exited {
714765
self.active_agents.remove(&name);
715766
self.handle_agent_exit(&name, &def, status);
@@ -1553,4 +1604,81 @@ sfia_skills = [{ code = "TEST", name = "Testing", level = 4, description = "Desi
15531604
assert!(validate_agent_name("agent@host").is_err()); // @
15541605
assert!(validate_agent_name("agent.name").is_err()); // dots
15551606
}
1607+
1608+
// =========================================================================
1609+
// ADF Remediation Tests (Gitea #117)
1610+
// =========================================================================
1611+
1612+
#[test]
1613+
fn test_provider_model_composition_opencode() {
1614+
// Simulate what spawn_agent does for opencode with provider + model
1615+
let provider = Some("kimi-for-coding".to_string());
1616+
let model = Some("k2p5".to_string());
1617+
let cli_name = "opencode";
1618+
1619+
let composed = if cli_name == "opencode" {
1620+
match (&provider, &model) {
1621+
(Some(p), Some(m)) => Some(format!("{}/{}", p, m)),
1622+
_ => model,
1623+
}
1624+
} else {
1625+
model
1626+
};
1627+
assert_eq!(composed, Some("kimi-for-coding/k2p5".to_string()));
1628+
}
1629+
1630+
#[test]
1631+
fn test_provider_model_composition_claude_unchanged() {
1632+
// Claude should not have provider/model composed
1633+
let provider = Some("anthropic".to_string());
1634+
let model = Some("claude-opus-4-6".to_string());
1635+
let cli_name = "claude";
1636+
1637+
let composed = if cli_name == "opencode" {
1638+
match (&provider, &model) {
1639+
(Some(p), Some(m)) => Some(format!("{}/{}", p, m)),
1640+
_ => model.clone(),
1641+
}
1642+
} else {
1643+
model.clone()
1644+
};
1645+
assert_eq!(composed, Some("claude-opus-4-6".to_string()));
1646+
}
1647+
1648+
#[tokio::test]
1649+
async fn test_wall_clock_timeout_kills_agent() {
1650+
let mut config = test_config_fast_lifecycle();
1651+
// Use sleep agent with 1-second timeout
1652+
config.agents = vec![AgentDefinition {
1653+
name: "timeout-test".to_string(),
1654+
layer: AgentLayer::Core,
1655+
cli_tool: "sleep".to_string(),
1656+
task: "60".to_string(),
1657+
model: None,
1658+
schedule: None,
1659+
capabilities: vec![],
1660+
max_memory_bytes: None,
1661+
budget_monthly_cents: None,
1662+
provider: None,
1663+
persona: None,
1664+
terraphim_role: None,
1665+
skill_chain: vec![],
1666+
sfia_skills: vec![],
1667+
fallback_provider: None,
1668+
fallback_model: None,
1669+
grace_period_secs: Some(2),
1670+
max_cpu_seconds: Some(1), // 1 second timeout
1671+
}];
1672+
let mut orch = AgentOrchestrator::new(config).unwrap();
1673+
let def = orch.config.agents[0].clone();
1674+
orch.spawn_agent(&def).await.unwrap();
1675+
assert!(orch.active_agents.contains_key("timeout-test"));
1676+
1677+
// Wait for the timeout to elapse
1678+
tokio::time::sleep(Duration::from_secs(2)).await;
1679+
1680+
// Poll should detect timeout and kill
1681+
orch.poll_agent_exits().await;
1682+
assert!(!orch.active_agents.contains_key("timeout-test"));
1683+
}
15561684
}

crates/terraphim_spawner/src/config.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,4 +346,28 @@ mod tests {
346346
assert_eq!(AgentConfig::cli_name("claude"), "claude");
347347
assert_eq!(AgentConfig::cli_name("/usr/bin/codex"), "codex");
348348
}
349+
350+
#[test]
351+
fn test_infer_args_opencode() {
352+
let args = AgentConfig::infer_args("opencode");
353+
assert_eq!(args, vec!["run", "--format", "json"]);
354+
}
355+
356+
#[test]
357+
fn test_infer_args_opencode_full_path() {
358+
let args = AgentConfig::infer_args("/home/alex/.bun/bin/opencode");
359+
assert_eq!(args, vec!["run", "--format", "json"]);
360+
}
361+
362+
#[test]
363+
fn test_model_args_opencode() {
364+
let args = AgentConfig::model_args("opencode", "kimi-for-coding/k2p5");
365+
assert_eq!(args, vec!["-m", "kimi-for-coding/k2p5"]);
366+
}
367+
368+
#[test]
369+
fn test_model_args_opencode_full_path() {
370+
let args = AgentConfig::model_args("/home/alex/.bun/bin/opencode", "opencode-go/kimi-k2.5");
371+
assert_eq!(args, vec!["-m", "opencode-go/kimi-k2.5"]);
372+
}
349373
}

crates/terraphim_spawner/src/lib.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,4 +1009,22 @@ mod tests {
10091009
let handle = handle.unwrap();
10101010
assert_eq!(handle.provider.id, "@model-cat-agent");
10111011
}
1012+
1013+
// =========================================================================
1014+
// ADF Remediation Tests (Gitea #117)
1015+
// =========================================================================
1016+
1017+
#[test]
1018+
fn test_spawn_request_with_resource_limits() {
1019+
let provider = create_test_agent_provider();
1020+
let limits = ResourceLimits {
1021+
max_cpu_seconds: Some(3600),
1022+
max_memory_bytes: Some(2_147_483_648),
1023+
..Default::default()
1024+
};
1025+
let request = SpawnRequest::new(provider, "test")
1026+
.with_resource_limits(limits.clone());
1027+
assert_eq!(request.resource_limits.max_cpu_seconds, Some(3600));
1028+
assert_eq!(request.resource_limits.max_memory_bytes, Some(2_147_483_648));
1029+
}
10121030
}

crates/terraphim_types/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
//! skip: None,
3131
//! limit: Some(10),
3232
//! role: Some(RoleName::new("engineer")),
33+
//! layer: Default::default(),
3334
//! };
3435
//!
3536
//! // Multi-term AND query
@@ -953,6 +954,7 @@ pub fn extract_first_paragraph(body: &str) -> String {
953954
/// skip: None,
954955
/// limit: Some(10),
955956
/// role: Some(RoleName::new("data_scientist")),
957+
/// layer: Default::default(),
956958
/// };
957959
/// ```
958960
///

0 commit comments

Comments
 (0)