diff --git a/README.md b/README.md index 873f1940..29678333 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,8 @@ AI code governance platform for enterprises. Captures what AI coding agents do in your repos — which files they touch, how many tokens they burn, what tools they call, what percentage of code is AI-generated — then enforces policies and produces tamper-evident audit trails for regulatory compliance. +Supports **Claude Code**, **Codex CLI**, and is extensible to other agents via the AgentAdapter architecture. + Built for financial institutions and regulated industries where AI-generated code needs the same audit rigor as human-written code. [Learn more at VirtusLab](https://virtuslab.com/services/tracevault) @@ -67,7 +69,7 @@ See exactly what AI wrote, line by line. The code browser overlays AI attributio Three Rust crates in a Cargo workspace: - **tracevault-core** — domain types, policy engine (7 condition types), attribution engine (tree-sitter based), secret redactor -- **tracevault-cli** — CLI binary that hooks into Claude Code, captures traces locally, checks policies, pushes to server +- **tracevault-cli** — CLI binary that hooks into Claude Code and Codex CLI, captures traces locally, checks policies, pushes to server - **tracevault-server** — axum HTTP server backed by PostgreSQL with Ed25519 signing, audit logging, RBAC, code browser Plus a SvelteKit web dashboard and a GitHub Action for CI verification. @@ -280,6 +282,19 @@ tracevault init That's it. From this point on, every Claude Code session in this repo is automatically traced — tool calls, file edits, token usage, and model info are captured and streamed to the TraceVault server as they happen. When you `git push`, the pre-push hook evaluates policies and blocks the push if any rule fails. +## Using with Codex CLI + +[Codex CLI](https://github.com/openai/codex) (OpenAI's coding agent) is also supported. Initialize with the `--agent codex` flag to install Codex hooks: + +```sh +npm install -g @openai/codex +cd /path/to/your/repo +tracevault login --server-url https://your-tracevault-server.example.com +tracevault init --agent codex +``` + +`--agent` selects exactly which agents to install — passing it replaces the default. `tracevault init --agent codex` installs Codex hooks in `.codex/hooks.json` only; to enable both agents in the same repo, pass each one explicitly: `tracevault init --agent claude-code --agent codex`. Codex sessions are traced including transcript parsing, token usage, and file changes via `apply_patch`. The session detail view shows a Codex badge to distinguish agent types. + ## Keys & Secrets ### Encryption key (`TRACEVAULT_ENCRYPTION_KEY`) @@ -332,10 +347,10 @@ export DATABASE_URL=postgres://user:password@host:5432/tracevault?sslmode=requir | Command | Description | |---------|-------------| -| `tracevault init [--server-url URL]` | Initialize TraceVault in current repo, install pre-push hook and Claude Code hooks | +| `tracevault init [--server-url URL] [--agent ]...` | Initialize TraceVault in current repo, install pre-push hook and agent hooks. With no `--agent` flag, Claude Code hooks are installed by default. Passing `--agent ` (repeatable) selects exactly which agents to install and replaces the default — to enable both, pass `--agent claude-code --agent codex`. | | `tracevault login --server-url URL [--no-browser]` | Authenticate via device auth flow. Prints the URL and opens a browser when possible; `--no-browser` (or a headless env) skips the auto-open. | | `tracevault logout` | Clear local credentials | -| `tracevault stream --event ` | Handle a Claude Code hook event (reads JSON from stdin) and stream it to the server | +| `tracevault stream --event [--agent ]` | Handle an agent hook event (reads JSON from stdin) and stream it to the server (`--agent`: `claude-code` (default), `codex`) | | `tracevault sync` | Sync repo metadata with the server | | `tracevault check` | Evaluate policies against server rules, exit non-zero if blocked | | `tracevault stats` | Show local session statistics | diff --git a/crates/tracevault-cli/src/commands/init.rs b/crates/tracevault-cli/src/commands/init.rs index 5bcb21c5..3fa016f7 100644 --- a/crates/tracevault-cli/src/commands/init.rs +++ b/crates/tracevault-cli/src/commands/init.rs @@ -3,6 +3,7 @@ use crate::config::TracevaultConfig; use std::fs; use std::io; use std::path::Path; +use tracevault_core::agent_adapter::{AgentAdapter, AgentAdapterRegistry}; pub fn git_remote_url(project_root: &Path) -> Option { std::process::Command::new("git") @@ -33,7 +34,8 @@ fn parse_github_org(remote_url: &str) -> Option { pub async fn init_in_directory( project_root: &Path, server_url: Option<&str>, -) -> Result<(), io::Error> { + agents: Option<&[String]>, +) -> Result, io::Error> { // Check for git repository if !project_root.join(".git").exists() { return Err(io::Error::new( @@ -48,9 +50,6 @@ pub async fn init_in_directory( fs::create_dir_all(config_dir.join("sessions"))?; fs::create_dir_all(config_dir.join("cache"))?; - // Keep all tracevault files local — update root .gitignore - update_root_gitignore(project_root)?; - // Register repo on server if authenticated, server URL known, and git remote available let remote_url = git_remote_url(project_root); if remote_url.is_none() { @@ -72,8 +71,47 @@ pub async fn init_in_directory( config.to_toml(), )?; - // Install Claude Code hooks into .claude/settings.json - install_claude_hooks(project_root)?; + // Resolve agents up front. When --agent is omitted entirely, Claude Code + // is installed as the default. When --agent is provided, only the listed + // agents are installed (Claude is no longer added implicitly). Aliases + // (e.g. "claude" → "claude-code") are resolved by the registry; + // deduplication uses the adapter's canonical `name()`. + let registry = AgentAdapterRegistry::new(); + let requested: Vec<&str> = match agents { + Some(extra) => extra.iter().map(String::as_str).collect(), + None => vec!["claude-code"], + }; + + let mut resolved: Vec<&dyn AgentAdapter> = Vec::new(); + let mut effective: Vec = Vec::new(); + let mut hook_paths: Vec = Vec::new(); + for raw in requested { + match registry.try_get(raw) { + Some(adapter) => { + let id = adapter.name().to_string(); + if !effective.contains(&id) { + let path = adapter.hooks_install_path(); + if !path.is_empty() { + hook_paths.push(path.to_string()); + } + effective.push(id); + resolved.push(adapter); + } + } + None => eprintln!("Warning: unknown agent '{}', skipping hooks", raw), + } + } + + // Keep tracevault and agent hook files local — update root .gitignore + // before installing hook files. Matches main's ordering: even if a + // subsequent `install_hooks` fails, `.gitignore` is already updated so + // any partial files left on disk stay untracked. + update_root_gitignore(project_root, &hook_paths)?; + + // Install agent-specific hooks + for adapter in &resolved { + adapter.install_hooks(project_root)?; + } // Install git hooks install_git_hook(project_root)?; @@ -128,10 +166,10 @@ pub async fn init_in_directory( } } - Ok(()) + Ok(effective) } -fn update_root_gitignore(project_root: &Path) -> Result<(), io::Error> { +fn update_root_gitignore(project_root: &Path, hook_paths: &[String]) -> Result<(), io::Error> { let path = project_root.join(".gitignore"); let existing = if path.exists() { fs::read_to_string(&path)? @@ -139,9 +177,11 @@ fn update_root_gitignore(project_root: &Path) -> Result<(), io::Error> { String::new() }; - let needed: Vec<&str> = [".tracevault/", ".claude/settings.json"] - .iter() - .copied() + let mut entries: Vec<&str> = vec![".tracevault/"]; + entries.extend(hook_paths.iter().map(String::as_str)); + + let needed: Vec<&str> = entries + .into_iter() .filter(|entry| !existing.lines().any(|line| line.trim() == *entry)) .collect(); @@ -284,83 +324,6 @@ fn install_post_commit_hook(project_root: &Path) -> Result<(), io::Error> { Ok(()) } -fn install_claude_hooks(project_root: &Path) -> Result<(), io::Error> { - let claude_dir = project_root.join(".claude"); - fs::create_dir_all(&claude_dir)?; - - let settings_path = claude_dir.join("settings.json"); - let mut settings: serde_json::Value = if settings_path.exists() { - let content = fs::read_to_string(&settings_path)?; - serde_json::from_str(&content).map_err(|e| { - io::Error::new( - io::ErrorKind::InvalidData, - format!("Failed to parse .claude/settings.json: {e}"), - ) - })? - } else { - serde_json::json!({}) - }; - - let hooks = tracevault_hooks(); - - // Merge hooks into existing settings - let settings_obj = settings.as_object_mut().ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidData, - ".claude/settings.json is not a JSON object", - ) - })?; - - settings_obj.insert("hooks".to_string(), hooks); - - let formatted = serde_json::to_string_pretty(&settings) - .map_err(|e| io::Error::other(format!("Failed to serialize settings: {e}")))?; - fs::write(&settings_path, formatted)?; - - Ok(()) -} - -pub fn tracevault_hooks() -> serde_json::Value { - serde_json::json!({ - "PreToolUse": [{ - "matcher": "Write|Edit|Bash", - "hooks": [{ - "type": "command", - "command": "tracevault stream --event pre-tool-use", - "timeout": 10, - "statusMessage": "TraceVault: streaming pre-tool event" - }] - }], - "PostToolUse": [{ - "matcher": "", - "hooks": [{ - "type": "command", - "command": "tracevault stream --event post-tool-use", - "timeout": 10, - "statusMessage": "TraceVault: streaming post-tool event" - }] - }], - "Notification": [{ - "matcher": "", - "hooks": [{ - "type": "command", - "command": "tracevault stream --event notification", - "timeout": 10, - "statusMessage": "TraceVault: streaming notification" - }] - }], - "Stop": [{ - "matcher": "", - "hooks": [{ - "type": "command", - "command": "tracevault stream --event stop", - "timeout": 10, - "statusMessage": "TraceVault: finalizing session" - }] - }] - }) -} - #[cfg(test)] mod tests { use super::*; diff --git a/crates/tracevault-cli/src/commands/stream.rs b/crates/tracevault-cli/src/commands/stream.rs index 809bb057..77f5b857 100644 --- a/crates/tracevault-cli/src/commands/stream.rs +++ b/crates/tracevault-cli/src/commands/stream.rs @@ -2,8 +2,9 @@ use std::fs::{self, OpenOptions}; use std::io::{self, BufRead, Read, Seek, SeekFrom, Write}; use std::path::Path; -use tracevault_core::hooks::{parse_hook_event, HookResponse}; -use tracevault_core::streaming::{StreamEventRequest, StreamEventType}; +use tracevault_core::agent_adapter::AgentAdapterRegistry; +use tracevault_core::hooks::parse_hook_event; +use tracevault_core::streaming::StreamEventRequest; pub fn next_event_index(counter_path: &Path) -> Result { let current = if counter_path.exists() { @@ -84,7 +85,11 @@ pub fn drain_pending(pending_path: &Path) -> Result, io::Error> { pub async fn run_stream( project_root: &Path, - event_type: &str, + // Unused: routing is driven by `hook_event.hook_event_name` from stdin + // (see `adapter.map_event_type` below). The `--event` CLI flag is kept + // only because the installed hooks pass it for shell-log readability. + _event_type: &str, + agent: &str, ) -> Result<(), Box> { // 1. Read HookEvent from stdin let mut input = String::new(); @@ -107,16 +112,17 @@ pub async fn run_stream( let offset_path = session_dir.join(".stream_offset"); let (transcript_lines, new_offset) = read_new_transcript_lines(transcript_path, &offset_path)?; - // 5. Build StreamEventRequest - let stream_event_type = match event_type { - "notification" => StreamEventType::SessionStart, - "stop" => StreamEventType::SessionEnd, - _ => StreamEventType::ToolUse, - }; + // 5. Map hook event to stream event type via the agent adapter. + // Resolve the adapter once; it owns the wire protocol version and the + // canonical tool name so user-supplied aliases (e.g. "claude" → "claude-code") + // produce the same wire bytes as the canonical name. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get(agent); + let stream_event_type = adapter.map_event_type(&hook_event.hook_event_name); let mut req = StreamEventRequest { - protocol_version: 1, - tool: Some("claude-code".to_string()), + protocol_version: adapter.wire_protocol_version(), + tool: Some(adapter.name().to_string()), event_type: stream_event_type, session_id: hook_event.session_id.clone(), timestamp: chrono::Utc::now(), @@ -195,8 +201,8 @@ pub async fn run_stream( } } - // 12. Always print HookResponse::allow() to stdout - let response = HookResponse::allow(); + // 12. Always print agent-specific hook response to stdout + let response = adapter.hook_response(); println!("{}", serde_json::to_string(&response)?); Ok(()) diff --git a/crates/tracevault-cli/src/main.rs b/crates/tracevault-cli/src/main.rs index 5962cb45..e5c8e32a 100644 --- a/crates/tracevault-cli/src/main.rs +++ b/crates/tracevault-cli/src/main.rs @@ -1,5 +1,6 @@ use clap::Parser; use std::env; +use tracevault_core::agent_adapter::AgentAdapterRegistry; mod api_client; mod commands; @@ -15,6 +16,9 @@ enum Cli { /// TraceVault server URL for repo registration #[arg(long)] server_url: Option, + /// Additional AI agents to install hooks for (e.g. codex, gemini) + #[arg(long = "agent")] + agents: Vec, }, /// Show current session status Status, @@ -25,6 +29,9 @@ enum Cli { Stream { #[arg(long)] event: String, + /// AI coding agent name (claude-code, codex) + #[arg(long, default_value = "claude-code")] + agent: String, }, /// Check session policies before pushing Check, @@ -66,14 +73,35 @@ enum Cli { async fn main() { let cli = Cli::parse(); match cli { - Cli::Init { server_url } => { + Cli::Init { server_url, agents } => { let cwd = env::current_dir().expect("Cannot determine current directory"); - match commands::init::init_in_directory(&cwd, server_url.as_deref()).await { - Ok(()) => { + match commands::init::init_in_directory( + &cwd, + server_url.as_deref(), + if agents.is_empty() { + None + } else { + Some(&agents) + }, + ) + .await + { + Ok(installed) => { println!("TraceVault initialized in {}", cwd.display()); - println!("Claude Code hooks installed (.claude/settings.json)"); + let registry = AgentAdapterRegistry::new(); + let mut gitignore_paths = vec![".tracevault/".to_string()]; + for agent in &installed { + let adapter = registry.get(agent); + let path = adapter.hooks_install_path(); + if path.is_empty() { + println!("{} hooks installed", adapter.display_name()); + } else { + println!("{} hooks installed ({})", adapter.display_name(), path); + gitignore_paths.push(path.to_string()); + } + } println!("Git hooks installed (pre-push, post-commit)"); - println!("Added .tracevault/ and .claude/settings.json to .gitignore"); + println!("Added {} to .gitignore", gitignore_paths.join(", ")); println!( "Nothing needs to be committed — all TraceVault files are local only." ); @@ -91,9 +119,9 @@ async fn main() { std::process::exit(code); } } - Cli::Stream { event } => { + Cli::Stream { event, agent } => { let cwd = env::current_dir().expect("Cannot determine current directory"); - if let Err(e) = commands::stream::run_stream(&cwd, &event).await { + if let Err(e) = commands::stream::run_stream(&cwd, &event, &agent).await { eprintln!("Stream error: {e}"); } } diff --git a/crates/tracevault-cli/tests/init_test.rs b/crates/tracevault-cli/tests/init_test.rs index 0ab3b75b..3d910787 100644 --- a/crates/tracevault-cli/tests/init_test.rs +++ b/crates/tracevault-cli/tests/init_test.rs @@ -10,7 +10,7 @@ fn tmp_git_repo() -> TempDir { #[tokio::test] async fn init_fails_without_git() { let tmp = TempDir::new().unwrap(); - let result = tracevault_cli::commands::init::init_in_directory(tmp.path(), None).await; + let result = tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None).await; assert!(result.is_err()); assert!(result .unwrap_err() @@ -23,7 +23,7 @@ async fn init_creates_tracevault_config() { let tmp = tmp_git_repo(); let config_path = tmp.path().join(".tracevault").join("config.toml"); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -36,7 +36,7 @@ async fn init_creates_tracevault_config() { async fn init_creates_directory_structure() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -53,7 +53,7 @@ async fn init_creates_directory_structure() { async fn init_installs_claude_hooks() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -66,6 +66,7 @@ async fn init_installs_claude_hooks() { assert!(hooks.get("PreToolUse").is_some()); assert!(hooks.get("PostToolUse").is_some()); assert!(hooks.get("Notification").is_some()); + assert!(hooks.get("Stop").is_some()); } #[tokio::test] @@ -77,7 +78,7 @@ async fn init_merges_into_existing_settings() { fs::create_dir_all(&claude_dir).unwrap(); fs::write(claude_dir.join("settings.json"), r#"{"model": "opus"}"#).unwrap(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -90,19 +91,11 @@ async fn init_merges_into_existing_settings() { assert_eq!(settings.get("model").unwrap(), "opus"); } -#[test] -fn tracevault_hooks_has_pre_post_and_notification() { - let hooks = tracevault_cli::commands::init::tracevault_hooks(); - assert!(hooks.get("PreToolUse").is_some()); - assert!(hooks.get("PostToolUse").is_some()); - assert!(hooks.get("Notification").is_some()); -} - #[tokio::test] async fn init_installs_git_pre_push_hook() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -130,7 +123,7 @@ async fn init_preserves_existing_pre_push_hook() { ) .unwrap(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -147,10 +140,10 @@ async fn init_preserves_existing_pre_push_hook() { async fn init_does_not_duplicate_hook_on_reinit() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -166,7 +159,7 @@ async fn init_does_not_duplicate_hook_on_reinit() { async fn init_installs_post_commit_hook() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -183,10 +176,10 @@ async fn init_installs_post_commit_hook() { async fn init_does_not_duplicate_post_commit_hook_on_reinit() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), None) + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) .await .unwrap(); @@ -202,11 +195,103 @@ async fn init_does_not_duplicate_post_commit_hook_on_reinit() { async fn init_writes_server_url_to_config() { let tmp = tmp_git_repo(); - tracevault_cli::commands::init::init_in_directory(tmp.path(), Some("https://tv.example.com")) - .await - .unwrap(); + tracevault_cli::commands::init::init_in_directory( + tmp.path(), + Some("https://tv.example.com"), + None, + ) + .await + .unwrap(); let config_path = tmp.path().join(".tracevault/config.toml"); let content = fs::read_to_string(&config_path).unwrap(); assert!(content.contains("server_url = \"https://tv.example.com\"")); } + +#[tokio::test] +async fn init_with_codex_agent_does_not_install_claude() { + // --agent codex installs only Codex hooks; Claude is the default only when + // --agent is omitted entirely, not added implicitly when --agent is given. + let tmp = tmp_git_repo(); + let extras = vec!["codex".to_string()]; + + let installed = tracevault_cli::commands::init::init_in_directory( + tmp.path(), + None, + Some(extras.as_slice()), + ) + .await + .unwrap(); + + assert!(!tmp.path().join(".claude/settings.json").exists()); + assert!(tmp.path().join(".codex/hooks.json").exists()); + assert_eq!(installed, vec!["codex".to_string()]); + + let gitignore = fs::read_to_string(tmp.path().join(".gitignore")).unwrap(); + assert!(gitignore.contains(".tracevault/")); + assert!(gitignore.contains(".codex/hooks.json")); + assert!(!gitignore.contains(".claude/settings.json")); +} + +#[tokio::test] +async fn init_installs_codex_session_start_with_match_all_matcher() { + // Codex SessionStart matcher must be empty so the hook fires for all + // source variants Codex passes: startup, resume, clear. + let tmp = tmp_git_repo(); + let extras = vec!["codex".to_string()]; + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, Some(extras.as_slice())) + .await + .unwrap(); + + let content = fs::read_to_string(tmp.path().join(".codex/hooks.json")).unwrap(); + let config: serde_json::Value = serde_json::from_str(&content).unwrap(); + let session_start = &config["hooks"]["SessionStart"][0]; + assert_eq!(session_start["matcher"], ""); +} + +#[tokio::test] +async fn init_installs_codex_stop_with_empty_matcher() { + // Stop is a session-lifecycle hook with no tool to match — its matcher + // must be present and empty, matching SessionStart's shape so Codex + // accepts the entry. + let tmp = tmp_git_repo(); + let extras = vec!["codex".to_string()]; + tracevault_cli::commands::init::init_in_directory(tmp.path(), None, Some(extras.as_slice())) + .await + .unwrap(); + + let content = fs::read_to_string(tmp.path().join(".codex/hooks.json")).unwrap(); + let config: serde_json::Value = serde_json::from_str(&content).unwrap(); + let stop = &config["hooks"]["Stop"][0]; + assert_eq!(stop["matcher"], ""); +} + +#[tokio::test] +async fn init_default_installs_only_claude() { + let tmp = tmp_git_repo(); + + let installed = tracevault_cli::commands::init::init_in_directory(tmp.path(), None, None) + .await + .unwrap(); + + assert!(tmp.path().join(".claude/settings.json").exists()); + assert!(!tmp.path().join(".codex/hooks.json").exists()); + assert_eq!(installed, vec!["claude-code".to_string()]); +} + +#[tokio::test] +async fn init_dedupes_explicit_claude_alias() { + // User passing both `claude` and `claude-code` should not double-install. + let tmp = tmp_git_repo(); + let extras = vec!["claude".to_string(), "claude-code".to_string()]; + + let installed = tracevault_cli::commands::init::init_in_directory( + tmp.path(), + None, + Some(extras.as_slice()), + ) + .await + .unwrap(); + + assert_eq!(installed, vec!["claude-code".to_string()]); +} diff --git a/crates/tracevault-core/src/agent_adapter/claude_code.rs b/crates/tracevault-core/src/agent_adapter/claude_code.rs new file mode 100644 index 00000000..a81ab57c --- /dev/null +++ b/crates/tracevault-core/src/agent_adapter/claude_code.rs @@ -0,0 +1,479 @@ +use chrono::{DateTime, Utc}; +use sha2::{Digest, Sha256}; +use std::fs; +use std::io; +use std::path::Path; + +use crate::hooks::HookResponse; +use crate::streaming::{ExtractedFileChange, StreamEventType}; + +use super::{AgentAdapter, FileChangeRecord, ParsedTranscriptRecord, TokenUsage}; + +pub struct ClaudeCodeAdapter; + +fn hooks_json() -> serde_json::Value { + serde_json::json!({ + "PreToolUse": [{ + "matcher": "Write|Edit|Bash", + "hooks": [{ + "type": "command", + "command": "tracevault stream --event pre-tool-use", + "timeout": 10, + "statusMessage": "TraceVault: streaming pre-tool event" + }] + }], + "PostToolUse": [{ + "matcher": "", + "hooks": [{ + "type": "command", + "command": "tracevault stream --event post-tool-use", + "timeout": 10, + "statusMessage": "TraceVault: streaming post-tool event" + }] + }], + "Notification": [{ + "matcher": "", + "hooks": [{ + "type": "command", + "command": "tracevault stream --event notification", + "timeout": 10, + "statusMessage": "TraceVault: streaming notification" + }] + }], + "Stop": [{ + "matcher": "", + "hooks": [{ + "type": "command", + "command": "tracevault stream --event stop", + "timeout": 10, + "statusMessage": "TraceVault: finalizing session" + }] + }] + }) +} + +impl AgentAdapter for ClaudeCodeAdapter { + fn name(&self) -> &str { + "claude-code" + } + + fn display_name(&self) -> &str { + "Claude Code" + } + + fn hooks_install_path(&self) -> &str { + ".claude/settings.json" + } + + fn wire_protocol_version(&self) -> u32 { + 1 + } + + fn map_event_type(&self, hook_event_name: &str) -> StreamEventType { + // Claude Code has no SessionStart hook — Notification is the first + // hook fired and serves as the session-start signal. + match hook_event_name { + "SessionStart" | "Notification" => StreamEventType::SessionStart, + "Stop" => StreamEventType::SessionEnd, + _ => StreamEventType::ToolUse, + } + } + + fn is_file_modifying(&self, tool_name: &str) -> bool { + matches!(tool_name, "Write" | "Edit" | "Bash") + } + + fn file_changes_from_hook( + &self, + tool_name: &str, + tool_input: &serde_json::Value, + timestamp: DateTime, + ) -> Vec { + let change = match tool_name { + "Write" => { + let file_path = match tool_input.get("file_path").and_then(|v| v.as_str()) { + Some(p) => p.to_string(), + None => return Vec::new(), + }; + let content = match tool_input.get("content").and_then(|v| v.as_str()) { + Some(c) => c, + None => return Vec::new(), + }; + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + let hash = hex::encode(hasher.finalize()); + let diff_text = content + .lines() + .map(|line| format!("+{}", line)) + .collect::>() + .join("\n"); + ExtractedFileChange { + file_path, + change_type: "create".to_string(), + diff_text: Some(diff_text), + content_hash: Some(hash), + } + } + "Edit" => { + let file_path = match tool_input.get("file_path").and_then(|v| v.as_str()) { + Some(p) => p.to_string(), + None => return Vec::new(), + }; + let old_string = match tool_input.get("old_string").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Vec::new(), + }; + let new_string = match tool_input.get("new_string").and_then(|v| v.as_str()) { + Some(s) => s, + None => return Vec::new(), + }; + let diff_text = format!("--- {}\n+++ {}", old_string, new_string); + ExtractedFileChange { + file_path, + change_type: "edit".to_string(), + diff_text: Some(diff_text), + content_hash: None, + } + } + _ => return Vec::new(), + }; + vec![FileChangeRecord { + change, + tool_name: tool_name.to_string(), + tool_input: Some(tool_input.clone()), + timestamp, + }] + } + + fn extract_token_usage(&self, chunk: &serde_json::Value) -> Option { + // Match main's pre-adapter behavior: any missing field defaults to 0 + // rather than aborting the whole extraction. The presence of `usage` is + // the only gating signal. + let usage = chunk.get("message")?.get("usage")?; + Some(TokenUsage { + input_tokens: usage + .get("input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0), + output_tokens: usage + .get("output_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0), + cache_read_tokens: usage + .get("cache_read_input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0), + cache_write_tokens: usage + .get("cache_creation_input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0), + }) + } + + fn extract_model(&self, chunk: &serde_json::Value) -> Option { + chunk + .get("message")? + .get("model")? + .as_str() + .map(|s| s.to_string()) + } + + fn hook_response(&self) -> HookResponse { + HookResponse::allow() + } + + fn install_hooks(&self, project_root: &Path) -> io::Result<()> { + let claude_dir = project_root.join(".claude"); + fs::create_dir_all(&claude_dir)?; + + let settings_path = claude_dir.join("settings.json"); + let mut settings: serde_json::Value = if settings_path.exists() { + let content = fs::read_to_string(&settings_path)?; + serde_json::from_str(&content).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to parse .claude/settings.json: {e}"), + ) + })? + } else { + serde_json::json!({}) + }; + + let settings_obj = settings.as_object_mut().ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + ".claude/settings.json is not a JSON object", + ) + })?; + settings_obj.insert("hooks".to_string(), hooks_json()); + + let formatted = serde_json::to_string_pretty(&settings) + .map_err(|e| io::Error::other(format!("Failed to serialize settings: {e}")))?; + fs::write(&settings_path, formatted)?; + Ok(()) + } + + fn parse_transcript_record(&self, chunk: &serde_json::Value) -> Option { + let record_type = chunk.get("type")?.as_str()?.to_string(); + let timestamp = chunk + .get("timestamp") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + match record_type.as_str() { + "assistant" => self.parse_assistant_record(chunk, record_type, timestamp), + "user" => self.parse_user_record(chunk, record_type, timestamp), + "progress" => self.parse_progress_record(chunk, record_type, timestamp), + "system" => self.parse_system_record(chunk, record_type, timestamp), + _ => Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types: Vec::new(), + tool_name: None, + text: None, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }), + } + } +} + +impl ClaudeCodeAdapter { + fn parse_assistant_record( + &self, + chunk: &serde_json::Value, + record_type: String, + timestamp: Option, + ) -> Option { + let message = match chunk.get("message") { + Some(m) => m, + None => { + return Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types: Vec::new(), + tool_name: None, + text: None, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }); + } + }; + let model = message + .get("model") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let mut content_types = Vec::new(); + let mut text_parts = Vec::new(); + let mut first_tool_name: Option = None; + // Match main's `arr.iter().find(|b| type==tool_use).and_then(|b| name)`: + // we lock onto the first tool_use block regardless of whether it had a + // `name` field, so a missing name yields `None` (not a name from a + // later block). + let mut seen_tool_use = false; + + if let Some(content) = message.get("content").and_then(|v| v.as_array()) { + for block in content { + if let Some(block_type) = block.get("type").and_then(|v| v.as_str()) { + if !content_types.contains(&block_type.to_string()) { + content_types.push(block_type.to_string()); + } + if block_type == "tool_use" && !seen_tool_use { + seen_tool_use = true; + first_tool_name = block + .get("name") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + } + } + if let Some(t) = block.get("text").and_then(|v| v.as_str()) { + text_parts.push(t.to_string()); + } + if let Some(t) = block.get("thinking").and_then(|v| v.as_str()) { + text_parts.push(format!("[thinking] {}", t)); + } + } + } + + // Match main's parse_record: presence of `usage` field gates the whole + // RecordUsage downstream. Individual missing tokens default to 0. + let usage = message.get("usage"); + let raw_input_tokens = + usage.map(|u| u.get("input_tokens").and_then(|v| v.as_i64()).unwrap_or(0)); + let raw_output_tokens = + usage.map(|u| u.get("output_tokens").and_then(|v| v.as_i64()).unwrap_or(0)); + let raw_cache_read_tokens = usage.map(|u| { + u.get("cache_read_input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0) + }); + let raw_cache_write_tokens = usage.map(|u| { + u.get("cache_creation_input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0) + }); + + let text = if text_parts.is_empty() { + None + } else { + Some(text_parts.join("\n\n")) + }; + + Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types, + tool_name: first_tool_name, + text, + raw_input_tokens, + raw_output_tokens, + raw_cache_read_tokens, + raw_cache_write_tokens, + model, + }) + } + + fn parse_user_record( + &self, + chunk: &serde_json::Value, + record_type: String, + timestamp: Option, + ) -> Option { + let mut content_types = Vec::new(); + let mut text: Option = None; + let mut tool_name: Option = None; + + // Handle message.content as either a string or an array of blocks. + match chunk.get("message").and_then(|m| m.get("content")) { + Some(serde_json::Value::String(s)) => { + content_types.push("text".to_string()); + text = Some(s.clone()); + } + Some(serde_json::Value::Array(arr)) => { + for block in arr { + if let Some(ct) = block.get("type").and_then(|v| v.as_str()) { + if !content_types.contains(&ct.to_string()) { + content_types.push(ct.to_string()); + } + if ct == "tool_result" { + if let Some(content) = block.get("content").and_then(|v| v.as_str()) { + text = Some(content.to_string()); + } + } else if ct == "text" { + if let Some(t) = block.get("text").and_then(|v| v.as_str()) { + text = Some(t.to_string()); + } + } + } + } + } + _ => {} + } + + // toolUseResult discriminates by which top-level field is present. + if let Some(tur) = chunk.get("toolUseResult") { + if let Some(file) = tur + .get("file") + .and_then(|f| f.get("filePath").and_then(|v| v.as_str())) + { + tool_name = Some(format!("Read: {}", file)); + } else if tur.get("filenames").is_some() { + tool_name = Some("Glob".to_string()); + } else if tur.get("stdout").is_some() { + tool_name = Some("Bash".to_string()); + } + } + + Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types, + tool_name, + text, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + + fn parse_progress_record( + &self, + chunk: &serde_json::Value, + record_type: String, + timestamp: Option, + ) -> Option { + let data = chunk.get("data"); + let hook_name = data + .and_then(|d| d.get("hookName").and_then(|v| v.as_str())) + .map(|s| s.to_string()); + let hook_event = data.and_then(|d| d.get("hookEvent").and_then(|v| v.as_str())); + let text = hook_event.map(|e| { + if let Some(ref name) = hook_name { + format!("{}: {}", e, name) + } else { + e.to_string() + } + }); + + Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types: Vec::new(), + tool_name: hook_name, + text, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + + fn parse_system_record( + &self, + chunk: &serde_json::Value, + record_type: String, + timestamp: Option, + ) -> Option { + let subtype = chunk + .get("subtype") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let text = match subtype { + "turn_duration" => { + let ms = chunk + .get("durationMs") + .and_then(|v| v.as_f64()) + .unwrap_or(0.0); + Some(format!("turn_duration: {:.1}s", ms / 1000.0)) + } + "stop_hook_summary" => { + let count = chunk.get("hookCount").and_then(|v| v.as_i64()).unwrap_or(0); + Some(format!("stop_hook_summary: {} hooks", count)) + } + _ => Some(subtype.to_string()), + }; + + Some(ParsedTranscriptRecord { + record_type, + timestamp, + content_types: vec![subtype.to_string()], + tool_name: None, + text, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } +} diff --git a/crates/tracevault-core/src/agent_adapter/codex.rs b/crates/tracevault-core/src/agent_adapter/codex.rs new file mode 100644 index 00000000..b8005e79 --- /dev/null +++ b/crates/tracevault-core/src/agent_adapter/codex.rs @@ -0,0 +1,536 @@ +use chrono::{DateTime, Utc}; +use sha2::{Digest, Sha256}; +use std::fs; +use std::io; +use std::path::Path; + +use crate::streaming::{ExtractedFileChange, StreamEventType}; + +use super::{AgentAdapter, FileChangeRecord, ParsedTranscriptRecord, TokenUsage}; + +/// Adapter for OpenAI Codex CLI. +/// +/// Codex file modifications come exclusively through transcript chunks +/// (custom_tool_call with apply_patch), NOT through hook ToolUse events. +/// The hook events only carry shell commands like `pwd`, `git status`, etc. +pub struct CodexAdapter; + +fn hooks_json() -> serde_json::Value { + serde_json::json!({ + "SessionStart": [{ + "matcher": "", + "hooks": [{ + "type": "command", + "command": "tracevault stream --agent codex --event session-start", + "timeout": 10, + "statusMessage": "TraceVault: streaming session start" + }] + }], + "PreToolUse": [{ + "matcher": "Bash", + "hooks": [{ + "type": "command", + "command": "tracevault stream --agent codex --event pre-tool-use", + "timeout": 10, + "statusMessage": "TraceVault: streaming pre-tool event" + }] + }], + "PostToolUse": [{ + "matcher": "Bash", + "hooks": [{ + "type": "command", + "command": "tracevault stream --agent codex --event post-tool-use", + "timeout": 10, + "statusMessage": "TraceVault: streaming post-tool event" + }] + }], + "Stop": [{ + "matcher": "", + "hooks": [{ + "type": "command", + "command": "tracevault stream --agent codex --event stop", + "timeout": 10, + "statusMessage": "TraceVault: finalizing session" + }] + }] + }) +} + +impl AgentAdapter for CodexAdapter { + fn name(&self) -> &str { + "codex" + } + + fn display_name(&self) -> &str { + "Codex" + } + + fn hooks_install_path(&self) -> &str { + ".codex/hooks.json" + } + + /// Codex transcripts can carry a model name in chunks that have no token + /// usage yet (e.g. a session-start chunk preceding any assistant reply). + /// Persist the model anyway so `sessions.model` is populated promptly. + fn persists_model_without_usage(&self) -> bool { + true + } + + fn map_event_type(&self, hook_event_name: &str) -> StreamEventType { + match hook_event_name { + "SessionStart" => StreamEventType::SessionStart, + "Stop" => StreamEventType::SessionEnd, + _ => StreamEventType::ToolUse, + } + } + + /// Codex hook events never carry file-modifying tool calls. File changes + /// come from transcript chunks via `file_changes_from_transcript`. + fn is_file_modifying(&self, _tool_name: &str) -> bool { + false + } + + fn provides_transcript_file_changes(&self) -> bool { + true + } + + /// Extract file changes from Codex transcript chunks. + /// Handles `response_item` with `payload.type: "custom_tool_call"` and `name: "apply_patch"`. + fn file_changes_from_transcript( + &self, + chunk: &serde_json::Value, + fallback_timestamp: DateTime, + ) -> Vec { + let payload = match chunk.get("payload") { + Some(p) => p, + None => return vec![], + }; + + let payload_type = payload.get("type").and_then(|v| v.as_str()).unwrap_or(""); + if payload_type != "custom_tool_call" { + return vec![]; + } + + let name = payload + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + if name != "apply_patch" { + return vec![]; + } + + let input = match payload.get("input").and_then(|v| v.as_str()) { + Some(s) => s, + None => return vec![], + }; + + // Codex chunks carry their own RFC 3339 timestamp at the top level — + // use it for precise per-patch ordering instead of the hook delivery + // time (which can lag minutes if patches are batched and the hook + // only fires at Stop). + let timestamp = chunk + .get("timestamp") + .and_then(|v| v.as_str()) + .and_then(|s| DateTime::parse_from_rfc3339(s).ok()) + .map(|dt| dt.with_timezone(&Utc)) + .unwrap_or(fallback_timestamp); + + let tool_input = Some(payload.clone()); + + parse_codex_patch(input) + .into_iter() + .map(|change| FileChangeRecord { + change, + tool_name: name.to_string(), + tool_input: tool_input.clone(), + timestamp, + }) + .collect() + } + + fn extract_token_usage(&self, chunk: &serde_json::Value) -> Option { + let top_type = chunk.get("type")?.as_str()?; + if top_type != "event_msg" { + return None; + } + let payload = chunk.get("payload")?; + let payload_type = payload.get("type")?.as_str()?; + if payload_type != "token_count" { + return None; + } + let usage = payload.get("info")?.get("last_token_usage")?; + Some(TokenUsage { + input_tokens: usage.get("input_tokens")?.as_i64()?, + output_tokens: usage.get("output_tokens")?.as_i64()?, + cache_read_tokens: usage + .get("cached_input_tokens") + .and_then(|v| v.as_i64()) + .unwrap_or(0), + cache_write_tokens: 0, + }) + } + + fn extract_model(&self, chunk: &serde_json::Value) -> Option { + let top_type = chunk.get("type")?.as_str()?; + if top_type != "turn_context" { + return None; + } + chunk + .get("payload")? + .get("model")? + .as_str() + .map(|s| s.to_string()) + } + + fn install_hooks(&self, project_root: &Path) -> io::Result<()> { + let codex_dir = project_root.join(".codex"); + fs::create_dir_all(&codex_dir)?; + + let hooks_path = codex_dir.join("hooks.json"); + let mut config: serde_json::Value = if hooks_path.exists() { + let content = fs::read_to_string(&hooks_path)?; + serde_json::from_str(&content).map_err(|e| { + io::Error::new( + io::ErrorKind::InvalidData, + format!("Failed to parse .codex/hooks.json: {e}"), + ) + })? + } else { + serde_json::json!({}) + }; + + let config_obj = config.as_object_mut().ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidData, + ".codex/hooks.json is not a JSON object", + ) + })?; + config_obj.insert("hooks".to_string(), hooks_json()); + + let formatted = serde_json::to_string_pretty(&config) + .map_err(|e| io::Error::other(format!("Failed to serialize hooks: {e}")))?; + fs::write(&hooks_path, formatted)?; + Ok(()) + } + + fn parse_transcript_record(&self, chunk: &serde_json::Value) -> Option { + let top_type = chunk.get("type")?.as_str()?; + let timestamp = chunk + .get("timestamp") + .and_then(|v| v.as_str()) + .map(String::from); + + match top_type { + "event_msg" => self.parse_event_msg(chunk, ×tamp), + "response_item" => self.parse_response_item(chunk, ×tamp), + // turn_context, session_meta — ingestion-only, not for display + _ => None, + } + } +} + +impl CodexAdapter { + fn parse_event_msg( + &self, + chunk: &serde_json::Value, + timestamp: &Option, + ) -> Option { + let payload = chunk.get("payload")?; + let payload_type = payload.get("type")?.as_str()?; + + match payload_type { + "agent_message" => { + let content = payload + .get("content") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + Some(ParsedTranscriptRecord { + record_type: "assistant".to_string(), + timestamp: timestamp.clone(), + content_types: vec!["text".to_string()], + tool_name: None, + text: content, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + "user_message" => { + let content = payload + .get("content") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + Some(ParsedTranscriptRecord { + record_type: "user".to_string(), + timestamp: timestamp.clone(), + content_types: vec!["text".to_string()], + tool_name: None, + text: content, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + // token_count, task_started — ingestion-only + _ => None, + } + } + + fn parse_response_item( + &self, + chunk: &serde_json::Value, + timestamp: &Option, + ) -> Option { + let payload = chunk.get("payload")?; + let payload_type = payload.get("type")?.as_str()?; + + match payload_type { + "local_shell_call" => { + let command = payload + .get("command") + .and_then(|v| v.as_str()) + .unwrap_or(""); + let output = payload.get("output").and_then(|v| v.as_str()).unwrap_or(""); + let text = format!("$ {}\n{}", command, output); + Some(ParsedTranscriptRecord { + record_type: "assistant".to_string(), + timestamp: timestamp.clone(), + content_types: vec!["tool_use".to_string()], + tool_name: Some("Bash".to_string()), + text: Some(text), + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + "message" => { + let role = payload.get("role")?.as_str()?; + // Skip system/developer messages (permissions, instructions) + if role == "developer" { + return None; + } + let record_type = if role == "assistant" { + "assistant" + } else { + "user" + }; + let text = payload + .get("content") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|block| { + let block_type = block.get("type").and_then(|v| v.as_str())?; + if block_type == "input_text" || block_type == "output_text" { + let t = block.get("text").and_then(|v| v.as_str())?; + // Codex injects system context into the user role wrapped + // in known XML tags (see openai/codex protocol.rs). + // Skip only those — a blunt `starts_with('<')` would also + // drop legitimate user questions about HTML/JSX/XML snippets. + if role == "user" && is_codex_system_prompt(t) { + return None; + } + Some(t.to_string()) + } else { + None + } + }) + .collect::>() + .join("\n\n") + }) + .filter(|s| !s.is_empty()); + // Skip if no meaningful text + text.as_ref()?; + Some(ParsedTranscriptRecord { + record_type: record_type.to_string(), + timestamp: timestamp.clone(), + content_types: vec!["text".to_string()], + tool_name: None, + text, + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + "custom_tool_call" => { + let name = payload + .get("name") + .and_then(|v| v.as_str()) + .unwrap_or("tool"); + let input = payload.get("input").and_then(|v| v.as_str()).unwrap_or(""); + // Truncate long patches for display (char-safe to avoid UTF-8 panic) + let display_input = if input.len() > 500 { + let truncated: String = input.chars().take(500).collect(); + format!("{}...", truncated) + } else { + input.to_string() + }; + Some(ParsedTranscriptRecord { + record_type: "assistant".to_string(), + timestamp: timestamp.clone(), + content_types: vec!["tool_use".to_string()], + tool_name: Some(name.to_string()), + text: Some(display_input), + raw_input_tokens: None, + raw_output_tokens: None, + raw_cache_read_tokens: None, + raw_cache_write_tokens: None, + model: None, + }) + } + // reasoning — encrypted, skip + _ => None, + } + } +} + +/// Known opening tags Codex uses to inject system context into the user role. +/// Sourced from openai/codex `codex-rs/protocol/src/protocol.rs`. +const CODEX_SYSTEM_PROMPT_TAGS: &[&str] = &[ + "", + "", + "", + "", + "", + "", + "", +]; + +/// Returns true if `text` starts with one of the known Codex system-prompt tags +/// (after trimming leading whitespace). +fn is_codex_system_prompt(text: &str) -> bool { + let trimmed = text.trim_start(); + CODEX_SYSTEM_PROMPT_TAGS + .iter() + .any(|tag| trimmed.starts_with(tag)) +} + +/// Parse Codex's custom apply_patch format into file changes. +pub fn parse_codex_patch(patch: &str) -> Vec { + let mut changes = Vec::new(); + let mut current_file: Option = None; + let mut current_type: Option = None; + let mut current_lines: Vec = Vec::new(); + + for line in patch.lines() { + if line == "*** Begin Patch" || line == "*** End Patch" { + flush_pending( + &mut changes, + &mut current_file, + &mut current_type, + &mut current_lines, + ); + continue; + } + + if let Some(path) = line.strip_prefix("*** Add File: ") { + flush_pending( + &mut changes, + &mut current_file, + &mut current_type, + &mut current_lines, + ); + current_file = Some(path.to_string()); + current_type = Some("create".to_string()); + } else if let Some(path) = line.strip_prefix("*** Update File: ") { + flush_pending( + &mut changes, + &mut current_file, + &mut current_type, + &mut current_lines, + ); + current_file = Some(path.to_string()); + current_type = Some("edit".to_string()); + } else if let Some(path) = line.strip_prefix("*** Delete File: ") { + flush_pending( + &mut changes, + &mut current_file, + &mut current_type, + &mut current_lines, + ); + current_file = Some(path.to_string()); + current_type = Some("delete".to_string()); + } else if current_file.is_some() { + current_lines.push(line.to_string()); + } + } + + flush_pending( + &mut changes, + &mut current_file, + &mut current_type, + &mut current_lines, + ); + changes +} + +fn flush_pending( + changes: &mut Vec, + file: &mut Option, + kind: &mut Option, + lines: &mut Vec, +) { + if let (Some(file_path), Some(change_type)) = (file.take(), kind.take()) { + changes.push(build_file_change(&file_path, &change_type, lines)); + lines.clear(); + } +} + +fn build_file_change(file_path: &str, change_type: &str, lines: &[String]) -> ExtractedFileChange { + match change_type { + "create" => { + let content: String = lines + .iter() + .map(|l| l.strip_prefix('+').unwrap_or(l)) + .collect::>() + .join("\n"); + let mut hasher = Sha256::new(); + hasher.update(content.as_bytes()); + let hash = hex::encode(hasher.finalize()); + let diff_text = lines.join("\n"); + ExtractedFileChange { + file_path: file_path.to_string(), + change_type: "create".to_string(), + diff_text: if diff_text.is_empty() { + None + } else { + Some(diff_text) + }, + content_hash: Some(hash), + } + } + "edit" => { + let diff_text = lines.join("\n"); + ExtractedFileChange { + file_path: file_path.to_string(), + change_type: "edit".to_string(), + diff_text: if diff_text.is_empty() { + None + } else { + Some(diff_text) + }, + content_hash: None, + } + } + "delete" => ExtractedFileChange { + file_path: file_path.to_string(), + change_type: "delete".to_string(), + diff_text: None, + content_hash: None, + }, + _ => ExtractedFileChange { + file_path: file_path.to_string(), + change_type: change_type.to_string(), + diff_text: None, + content_hash: None, + }, + } +} diff --git a/crates/tracevault-core/src/agent_adapter/default.rs b/crates/tracevault-core/src/agent_adapter/default.rs new file mode 100644 index 00000000..85e25316 --- /dev/null +++ b/crates/tracevault-core/src/agent_adapter/default.rs @@ -0,0 +1,38 @@ +use crate::streaming::StreamEventType; + +use super::{AgentAdapter, ParsedTranscriptRecord, TokenUsage}; + +pub struct DefaultAdapter; + +impl AgentAdapter for DefaultAdapter { + fn name(&self) -> &str { + "default" + } + + fn map_event_type(&self, hook_event_name: &str) -> StreamEventType { + match hook_event_name { + "SessionStart" => StreamEventType::SessionStart, + "Stop" | "SessionEnd" => StreamEventType::SessionEnd, + _ => StreamEventType::ToolUse, + } + } + + fn is_file_modifying(&self, _tool_name: &str) -> bool { + false + } + + fn extract_token_usage(&self, _chunk: &serde_json::Value) -> Option { + None + } + + fn extract_model(&self, _chunk: &serde_json::Value) -> Option { + None + } + + fn parse_transcript_record( + &self, + _chunk: &serde_json::Value, + ) -> Option { + None + } +} diff --git a/crates/tracevault-core/src/agent_adapter/mod.rs b/crates/tracevault-core/src/agent_adapter/mod.rs new file mode 100644 index 00000000..ae60aa3b --- /dev/null +++ b/crates/tracevault-core/src/agent_adapter/mod.rs @@ -0,0 +1,158 @@ +pub mod claude_code; +pub mod codex; +mod default; + +use chrono::{DateTime, Utc}; +use serde::Serialize; +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use crate::hooks::HookResponse; +use crate::streaming::{ExtractedFileChange, StreamEventType}; + +use self::default::DefaultAdapter; + +/// File change with all metadata `stream.rs` needs to persist it. Both +/// hook-sourced and transcript-sourced extractions return this same shape so +/// the persistence layer doesn't need to know which mechanism produced it. +#[derive(Debug, Clone)] +pub struct FileChangeRecord { + pub change: ExtractedFileChange, + pub tool_name: String, + pub tool_input: Option, + pub timestamp: DateTime, +} + +#[derive(Debug, Clone, Default)] +pub struct TokenUsage { + pub input_tokens: i64, + pub output_tokens: i64, + pub cache_read_tokens: i64, + pub cache_write_tokens: i64, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ParsedTranscriptRecord { + pub record_type: String, + pub timestamp: Option, + pub content_types: Vec, + pub tool_name: Option, + pub text: Option, + pub raw_input_tokens: Option, + pub raw_output_tokens: Option, + pub raw_cache_read_tokens: Option, + pub raw_cache_write_tokens: Option, + pub model: Option, +} + +pub trait AgentAdapter: Send + Sync { + fn name(&self) -> &str; + /// Human-readable label shown in CLI/UI (e.g. "Claude Code", "Codex"). + /// Defaults to `name()` for adapters that don't override. + fn display_name(&self) -> &str { + self.name() + } + /// Repo-relative path of the file `install_hooks` writes to + /// (e.g. ".claude/settings.json"). Empty for adapters that don't install + /// hooks (the default adapter). + fn hooks_install_path(&self) -> &str { + "" + } + /// Wire protocol version the CLI should send for this adapter. + /// Claude Code stays on v1 to keep its request bytes identical to the + /// pre-multi-agent main; new adapters use v2 (which carries `tool` over + /// the wire instead of the server hardcoding "claude-code"). + fn wire_protocol_version(&self) -> u32 { + 2 + } + /// Capability flag: should the server fire `update_tokens` when a + /// transcript batch contained a model but zero token usage? Defaults to + /// `false` to preserve main's Claude path bit-for-bit (where the gate was + /// solely on token presence). Codex sets this to `true` because its + /// model-only chunks can legitimately precede usage. + fn persists_model_without_usage(&self) -> bool { + false + } + fn map_event_type(&self, hook_event_name: &str) -> StreamEventType; + fn is_file_modifying(&self, tool_name: &str) -> bool; + /// File changes derived from a hook ToolUse event (Claude Write/Edit). + /// Default: none. Override for adapters whose file ops appear in the hook's + /// `tool_input` payload itself. + fn file_changes_from_hook( + &self, + _tool_name: &str, + _tool_input: &serde_json::Value, + _timestamp: DateTime, + ) -> Vec { + vec![] + } + /// Capability flag: does this adapter source file changes from transcript + /// chunks? When `false` (default), `stream.rs` skips + /// `file_changes_from_transcript` entirely — preserving the pre-multi-agent + /// code path for adapters like Claude Code that have no transcript-side + /// file extraction. + fn provides_transcript_file_changes(&self) -> bool { + false + } + /// File changes discovered inside a transcript chunk (Codex apply_patch). + /// Only called when `provides_transcript_file_changes()` returns `true`. + /// `fallback_timestamp` is used when the chunk itself has no parseable + /// timestamp. + fn file_changes_from_transcript( + &self, + _chunk: &serde_json::Value, + _fallback_timestamp: DateTime, + ) -> Vec { + vec![] + } + fn extract_token_usage(&self, chunk: &serde_json::Value) -> Option; + fn extract_model(&self, chunk: &serde_json::Value) -> Option; + fn parse_transcript_record(&self, chunk: &serde_json::Value) -> Option; + /// Install agent-specific hooks into `project_root`. Default: no-op. + fn install_hooks(&self, _project_root: &Path) -> std::io::Result<()> { + Ok(()) + } + /// Response to print on stdout after the hook stream finishes. + /// Default: empty `{}` (e.g. Codex). Claude Code overrides with `suppress_output: true`. + fn hook_response(&self) -> HookResponse { + HookResponse::empty() + } +} + +pub struct AgentAdapterRegistry { + adapters: HashMap>, + default: Arc, +} + +impl AgentAdapterRegistry { + pub fn new() -> Self { + let mut adapters: HashMap> = HashMap::new(); + let claude: Arc = Arc::new(claude_code::ClaudeCodeAdapter); + adapters.insert("claude-code".to_string(), Arc::clone(&claude)); + adapters.insert("claude".to_string(), claude); + adapters.insert("codex".to_string(), Arc::new(codex::CodexAdapter)); + Self { + adapters, + default: Arc::new(DefaultAdapter), + } + } + + pub fn get(&self, name: &str) -> &dyn AgentAdapter { + self.adapters + .get(name) + .map(|a| a.as_ref()) + .unwrap_or(self.default.as_ref()) + } + + /// Returns Some only for explicitly registered agents — None for unknown. + pub fn try_get(&self, name: &str) -> Option<&dyn AgentAdapter> { + self.adapters.get(name).map(|a| a.as_ref()) + } +} + +impl Default for AgentAdapterRegistry { + fn default() -> Self { + Self::new() + } +} diff --git a/crates/tracevault-core/src/hooks.rs b/crates/tracevault-core/src/hooks.rs index 79a8f694..5e47d97b 100644 --- a/crates/tracevault-core/src/hooks.rs +++ b/crates/tracevault-core/src/hooks.rs @@ -34,6 +34,13 @@ impl HookResponse { suppress_output: Some(true), } } + + pub fn empty() -> Self { + Self { + r#continue: None, + suppress_output: None, + } + } } #[derive(Debug, Error)] diff --git a/crates/tracevault-core/src/lib.rs b/crates/tracevault-core/src/lib.rs index 5681ebd1..d4fde9f5 100644 --- a/crates/tracevault-core/src/lib.rs +++ b/crates/tracevault-core/src/lib.rs @@ -1,3 +1,4 @@ +pub mod agent_adapter; pub mod code_nav; pub mod diff; pub mod extensions; diff --git a/crates/tracevault-core/src/streaming.rs b/crates/tracevault-core/src/streaming.rs index 43214814..1b3ae239 100644 --- a/crates/tracevault-core/src/streaming.rs +++ b/crates/tracevault-core/src/streaming.rs @@ -1,6 +1,5 @@ use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "snake_case")] @@ -75,10 +74,6 @@ pub struct ExtractedFileChange { pub content_hash: Option, } -pub fn is_file_modifying_tool(tool_name: &str) -> bool { - matches!(tool_name, "Write" | "Edit" | "Bash") -} - impl StreamEventRequest { /// Drop optional fields largest-first until the serialized payload is /// under 512 KB. Prevents 413 errors on both real-time sends and flush. @@ -98,42 +93,3 @@ impl StreamEventRequest { self.tool_input = None; } } - -pub fn extract_file_change( - tool_name: &str, - tool_input: &serde_json::Value, -) -> Option { - match tool_name { - "Write" => { - let file_path = tool_input.get("file_path")?.as_str()?.to_string(); - let content = tool_input.get("content")?.as_str()?; - let mut hasher = Sha256::new(); - hasher.update(content.as_bytes()); - let hash = hex::encode(hasher.finalize()); - let diff = content - .lines() - .map(|l| format!("+{l}")) - .collect::>() - .join("\n"); - Some(ExtractedFileChange { - file_path, - change_type: "create".to_string(), - diff_text: Some(diff), - content_hash: Some(hash), - }) - } - "Edit" => { - let file_path = tool_input.get("file_path")?.as_str()?.to_string(); - let old_string = tool_input.get("old_string")?.as_str()?; - let new_string = tool_input.get("new_string")?.as_str()?; - let diff = format!("--- {old_string}\n+++ {new_string}"); - Some(ExtractedFileChange { - file_path, - change_type: "edit".to_string(), - diff_text: Some(diff), - content_hash: None, - }) - } - _ => None, - } -} diff --git a/crates/tracevault-core/tests/agent_adapter_test.rs b/crates/tracevault-core/tests/agent_adapter_test.rs new file mode 100644 index 00000000..92e5d039 --- /dev/null +++ b/crates/tracevault-core/tests/agent_adapter_test.rs @@ -0,0 +1,697 @@ +use chrono::{TimeZone, Utc}; +use serde_json::json; +use tracevault_core::agent_adapter::AgentAdapterRegistry; +use tracevault_core::streaming::StreamEventType; + +fn ts() -> chrono::DateTime { + Utc.with_ymd_and_hms(2026, 4, 29, 10, 0, 0).unwrap() +} + +#[test] +fn registry_unknown_agent_returns_default() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("unknown-agent"); + assert_eq!(adapter.name(), "default"); +} + +#[test] +fn default_adapter_extract_token_usage_returns_none() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("nope"); + let chunk = + serde_json::json!({"type": "assistant", "message": {"usage": {"input_tokens": 100}}}); + assert!(adapter.extract_token_usage(&chunk).is_none()); +} + +#[test] +fn registry_dispatches_to_claude_code() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + assert_eq!(adapter.name(), "claude-code"); +} + +#[test] +fn claude_code_map_event_types() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + assert!(matches!( + adapter.map_event_type("SessionStart"), + StreamEventType::SessionStart + )); + assert!(matches!( + adapter.map_event_type("Notification"), + StreamEventType::SessionStart + )); + assert!(matches!( + adapter.map_event_type("Stop"), + StreamEventType::SessionEnd + )); + assert!(matches!( + adapter.map_event_type("PostToolUse"), + StreamEventType::ToolUse + )); +} + +#[test] +fn claude_code_file_changes_from_hook_write() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let input = json!({"file_path": "src/main.rs", "content": "fn main() {}"}); + let records = adapter.file_changes_from_hook("Write", &input, ts()); + assert_eq!(records.len(), 1); + assert_eq!(records[0].change.file_path, "src/main.rs"); + assert_eq!(records[0].change.change_type, "create"); + assert!(records[0].change.content_hash.is_some()); + assert_eq!(records[0].tool_name, "Write"); + assert_eq!(records[0].timestamp, ts()); +} + +#[test] +fn claude_code_file_changes_from_hook_edit() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let input = json!({"file_path": "src/lib.rs", "old_string": "old", "new_string": "new"}); + let records = adapter.file_changes_from_hook("Edit", &input, ts()); + assert_eq!(records.len(), 1); + assert_eq!(records[0].change.change_type, "edit"); + assert_eq!(records[0].tool_name, "Edit"); +} + +#[test] +fn claude_code_read_returns_empty() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let input = json!({"file_path": "src/lib.rs"}); + assert!(adapter + .file_changes_from_hook("Read", &input, ts()) + .is_empty()); +} + +#[test] +fn claude_code_is_file_modifying() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + assert!(adapter.is_file_modifying("Write")); + assert!(adapter.is_file_modifying("Edit")); + assert!(adapter.is_file_modifying("Bash")); + assert!(!adapter.is_file_modifying("Read")); +} + +#[test] +fn claude_code_extract_token_usage() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "assistant", + "message": { + "usage": { + "input_tokens": 1000, + "output_tokens": 200, + "cache_read_input_tokens": 500, + "cache_creation_input_tokens": 100 + } + } + }); + let usage = adapter.extract_token_usage(&chunk).unwrap(); + assert_eq!(usage.input_tokens, 1000); + assert_eq!(usage.output_tokens, 200); + assert_eq!(usage.cache_read_tokens, 500); + assert_eq!(usage.cache_write_tokens, 100); +} + +#[test] +fn claude_code_extract_model() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "assistant", "message": {"model": "claude-opus-4-6"}}); + assert_eq!( + adapter.extract_model(&chunk).as_deref(), + Some("claude-opus-4-6") + ); +} + +#[test] +fn claude_code_parse_assistant_record() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "assistant", + "timestamp": "2026-03-23T13:17:16Z", + "message": { + "model": "claude-opus-4-6", + "content": [ + {"type": "text", "text": "Hello world"}, + {"type": "tool_use", "name": "Write", "input": {}} + ], + "usage": { + "input_tokens": 100, "output_tokens": 50, + "cache_read_input_tokens": 0, "cache_creation_input_tokens": 0 + } + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert_eq!(record.model.as_deref(), Some("claude-opus-4-6")); + assert!(record.text.as_ref().unwrap().contains("Hello world")); + assert!(record.content_types.contains(&"text".to_string())); + assert!(record.content_types.contains(&"tool_use".to_string())); + assert_eq!(record.tool_name.as_deref(), Some("Write")); + assert_eq!(record.raw_input_tokens, Some(100)); +} + +#[test] +fn claude_code_parse_user_record() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "user", "timestamp": "2026-03-23T13:17:00Z", "message": {"content": "Fix the bug"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "user"); + assert_eq!(record.text.as_deref(), Some("Fix the bug")); +} + +#[test] +fn claude_code_parse_user_tool_result_read() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "user", "toolUseResult": {"file": {"filePath": "src/main.rs"}}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.tool_name.as_deref(), Some("Read: src/main.rs")); +} + +#[test] +fn claude_code_parse_user_tool_result_bash_uses_top_level_stdout() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "user", + "toolUseResult": { + "stdout": "ok\n", + "stderr": "", + "interrupted": false + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.tool_name.as_deref(), Some("Bash")); +} + +#[test] +fn claude_code_parse_user_tool_result_glob_uses_top_level_filenames() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "user", + "toolUseResult": { + "filenames": ["src/main.rs", "src/lib.rs"] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.tool_name.as_deref(), Some("Glob")); +} + +#[test] +fn claude_code_parse_user_tool_result_block_reads_content_field() { + // tool_result blocks store the body under `content`, not `text`. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "user", + "message": { + "content": [ + {"type": "tool_result", "tool_use_id": "abc", "content": "command output"} + ] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.text.as_deref(), Some("command output")); + assert!(record.content_types.contains(&"tool_result".to_string())); +} + +#[test] +fn claude_code_parse_user_text_block() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "user", + "message": { + "content": [{"type": "text", "text": "follow up"}] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.text.as_deref(), Some("follow up")); +} + +#[test] +fn claude_code_parse_assistant_thinking_uses_prefix_and_double_newline() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({ + "type": "assistant", + "message": { + "model": "claude-opus-4-6", + "content": [ + {"type": "thinking", "thinking": "let me think"}, + {"type": "text", "text": "the answer"} + ] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!( + record.text.as_deref(), + Some("[thinking] let me think\n\nthe answer") + ); +} + +#[test] +fn claude_code_parse_assistant_missing_message_returns_empty_record() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "assistant", "timestamp": "2026-04-29T10:00:00Z"}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert!(record.text.is_none()); + assert!(record.content_types.is_empty()); + assert!(record.model.is_none()); +} + +#[test] +fn claude_code_parse_progress_record() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = + json!({"type": "progress", "data": {"hookName": "tracevault", "hookEvent": "PostToolUse"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "progress"); + assert_eq!(record.text.as_deref(), Some("PostToolUse: tracevault")); + assert_eq!(record.tool_name.as_deref(), Some("tracevault")); +} + +#[test] +fn claude_code_parse_progress_record_event_only() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "progress", "data": {"hookEvent": "PostToolUse"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.text.as_deref(), Some("PostToolUse")); + assert!(record.tool_name.is_none()); +} + +#[test] +fn claude_code_parse_progress_record_missing_event_yields_no_text() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "progress", "data": {"hookName": "tracevault"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert!(record.text.is_none()); + assert_eq!(record.tool_name.as_deref(), Some("tracevault")); +} + +#[test] +fn claude_code_parse_system_record_turn_duration() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "system", "subtype": "turn_duration", "durationMs": 5000.0}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "system"); + assert_eq!(record.text.as_deref(), Some("turn_duration: 5.0s")); + assert_eq!(record.content_types, vec!["turn_duration".to_string()]); +} + +#[test] +fn claude_code_parse_system_record_stop_hook_summary() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "system", "subtype": "stop_hook_summary", "hookCount": 3}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.text.as_deref(), Some("stop_hook_summary: 3 hooks")); +} + +#[test] +fn claude_code_parse_system_record_unknown_subtype_keeps_subtype_in_content_types() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "system", "subtype": "init"}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.text.as_deref(), Some("init")); + assert_eq!(record.content_types, vec!["init".to_string()]); +} + +#[test] +fn codex_map_event_types() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + assert!(matches!( + adapter.map_event_type("SessionStart"), + StreamEventType::SessionStart + )); + assert!(matches!( + adapter.map_event_type("Stop"), + StreamEventType::SessionEnd + )); + assert!(matches!( + adapter.map_event_type("PostToolUse"), + StreamEventType::ToolUse + )); +} + +#[test] +fn codex_extract_token_usage() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 2000, "output_tokens": 300, "cached_input_tokens": 1500}}}}); + let usage = adapter.extract_token_usage(&chunk).unwrap(); + assert_eq!(usage.input_tokens, 2000); + assert_eq!(usage.output_tokens, 300); + assert_eq!(usage.cache_read_tokens, 1500); + assert_eq!(usage.cache_write_tokens, 0); +} + +#[test] +fn codex_extract_token_usage_non_token_chunk_returns_none() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "agent_message"}}); + assert!(adapter.extract_token_usage(&chunk).is_none()); +} + +#[test] +fn codex_extract_token_usage_token_count_without_info_returns_none() { + // token_count event with no `info` field (e.g. early/empty payload). + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "token_count"}}); + assert!(adapter.extract_token_usage(&chunk).is_none()); +} + +#[test] +fn codex_extract_token_usage_token_count_without_last_token_usage_returns_none() { + // token_count event with `info` but no `last_token_usage` (metadata-only). + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "event_msg", + "payload": {"type": "token_count", "info": {"total_tokens": 0}} + }); + assert!(adapter.extract_token_usage(&chunk).is_none()); +} + +#[test] +fn codex_extract_model() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "turn_context", "payload": {"model": "codex-mini-latest"}}); + assert_eq!( + adapter.extract_model(&chunk).as_deref(), + Some("codex-mini-latest") + ); +} + +#[test] +fn codex_extract_model_non_turn_context_returns_none() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "agent_message"}}); + assert!(adapter.extract_model(&chunk).is_none()); +} + +#[test] +fn codex_parse_agent_message() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "agent_message", "content": "I'll fix that bug now."}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert_eq!(record.text.as_deref(), Some("I'll fix that bug now.")); +} + +#[test] +fn codex_parse_user_message() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "user_message", "content": "Fix the login bug"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "user"); + assert_eq!(record.text.as_deref(), Some("Fix the login bug")); +} + +#[test] +fn codex_user_message_with_html_snippet_is_kept() { + // Legitimate user questions starting with `<` (HTML/JSX/XML) must not be + // dropped by the system-prompt filter — only known Codex prompt tags are. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": "
fix this rendering
"} + ] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!( + record.text.as_deref(), + Some("
fix this rendering
") + ); +} + +#[test] +fn codex_user_message_with_system_prompt_tag_is_dropped() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + for tag in [ + "", + "", + "", + "", + "", + "", + "", + ] { + let body = format!("{tag}some system context\n"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": body}] + } + }); + assert!( + adapter.parse_transcript_record(&chunk).is_none(), + "tag {tag} should be filtered out" + ); + } +} + +#[test] +fn codex_user_message_with_leading_whitespace_then_system_tag_is_dropped() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "message", + "role": "user", + "content": [ + {"type": "input_text", "text": " \ncwd: /tmp"} + ] + } + }); + assert!(adapter.parse_transcript_record(&chunk).is_none()); +} + +#[test] +fn codex_assistant_message_with_html_snippet_is_kept_regardless_of_prefix() { + // The system-prompt filter only applies to the user role. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "message", + "role": "assistant", + "content": [ + {"type": "output_text", "text": "example"} + ] + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert!(record.text.is_some()); +} + +#[test] +fn codex_parse_shell_call() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "response_item", "payload": {"type": "local_shell_call", "command": "cargo test", "output": "test result: ok. 5 passed"}}); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert_eq!(record.tool_name.as_deref(), Some("Bash")); + assert!(record.text.as_ref().unwrap().contains("cargo test")); +} + +#[test] +fn codex_parse_token_count_returns_none_for_display() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({"type": "event_msg", "payload": {"type": "token_count", "info": {"last_token_usage": {"input_tokens": 100, "output_tokens": 50}}}}); + assert!(adapter.parse_transcript_record(&chunk).is_none()); +} + +// Codex file changes are extracted from transcript, not hook events. +// These tests use parse_codex_patch directly. + +#[test] +fn codex_patch_parse_add_file() { + let changes = tracevault_core::agent_adapter::codex::parse_codex_patch( + "*** Begin Patch\n*** Add File: src/new.rs\n+fn main() {}\n*** End Patch\n", + ); + assert_eq!(changes.len(), 1); + assert_eq!(changes[0].file_path, "src/new.rs"); + assert_eq!(changes[0].change_type, "create"); + assert!(changes[0].content_hash.is_some()); +} + +#[test] +fn codex_patch_parse_update_file() { + let changes = tracevault_core::agent_adapter::codex::parse_codex_patch( + "*** Begin Patch\n*** Update File: src/lib.rs\n@@ fn old()\n-fn old()\n+fn new_func()\n*** End Patch\n", + ); + assert_eq!(changes.len(), 1); + assert_eq!(changes[0].file_path, "src/lib.rs"); + assert_eq!(changes[0].change_type, "edit"); + assert!(changes[0].diff_text.is_some()); +} + +#[test] +fn codex_patch_parse_delete_file() { + let changes = tracevault_core::agent_adapter::codex::parse_codex_patch( + "*** Begin Patch\n*** Delete File: src/old.rs\n*** End Patch\n", + ); + assert_eq!(changes.len(), 1); + assert_eq!(changes[0].file_path, "src/old.rs"); + assert_eq!(changes[0].change_type, "delete"); +} + +#[test] +fn codex_file_changes_from_hook_returns_empty() { + // Codex hook events don't carry file modifications. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let input = json!({"command": "cargo build"}); + assert!(adapter + .file_changes_from_hook("Bash", &input, ts()) + .is_empty()); +} + +#[test] +fn codex_is_file_modifying_always_false() { + // Codex file changes come from transcript, not hook events. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + assert!(!adapter.is_file_modifying("Bash")); + assert!(!adapter.is_file_modifying("Read")); + assert!(!adapter.is_file_modifying("apply_patch")); +} + +#[test] +fn codex_file_changes_from_transcript_apply_patch() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "timestamp": "2026-04-29T11:30:00Z", + "payload": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Update File: src/main.rs\n@@ fn old()\n-fn old()\n+fn new_func()\n*** End Patch\n" + } + }); + let records = adapter.file_changes_from_transcript(&chunk, ts()); + assert_eq!(records.len(), 1); + assert_eq!(records[0].change.file_path, "src/main.rs"); + assert_eq!(records[0].change.change_type, "edit"); + assert_eq!(records[0].tool_name, "apply_patch"); + assert!(records[0].tool_input.is_some()); + // chunk timestamp wins over fallback. + assert_ne!(records[0].timestamp, ts()); +} + +#[test] +fn codex_file_changes_from_transcript_falls_back_when_chunk_has_no_timestamp() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Add File: x.rs\n+x\n*** End Patch\n" + } + }); + let records = adapter.file_changes_from_transcript(&chunk, ts()); + assert_eq!(records.len(), 1); + assert_eq!(records[0].timestamp, ts()); +} + +#[test] +fn codex_file_changes_from_transcript_non_patch_returns_empty() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": {"type": "message", "role": "assistant", "content": []} + }); + assert!(adapter + .file_changes_from_transcript(&chunk, ts()) + .is_empty()); +} + +#[test] +fn codex_reasoning_record_returns_none() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "payload": { + "type": "reasoning", + "content": null, + "summary": [], + "encrypted_content": "gAAAAA..." + } + }); + assert!(adapter.parse_transcript_record(&chunk).is_none()); +} + +#[test] +fn codex_custom_tool_call_display() { + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("codex"); + let chunk = json!({ + "type": "response_item", + "timestamp": "2026-04-03T17:52:42Z", + "payload": { + "type": "custom_tool_call", + "name": "apply_patch", + "input": "*** Begin Patch\n*** Update File: README.md\n@@\n old line\n+new line\n*** End Patch" + } + }); + let record = adapter.parse_transcript_record(&chunk).unwrap(); + assert_eq!(record.record_type, "assistant"); + assert_eq!(record.tool_name.as_deref(), Some("apply_patch")); + assert!(record.text.as_ref().unwrap().contains("Update File")); +} + +#[test] +fn claude_code_file_changes_from_transcript_returns_empty() { + // Claude Code file changes come from hook events, not transcript. + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let chunk = json!({"type": "assistant", "message": {"content": []}}); + assert!(adapter + .file_changes_from_transcript(&chunk, ts()) + .is_empty()); +} diff --git a/crates/tracevault-core/tests/streaming_test.rs b/crates/tracevault-core/tests/streaming_test.rs index 19f3f2b2..677a2b25 100644 --- a/crates/tracevault-core/tests/streaming_test.rs +++ b/crates/tracevault-core/tests/streaming_test.rs @@ -28,50 +28,6 @@ fn test_stream_event_request_serialization() { assert_eq!(parsed.event_index, Some(42)); } -#[test] -fn test_extract_file_change_from_edit() { - let tool_input = json!({ - "file_path": "/repo/src/lib.rs", - "old_string": "fn old() {}", - "new_string": "fn new_func() {}" - }); - let change = extract_file_change("Edit", &tool_input); - assert!(change.is_some()); - let c = change.unwrap(); - assert_eq!(c.file_path, "/repo/src/lib.rs"); - assert_eq!(c.change_type, "edit"); - assert!(c.diff_text.is_some()); -} - -#[test] -fn test_extract_file_change_from_write() { - let tool_input = json!({ - "file_path": "/repo/src/new_file.rs", - "content": "fn main() {}" - }); - let change = extract_file_change("Write", &tool_input); - assert!(change.is_some()); - let c = change.unwrap(); - assert_eq!(c.file_path, "/repo/src/new_file.rs"); - assert_eq!(c.change_type, "create"); - assert!(c.content_hash.is_some()); -} - -#[test] -fn test_extract_file_change_from_read_returns_none() { - let tool_input = json!({"file_path": "/repo/src/lib.rs"}); - assert!(extract_file_change("Read", &tool_input).is_none()); -} - -#[test] -fn test_is_file_modifying_tool() { - assert!(is_file_modifying_tool("Write")); - assert!(is_file_modifying_tool("Edit")); - assert!(is_file_modifying_tool("Bash")); - assert!(!is_file_modifying_tool("Read")); - assert!(!is_file_modifying_tool("Grep")); -} - #[test] fn test_commit_push_request_serialization() { let req = CommitPushRequest { @@ -86,21 +42,3 @@ fn test_commit_push_request_serialization() { let parsed: CommitPushRequest = serde_json::from_str(&json_str).unwrap(); assert_eq!(parsed.commit_sha, "abc123"); } - -#[test] -fn extract_file_change_write_missing_content() { - let input = json!({"file_path": "/tmp/test.rs"}); - assert!(extract_file_change("Write", &input).is_none()); -} - -#[test] -fn extract_file_change_edit_missing_old_string() { - let input = json!({"file_path": "/tmp/test.rs", "new_string": "new"}); - assert!(extract_file_change("Edit", &input).is_none()); -} - -#[test] -fn extract_file_change_write_missing_file_path() { - let input = json!({"content": "hello"}); - assert!(extract_file_change("Write", &input).is_none()); -} diff --git a/crates/tracevault-server/src/api/session_detail.rs b/crates/tracevault-server/src/api/session_detail.rs index 57994ed4..f06b2d6d 100644 --- a/crates/tracevault-server/src/api/session_detail.rs +++ b/crates/tracevault-server/src/api/session_detail.rs @@ -4,6 +4,8 @@ use chrono::{DateTime, Utc}; use serde::Serialize; use uuid::Uuid; +use tracevault_core::agent_adapter::AgentAdapter; + use crate::error::AppError; use crate::extractors::OrgAuth; use crate::pricing::{self, ModelPricing}; @@ -91,230 +93,10 @@ pub struct TranscriptRecord { pub model: Option, } -fn parse_record(record: &serde_json::Value, pricing: &ModelPricing) -> Option { - let record_type = record.get("type")?.as_str()?.to_string(); - let timestamp = record - .get("timestamp") - .and_then(|v| v.as_str()) - .map(String::from); - - match record_type.as_str() { - "assistant" => { - let msg = match record.get("message") { - Some(m) => m, - None => { - return Some(TranscriptRecord { - record_type, - timestamp, - content_types: vec![], - tool_name: None, - text: None, - usage: None, - model: None, - }); - } - }; - let model = msg.get("model").and_then(|v| v.as_str()).map(String::from); - - let mut content_types = Vec::new(); - let mut texts = Vec::new(); - if let Some(content) = msg.get("content").and_then(|v| v.as_array()) { - for block in content { - if let Some(ct) = block.get("type").and_then(|v| v.as_str()) { - if !content_types.contains(&ct.to_string()) { - content_types.push(ct.to_string()); - } - } - if let Some(text) = block.get("text").and_then(|v| v.as_str()) { - texts.push(text.to_string()); - } - if let Some(thinking) = block.get("thinking").and_then(|v| v.as_str()) { - texts.push(format!("[thinking] {}", thinking)); - } - } - } - - let usage = msg.get("usage").map(|u| { - let total_input = u.get("input_tokens").and_then(|v| v.as_i64()).unwrap_or(0); - let output = u.get("output_tokens").and_then(|v| v.as_i64()).unwrap_or(0); - let cache_read = u - .get("cache_read_input_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - let cache_write = u - .get("cache_creation_input_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - // input_tokens from the API includes cache_read and cache_write tokens, - // so subtract them to get fresh (non-cached) input tokens only - let fresh_input = (total_input - cache_read - cache_write).max(0); - let cost = pricing::estimate_cost_with_pricing( - pricing, - fresh_input, - output, - cache_read, - cache_write, - ); - RecordUsage { - input_tokens: fresh_input, - output_tokens: output, - cache_read_tokens: cache_read, - cache_write_tokens: cache_write, - cost_usd: cost, - } - }); - - let tool_name = msg - .get("content") - .and_then(|v| v.as_array()) - .and_then(|arr| { - arr.iter() - .find(|b| b.get("type").and_then(|v| v.as_str()) == Some("tool_use")) - }) - .and_then(|b| b.get("name").and_then(|v| v.as_str()).map(String::from)); - - Some(TranscriptRecord { - record_type, - timestamp, - content_types, - tool_name, - text: if texts.is_empty() { - None - } else { - Some(texts.join("\n\n")) - }, - usage, - model, - }) - } - "user" => { - let mut content_types = Vec::new(); - let mut text = None; - let mut tool_name = None; - - let msg = record.get("message"); - match msg.and_then(|m| m.get("content")) { - Some(serde_json::Value::String(s)) => { - content_types.push("text".to_string()); - text = Some(s.clone()); - } - Some(serde_json::Value::Array(arr)) => { - for block in arr { - if let Some(ct) = block.get("type").and_then(|v| v.as_str()) { - if !content_types.contains(&ct.to_string()) { - content_types.push(ct.to_string()); - } - if ct == "tool_result" { - if let Some(content) = block.get("content").and_then(|v| v.as_str()) - { - text = Some(content.to_string()); - } - } else if ct == "text" { - if let Some(t) = block.get("text").and_then(|v| v.as_str()) { - text = Some(t.to_string()); - } - } - } - } - } - _ => {} - } - - if let Some(tur) = record.get("toolUseResult") { - if let Some(file) = tur - .get("file") - .and_then(|f| f.get("filePath").and_then(|v| v.as_str())) - { - tool_name = Some(format!("Read: {}", file)); - } else if tur.get("filenames").is_some() { - tool_name = Some("Glob".to_string()); - } else if tur.get("stdout").is_some() { - tool_name = Some("Bash".to_string()); - } - } - - Some(TranscriptRecord { - record_type, - timestamp, - content_types, - tool_name, - text, - usage: None, - model: None, - }) - } - "progress" => { - let data = record.get("data"); - let hook_name = data - .and_then(|d| d.get("hookName").and_then(|v| v.as_str())) - .map(String::from); - let hook_event = data.and_then(|d| d.get("hookEvent").and_then(|v| v.as_str())); - let text = hook_event.map(|e| { - if let Some(ref name) = hook_name { - format!("{}: {}", e, name) - } else { - e.to_string() - } - }); - - Some(TranscriptRecord { - record_type, - timestamp, - content_types: vec![], - tool_name: hook_name, - text, - usage: None, - model: None, - }) - } - "system" => { - let subtype = record - .get("subtype") - .and_then(|v| v.as_str()) - .unwrap_or("unknown"); - let text = match subtype { - "turn_duration" => { - let ms = record - .get("durationMs") - .and_then(|v| v.as_f64()) - .unwrap_or(0.0); - Some(format!("turn_duration: {:.1}s", ms / 1000.0)) - } - "stop_hook_summary" => { - let count = record - .get("hookCount") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - Some(format!("stop_hook_summary: {} hooks", count)) - } - _ => Some(subtype.to_string()), - }; - - Some(TranscriptRecord { - record_type, - timestamp, - content_types: vec![subtype.to_string()], - tool_name: None, - text, - usage: None, - model: None, - }) - } - _ => Some(TranscriptRecord { - record_type, - timestamp, - content_types: vec![], - tool_name: None, - text: None, - usage: None, - model: None, - }), - } -} - pub fn parse_transcript( transcript: &serde_json::Value, pricing: &ModelPricing, + adapter: &dyn AgentAdapter, ) -> ( Vec, Vec, @@ -335,7 +117,41 @@ pub fn parse_transcript( let mut total_cache_write: i64 = 0; for record in records { - if let Some(tr) = parse_record(record, pricing) { + if let Some(parsed) = adapter.parse_transcript_record(record) { + let usage = if parsed.raw_input_tokens.is_some() { + let total_input_raw = parsed.raw_input_tokens.unwrap_or(0); + let output = parsed.raw_output_tokens.unwrap_or(0); + let cache_read = parsed.raw_cache_read_tokens.unwrap_or(0); + let cache_write = parsed.raw_cache_write_tokens.unwrap_or(0); + let fresh_input = (total_input_raw - cache_read - cache_write).max(0); + let cost = pricing::estimate_cost_with_pricing( + pricing, + fresh_input, + output, + cache_read, + cache_write, + ); + Some(RecordUsage { + input_tokens: fresh_input, + output_tokens: output, + cache_read_tokens: cache_read, + cache_write_tokens: cache_write, + cost_usd: cost, + }) + } else { + None + }; + + let tr = TranscriptRecord { + record_type: parsed.record_type.clone(), + timestamp: parsed.timestamp, + content_types: parsed.content_types, + tool_name: parsed.tool_name, + text: parsed.text, + usage, + model: parsed.model, + }; + if tr.record_type == "assistant" { if let Some(ref usage) = tr.usage { let model = tr.model.as_deref().unwrap_or("unknown"); @@ -414,6 +230,7 @@ pub fn parse_transcript( struct SessionRow { session_id: String, model: Option, + tool: Option, started_at: Option>, ended_at: Option>, duration_ms: Option, @@ -436,7 +253,7 @@ pub async fn get_session_detail( let org_id = auth.org_id; let row = sqlx::query_as::<_, SessionRow>( - "SELECT s.session_id, s.model, s.started_at, s.ended_at, s.duration_ms, + "SELECT s.session_id, s.model, s.tool, s.started_at, s.ended_at, s.duration_ms, s.total_tokens, s.input_tokens, s.output_tokens, s.cache_read_tokens, s.cache_write_tokens, s.estimated_cost_usd, @@ -471,8 +288,11 @@ pub async fn get_session_detail( let transcript_array: Vec = chunks.into_iter().map(|(d,)| d).collect(); let transcript_val = serde_json::Value::Array(transcript_array); + let adapter = state + .agent_registry + .get(row.tool.as_deref().unwrap_or("claude-code")); let (per_call, transcript_records, token_distribution, cost_breakdown, cache_savings) = - parse_transcript(&transcript_val, &pricing); + parse_transcript(&transcript_val, &pricing, adapter); // Count API calls from per_call data since api_calls column doesn't exist on sessions let api_calls = per_call.len() as i32; @@ -505,6 +325,7 @@ pub async fn get_session_detail( #[cfg(test)] mod tests { use super::*; + use tracevault_core::agent_adapter::AgentAdapterRegistry; fn test_pricing() -> ModelPricing { ModelPricing { @@ -518,8 +339,10 @@ mod tests { #[test] fn test_parse_empty_transcript() { let transcript = serde_json::json!([]); + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); let (per_call, records, dist, cost, savings) = - parse_transcript(&transcript, &test_pricing()); + parse_transcript(&transcript, &test_pricing(), adapter); assert!(per_call.is_empty()); assert!(records.is_empty()); assert_eq!(dist.input_tokens, 0); @@ -545,8 +368,10 @@ mod tests { } } ]); + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); let (per_call, records, dist, _cost, _savings) = - parse_transcript(&transcript, &test_pricing()); + parse_transcript(&transcript, &test_pricing(), adapter); assert_eq!(per_call.len(), 1); assert_eq!(per_call[0].index, 1); assert_eq!(per_call[0].cache_read_tokens, 1000); @@ -567,8 +392,10 @@ mod tests { } } ]); + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); let (per_call, records, _dist, _cost, _savings) = - parse_transcript(&transcript, &test_pricing()); + parse_transcript(&transcript, &test_pricing(), adapter); assert!(per_call.is_empty()); assert_eq!(records.len(), 1); } @@ -592,7 +419,10 @@ mod tests { } ]); let pricing = test_pricing(); - let (_per_call, _records, _dist, _cost, savings) = parse_transcript(&transcript, &pricing); + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let (_per_call, _records, _dist, _cost, savings) = + parse_transcript(&transcript, &pricing, adapter); assert!((savings.gross_savings_usd - 13.5).abs() < 0.001); assert!((savings.cache_write_overhead_usd - 0.375).abs() < 0.001); assert!((savings.net_savings_usd - 13.125).abs() < 0.001); @@ -620,7 +450,9 @@ mod tests { } } ]); - let (per_call, _, _, _, _) = parse_transcript(&transcript, &test_pricing()); + let registry = AgentAdapterRegistry::new(); + let adapter = registry.get("claude-code"); + let (per_call, _, _, _, _) = parse_transcript(&transcript, &test_pricing(), adapter); assert_eq!(per_call.len(), 2); assert!((per_call[0].cumulative_cost_usd - 15.0).abs() < 0.001); assert!((per_call[1].cumulative_cost_usd - 30.0).abs() < 0.001); diff --git a/crates/tracevault-server/src/api/traces_ui.rs b/crates/tracevault-server/src/api/traces_ui.rs index 9ec4742f..9331ed76 100644 --- a/crates/tracevault-server/src/api/traces_ui.rs +++ b/crates/tracevault-server/src/api/traces_ui.rs @@ -438,17 +438,14 @@ pub async fn get_session_transcript( ) -> Result, AppError> { verify_session_access(&state.pool, session_id, auth.org_id).await?; - let session_model: Option = - sqlx::query_scalar("SELECT model FROM sessions WHERE id = $1") - .bind(session_id) - .fetch_one(&state.pool) - .await?; - - let session_started_at: Option> = - sqlx::query_scalar("SELECT started_at FROM sessions WHERE id = $1") - .bind(session_id) - .fetch_one(&state.pool) - .await?; + let (session_model, session_tool, session_started_at): ( + Option, + Option, + Option>, + ) = sqlx::query_as("SELECT model, tool, started_at FROM sessions WHERE id = $1") + .bind(session_id) + .fetch_one(&state.pool) + .await?; let transcript_chunks = sqlx::query_as::<_, TranscriptChunkRow>( "SELECT chunk_index, data @@ -467,10 +464,13 @@ pub async fn get_session_transcript( ) .await; + let adapter = state + .agent_registry + .get(session_tool.as_deref().unwrap_or("claude-code")); let transcript_array: Vec = transcript_chunks.iter().map(|c| c.data.clone()).collect(); let transcript_val = serde_json::Value::Array(transcript_array); - let (_, transcript_records, _, _, _) = parse_transcript(&transcript_val, &pricing); + let (_, transcript_records, _, _, _) = parse_transcript(&transcript_val, &pricing, adapter); Ok(Json(TranscriptResponse { transcript_chunks, diff --git a/crates/tracevault-server/src/lib.rs b/crates/tracevault-server/src/lib.rs index 7bc67b03..b31760cb 100644 --- a/crates/tracevault-server/src/lib.rs +++ b/crates/tracevault-server/src/lib.rs @@ -22,6 +22,9 @@ pub mod story; pub use error::AppError; +use std::sync::Arc; +use tracevault_core::agent_adapter::AgentAdapterRegistry; + /// Stable replacement for `str::floor_char_boundary` (nightly-only). /// Returns the largest byte index `<= index` that is a char boundary. pub fn floor_char_boundary(s: &str, index: usize) -> usize { @@ -47,4 +50,5 @@ pub struct AppState { pub invite_expiry_minutes: u64, pub embedding_service: Option>, + pub agent_registry: Arc, } diff --git a/crates/tracevault-server/src/main.rs b/crates/tracevault-server/src/main.rs index 92159d70..0be93790 100644 --- a/crates/tracevault-server/src/main.rs +++ b/crates/tracevault-server/src/main.rs @@ -4,10 +4,12 @@ use axum::{ }; use http::Method; use std::net::SocketAddr; +use std::sync::Arc; use tower_governor::{governor::GovernorConfigBuilder, GovernorLayer}; use tower_http::cors::CorsLayer; use tower_http::trace::TraceLayer; +use tracevault_core::agent_adapter::AgentAdapterRegistry; use tracevault_server::{api, config, db, extensions, pricing_sync, repo_manager, AppState}; #[tokio::main] @@ -576,6 +578,7 @@ async fn main() { cors_origin: cfg.cors_origin.clone(), invite_expiry_minutes: cfg.invite_expiry_minutes, embedding_service, + agent_registry: Arc::new(AgentAdapterRegistry::new()), }); let listener = tokio::net::TcpListener::bind(&bind_addr).await.unwrap(); diff --git a/crates/tracevault-server/src/service/stream.rs b/crates/tracevault-server/src/service/stream.rs index 7bc2dac9..63fa8fee 100644 --- a/crates/tracevault-server/src/service/stream.rs +++ b/crates/tracevault-server/src/service/stream.rs @@ -1,8 +1,5 @@ use tracevault_core::software::extract_software; -use tracevault_core::streaming::{ - extract_file_change, is_file_modifying_tool, StreamEventRequest, StreamEventResponse, - StreamEventType, -}; +use tracevault_core::streaming::{StreamEventRequest, StreamEventResponse, StreamEventType}; use uuid::Uuid; use crate::error::AppError; @@ -37,6 +34,9 @@ impl StreamService { Some("claude-code".to_string()) }; + let agent_name = tool.as_deref().unwrap_or("claude-code"); + let adapter = state.agent_registry.get(agent_name); + // 3. Upsert session let session_db_id = SessionRepo::upsert( &state.pool, @@ -83,39 +83,67 @@ impl StreamService { continue; } - // Extract token usage from assistant messages - if let Some(msg) = line.get("message") { - if let Some(usage) = msg.get("usage") { - batch_input += usage - .get("input_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - batch_output += usage - .get("output_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - batch_cache_read += usage - .get("cache_read_input_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - batch_cache_write += usage - .get("cache_creation_input_tokens") - .and_then(|v| v.as_i64()) - .unwrap_or(0); - } - if detected_model.is_none() { - detected_model = - msg.get("model").and_then(|v| v.as_str()).map(String::from); + // Transcript-sourced file changes (Codex apply_patch) come + // with their own synthetic tool_event. The capability flag + // gates the call entirely so adapters without this feature + // (Claude Code) don't even invoke the method — keeping the + // pre-multi-agent code path for them bit-for-bit. + if adapter.provides_transcript_file_changes() { + for record in adapter.file_changes_from_transcript(line, req.timestamp) { + let event_id = EventRepo::insert_tool_event( + &state.pool, + &crate::repo::events::InsertToolEvent { + session_id: session_db_id, + event_index: chunk_index, + tool_name: Some(record.tool_name), + tool_input: record.tool_input, + tool_response: None, + timestamp: Some(record.timestamp), + }, + ) + .await?; + if let Some(eid) = event_id { + EventRepo::insert_file_change( + &state.pool, + &InsertFileChange { + session_id: session_db_id, + event_id: eid, + file_path: record.change.file_path, + change_type: record.change.change_type, + diff_text: record.change.diff_text, + content_hash: record.change.content_hash, + timestamp: Some(record.timestamp), + }, + ) + .await?; + } } } + + // Extract token usage via adapter + if let Some(usage) = adapter.extract_token_usage(line) { + batch_input += usage.input_tokens; + batch_output += usage.output_tokens; + batch_cache_read += usage.cache_read_tokens; + batch_cache_write += usage.cache_write_tokens; + } + if detected_model.is_none() { + detected_model = adapter.extract_model(line); + } } - // Update session token counts and cost if we found usage data + // Update session token counts and cost if we found usage data. + // The `persists_model_without_usage` capability lets adapters + // (Codex) extend the gate to also fire on a model-only batch; + // Claude leaves it `false` so the gate stays bit-identical to + // pre-multi-agent main (token presence only). let has_tokens = batch_input > 0 || batch_output > 0 || batch_cache_read > 0 || batch_cache_write > 0; - if has_tokens { + let persist_model_only = + adapter.persists_model_without_usage() && detected_model.is_some(); + if has_tokens || persist_model_only { let model_name = detected_model.as_deref().unwrap_or("unknown"); // input_tokens from the API includes cache_read and cache_write, // subtract to get fresh (non-cached) input only @@ -156,7 +184,7 @@ impl StreamService { })?; let tool_name = req.tool_name.as_deref().unwrap_or(""); - let store_response = is_file_modifying_tool(tool_name); + let store_response = adapter.is_file_modifying(tool_name); let inserted_id = EventRepo::insert_tool_event( &state.pool, @@ -178,20 +206,26 @@ impl StreamService { if let Some(eid) = inserted_id { event_db_id = Some(eid); - // Extract file changes for file-modifying tools - if is_file_modifying_tool(tool_name) { + // Hook-sourced file changes attach to the tool_event that + // was just inserted. Gated by `is_file_modifying` so the + // Claude path matches main exactly: Read/Glob/etc. skip + // the call, only Write/Edit/Bash enter (Bash returns empty + // because there's no file_path/content to extract). + if store_response { if let Some(ref tool_input) = req.tool_input { - if let Some(change) = extract_file_change(tool_name, tool_input) { + for record in + adapter.file_changes_from_hook(tool_name, tool_input, req.timestamp) + { EventRepo::insert_file_change( &state.pool, &InsertFileChange { session_id: session_db_id, event_id: eid, - file_path: change.file_path, - change_type: change.change_type, - diff_text: change.diff_text, - content_hash: change.content_hash, - timestamp: Some(req.timestamp), + file_path: record.change.file_path, + change_type: record.change.change_type, + diff_text: record.change.diff_text, + content_hash: record.change.content_hash, + timestamp: Some(record.timestamp), }, ) .await?; diff --git a/web/src/lib/components/AgentBadge.svelte b/web/src/lib/components/AgentBadge.svelte new file mode 100644 index 00000000..e283fcaa --- /dev/null +++ b/web/src/lib/components/AgentBadge.svelte @@ -0,0 +1,69 @@ + + +{#if agent} + + {#if tool === 'claude-code'} + + + + + {:else if tool === 'codex'} + + + + + + {:else if tool === 'gemini'} + + + + + {:else if tool === 'cursor'} + + + + + {:else} + + + + + + + + {/if} + {agent.label} + +{/if} diff --git a/web/src/routes/orgs/[slug]/traces/sessions/+page.svelte b/web/src/routes/orgs/[slug]/traces/sessions/+page.svelte index 3ff5daae..bfe0cb23 100644 --- a/web/src/routes/orgs/[slug]/traces/sessions/+page.svelte +++ b/web/src/routes/orgs/[slug]/traces/sessions/+page.svelte @@ -6,6 +6,7 @@ import type { SessionItem } from '$lib/types'; import DataTable from '$lib/components/DataTable.svelte'; import StatusBadge from '$lib/components/StatusBadge.svelte'; + import AgentBadge from '$lib/components/AgentBadge.svelte'; import LoadingState from '$lib/components/LoadingState.svelte'; import ErrorState from '$lib/components/ErrorState.svelte'; import EmptyState from '$lib/components/EmptyState.svelte'; @@ -87,7 +88,10 @@ > {#snippet children({ row, col })} {#if col.key === '_status'} - +
+ + +
{:else if col.key === 'session_id'} {String(row.session_id).slice(0, 8)} diff --git a/web/src/routes/orgs/[slug]/traces/sessions/[id]/+page.svelte b/web/src/routes/orgs/[slug]/traces/sessions/[id]/+page.svelte index b4ed381f..39f854f3 100644 --- a/web/src/routes/orgs/[slug]/traces/sessions/[id]/+page.svelte +++ b/web/src/routes/orgs/[slug]/traces/sessions/[id]/+page.svelte @@ -15,6 +15,7 @@ import { formatDateTime } from '$lib/utils/date'; import * as Table from '$lib/components/ui/table/index.js'; import StatusBadge from '$lib/components/StatusBadge.svelte'; + import AgentBadge from '$lib/components/AgentBadge.svelte'; import LoadingState from '$lib/components/LoadingState.svelte'; import ErrorState from '$lib/components/ErrorState.svelte'; import SessionTranscript from '$lib/components/session-detail/SessionTranscript.svelte'; @@ -197,6 +198,7 @@ / {session.session_id.slice(0, 8)} +