Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
352 changes: 352 additions & 0 deletions crates/terraphim_agent/src/learnings/capture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,223 @@ impl ScoredEntry {
}
}

/// JSONL transcript entry types for auto-extraction.
#[derive(Debug, Clone, Deserialize)]
pub struct TranscriptEntry {
#[serde(default)]
pub r#type: Option<String>,
#[serde(default)]
pub content: Option<String>,
#[serde(default)]
pub tool_name: Option<String>,
#[serde(default)]
pub tool_input: Option<serde_json::Value>,
#[serde(default)]
pub tool_result: Option<serde_json::Value>,
#[serde(default)]
pub exit_code: Option<i32>,
#[serde(default)]
pub error: Option<String>,
}

/// Check if content contains explicit correction phrases.
fn contains_correction_phrase(content: &str) -> Option<(String, String)> {
let lower = content.to_lowercase();

// Pattern: "instead use X" or "use X instead"
if let Some(idx) = lower.find("instead use") {
let after = &content[idx + 11..];
return Some((content.to_string(), after.trim().to_string()));
}
if let Some(idx) = lower.find("use ") {
let rest = &lower[idx + 4..];
if rest.contains("instead") {
let end = rest.find("instead").unwrap_or(rest.len());
let tool = &content[idx + 4..idx + 4 + end].trim();
return Some((content.to_string(), tool.to_string()));
}
}

// Pattern: "should be"
if let Some(idx) = lower.find("should be") {
let after = &content[idx + 9..];
return Some((content.to_string(), after.trim().to_string()));
}

// Pattern: "correct way"
if let Some(idx) = lower.find("correct way") {
let after = &content[idx + 11..];
// Look for "is to" or "to"
if after.contains("is to") {
let start = after.find("is to").unwrap_or(0) + 5;
return Some((content.to_string(), after[start..].trim().to_string()));
}
return Some((content.to_string(), after.trim().to_string()));
}

// Pattern: "use X not Y" or "use X, not Y"
if let Some(idx) = lower.find("use ") {
let rest = &content[idx + 4..];
let lower_rest = rest.to_lowercase();
if let Some(not_idx) = lower_rest.find(" not ") {
let tool = rest[..not_idx].trim();
// Find the end of the old tool (rest of string or next word boundary)
let old_tool_rest = &rest[not_idx + 5..];
let old_tool = old_tool_rest
.split_whitespace()
.next()
.unwrap_or(old_tool_rest)
.trim();
return Some((old_tool.to_string(), tool.to_string()));
}
}

None
}

/// Extract command from Bash tool input.
fn extract_command_from_input(input: &serde_json::Value) -> Option<String> {
input
.get("command")
.or_else(|| input.get("cmd"))
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}

/// Auto-extract corrections from a JSONL session transcript.
///
/// Scans the transcript line by line and identifies:
/// 1. Failed Bash commands (exit code != 0) followed by successful variants
/// 2. Explicit correction phrases like "instead use", "should be", etc.
///
/// # Arguments
///
/// * `transcript_path` - Path to the JSONL transcript file
///
/// # Returns
///
/// Vector of extracted CorrectionEvent objects.
pub fn auto_extract_corrections(
transcript_path: &std::path::Path,
) -> Result<Vec<CorrectionEvent>, LearningError> {
use std::io::BufRead;

let file = fs::File::open(transcript_path)?;
let reader = std::io::BufReader::new(file);

let mut corrections = Vec::new();
let mut last_failed_command: Option<(String, i32, String)> = None; // (command, exit_code, error)

for line in reader.lines() {
let line = line?;
if line.trim().is_empty() {
continue;
}

let entry: TranscriptEntry = match serde_json::from_str(&line) {
Ok(e) => e,
Err(_) => continue, // Skip malformed lines
};

// Check for Bash tool results with exit codes
if entry.tool_name.as_deref() == Some("Bash")
|| entry.r#type.as_deref() == Some("tool_result")
{
// Check if this is a failed Bash command
if let Some(exit_code) = entry.exit_code {
if exit_code != 0 {
// Extract the command from tool_input in previous context or from error
if let Some(ref tool_input) = entry.tool_input {
if let Some(cmd) = extract_command_from_input(tool_input) {
let error = entry
.error
.clone()
.or_else(|| entry.content.clone())
.unwrap_or_default();
last_failed_command = Some((cmd, exit_code, error));
}
}
} else if exit_code == 0 {
// Successful command - check if we had a previous failure
if let Some((failed_cmd, failed_exit, failed_error)) =
last_failed_command.take()
{
// Extract the successful command
if let Some(ref tool_input) = entry.tool_input {
if let Some(success_cmd) = extract_command_from_input(tool_input) {
// Only create correction if commands are different
if failed_cmd != success_cmd {
let context = format!(
"Auto-extracted from session transcript. Failed with exit {}: {}",
failed_exit, failed_error
);
let correction = CorrectionEvent::new(
CorrectionType::ToolPreference,
failed_cmd,
success_cmd,
context,
LearningSource::Project,
)
.with_tags(vec![
"auto-extracted".to_string(),
"transcript".to_string(),
]);
corrections.push(correction);
}
}
}
}
}
}
}

// Check for explicit correction phrases in content
if let Some(ref content) = entry.content {
if let Some((original, corrected)) = contains_correction_phrase(content) {
let context = format!(
"Auto-extracted from session transcript content: {}",
content.chars().take(100).collect::<String>()
);
let correction = CorrectionEvent::new(
CorrectionType::Other("phrase-detected".to_string()),
original,
corrected,
context,
LearningSource::Project,
)
.with_tags(vec!["auto-extracted".to_string(), "phrase".to_string()]);
corrections.push(correction);
}
}

// Also check in tool_result if it's a string
if let Some(ref tool_result) = entry.tool_result {
if let Some(content) = tool_result.as_str() {
if let Some((original, corrected)) = contains_correction_phrase(content) {
let context = format!(
"Auto-extracted from tool result: {}",
content.chars().take(100).collect::<String>()
);
let correction = CorrectionEvent::new(
CorrectionType::Other("phrase-detected".to_string()),
original,
corrected,
context,
LearningSource::Project,
)
.with_tags(vec![
"auto-extracted".to_string(),
"tool-result".to_string(),
]);
corrections.push(correction);
}
}
}
}

Ok(corrections)
}

/// Suggest learnings based on context relevance.
///
/// Takes a context string (e.g., current working directory or task description),
Expand Down Expand Up @@ -1439,4 +1656,139 @@ mod tests {
assert!(correction_entry.summary().contains("npm"));
assert!(correction_entry.summary().contains("bun"));
}

#[test]
fn test_contains_correction_phrase_instead_use() {
let content = "You should instead use cargo build";
let result = contains_correction_phrase(content);
assert!(result.is_some());
let (original, _corrected) = result.unwrap();
assert!(original.contains("You should"));
}

#[test]
fn test_contains_correction_phrase_use_instead() {
let content = "Use bun instead of npm for faster installs";
let result = contains_correction_phrase(content);
assert!(result.is_some());
let (original, _corrected) = result.unwrap();
assert!(original.contains("Use bun"));
}

#[test]
fn test_contains_correction_phrase_should_be() {
let content = "The variable name should be user_count";
let result = contains_correction_phrase(content);
assert!(result.is_some());
let (original, _corrected) = result.unwrap();
assert!(original.contains("variable name"));
}

#[test]
fn test_contains_correction_phrase_correct_way() {
let content = "The correct way is to use cargo check first";
let result = contains_correction_phrase(content);
assert!(result.is_some());
let (original, _corrected) = result.unwrap();
assert!(original.contains("The correct way"));
}

#[test]
fn test_contains_correction_phrase_use_not() {
let content = "Use yarn not npm for this project";
let result = contains_correction_phrase(content);
assert!(result.is_some());
let (original, corrected) = result.unwrap();
assert_eq!(original, "npm");
assert_eq!(corrected, "yarn");
}

#[test]
fn test_contains_correction_phrase_no_match() {
let content = "This is just a normal sentence without corrections";
let result = contains_correction_phrase(content);
assert!(result.is_none());
}

#[test]
fn test_auto_extract_corrections_from_transcript() {
use std::io::Write;

let temp_dir = TempDir::new().unwrap();
let storage = temp_dir.path().join("learnings");
fs::create_dir(&storage).unwrap();

// Create a mock transcript with failed then successful commands
let transcript_path = temp_dir.path().join("session.jsonl");
let transcript_content = r#"
{"type": "tool_use", "tool_name": "Bash", "tool_input": {"command": "git push -f"}}
{"type": "tool_result", "tool_name": "Bash", "exit_code": 1, "error": "remote: rejected", "tool_input": {"command": "git push -f"}}
{"type": "tool_use", "tool_name": "Bash", "tool_input": {"command": "git push origin main"}}
{"type": "tool_result", "tool_name": "Bash", "exit_code": 0, "tool_input": {"command": "git push origin main"}}
{"content": "You should instead use cargo check before building"}
"#;
let mut file = fs::File::create(&transcript_path).unwrap();
file.write_all(transcript_content.as_bytes()).unwrap();

let corrections = auto_extract_corrections(&transcript_path).unwrap();

// Should find at least 2 corrections: the command fix + the phrase
assert!(
corrections.len() >= 2,
"Expected at least 2 corrections, got {}",
corrections.len()
);

// Check for the command correction
let cmd_correction = corrections
.iter()
.find(|c| c.original == "git push -f" && c.corrected == "git push origin main");
assert!(
cmd_correction.is_some(),
"Should find command correction: git push -f -> git push origin main"
);

// Check for the phrase correction
let phrase_correction = corrections
.iter()
.find(|c| c.corrected.contains("cargo check"));
assert!(
phrase_correction.is_some(),
"Should find phrase correction containing 'cargo check'"
);
}

#[test]
fn test_auto_extract_corrections_empty_transcript() {
let temp_dir = TempDir::new().unwrap();

// Create an empty transcript
let transcript_path = temp_dir.path().join("empty.jsonl");
fs::write(&transcript_path, "").unwrap();

let corrections = auto_extract_corrections(&transcript_path).unwrap();
assert!(corrections.is_empty());
}

#[test]
fn test_auto_extract_corrections_no_failures() {
use std::io::Write;

let temp_dir = TempDir::new().unwrap();

// Create a transcript with only successful commands
let transcript_path = temp_dir.path().join("success.jsonl");
let transcript_content = r#"
{"type": "tool_use", "tool_name": "Bash", "tool_input": {"command": "git status"}}
{"type": "tool_result", "tool_name": "Bash", "exit_code": 0, "tool_input": {"command": "git status"}}
{"type": "tool_use", "tool_name": "Bash", "tool_input": {"command": "git log"}}
{"type": "tool_result", "tool_name": "Bash", "exit_code": 0, "tool_input": {"command": "git log"}}
"#;
let mut file = fs::File::create(&transcript_path).unwrap();
file.write_all(transcript_content.as_bytes()).unwrap();

let corrections = auto_extract_corrections(&transcript_path).unwrap();
// No corrections since all commands succeeded
assert!(corrections.is_empty());
}
}
6 changes: 3 additions & 3 deletions crates/terraphim_agent/src/learnings/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ mod redaction;

#[allow(unused_imports)]
pub use capture::{
CorrectionType, LearningEntry, LearningSource, ScoredEntry, capture_correction,
capture_failed_command, correct_learning, list_all_entries, query_all_entries,
suggest_learnings,
CorrectionType, LearningEntry, LearningSource, ScoredEntry, auto_extract_corrections,
capture_correction, capture_failed_command, correct_learning, list_all_entries,
query_all_entries, suggest_learnings,
};

// Re-export for testing - not used by CLI yet
Expand Down
Loading
Loading