From 05ba039b47a7295577cb15517f3aaed163b14580 Mon Sep 17 00:00:00 2001 From: "Heo, Sung" Date: Sat, 9 May 2026 13:11:23 +0900 Subject: [PATCH 1/2] feat: truncate oversized git diff in system prompt Large staged/unstaged diffs can blow up the system prompt context, wasting tokens and potentially exceeding model limits. Cap the combined git diff at 50 000 characters with a clear truncation notice. The cut point respects UTF-8 character boundaries to avoid producing invalid strings. Adds MAX_GIT_DIFF_CHARS constant alongside the existing MAX_INSTRUCTION_FILE_CHARS and MAX_TOTAL_INSTRUCTION_CHARS. Co-Authored-By: Claude Opus 4.6 --- rust/crates/runtime/src/prompt.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs index 1e6c4eda85..70ce1b7951 100644 --- a/rust/crates/runtime/src/prompt.rs +++ b/rust/crates/runtime/src/prompt.rs @@ -42,6 +42,7 @@ pub const SYSTEM_PROMPT_DYNAMIC_BOUNDARY: &str = "__SYSTEM_PROMPT_DYNAMIC_BOUNDA pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6"; const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000; const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000; +const MAX_GIT_DIFF_CHARS: usize = 50_000; /// Neutral identity for the model family line in generated prompts. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -295,7 +296,17 @@ fn read_git_diff(cwd: &Path) -> Option { if sections.is_empty() { None } else { - Some(sections.join("\n\n")) + let mut combined = sections.join("\n\n"); + if combined.len() > MAX_GIT_DIFF_CHARS { + // Truncate to a valid UTF-8 char boundary + let mut end = MAX_GIT_DIFF_CHARS; + while !combined.is_char_boundary(end) { + end -= 1; + } + combined.truncate(end); + combined.push_str("\n\n... [diff truncated — too large for system prompt]"); + } + Some(combined) } } From cec026b0db516f53165e574bf8865126d655b2ec Mon Sep 17 00:00:00 2001 From: "Heo, Sung" Date: Sun, 10 May 2026 19:00:36 +0900 Subject: [PATCH 2/2] test: add unit tests for git diff truncation Extract `truncate_diff` helper from `read_git_diff` so it can be tested independently. Add three tests: - preserves content under the limit - caps oversized content with a truncation marker - respects UTF-8 character boundaries (multi-byte emoji) Co-Authored-By: Claude Opus 4.6 --- rust/crates/runtime/src/prompt.rs | 70 +++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 13 deletions(-) diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs index 70ce1b7951..a41078d49f 100644 --- a/rust/crates/runtime/src/prompt.rs +++ b/rust/crates/runtime/src/prompt.rs @@ -296,18 +296,20 @@ fn read_git_diff(cwd: &Path) -> Option { if sections.is_empty() { None } else { - let mut combined = sections.join("\n\n"); - if combined.len() > MAX_GIT_DIFF_CHARS { - // Truncate to a valid UTF-8 char boundary - let mut end = MAX_GIT_DIFF_CHARS; - while !combined.is_char_boundary(end) { - end -= 1; - } - combined.truncate(end); - combined.push_str("\n\n... [diff truncated — too large for system prompt]"); + Some(truncate_diff(sections.join("\n\n"))) + } +} + +fn truncate_diff(mut diff: String) -> String { + if diff.len() > MAX_GIT_DIFF_CHARS { + let mut end = MAX_GIT_DIFF_CHARS; + while !diff.is_char_boundary(end) { + end -= 1; } - Some(combined) + diff.truncate(end); + diff.push_str("\n\n... [diff truncated — too large for system prompt]"); } + diff } fn read_git_output(cwd: &Path, args: &[&str]) -> Option { @@ -560,9 +562,9 @@ fn get_actions_section() -> String { mod tests { use super::{ collapse_blank_lines, display_context_path, normalize_instruction_content, - render_instruction_content, render_instruction_files, truncate_instruction_content, - ContextFile, ModelFamilyIdentity, ProjectContext, SystemPromptBuilder, - SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + render_instruction_content, render_instruction_files, truncate_diff, + truncate_instruction_content, ContextFile, ModelFamilyIdentity, ProjectContext, + SystemPromptBuilder, MAX_GIT_DIFF_CHARS, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, }; use crate::config::ConfigLoader; use std::fs; @@ -992,4 +994,46 @@ mod tests { assert!(rendered.contains("scope: /tmp/project")); assert!(rendered.contains("Project rules")); } + + #[test] + fn truncate_diff_preserves_short_content() { + let short = "a".repeat(1_000); + let result = truncate_diff(short.clone()); + assert_eq!(result, short); + assert!(!result.contains("[diff truncated")); + } + + #[test] + fn truncate_diff_caps_oversized_content() { + let large = "x".repeat(MAX_GIT_DIFF_CHARS + 5_000); + let result = truncate_diff(large); + assert!(result.contains("... [diff truncated — too large for system prompt]")); + // The body before the marker must be at most MAX_GIT_DIFF_CHARS bytes + let marker = "\n\n... [diff truncated — too large for system prompt]"; + let body_len = result.len() - marker.len(); + assert!(body_len <= MAX_GIT_DIFF_CHARS); + } + + #[test] + fn truncate_diff_respects_utf8_char_boundaries() { + // Build a string where MAX_GIT_DIFF_CHARS falls in the middle of a + // multi-byte character (U+1F600 = 4 bytes in UTF-8). + let prefix_len = MAX_GIT_DIFF_CHARS - 2; + let mut input = "a".repeat(prefix_len); + // Append a 4-byte emoji so bytes [prefix_len..prefix_len+4] are the + // emoji. MAX_GIT_DIFF_CHARS lands at prefix_len+2, inside the emoji. + input.push('\u{1F600}'); + input.push_str(&"b".repeat(10_000)); + + let result = truncate_diff(input); + // Must be valid UTF-8 (the fact that we have a String proves this, but + // let's also verify the truncation marker is present). + assert!(result.contains("[diff truncated")); + // The body (before marker) should end before the emoji since cutting + // inside it would be invalid UTF-8. + let marker = "\n\n... [diff truncated — too large for system prompt]"; + let body = &result[..result.len() - marker.len()]; + assert!(body.len() <= MAX_GIT_DIFF_CHARS); + assert!(body.is_char_boundary(body.len())); + } }