From 9d11a9db6dd0ac6b0cbb5863cf710fd5d28f76e0 Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 7 Dec 2025 11:10:34 -0800 Subject: [PATCH 1/2] Better error messages when parse test fails --- tests/parse_samples.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs index c062dea..36395cf 100644 --- a/tests/parse_samples.rs +++ b/tests/parse_samples.rs @@ -24,16 +24,14 @@ where // patch file. fn verify_patch_roundtrip(data: &str, path: &PathBuf) { let patches = Patch::from_multiple(data) - .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + .unwrap_or_else(|err| panic!("failed to parse {path:?}, error: {err:?}")); - #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str + #[allow(clippy::format_collect)] // it's not performance-sensitive let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); - println!("{}", patch_file); + println!("Emitting parsed file:\n{}", patch_file); + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { - panic!( - "failed to re-parse {:?} after formatting, error: {}", - path, err - ) + panic!("failed to re-parse {path:?} after formatting, error: {err:?}",) }); assert_eq!(patches, patches2); } From 3aad6a28a6452e2fcffe52ee6a1cfbd915c64491 Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 7 Dec 2025 11:10:34 -0800 Subject: [PATCH 2/2] Better handling of "Binary files differ" lines, git index lines --- src/ast.rs | 24 +++++++---- src/parser.rs | 55 +++++++++++++++++++++---- tests/regressions.rs | 1 + tests/wild-samples/gitbinary.patch | 3 ++ tests/wild-samples/gitsmallbinary.patch | 3 ++ 5 files changed, 71 insertions(+), 15 deletions(-) create mode 100644 tests/wild-samples/gitbinary.patch create mode 100644 tests/wild-samples/gitsmallbinary.patch diff --git a/src/ast.rs b/src/ast.rs index 332b856..ec3afb9 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,6 +14,10 @@ pub struct Patch<'a> { pub new: File<'a>, /// hunks of differences; each hunk shows one area where the files differ pub hunks: Vec>, + /// True if this patch is between two binary files. + /// + /// For binary files, there are no hunks and `end_newline` is meaningless. + pub binary: bool, /// true if the last line of the file ends in a newline character /// /// This will only be false if at the end of the patch we encounter the text: @@ -25,14 +29,18 @@ impl fmt::Display for Patch<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Display implementations typically hold up the invariant that there is no trailing // newline. This isn't enforced, but it allows them to work well with `println!` - - write!(f, "--- {}", self.old)?; - write!(f, "\n+++ {}", self.new)?; - for hunk in &self.hunks { - write!(f, "\n{}", hunk)?; - } - if !self.end_newline { - write!(f, "\n\\ No newline at end of file")?; + if self.binary { + assert!(self.hunks.is_empty()); + writeln!(f, "Binary files {} and {} differ", self.old, self.new)?; + } else { + write!(f, "--- {}", self.old)?; + write!(f, "\n+++ {}", self.new)?; + for hunk in &self.hunks { + write!(f, "\n{}", hunk)?; + } + if !self.end_newline { + write!(f, "\n\\ No newline at end of file")?; + } } Ok(()) } diff --git a/src/parser.rs b/src/parser.rs index dd1b2c9..be1d355 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,16 +2,19 @@ use std::borrow::Cow; use std::error::Error; use chrono::DateTime; +use nom::character::complete::{hex_digit1, space1}; +use nom::sequence::separated_pair; use nom::*; use nom::{ branch::alt, bytes::complete::{is_not, tag, take_until}, character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of}, - combinator::{all_consuming, map, map_opt, not, opt, verify}, + combinator::{all_consuming, map, map_opt, not, opt, recognize, verify}, error::context, multi::{many0, many1}, - sequence::{delimited, preceded, terminated, tuple}, + sequence::{delimited, pair, preceded, terminated, tuple}, }; +use nom_locate::LocatedSpan; use crate::ast::*; @@ -50,8 +53,8 @@ impl std::fmt::Display for ParseError<'_> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "Line {}: Error while parsing: {}", - self.line, self.fragment + "Line {}: Error while parsing: {:?}: {:?}", + self.line, self.fragment, self.kind ) } } @@ -93,10 +96,12 @@ fn multiple_patches(input: Input) -> IResult> { } fn patch(input: Input) -> IResult { - if let Ok(patch) = binary_files_differ(input) { + if let Ok(patch) = file_rename_only(input) { return Ok(patch); } - if let Ok(patch) = file_rename_only(input) { + let (input, _diff_command) = diff_command(input)?; + let (input, _git_index) = git_index_line(input)?; + if let Ok(patch) = binary_files_differ(input) { return Ok(patch); } let (input, files) = headers(input)?; @@ -113,6 +118,7 @@ fn patch(input: Input) -> IResult { new, hunks, end_newline: !no_newline_indicator, + binary: false, }, )) } @@ -131,7 +137,7 @@ fn binary_files_differ(input: Input) -> IResult { .strip_suffix(" differ") .and_then(|s| s.split_once(" and ")) }), - line_ending, + many1(line_ending), ), )(input)?; Ok(( @@ -146,6 +152,7 @@ fn binary_files_differ(input: Input) -> IResult { meta: None, }, hunks: Vec::new(), + binary: true, end_newline: false, }, )) @@ -176,11 +183,44 @@ fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { meta: None, }, hunks: Vec::new(), + binary: false, end_newline: false, }, )) } +/// Parse a diff command line from the input, if one is present. +/// +/// git inserts e.g. +/// `diff --git a/file1 b/file2` +fn diff_command(input: Input<'_>) -> IResult, Option>> { + context( + "diff command line ", + opt(recognize(pair( + tag("diff "), + terminated(not_line_ending, opt(line_ending)), + ))), + )(input) +} + +/// Parse a git "index" line from the input, if one is present. +/// +/// e.g. `index 4805bf6..e807873 100644` +fn git_index_line(input: Input<'_>) -> IResult, Option>> { + // TODO: We could return the hashes, and maybe more usefully the unix file + // mode into the AST. + context( + "git index line", + opt(recognize(tuple(( + tag("index "), + separated_pair(hex_digit1, tag(".."), hex_digit1), + space1, + digit1, + opt(line_ending), + )))), + )(input) +} + // Header lines fn headers(input: Input) -> IResult { // Ignore any preamble lines in produced diffs @@ -667,6 +707,7 @@ mod tests { }, ], end_newline: true, + binary: false, }; test_parser!(patch(sample) -> expected); diff --git a/tests/regressions.rs b/tests/regressions.rs index 04cd59f..c7afe27 100644 --- a/tests/regressions.rs +++ b/tests/regressions.rs @@ -41,6 +41,7 @@ fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { range_hint: "", lines: vec![Line::Context("x")], }], + binary: false, end_newline: true, } ); diff --git a/tests/wild-samples/gitbinary.patch b/tests/wild-samples/gitbinary.patch new file mode 100644 index 0000000..0afaf5f --- /dev/null +++ b/tests/wild-samples/gitbinary.patch @@ -0,0 +1,3 @@ +diff --git a/test-renderers/expected/renderers/fog-None-wgpu.png b/test-renderers/expected/renderers/fog-None-wgpu.png +index 616d6ea8..afd1b043 100644 +Binary files a/test-renderers/expected/renderers/fog-None-wgpu.png and b/test-renderers/expected/renderers/fog-None-wgpu.png differ diff --git a/tests/wild-samples/gitsmallbinary.patch b/tests/wild-samples/gitsmallbinary.patch new file mode 100644 index 0000000..975477b --- /dev/null +++ b/tests/wild-samples/gitsmallbinary.patch @@ -0,0 +1,3 @@ +diff --git a/bin b/bin +index 4805bf6..e807873 100644 +Binary files a/bin and b/bin differ