From a21b779ee857da835b4969aee65f8f68323d223a Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 7 Dec 2025 11:10:34 -0800 Subject: [PATCH 1/3] Better error messages when parse test fails --- tests/parse_samples.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs index c062dea..36395cf 100644 --- a/tests/parse_samples.rs +++ b/tests/parse_samples.rs @@ -24,16 +24,14 @@ where // patch file. fn verify_patch_roundtrip(data: &str, path: &PathBuf) { let patches = Patch::from_multiple(data) - .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + .unwrap_or_else(|err| panic!("failed to parse {path:?}, error: {err:?}")); - #[allow(clippy::format_collect)] // Display::fmt is the only way to resolve Patch->str + #[allow(clippy::format_collect)] // it's not performance-sensitive let patch_file: String = patches.iter().map(|patch| format!("{}\n", patch)).collect(); - println!("{}", patch_file); + println!("Emitting parsed file:\n{}", patch_file); + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { - panic!( - "failed to re-parse {:?} after formatting, error: {}", - path, err - ) + panic!("failed to re-parse {path:?} after formatting, error: {err:?}",) }); assert_eq!(patches, patches2); } From 71af6400e9f6fdae0500f2e6c7d952453317d070 Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 7 Dec 2025 11:10:34 -0800 Subject: [PATCH 2/3] Better handling of "Binary files differ" lines, git index lines --- CHANGELOG.md | 2 + src/ast.rs | 28 +++++++++---- src/parser.rs | 55 +++++++++++++++++++++---- tests/regressions.rs | 3 +- tests/wild-samples/gitbinary.patch | 3 ++ tests/wild-samples/gitsmallbinary.patch | 3 ++ 6 files changed, 77 insertions(+), 17 deletions(-) create mode 100644 tests/wild-samples/gitbinary.patch create mode 100644 tests/wild-samples/gitsmallbinary.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 79cb37e..9ce1cbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ - `Patch::from_multiple` no longer returns an error on an input that contains no patches, including an empty string. It instead returns an empty vector. +- `Patch` now has an additional `binary` field indicating a "binary files differ" message. + ### Fixed - Issue #4: Fixed parsing of “No newline at end of file” markers so they are recognized even when not the final line of a hunk. diff --git a/src/ast.rs b/src/ast.rs index ae036c2..09ed367 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,25 +14,35 @@ pub struct Patch<'a> { pub new: File<'a>, /// hunks of differences; each hunk shows one area where the files differ pub hunks: Vec>, + /// If there was a `No newline at end of file` indicator after the last line of the old version of the file pub old_missing_newline: bool, /// If there was a `No newline at end of file` indicator after the last line of the new version of the file pub new_missing_newline: bool, + + /// True if this patch is between two binary files. + /// + /// For binary files, `hunks` is empty and the newline indicators are false. + pub binary: bool, } impl fmt::Display for Patch<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Display implementations typically hold up the invariant that there is no trailing // newline. This isn't enforced, but it allows them to work well with `println!` - - write!(f, "--- {}", self.old)?; - write!(f, "\n+++ {}", self.new)?; - for (i, hunk) in self.hunks.iter().enumerate() { - writeln!(f)?; - if i == self.hunks.len() - 1 { - hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; - } else { - hunk.fmt(f, false, false)?; + if self.binary { + assert!(self.hunks.is_empty()); + write!(f, "Binary files {} and {} differ", self.old, self.new)?; + } else { + writeln!(f, "--- {}", self.old)?; + write!(f, "+++ {}", self.new)?; + for (i, hunk) in self.hunks.iter().enumerate() { + writeln!(f)?; + if i == self.hunks.len() - 1 { + hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; + } else { + hunk.fmt(f, false, false)?; + } } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index 2ca42c7..22ca43f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,16 +2,19 @@ use std::borrow::Cow; use std::error::Error; use chrono::DateTime; +use nom::character::complete::{hex_digit1, space1}; +use nom::sequence::separated_pair; use nom::*; use nom::{ branch::alt, bytes::complete::{is_not, tag, take_until}, character::complete::{char, digit1, line_ending, none_of, not_line_ending, one_of}, - combinator::{all_consuming, map, map_opt, not, opt, verify}, + combinator::{all_consuming, map, map_opt, not, opt, recognize, verify}, error::context, multi::{many0, many1}, - sequence::{delimited, preceded, terminated, tuple}, + sequence::{delimited, pair, preceded, terminated, tuple}, }; +use nom_locate::LocatedSpan; use crate::ast::*; @@ -50,8 +53,8 @@ impl std::fmt::Display for ParseError<'_> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, - "Line {}: Error while parsing: {}", - self.line, self.fragment + "Line {}: Error while parsing: {:?}: {:?}", + self.line, self.fragment, self.kind ) } } @@ -99,10 +102,12 @@ fn multiple_patches(input: Input) -> IResult> { } fn patch(input: Input) -> IResult { - if let Ok(patch) = binary_files_differ(input) { + if let Ok(patch) = file_rename_only(input) { return Ok(patch); } - if let Ok(patch) = file_rename_only(input) { + let (input, _diff_command) = diff_command(input)?; + let (input, _git_index) = git_index_line(input)?; + if let Ok(patch) = binary_files_differ(input) { return Ok(patch); } let (input, files) = headers(input)?; @@ -126,6 +131,7 @@ fn patch(input: Input) -> IResult { hunks, old_missing_newline, new_missing_newline, + binary: false, }, )) } @@ -144,7 +150,7 @@ fn binary_files_differ(input: Input) -> IResult { .strip_suffix(" differ") .and_then(|s| s.split_once(" and ")) }), - line_ending, + many1(line_ending), ), )(input)?; Ok(( @@ -161,6 +167,7 @@ fn binary_files_differ(input: Input) -> IResult { hunks: Vec::new(), old_missing_newline: false, new_missing_newline: false, + binary: true, }, )) } @@ -192,10 +199,43 @@ fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { hunks: Vec::new(), old_missing_newline: false, new_missing_newline: false, + binary: false, }, )) } +/// Parse a diff command line from the input, if one is present. +/// +/// git inserts e.g. +/// `diff --git a/file1 b/file2` +fn diff_command(input: Input<'_>) -> IResult, Option>> { + context( + "diff command line", + opt(recognize(pair( + tag("diff "), + terminated(not_line_ending, opt(line_ending)), + ))), + )(input) +} + +/// Parse a git "index" line from the input, if one is present. +/// +/// e.g. `index 4805bf6..e807873 100644` +fn git_index_line(input: Input<'_>) -> IResult, Option>> { + // TODO: We could return the hashes, and maybe more usefully the unix file + // mode into the AST. + context( + "git index line", + opt(recognize(tuple(( + tag("index "), + separated_pair(hex_digit1, tag(".."), hex_digit1), + space1, + digit1, + opt(line_ending), + )))), + )(input) +} + // Header lines fn headers(input: Input) -> IResult { // Ignore any preamble lines in produced diffs @@ -766,6 +806,7 @@ mod tests { ], old_missing_newline: false, new_missing_newline: false, + binary: false, }; test_parser!(patch(sample) -> expected); diff --git a/tests/regressions.rs b/tests/regressions.rs index f02afb7..f80d140 100644 --- a/tests/regressions.rs +++ b/tests/regressions.rs @@ -42,7 +42,8 @@ fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { lines: vec![Line::Context("x")], }], old_missing_newline: false, - new_missing_newline: false + new_missing_newline: false, + binary: false, } ); Ok(()) diff --git a/tests/wild-samples/gitbinary.patch b/tests/wild-samples/gitbinary.patch new file mode 100644 index 0000000..0afaf5f --- /dev/null +++ b/tests/wild-samples/gitbinary.patch @@ -0,0 +1,3 @@ +diff --git a/test-renderers/expected/renderers/fog-None-wgpu.png b/test-renderers/expected/renderers/fog-None-wgpu.png +index 616d6ea8..afd1b043 100644 +Binary files a/test-renderers/expected/renderers/fog-None-wgpu.png and b/test-renderers/expected/renderers/fog-None-wgpu.png differ diff --git a/tests/wild-samples/gitsmallbinary.patch b/tests/wild-samples/gitsmallbinary.patch new file mode 100644 index 0000000..975477b --- /dev/null +++ b/tests/wild-samples/gitsmallbinary.patch @@ -0,0 +1,3 @@ +diff --git a/bin b/bin +index 4805bf6..e807873 100644 +Binary files a/bin and b/bin differ From 1de718bb55aa6efc4447d290069a4f42a24d3563 Mon Sep 17 00:00:00 2001 From: Martin Pool Date: Sun, 11 Jan 2026 14:00:02 -0800 Subject: [PATCH 3/3] Handle git diffs that only rename files Fixes #20 --- CHANGELOG.md | 3 ++ src/ast.rs | 17 +++++++-- src/parser.rs | 38 ++++++++++++++++--- .../git-multiple-file-moves.patch | 20 ++++++++++ tests/wild-samples/git-only-file-moves.patch | 4 ++ 5 files changed, 72 insertions(+), 10 deletions(-) create mode 100644 tests/wild-samples/git-multiple-file-moves.patch create mode 100644 tests/wild-samples/git-only-file-moves.patch diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ce1cbf..54b3538 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,11 @@ - `Patch` now has an additional `binary` field indicating a "binary files differ" message. ### Fixed + - Issue #4: Fixed parsing of “No newline at end of file” markers so they are recognized even when not the final line of a hunk. +- Issue #20: Handle patches containing git file renames with no changes. + ### Changed ## [v0.7] diff --git a/src/ast.rs b/src/ast.rs index 09ed367..a43641b 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -30,12 +30,21 @@ impl fmt::Display for Patch<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { // Display implementations typically hold up the invariant that there is no trailing // newline. This isn't enforced, but it allows them to work well with `println!` + let Patch { old, new, .. } = &self; if self.binary { - assert!(self.hunks.is_empty()); - write!(f, "Binary files {} and {} differ", self.old, self.new)?; + assert!( + self.hunks.is_empty(), + "binary diff is not expected to have hunks" + ); + write!(f, "Binary files {old} and {new} differ")?; + } else if self.hunks.is_empty() && old != new { + writeln!(f, "diff --git a/{old} b/{new}")?; // TODO: Perhaps should be emitted in every case? + writeln!(f, "similarity index 100%")?; + writeln!(f, "rename from {old}")?; + write!(f, "rename to {new}")?; } else { - writeln!(f, "--- {}", self.old)?; - write!(f, "+++ {}", self.new)?; + writeln!(f, "--- {old}")?; + write!(f, "+++ {new}")?; for (i, hunk) in self.hunks.iter().enumerate() { writeln!(f)?; if i == self.hunks.len() - 1 { diff --git a/src/parser.rs b/src/parser.rs index 22ca43f..fb95643 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -102,10 +102,10 @@ fn multiple_patches(input: Input) -> IResult> { } fn patch(input: Input) -> IResult { + let (input, _diff_command) = diff_command(input)?; if let Ok(patch) = file_rename_only(input) { return Ok(patch); } - let (input, _diff_command) = diff_command(input)?; let (input, _git_index) = git_index_line(input)?; if let Ok(patch) = binary_files_differ(input) { return Ok(patch); @@ -172,14 +172,13 @@ fn binary_files_differ(input: Input) -> IResult { )) } -/// Parse patches with "similarity index 100%", i.e., patches where a file is renamed without any -/// other change in its diff. +/// Attempt to match patches with "similarity index 100%", i.e., patches where a file is renamed +/// without any other change in its diff. /// -/// The `parse` function should handle rename diffs with similary index less than 100%, at least as per the test +/// The `parse` function should handle rename diffs with similarity index less than 100%, at least as per the test /// `parses_file_renames_with_some_diff`. fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { - let (rest, _parsed) = take_until("\nsimilarity index 100%\n")(input)?; - let (rest, _parsed) = tag("\nsimilarity index 100%\n")(rest)?; + let (rest, _parsed) = tag("similarity index 100%\n")(input)?; let (rest, old_name) = delimited(tag("rename from "), take_until("\n"), line_ending)(rest)?; @@ -815,4 +814,31 @@ mod tests { Ok(()) } + + #[test] + fn test_git_file_rename_without_changes() -> ParseResult<'static, ()> { + let sample = "\ +diff --git a/tests/test-utils/Cargo.toml b/test-utils/Cargo.toml +similarity index 100% +rename from tests/test-utils/Cargo.toml +rename to test-utils/Cargo.toml +"; + let expected = Patch { + old: File { + path: "tests/test-utils/Cargo.toml".into(), + meta: None, + }, + new: File { + path: "test-utils/Cargo.toml".into(), + meta: None, + }, + hunks: Vec::new(), + old_missing_newline: false, + new_missing_newline: false, + binary: false, + }; + test_parser!(patch(sample) -> expected); + assert_eq!(format!("{expected}\n"), sample); + Ok(()) + } } diff --git a/tests/wild-samples/git-multiple-file-moves.patch b/tests/wild-samples/git-multiple-file-moves.patch new file mode 100644 index 0000000..115e73c --- /dev/null +++ b/tests/wild-samples/git-multiple-file-moves.patch @@ -0,0 +1,20 @@ +diff --git a/tests/test-utils/Cargo.toml b/test-utils/Cargo.toml +similarity index 100% +rename from tests/test-utils/Cargo.toml +rename to test-utils/Cargo.toml +diff --git a/tests/test-utils/src/io.rs b/test-utils/src/io.rs +similarity index 100% +rename from tests/test-utils/src/io.rs +rename to test-utils/src/io.rs +diff --git a/tests/test-utils/src/lib.rs b/test-utils/src/lib.rs +similarity index 100% +rename from tests/test-utils/src/lib.rs +rename to test-utils/src/lib.rs +diff --git a/tests/test-utils/src/read.rs b/test-utils/src/read.rs +similarity index 100% +rename from tests/test-utils/src/read.rs +rename to test-utils/src/read.rs +diff --git a/tests/test-utils/src/write.rs b/test-utils/src/write.rs +similarity index 100% +rename from tests/test-utils/src/write.rs +rename to test-utils/src/write.rs diff --git a/tests/wild-samples/git-only-file-moves.patch b/tests/wild-samples/git-only-file-moves.patch new file mode 100644 index 0000000..f65331f --- /dev/null +++ b/tests/wild-samples/git-only-file-moves.patch @@ -0,0 +1,4 @@ +diff --git a/tests/test-utils/Cargo.toml b/test-utils/Cargo.toml +similarity index 100% +rename from tests/test-utils/Cargo.toml +rename to test-utils/Cargo.toml