diff --git a/CHANGELOG.md b/CHANGELOG.md index a43dde0..79cb37e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ - `Patch::from_multiple` no longer returns an error on an input that contains no patches, including an empty string. It instead returns an empty vector. +### Fixed +- Issue #4: Fixed parsing of “No newline at end of file” markers so they are recognized even when not the final line of a hunk. + ### Changed ## [v0.7] diff --git a/examples/apply.rs b/examples/apply.rs index 150365e..d3811e6 100644 --- a/examples/apply.rs +++ b/examples/apply.rs @@ -14,11 +14,16 @@ fn apply(diff: Patch, old: &str) -> String { old_line += hunk.old_range.count; for line in hunk.lines { match line { - Line::Add(s) | Line::Context(s) => out.push(s), + Line::Add(s) | Line::Context(s) => { + out.push(s); + } Line::Remove(_) => {} } } } + if !diff.new_missing_newline { + out.push(""); + } out.join("\n") } diff --git a/src/ast.rs b/src/ast.rs index 332b856..ae036c2 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -14,11 +14,10 @@ pub struct Patch<'a> { pub new: File<'a>, /// hunks of differences; each hunk shows one area where the files differ pub hunks: Vec>, - /// true if the last line of the file ends in a newline character - /// - /// This will only be false if at the end of the patch we encounter the text: - /// `\ No newline at end of file` - pub end_newline: bool, + /// If there was a `No newline at end of file` indicator after the last line of the old version of the file + pub old_missing_newline: bool, + /// If there was a `No newline at end of file` indicator after the last line of the new version of the file + pub new_missing_newline: bool, } impl fmt::Display for Patch<'_> { @@ -28,11 +27,13 @@ impl fmt::Display for Patch<'_> { write!(f, "--- {}", self.old)?; write!(f, "\n+++ {}", self.new)?; - for hunk in &self.hunks { - write!(f, "\n{}", hunk)?; - } - if !self.end_newline { - write!(f, "\n\\ No newline at end of file")?; + for (i, hunk) in self.hunks.iter().enumerate() { + writeln!(f)?; + if i == self.hunks.len() - 1 { + hunk.fmt(f, self.old_missing_newline, self.new_missing_newline)?; + } else { + hunk.fmt(f, false, false)?; + } } Ok(()) } @@ -72,7 +73,8 @@ impl<'a> Patch<'a> { /// let patch = Patch::from_single(sample)?; /// assert_eq!(&patch.old.path, "lao"); /// assert_eq!(&patch.new.path, "tzu"); - /// assert_eq!(patch.end_newline, false); + /// assert_eq!(patch.old_missing_newline, false); + /// assert_eq!(patch.new_missing_newline, true); /// # Ok(()) /// # } /// ``` @@ -234,18 +236,41 @@ impl Hunk<'_> { Some(h) } } -} -impl fmt::Display for Hunk<'_> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fn fmt( + &self, + f: &mut fmt::Formatter, + old_missing_newline: bool, + new_missing_newline: bool, + ) -> fmt::Result { write!( f, "@@ -{} +{} @@{}", self.old_range, self.new_range, self.range_hint )?; - for line in &self.lines { + // compute line indices to put "No newline at end of file" indicator after + let last_old_idx = old_missing_newline + .then(|| { + self.lines + .iter() + .rposition(|l| matches!(l, Line::Remove(_) | Line::Context(_))) + }) + .flatten(); + let last_new_idx = new_missing_newline + .then(|| { + self.lines + .iter() + .rposition(|l| matches!(l, Line::Add(_) | Line::Context(_))) + }) + .flatten(); + + for (i, line) in self.lines.iter().enumerate() { write!(f, "\n{}", line)?; + if Some(i) == last_old_idx || Some(i) == last_new_idx { + writeln!(f)?; + write!(f, "\\ No newline at end of file")?; + } } Ok(()) diff --git a/src/parser.rs b/src/parser.rs index dd1b2c9..2ca42c7 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -62,9 +62,15 @@ impl Error for ParseError<'_> { } } -fn consume_content_line(input: Input<'_>) -> IResult, &str> { +fn consume_content_line(input: Input<'_>) -> IResult, (&str, bool)> { let (input, raw) = terminated(not_line_ending, line_ending)(input)?; - Ok((input, raw.fragment())) + + let (input, missing_newline) = opt(terminated( + tag("\\ No newline at end of file"), + opt(line_ending), + ))(input)?; + + Ok((input, (raw.fragment(), missing_newline.is_some()))) } pub(crate) fn parse_single_patch(s: &str) -> Result, ParseError<'_>> { @@ -100,8 +106,14 @@ fn patch(input: Input) -> IResult { return Ok(patch); } let (input, files) = headers(input)?; - let (input, hunks) = chunks(input)?; - let (input, no_newline_indicator) = no_newline_indicator(input)?; + let ( + input, + ParsedHunks { + hunks, + old_missing_newline, + new_missing_newline, + }, + ) = chunks(input)?; // Ignore trailing empty lines produced by some diff programs let (input, _) = many0(line_ending)(input)?; @@ -112,7 +124,8 @@ fn patch(input: Input) -> IResult { old, new, hunks, - end_newline: !no_newline_indicator, + old_missing_newline, + new_missing_newline, }, )) } @@ -146,7 +159,8 @@ fn binary_files_differ(input: Input) -> IResult { meta: None, }, hunks: Vec::new(), - end_newline: false, + old_missing_newline: false, + new_missing_newline: false, }, )) } @@ -176,7 +190,8 @@ fn file_rename_only(input: Input<'_>) -> IResult, Patch<'_>> { meta: None, }, hunks: Vec::new(), - end_newline: false, + old_missing_newline: false, + new_missing_newline: false, }, )) } @@ -216,9 +231,29 @@ fn header_line_content(input: Input) -> IResult { )) } +struct ParsedHunks<'a> { + hunks: Vec>, + old_missing_newline: bool, + new_missing_newline: bool, +} + // Hunks of the file differences -fn chunks(input: Input) -> IResult> { - many1(chunk)(input) +fn chunks(input: Input) -> IResult { + let (span, hunks) = many1(chunk)(input)?; + + let (old_missing_newline, new_missing_newline) = hunks + .last() + .map(|hunk| (hunk.old_missing_newline, hunk.new_missing_newline)) + .unwrap_or_default(); + let hunks = hunks.into_iter().map(|hunk| hunk.hunk).collect(); + Ok(( + span, + ParsedHunks { + hunks, + old_missing_newline, + new_missing_newline, + }, + )) } fn is_next_header(input: Input<'_>) -> bool { @@ -229,6 +264,48 @@ fn is_next_header(input: Input<'_>) -> bool { || input.starts_with("@@ ") } +#[derive(Debug, PartialEq)] +struct ParsedHunk<'a> { + hunk: Hunk<'a>, + old_missing_newline: bool, + new_missing_newline: bool, +} + +enum LineOrEmptyLine<'a> { + Line(Line<'a>, bool), + EmptyLine, +} + +impl<'a> LineOrEmptyLine<'a> { + fn is_new(&self) -> bool { + matches!( + self, + Self::EmptyLine | Self::Line(Line::Context(_), _) | Self::Line(Line::Add(_), _) + ) + } + + fn is_old(&self) -> bool { + matches!( + self, + Self::EmptyLine | Self::Line(Line::Context(_), _) | Self::Line(Line::Remove(_), _) + ) + } + + fn missing_new_line(&self) -> bool { + match self { + Self::Line(_, missing_new_line) => *missing_new_line, + Self::EmptyLine => false, + } + } + + fn into_line(self) -> Line<'a> { + match self { + Self::Line(line, _) => line, + Self::EmptyLine => Line::Context(""), + } + } +} + /// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check. /// /// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a @@ -257,39 +334,67 @@ fn is_next_header(input: Input<'_>) -> bool { ///FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need /// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()? ///FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed. -fn chunk(input: Input) -> IResult { +fn chunk(input: Input) -> IResult { let (input, ranges) = chunk_header(input)?; // Parse chunk lines, using the range information to guide parsing - let (input, lines) = many0(verify( + let (input, mut lines) = many0(verify( alt(( // Detect added lines map( preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line), - Line::Add, + |(line, missing_new_line)| LineOrEmptyLine::Line(Line::Add(line), missing_new_line), ), // Detect removed lines map( preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line), - Line::Remove, + |(line, missing_new_line)| { + LineOrEmptyLine::Line(Line::Remove(line), missing_new_line) + }, ), // Detect context lines - map(preceded(char(' '), consume_content_line), Line::Context), + map( + preceded(char(' '), consume_content_line), + |(line, missing_new_line)| { + LineOrEmptyLine::Line(Line::Context(line), missing_new_line) + }, + ), // Handle empty lines within the chunk - map(tag("\n"), |_| Line::Context("")), + map(tag("\n"), |_| LineOrEmptyLine::EmptyLine), )), // Stop parsing when we detect the next header or have parsed the expected number of lines |_| !is_next_header(input), ))(input)?; + // remove trailing empty lines + while matches!(lines.last(), Some(LineOrEmptyLine::EmptyLine)) { + lines.pop(); + } + + // was there "missing new line" indicator for any of the lines? + let old_missing_newline = lines + .iter() + .filter(|line| line.is_old()) + .any(|line| line.missing_new_line()); + let new_missing_newline = lines + .iter() + .filter(|line| line.is_new()) + .any(|line| line.missing_new_line()); + + let lines = lines.into_iter().map(LineOrEmptyLine::into_line).collect(); + let (old_range, new_range, range_hint) = ranges; Ok(( input, - Hunk { - old_range, - new_range, - range_hint, - lines, + ParsedHunk { + hunk: Hunk { + old_range, + new_range, + range_hint, + lines, + }, + old_missing_newline, + new_missing_newline, }, )) } @@ -320,17 +425,6 @@ fn u64_digit(input: Input<'_>) -> IResult, u64> { Ok((input, num)) } -// Trailing newline indicator -fn no_newline_indicator(input: Input<'_>) -> IResult, bool> { - map( - opt(terminated( - tag("\\ No newline at end of file"), - opt(line_ending), - )), - |matched| matched.is_some(), - )(input) -} - fn filename(input: Input) -> IResult> { alt((quoted, bare))(input) } @@ -578,21 +672,25 @@ mod tests { Therefore let there always be non-being, so we may see their subtlety, And let there always be being,\n"; - let expected = Hunk { - old_range: Range { start: 1, count: 7 }, - new_range: Range { start: 1, count: 6 }, - range_hint: "", - lines: vec![ - Line::Remove("The Way that can be told of is not the eternal Way;"), - Line::Remove("The name that can be named is not the eternal name."), - Line::Context("The Nameless is the origin of Heaven and Earth;"), - Line::Remove("The Named is the mother of all things."), - Line::Add("The named is the mother of all things."), - Line::Add(""), - Line::Context("Therefore let there always be non-being,"), - Line::Context(" so we may see their subtlety,"), - Line::Context("And let there always be being,"), - ], + let expected = ParsedHunk { + hunk: Hunk { + old_range: Range { start: 1, count: 7 }, + new_range: Range { start: 1, count: 6 }, + range_hint: "", + lines: vec![ + Line::Remove("The Way that can be told of is not the eternal Way;"), + Line::Remove("The name that can be named is not the eternal name."), + Line::Context("The Nameless is the origin of Heaven and Earth;"), + Line::Remove("The Named is the mother of all things."), + Line::Add("The named is the mother of all things."), + Line::Add(""), + Line::Context("Therefore let there always be non-being,"), + Line::Context(" so we may see their subtlety,"), + Line::Context("And let there always be being,"), + ], + }, + old_missing_newline: false, + new_missing_newline: false, }; test_parser!(chunk(sample) -> expected); Ok(()) @@ -666,7 +764,8 @@ mod tests { ], }, ], - end_newline: true, + old_missing_newline: false, + new_missing_newline: false, }; test_parser!(patch(sample) -> expected); diff --git a/tests/parse_patch.rs b/tests/parse_patch.rs index da25ada..d10362d 100644 --- a/tests/parse_patch.rs +++ b/tests/parse_patch.rs @@ -31,7 +31,7 @@ fn test_parse() -> Result<(), ParseError<'static>> { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -41,35 +41,78 @@ fn test_parse() -> Result<(), ParseError<'static>> { #[test] fn test_parse_no_newline_indicator() -> Result<(), ParseError<'static>> { let sample = "\ ---- before.py -+++ after.py -@@ -1,4 +1,4 @@ --bacon --eggs --ham -+python -+eggy -+hamster - guido +--- a/bar.txt ++++ b/bar.txt +@@ -1,3 +1,3 @@ + bar + Bar +-BAR ++BAR +\\ No newline at end of file +--- a/baz.txt ++++ b/baz.txt +@@ -1,3 +1,3 @@ + baz + Baz +-BAZ +\\ No newline at end of file ++ZAB +\\ No newline at end of file +--- a/foo.txt ++++ b/foo.txt +@@ -1,3 +1,3 @@ + foo + Foo +-FOO +\\ No newline at end of file ++FOO +--- a/foobar.txt ++++ b/foobar.txt +@@ -1,3 +1,3 @@ + foobar +-FooBar ++BarFoo + FOOBAR \\ No newline at end of file\n"; - let patch = Patch::from_single(sample)?; - assert_eq!( - patch.old, - File { - path: "before.py".into(), - meta: None - } - ); + let patches = Patch::from_multiple(sample)?; + + assert_eq!(patches.len(), 4); + + assert_eq!(patches[0].old.path, "a/bar.txt"); + assert_eq!(patches[0].new.path, "b/bar.txt"); + assert_eq!(patches[0].hunks.len(), 1); + assert_eq!(patches[0].hunks[0].lines.len(), 4); + assert_eq!(patches[0].old_missing_newline, false); + assert_eq!(patches[0].new_missing_newline, true); + + assert_eq!(patches[1].old.path, "a/baz.txt"); + assert_eq!(patches[1].new.path, "b/baz.txt"); + assert_eq!(patches[1].hunks.len(), 1); + assert_eq!(patches[1].hunks[0].lines.len(), 4); + assert_eq!(patches[1].old_missing_newline, true); + assert_eq!(patches[1].new_missing_newline, true); + + assert_eq!(patches[2].old.path, "a/foo.txt"); + assert_eq!(patches[2].new.path, "b/foo.txt"); + assert_eq!(patches[2].hunks.len(), 1); + assert_eq!(patches[2].hunks[0].lines.len(), 4); + assert_eq!(patches[2].old_missing_newline, true); + assert_eq!(patches[2].new_missing_newline, false); + + assert_eq!(patches[3].old.path, "a/foobar.txt"); + assert_eq!(patches[3].new.path, "b/foobar.txt"); + assert_eq!(patches[3].hunks.len(), 1); + assert_eq!(patches[3].hunks[0].lines.len(), 4); + assert_eq!(patches[3].old_missing_newline, true); + assert_eq!(patches[3].new_missing_newline, true); + assert_eq!( - patch.new, - File { - path: "after.py".into(), - meta: None - } + patches + .iter() + .map(|patch| format!("{}\n", patch)) + .collect::(), + sample ); - assert!(!patch.end_newline); - - assert_eq!(format!("{}\n", patch), sample); Ok(()) } @@ -107,7 +150,7 @@ fn test_parse_timestamps() -> Result<(), ParseError<'static>> { )), } ); - assert!(!patch.end_newline); + assert!(patch.old_missing_newline && patch.new_missing_newline); // to_string() uses Display but adds no trailing newline assert_eq!(patch.to_string(), sample); @@ -147,7 +190,7 @@ fn test_parse_other() -> Result<(), ParseError<'static>> { )), } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -184,7 +227,7 @@ fn test_parse_escaped() -> Result<(), ParseError<'static>> { )), } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(format!("{}\n", patch), sample); @@ -226,7 +269,7 @@ fn test_parse_triple_plus_minus() -> Result<(), ParseError<'static>> { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(patch.hunks.len(), 1); assert_eq!(patch.hunks[0].lines.len(), 8); @@ -278,7 +321,7 @@ fn test_parse_triple_plus_minus_hack() { meta: None } ); - assert!(patch.end_newline); + assert!(!patch.old_missing_newline && !patch.new_missing_newline); assert_eq!(patch.hunks.len(), 1); assert_eq!(patch.hunks[0].lines.len(), 8); diff --git a/tests/regressions.rs b/tests/regressions.rs index 04cd59f..f02afb7 100644 --- a/tests/regressions.rs +++ b/tests/regressions.rs @@ -41,7 +41,8 @@ fn crlf_breaks_stuff_17() -> Result<(), ParseError<'static>> { range_hint: "", lines: vec![Line::Context("x")], }], - end_newline: true, + old_missing_newline: false, + new_missing_newline: false } ); Ok(())