gitpatch-rs · vrmiguel · Apr 30, 2025 · Apr 16, 2025
diff --git a/src/parser.rs b/src/parser.rs
@@ -2,6 +2,7 @@ use std::borrow::Cow;
 use std::error::Error;
 
 use chrono::DateTime;
+use nom::combinator::verify;
 use nom::*;
 use nom::{
     branch::alt,
@@ -82,12 +83,14 @@ pub(crate) fn parse_single_patch(s: &str) -> Result<Patch, ParseError<'_>> {
 pub(crate) fn parse_multiple_patches(s: &str) -> Result<Vec<Patch>, ParseError<'_>> {
     let (remaining_input, patches) = multiple_patches(Input::new(s))?;
     // Parser should return an error instead of producing remaining input
-    assert!(
-        remaining_input.fragment().is_empty(),
-        "bug: failed to parse entire input. \
-        Remaining: '{}'",
-        remaining_input.fragment()
-    );
+    if !remaining_input.fragment().is_empty() {
+        return Err(ParseError {
+            line: remaining_input.location_line(),
+            offset: remaining_input.location_offset(),
+            fragment: remaining_input.fragment(),
+            kind: nom::error::ErrorKind::Eof,
+        });
+    }
     Ok(patches)
 }
 
@@ -224,9 +227,67 @@ fn chunks(input: Input<'_>) -> IResult<Input<'_>, Vec<Hunk>> {
     many1(chunk)(input)
 }
 
+fn is_next_header(input: Input<'_>) -> bool {
+    // Check for diff file header or chunk header
+    input.starts_with("diff ")
+        || input.starts_with("--- ")
+        || input.starts_with("+++ ")
+        || input.starts_with("@@ ")
+}
+
+
+/// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check.
+///
+/// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a
+/// line that was using the pre-increment (++) operator, this will fail.
+///
+/// Example where this doesn't work:
+///
+/// --- main.c
+/// +++ main.c
+/// @@ -1,4 +1,7 @@
+/// +#include<stdio.h>
+/// +
+///  int main() {
+///  double a;
+/// --- a;
+/// +++ a;
+/// +printf("%d\n", a);
+///  }
+///
+/// We will fail to parse this entire diff.
+///
+/// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because
+/// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough
+/// to guarantee correctness in all cases.
+///
+///FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need
+/// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()?
+///FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed.
 fn chunk(input: Input<'_>) -> IResult<Input<'_>, Hunk> {
     let (input, ranges) = chunk_header(input)?;
-    let (input, lines) = many1(chunk_line)(input)?;
+
+    // Parse chunk lines, using the range information to guide parsing
+    let (input, lines) = many0(verify(
+        alt((
+            // Detect added lines
+            map(
+                preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
+                Line::Add,
+            ),
+            // Detect removed lines
+            map(
+                preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
+                Line::Remove,
+            ),
+            // Detect context lines
+            map(preceded(char(' '), consume_content_line), Line::Context),
+            // Handle empty lines within the chunk
+            map(tag("\n"), |_| Line::Context("")),
+        )),
+        // Stop parsing when we detect the next header or have parsed the expected number of lines
+        |_| !is_next_header(input),
+    ))(input)?;
 
     let (old_range, new_range, range_hint) = ranges;
     Ok((
@@ -266,48 +327,6 @@ fn u64_digit(input: Input<'_>) -> IResult<Input<'_>, u64> {
     Ok((input, num))
 }
 
-// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check.
-//
-// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a
-// line that was using the pre-increment (++) operator, this will fail.
-//
-// Example where this doesn't work:
-//
-// --- main.c
-// +++ main.c
-// @@ -1,4 +1,7 @@
-// +#include<stdio.h>
-// +
-//  int main() {
-//  double a;
-// --- a;
-// +++ a;
-// +printf("%d\n", a);
-//  }
-//
-// We will fail to parse this entire diff.
-//
-// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because
-// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough
-// to guarantee correctness in all cases.
-//
-//FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need
-// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()?
-//FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed.
-fn chunk_line(input: Input<'_>) -> IResult<Input<'_>, Line> {
-    alt((
-        map(
-            preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
-            Line::Add,
-        ),
-        map(
-            preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
-            Line::Remove,
-        ),
-        map(preceded(char(' '), consume_content_line), Line::Context),
-    ))(input)
-}
-
 // Trailing newline indicator
 fn no_newline_indicator(input: Input<'_>) -> IResult<Input<'_>, bool> {
     map(

diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs
@@ -31,3 +31,36 @@ fn parse_samples() {
         assert_eq!(patches, patches2);
     }
 }
+
+#[test]
+fn parse_wild_samples() {
+    let samples_path = PathBuf::from(file!())
+        .parent()
+        .unwrap()
+        .join("wild-samples");
+    for file in fs::read_dir(samples_path).unwrap() {
+        let path = file.unwrap().path();
+        if path.extension().unwrap_or_default() != "patch" {
+            continue;
+        }
+
+        let data = fs::read_to_string(dbg!(&path)).unwrap();
+        let patches = Patch::from_multiple(&data)
+            .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err));
+
+        // Make sure that the patch file we produce parses to the same information as the original
+        // patch file.
+        let patch_file: String = patches
+            .iter()
+            .map(|patch| format!("{}\n", patch))
+            .collect();
+
+        let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| {
+            panic!(
+                "failed to re-parse {:?} after formatting, error: {}",
+                path, err
+            )
+        });
+        assert_eq!(patches, patches2);
+    }
+}
diff --git a/tests/wild-samples/0001-cross.patch b/tests/wild-samples/0001-cross.patch
@@ -0,0 +1,52 @@
+diff --git a/Makefile b/Makefile
+index 9754ddf..b5512de 100644
+--- a/Makefile
++++ b/Makefile
+@@ -15,16 +15,16 @@
+ SHELL=/bin/sh
+
+ # To assist in cross-compiling
+-CC=gcc
+-AR=ar
+-RANLIB=ranlib
+-LDFLAGS=
++CC?=gcc
++AR?=ar
++RANLIB?=ranlib
+
+ BIGFILES=-D_FILE_OFFSET_BITS=64
+-CFLAGS=-Wall -Winline -O2 -g $(BIGFILES)
++CFLAGS?=-Wall -Winline -O2 -g
++CFLAGS=$(CFLAGS) $(BIGFILES)
+
+ # Where you want it installed when you do 'make install'
+-PREFIX=/usr/local
++PREFIX=$PREFIX
+
+
+ OBJS= blocksort.o  \
+diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so
+index e58791b..f4b9fa2 100644
+--- a/Makefile-libbz2_so
++++ b/Makefile-libbz2_so
+@@ -22,9 +22,18 @@
+
+
+ SHELL=/bin/sh
+-CC=gcc
++
++# To assist in cross-compiling
++CC?=gcc
++AR?=ar
++RANLIB?=ranlib
++
+ BIGFILES=-D_FILE_OFFSET_BITS=64
+-CFLAGS=-fpic -fPIC -Wall -Winline -O2 -g $(BIGFILES)
++CFLAGS?=-Wall -Winline -O2 -g
++CFLAGS=$(CFLAGS) $(BIGFILES)
++
++# Where you want it installed when you do 'make install'
++PREFIX=$PREFIX
+
+ OBJS= blocksort.o  \
+       huffman.o    \
diff --git a/tests/wild-samples/CVE-2019-12211-13.patch b/tests/wild-samples/CVE-2019-12211-13.patch
@@ -0,0 +1,162 @@
+Index: freeimage/Source/FreeImage/PluginTIFF.cpp
+===================================================================
+--- freeimage.orig/Source/FreeImage/PluginTIFF.cpp
++++ freeimage/Source/FreeImage/PluginTIFF.cpp
+@@ -122,9 +122,14 @@ static void ReadThumbnail(FreeImageIO *i
+ static int s_format_id;
+
+ typedef struct {
++    //! FreeImage IO functions
+     FreeImageIO *io;
++    //! FreeImage handle
+ 	fi_handle handle;
++    //! LibTIFF handle
+ 	TIFF *tif;
++    //! Count the number of thumbnails already read (used to avoid recursion on loading)
++    unsigned thumbnailCount;
+ } fi_TIFFIO;
+
+ // ----------------------------------------------------------
+@@ -184,10 +189,8 @@ Open a TIFF file descriptor for reading
+ */
+ TIFF *
+ TIFFFdOpen(thandle_t handle, const char *name, const char *mode) {
+-	TIFF *tif;
+-	
+ 	// Open the file; the callback will set everything up
+-	tif = TIFFClientOpen(name, mode, handle,
++	TIFF *tif = TIFFClientOpen(name, mode, handle,
+ 	    _tiffReadProc, _tiffWriteProc, _tiffSeekProc, _tiffCloseProc,
+ 	    _tiffSizeProc, _tiffMapProc, _tiffUnmapProc);
+
+@@ -460,9 +463,9 @@ CreateImageType(BOOL header_only, FREE_I
+ 			}
+
+ 		}
+-		else {
++		else if (bpp <= 32) {
+
+-			dib = FreeImage_AllocateHeader(header_only, width, height, MIN(bpp, 32), FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK);
++			dib = FreeImage_AllocateHeader(header_only, width, height, bpp, FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK);
+ 		}
+
+
+@@ -1053,6 +1056,7 @@ Open(FreeImageIO *io, fi_handle handle,
+ 	if(!fio) return NULL;
+ 	fio->io = io;
+ 	fio->handle = handle;
++        fio->thumbnailCount = 0;
+
+ 	if (read) {
+ 		fio->tif = TIFFFdOpen((thandle_t)fio, "", "r");
+@@ -1108,6 +1112,27 @@ check for uncommon bitspersample values
+ */
+ static BOOL 
+ IsValidBitsPerSample(uint16 photometric, uint16 bitspersample, uint16 samplesperpixel) {
++        // get the pixel depth in bits
++        const uint16 pixel_depth = bitspersample * samplesperpixel;
++
++        // check for a supported pixel depth
++        switch (pixel_depth) {
++            case 1:
++            case 4:
++            case 8:
++            case 16:
++            case 24:
++            case 32:
++            case 48:
++            case 64:
++            case 96:
++            case 128:
++                // OK, go on
++                break;
++            default:
++                // unsupported pixel depth
++                return FALSE;
++        }
+
+ 	switch(bitspersample) {
+ 		case 1:
+@@ -1148,6 +1173,8 @@ IsValidBitsPerSample(uint16 photometric,
+ 		default:
+ 			return FALSE;
+ 	}
++
++        return FALSE;
+ }
+
+ static TIFFLoadMethod  
+@@ -1237,16 +1264,32 @@ Read embedded thumbnail
+ static void 
+ ReadThumbnail(FreeImageIO *io, fi_handle handle, void *data, TIFF *tiff, FIBITMAP *dib) {
+ 	FIBITMAP* thumbnail = NULL;
++
++        fi_TIFFIO *fio = (fi_TIFFIO*)data;
++
++        /*
++        Thumbnail loading can cause recursions because of the way 
++        functions TIFFLastDirectory and TIFFSetSubDirectory are working.
++        We use here a hack to count the number of times the ReadThumbnail function was called. 
++        We only allow one call, check for this
++        */
++        if (fio->thumbnailCount > 0) {
++            return;
++        }
++        else {
++            // update the thumbnail count (used to avoid recursion)
++            fio->thumbnailCount++;
++        }
+
+ 	// read exif thumbnail (IFD 1) ...
+
+-	/*
+-	// this code can cause unwanted recursion causing an overflow, it is thus disabled until we have a better solution
+-	// do we really need to read a thumbnail from the Exif segment ? knowing that TIFF store the thumbnail in the subIFD ...
+-	// 
+ 	toff_t exif_offset = 0;
+ 	if(TIFFGetField(tiff, TIFFTAG_EXIFIFD, &exif_offset)) {
+
++                // this code can cause unwanted recursion causing an overflow,
++                // because of the way TIFFLastDirectory work => this is checked
++                // using 
++
+ 		if(!TIFFLastDirectory(tiff)) {
+ 			// save current position
+ 			const long tell_pos = io->tell_proc(handle);
+@@ -1264,7 +1307,6 @@ ReadThumbnail(FreeImageIO *io, fi_handle
+ 			TIFFSetDirectory(tiff, cur_dir);
+ 		}
+ 	}
+-	*/
+
+ 	// ... or read the first subIFD
+
+@@ -1281,6 +1323,10 @@ ReadThumbnail(FreeImageIO *io, fi_handle
+ 				const long tell_pos = io->tell_proc(handle);
+ 				const uint16 cur_dir = TIFFCurrentDirectory(tiff);
+
++                                // this code can cause unwanted recursion
++                                // causing an overflow, because of the way
++                                // TIFFSetSubDirectory work
++
+ 				if(TIFFSetSubDirectory(tiff, subIFD_offsets[0])) {
+ 					// load the thumbnail
+ 					int page = -1; 
+@@ -2041,7 +2087,7 @@ Load(FreeImageIO *io, fi_handle handle,
+ 				}
+
+ 				// calculate src line and dst pitch
+-				int dst_pitch = FreeImage_GetPitch(dib);
++				unsigned dst_pitch = FreeImage_GetPitch(dib);
+ 				uint32 tileRowSize = (uint32)TIFFTileRowSize(tif);
+ 				uint32 imageRowSize = (uint32)TIFFScanlineSize(tif);
+
+@@ -2071,7 +2117,7 @@ Load(FreeImageIO *io, fi_handle handle,
+ 						BYTE *src_bits = tileBuffer;
+ 						BYTE *dst_bits = bits + rowSize;
+ 						for(int k = 0; k < nrows; k++) {
+-							memcpy(dst_bits, src_bits, src_line);
++							memcpy(dst_bits, src_bits, MIN(dst_pitch, src_line));
+ 							src_bits += tileRowSize;
+ 							dst_bits -= dst_pitch;
+ 						}