diff --git a/src/parser.rs b/src/parser.rs index e2b7118..8c7b00b 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,6 +2,7 @@ use std::borrow::Cow; use std::error::Error; use chrono::DateTime; +use nom::combinator::verify; use nom::*; use nom::{ branch::alt, @@ -82,12 +83,14 @@ pub(crate) fn parse_single_patch(s: &str) -> Result> { pub(crate) fn parse_multiple_patches(s: &str) -> Result, ParseError<'_>> { let (remaining_input, patches) = multiple_patches(Input::new(s))?; // Parser should return an error instead of producing remaining input - assert!( - remaining_input.fragment().is_empty(), - "bug: failed to parse entire input. \ - Remaining: '{}'", - remaining_input.fragment() - ); + if !remaining_input.fragment().is_empty() { + return Err(ParseError { + line: remaining_input.location_line(), + offset: remaining_input.location_offset(), + fragment: remaining_input.fragment(), + kind: nom::error::ErrorKind::Eof, + }); + } Ok(patches) } @@ -224,9 +227,67 @@ fn chunks(input: Input<'_>) -> IResult, Vec> { many1(chunk)(input) } +fn is_next_header(input: Input<'_>) -> bool { + // Check for diff file header or chunk header + input.starts_with("diff ") + || input.starts_with("--- ") + || input.starts_with("+++ ") + || input.starts_with("@@ ") +} + + +/// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check. +/// +/// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a +/// line that was using the pre-increment (++) operator, this will fail. +/// +/// Example where this doesn't work: +/// +/// --- main.c +/// +++ main.c +/// @@ -1,4 +1,7 @@ +/// +#include +/// + +/// int main() { +/// double a; +/// --- a; +/// +++ a; +/// +printf("%d\n", a); +/// } +/// +/// We will fail to parse this entire diff. +/// +/// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because +/// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough +/// to guarantee correctness in all cases. +/// +///FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need +/// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()? +///FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed. fn chunk(input: Input<'_>) -> IResult, Hunk> { let (input, ranges) = chunk_header(input)?; - let (input, lines) = many1(chunk_line)(input)?; + + // Parse chunk lines, using the range information to guide parsing + let (input, lines) = many0(verify( + alt(( + // Detect added lines + map( + preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line), + Line::Add, + ), + // Detect removed lines + map( + preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line), + Line::Remove, + ), + // Detect context lines + map(preceded(char(' '), consume_content_line), Line::Context), + // Handle empty lines within the chunk + map(tag("\n"), |_| Line::Context("")), + )), + // Stop parsing when we detect the next header or have parsed the expected number of lines + |_| !is_next_header(input), + ))(input)?; let (old_range, new_range, range_hint) = ranges; Ok(( @@ -266,48 +327,6 @@ fn u64_digit(input: Input<'_>) -> IResult, u64> { Ok((input, num)) } -// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check. -// -// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a -// line that was using the pre-increment (++) operator, this will fail. -// -// Example where this doesn't work: -// -// --- main.c -// +++ main.c -// @@ -1,4 +1,7 @@ -// +#include -// + -// int main() { -// double a; -// --- a; -// +++ a; -// +printf("%d\n", a); -// } -// -// We will fail to parse this entire diff. -// -// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because -// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough -// to guarantee correctness in all cases. -// -//FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need -// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()? -//FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed. -fn chunk_line(input: Input<'_>) -> IResult, Line> { - alt(( - map( - preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line), - Line::Add, - ), - map( - preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line), - Line::Remove, - ), - map(preceded(char(' '), consume_content_line), Line::Context), - ))(input) -} - // Trailing newline indicator fn no_newline_indicator(input: Input<'_>) -> IResult, bool> { map( diff --git a/tests/parse_samples.rs b/tests/parse_samples.rs index bd34f14..aac580d 100644 --- a/tests/parse_samples.rs +++ b/tests/parse_samples.rs @@ -31,3 +31,36 @@ fn parse_samples() { assert_eq!(patches, patches2); } } + +#[test] +fn parse_wild_samples() { + let samples_path = PathBuf::from(file!()) + .parent() + .unwrap() + .join("wild-samples"); + for file in fs::read_dir(samples_path).unwrap() { + let path = file.unwrap().path(); + if path.extension().unwrap_or_default() != "patch" { + continue; + } + + let data = fs::read_to_string(dbg!(&path)).unwrap(); + let patches = Patch::from_multiple(&data) + .unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err)); + + // Make sure that the patch file we produce parses to the same information as the original + // patch file. + let patch_file: String = patches + .iter() + .map(|patch| format!("{}\n", patch)) + .collect(); + + let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| { + panic!( + "failed to re-parse {:?} after formatting, error: {}", + path, err + ) + }); + assert_eq!(patches, patches2); + } +} diff --git a/tests/wild-samples/0001-cross.patch b/tests/wild-samples/0001-cross.patch new file mode 100644 index 0000000..5398fb2 --- /dev/null +++ b/tests/wild-samples/0001-cross.patch @@ -0,0 +1,52 @@ +diff --git a/Makefile b/Makefile +index 9754ddf..b5512de 100644 +--- a/Makefile ++++ b/Makefile +@@ -15,16 +15,16 @@ + SHELL=/bin/sh + + # To assist in cross-compiling +-CC=gcc +-AR=ar +-RANLIB=ranlib +-LDFLAGS= ++CC?=gcc ++AR?=ar ++RANLIB?=ranlib + + BIGFILES=-D_FILE_OFFSET_BITS=64 +-CFLAGS=-Wall -Winline -O2 -g $(BIGFILES) ++CFLAGS?=-Wall -Winline -O2 -g ++CFLAGS=$(CFLAGS) $(BIGFILES) + + # Where you want it installed when you do 'make install' +-PREFIX=/usr/local ++PREFIX=$PREFIX + + + OBJS= blocksort.o \ +diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so +index e58791b..f4b9fa2 100644 +--- a/Makefile-libbz2_so ++++ b/Makefile-libbz2_so +@@ -22,9 +22,18 @@ + + + SHELL=/bin/sh +-CC=gcc ++ ++# To assist in cross-compiling ++CC?=gcc ++AR?=ar ++RANLIB?=ranlib ++ + BIGFILES=-D_FILE_OFFSET_BITS=64 +-CFLAGS=-fpic -fPIC -Wall -Winline -O2 -g $(BIGFILES) ++CFLAGS?=-Wall -Winline -O2 -g ++CFLAGS=$(CFLAGS) $(BIGFILES) ++ ++# Where you want it installed when you do 'make install' ++PREFIX=$PREFIX + + OBJS= blocksort.o \ + huffman.o \ diff --git a/tests/wild-samples/CVE-2019-12211-13.patch b/tests/wild-samples/CVE-2019-12211-13.patch new file mode 100644 index 0000000..1260786 --- /dev/null +++ b/tests/wild-samples/CVE-2019-12211-13.patch @@ -0,0 +1,162 @@ +Index: freeimage/Source/FreeImage/PluginTIFF.cpp +=================================================================== +--- freeimage.orig/Source/FreeImage/PluginTIFF.cpp ++++ freeimage/Source/FreeImage/PluginTIFF.cpp +@@ -122,9 +122,14 @@ static void ReadThumbnail(FreeImageIO *i + static int s_format_id; + + typedef struct { ++ //! FreeImage IO functions + FreeImageIO *io; ++ //! FreeImage handle + fi_handle handle; ++ //! LibTIFF handle + TIFF *tif; ++ //! Count the number of thumbnails already read (used to avoid recursion on loading) ++ unsigned thumbnailCount; + } fi_TIFFIO; + + // ---------------------------------------------------------- +@@ -184,10 +189,8 @@ Open a TIFF file descriptor for reading + */ + TIFF * + TIFFFdOpen(thandle_t handle, const char *name, const char *mode) { +- TIFF *tif; +- + // Open the file; the callback will set everything up +- tif = TIFFClientOpen(name, mode, handle, ++ TIFF *tif = TIFFClientOpen(name, mode, handle, + _tiffReadProc, _tiffWriteProc, _tiffSeekProc, _tiffCloseProc, + _tiffSizeProc, _tiffMapProc, _tiffUnmapProc); + +@@ -460,9 +463,9 @@ CreateImageType(BOOL header_only, FREE_I + } + + } +- else { ++ else if (bpp <= 32) { + +- dib = FreeImage_AllocateHeader(header_only, width, height, MIN(bpp, 32), FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK); ++ dib = FreeImage_AllocateHeader(header_only, width, height, bpp, FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK); + } + + +@@ -1053,6 +1056,7 @@ Open(FreeImageIO *io, fi_handle handle, + if(!fio) return NULL; + fio->io = io; + fio->handle = handle; ++ fio->thumbnailCount = 0; + + if (read) { + fio->tif = TIFFFdOpen((thandle_t)fio, "", "r"); +@@ -1108,6 +1112,27 @@ check for uncommon bitspersample values + */ + static BOOL + IsValidBitsPerSample(uint16 photometric, uint16 bitspersample, uint16 samplesperpixel) { ++ // get the pixel depth in bits ++ const uint16 pixel_depth = bitspersample * samplesperpixel; ++ ++ // check for a supported pixel depth ++ switch (pixel_depth) { ++ case 1: ++ case 4: ++ case 8: ++ case 16: ++ case 24: ++ case 32: ++ case 48: ++ case 64: ++ case 96: ++ case 128: ++ // OK, go on ++ break; ++ default: ++ // unsupported pixel depth ++ return FALSE; ++ } + + switch(bitspersample) { + case 1: +@@ -1148,6 +1173,8 @@ IsValidBitsPerSample(uint16 photometric, + default: + return FALSE; + } ++ ++ return FALSE; + } + + static TIFFLoadMethod +@@ -1237,16 +1264,32 @@ Read embedded thumbnail + static void + ReadThumbnail(FreeImageIO *io, fi_handle handle, void *data, TIFF *tiff, FIBITMAP *dib) { + FIBITMAP* thumbnail = NULL; ++ ++ fi_TIFFIO *fio = (fi_TIFFIO*)data; ++ ++ /* ++ Thumbnail loading can cause recursions because of the way ++ functions TIFFLastDirectory and TIFFSetSubDirectory are working. ++ We use here a hack to count the number of times the ReadThumbnail function was called. ++ We only allow one call, check for this ++ */ ++ if (fio->thumbnailCount > 0) { ++ return; ++ } ++ else { ++ // update the thumbnail count (used to avoid recursion) ++ fio->thumbnailCount++; ++ } + + // read exif thumbnail (IFD 1) ... + +- /* +- // this code can cause unwanted recursion causing an overflow, it is thus disabled until we have a better solution +- // do we really need to read a thumbnail from the Exif segment ? knowing that TIFF store the thumbnail in the subIFD ... +- // + toff_t exif_offset = 0; + if(TIFFGetField(tiff, TIFFTAG_EXIFIFD, &exif_offset)) { + ++ // this code can cause unwanted recursion causing an overflow, ++ // because of the way TIFFLastDirectory work => this is checked ++ // using ++ + if(!TIFFLastDirectory(tiff)) { + // save current position + const long tell_pos = io->tell_proc(handle); +@@ -1264,7 +1307,6 @@ ReadThumbnail(FreeImageIO *io, fi_handle + TIFFSetDirectory(tiff, cur_dir); + } + } +- */ + + // ... or read the first subIFD + +@@ -1281,6 +1323,10 @@ ReadThumbnail(FreeImageIO *io, fi_handle + const long tell_pos = io->tell_proc(handle); + const uint16 cur_dir = TIFFCurrentDirectory(tiff); + ++ // this code can cause unwanted recursion ++ // causing an overflow, because of the way ++ // TIFFSetSubDirectory work ++ + if(TIFFSetSubDirectory(tiff, subIFD_offsets[0])) { + // load the thumbnail + int page = -1; +@@ -2041,7 +2087,7 @@ Load(FreeImageIO *io, fi_handle handle, + } + + // calculate src line and dst pitch +- int dst_pitch = FreeImage_GetPitch(dib); ++ unsigned dst_pitch = FreeImage_GetPitch(dib); + uint32 tileRowSize = (uint32)TIFFTileRowSize(tif); + uint32 imageRowSize = (uint32)TIFFScanlineSize(tif); + +@@ -2071,7 +2117,7 @@ Load(FreeImageIO *io, fi_handle handle, + BYTE *src_bits = tileBuffer; + BYTE *dst_bits = bits + rowSize; + for(int k = 0; k < nrows; k++) { +- memcpy(dst_bits, src_bits, src_line); ++ memcpy(dst_bits, src_bits, MIN(dst_pitch, src_line)); + src_bits += tileRowSize; + dst_bits -= dst_pitch; + } diff --git a/tests/wild-samples/define_byteswap.patch b/tests/wild-samples/define_byteswap.patch new file mode 100644 index 0000000..e3c0480 --- /dev/null +++ b/tests/wild-samples/define_byteswap.patch @@ -0,0 +1,27 @@ +diff --git a/image/decode/segdec.c b/image/decode/segdec.c +index fb83f2b..1eb9ae4 100644 +--- a/image/decode/segdec.c ++++ b/image/decode/segdec.c +@@ -52,6 +52,9 @@ static Int DecodeSignificantAbsLevel (struct CAdaptiveHuffman *pAHexpt, BitIOInf + //================================================================ + // Memory access functions + //================================================================ ++ ++U32 _byteswap_ulong(U32 bits); ++ + static U32 _FORCEINLINE _load4(void* pv) + { + #ifdef _BIG__ENDIAN_ +diff --git a/jxrgluelib/JXRGlueJxr.c b/jxrgluelib/JXRGlueJxr.c +index 0fde9bb..e6c54e4 100644 +--- a/jxrgluelib/JXRGlueJxr.c ++++ b/jxrgluelib/JXRGlueJxr.c +@@ -28,7 +28,7 @@ + //*@@@---@@@@****************************************************************** + #include + #include +- ++#include + + static const char szHDPhotoFormat[] = "image/vnd.ms-photo"; + const U32 IFDEntryTypeSizes[] = { 0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8 }; diff --git a/tests/wild-samples/sample5.patch b/tests/wild-samples/sample5.patch new file mode 100644 index 0000000..ec0eeb8 --- /dev/null +++ b/tests/wild-samples/sample5.patch @@ -0,0 +1,29 @@ +=== modified file 'modified_file1' +--- modified_file1 2013-10-13 23:53:13 +0000 ++++ modified_file1 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + +=== modified file 'modified_file2' +--- modified_file2 2013-10-13 23:53:13 +0000 ++++ modified_file2 2013-10-13 23:53:26 +0000 +@@ -1,5 +1,7 @@ + This is the original content. + +-This should be updated. ++This is now updated. ++ ++This is a new line. + + This will stay. +\ No newline at end of file + + diff --git a/tests/wild-samples/usecmake.patch b/tests/wild-samples/usecmake.patch new file mode 100644 index 0000000..139e0ba --- /dev/null +++ b/tests/wild-samples/usecmake.patch @@ -0,0 +1,143 @@ +Description: Prefer a cmake based build system +Author: Mathieu Malaterre +Forwarded: https://jxrlib.codeplex.com/discussions/440294 + +Index: CMakeLists.txt +=================================================================== +--- CMakeLists.txt 1970-01-01 00:00:00.000000000 +0000 ++++ CMakeLists.txt 2014-03-26 17:05:04.429637801 +0100 +@@ -0,0 +1,134 @@ ++# Copyright Mathieu Malaterre ++# BSD (Same as jxrlib) ++cmake_minimum_required(VERSION 2.8) ++project(jxrlib C) ++ ++# Need shared libs for ABI ++set(BUILD_SHARED_LIBS ON) ++ ++# helper macro to preserve original Makefile convention ++macro(JXR_MAKE_OBJ SET_NAME) ++ foreach(src ${SRC_${SET_NAME}}) ++ list(APPEND OBJ_${SET_NAME} ${DIR_${SET_NAME}}/${src}) ++ endforeach() ++endmacro() ++ ++include(TestBigEndian) ++test_big_endian(ISBIGENDIAN) ++if(ISBIGENDIAN) ++ set(DEF_ENDIAN _BIG__ENDIAN_) ++endif() ++ ++set(DIR_SYS image/sys) ++set(DIR_DEC image/decode) ++set(DIR_ENC image/encode) ++ ++set(DIR_GLUE jxrgluelib) ++set(DIR_TEST jxrtestlib) ++set(DIR_EXEC jxrencoderdecoder) ++ ++if(NOT JXRLIB_INSTALL_BIN_DIR) ++ set(JXRLIB_INSTALL_BIN_DIR "bin") ++endif() ++ ++if(NOT JXRLIB_INSTALL_LIB_DIR) ++ set(JXRLIB_INSTALL_LIB_DIR "lib") ++endif() ++ ++if(NOT JXRLIB_INSTALL_INCLUDE_DIR) ++ set(JXRLIB_INSTALL_INCLUDE_DIR "include/jxrlib") ++endif() ++ ++include_directories( ++ common/include ++ ${DIR_SYS} ++ ${DIR_GLUE} ++ ${DIR_TEST} ++) ++ ++# where is strlcpy ? ++include(CheckSymbolExists) ++check_symbol_exists(strlcpy "string.h" HAVE_STRLCPY) ++#set(CMAKE_REQUIRED_LIBRARIES bsd) ++#CHECK_SYMBOL_EXISTS(strlcpy "string.h" HAVE_STRLCPY4) ++# on linux, strlcpy is in -lbsd: ++#if(NOT HAVE_STRLCPY) ++# include(CheckLibraryExists) ++# find_library(BSD_LIBRARY bsd) ++# check_library_exists(bsd "strlcpy" ${BSD_LIBRARY} HAVE_STRLCPY_BSD) ++#endif() ++ ++# JPEG-XR ++set(SRC_SYS adapthuff.c image.c strcodec.c strPredQuant.c strTransform.c perfTimerANSI.c) ++JXR_MAKE_OBJ(SYS) ++set(SRC_DEC decode.c postprocess.c segdec.c strdec.c strInvTransform.c strPredQuantDec.c JXRTranscode.c) ++JXR_MAKE_OBJ(DEC) ++set(SRC_ENC encode.c segenc.c strenc.c strFwdTransform.c strPredQuantEnc.c) ++JXR_MAKE_OBJ(ENC) ++ ++add_library(jpegxr ${OBJ_ENC} ${OBJ_DEC} ${OBJ_SYS}) ++set_property(TARGET jpegxr ++ PROPERTY COMPILE_DEFINITIONS __ANSI__ DISABLE_PERF_MEASUREMENT ${DEF_ENDIAN} ++) ++set_property(TARGET jpegxr PROPERTY LINK_INTERFACE_LIBRARIES "") ++set_property(TARGET jpegxr PROPERTY COMPILE_FLAGS -w) ++# VERSION/SOVERSION ++set_property(TARGET jpegxr PROPERTY VERSION 1.1) ++set_property(TARGET jpegxr PROPERTY SOVERSION 0) ++install(TARGETS jpegxr ++ EXPORT JXRLibTargets ++ RUNTIME DESTINATION ${JXRLIB_INSTALL_BIN_DIR} COMPONENT Applications ++ LIBRARY DESTINATION ${JXRLIB_INSTALL_LIB_DIR} COMPONENT Libraries ++) ++ ++# JXR-GLUE ++set(SRC_GLUE JXRGlue.c JXRMeta.c JXRGluePFC.c JXRGlueJxr.c) ++JXR_MAKE_OBJ(GLUE) ++set(SRC_TEST JXRTest.c JXRTestBmp.c JXRTestHdr.c JXRTestPnm.c JXRTestTif.c JXRTestYUV.c) ++JXR_MAKE_OBJ(TEST) ++ ++add_library(jxrglue ${OBJ_GLUE} ${OBJ_TEST}) ++set_property(TARGET jxrglue ++ PROPERTY COMPILE_DEFINITIONS __ANSI__ DISABLE_PERF_MEASUREMENT ${DEF_ENDIAN} ++) ++set_property(TARGET jxrglue PROPERTY LINK_INTERFACE_LIBRARIES "") ++set_property(TARGET jxrglue PROPERTY COMPILE_FLAGS -w) ++# VERSION/SOVERSION ++set_property(TARGET jxrglue PROPERTY VERSION 1.1) ++set_property(TARGET jxrglue PROPERTY SOVERSION 0) ++install(TARGETS jxrglue ++ EXPORT JXRLibTargets ++ RUNTIME DESTINATION ${JXRLIB_INSTALL_BIN_DIR} COMPONENT Applications ++ LIBRARY DESTINATION ${JXRLIB_INSTALL_LIB_DIR} COMPONENT Libraries ++) ++#if(HAVE_STRLCPY_BSD) ++# target_link_libraries(jxrglue ${BSD_LIBRARY}) ++#endif() ++target_link_libraries(jxrglue jpegxr m) ++# Enc app files ++set(ENCAPP JxrEncApp) ++add_executable(${ENCAPP} ${DIR_EXEC}/${ENCAPP}.c) ++set_property(TARGET ${ENCAPP} ++ PROPERTY COMPILE_DEFINITIONS __ANSI__ DISABLE_PERF_MEASUREMENT ${DEF_ENDIAN} ++) ++set_property(TARGET ${ENCAPP} PROPERTY COMPILE_FLAGS -w) ++target_link_libraries(${ENCAPP} jxrglue) # jpegxr) ++install(TARGETS ${ENCAPP} RUNTIME DESTINATION ${JXRLIB_INSTALL_BIN_DIR}) ++# Dec app files ++set(DECAPP JxrDecApp) ++add_executable(${DECAPP} ${DIR_EXEC}/${DECAPP}.c) ++set_property(TARGET ${DECAPP} ++ PROPERTY COMPILE_DEFINITIONS __ANSI__ DISABLE_PERF_MEASUREMENT ${DEF_ENDIAN} ++) ++set_property(TARGET ${DECAPP} PROPERTY COMPILE_FLAGS -w) ++target_link_libraries(${DECAPP} jxrglue) # jpegxr) ++install(TARGETS ${DECAPP} RUNTIME DESTINATION ${JXRLIB_INSTALL_BIN_DIR}) ++ ++# install rules ++install(FILES jxrgluelib/JXRGlue.h jxrgluelib/JXRMeta.h jxrtestlib/JXRTest.h ++ image/sys/windowsmediaphoto.h ++ DESTINATION ${JXRLIB_INSTALL_INCLUDE_DIR} COMPONENT Headers ++) ++install(DIRECTORY common/include/ DESTINATION ${JXRLIB_INSTALL_INCLUDE_DIR} ++ FILES_MATCHING PATTERN "*.h" ++)