Skip to content

Commit cbad97c

Browse files
authored
Merge pull request #1 from wolfv/hunks
feat: add more tests and parse hunks a little differently
2 parents 44135e6 + 1facc6b commit cbad97c

File tree

7 files changed

+514
-49
lines changed

7 files changed

+514
-49
lines changed

src/parser.rs

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::borrow::Cow;
22
use std::error::Error;
33

44
use chrono::DateTime;
5+
use nom::combinator::verify;
56
use nom::*;
67
use nom::{
78
branch::alt,
@@ -82,12 +83,14 @@ pub(crate) fn parse_single_patch(s: &str) -> Result<Patch, ParseError<'_>> {
8283
pub(crate) fn parse_multiple_patches(s: &str) -> Result<Vec<Patch>, ParseError<'_>> {
8384
let (remaining_input, patches) = multiple_patches(Input::new(s))?;
8485
// Parser should return an error instead of producing remaining input
85-
assert!(
86-
remaining_input.fragment().is_empty(),
87-
"bug: failed to parse entire input. \
88-
Remaining: '{}'",
89-
remaining_input.fragment()
90-
);
86+
if !remaining_input.fragment().is_empty() {
87+
return Err(ParseError {
88+
line: remaining_input.location_line(),
89+
offset: remaining_input.location_offset(),
90+
fragment: remaining_input.fragment(),
91+
kind: nom::error::ErrorKind::Eof,
92+
});
93+
}
9194
Ok(patches)
9295
}
9396

@@ -224,9 +227,67 @@ fn chunks(input: Input<'_>) -> IResult<Input<'_>, Vec<Hunk>> {
224227
many1(chunk)(input)
225228
}
226229

230+
fn is_next_header(input: Input<'_>) -> bool {
231+
// Check for diff file header or chunk header
232+
input.starts_with("diff ")
233+
|| input.starts_with("--- ")
234+
|| input.starts_with("+++ ")
235+
|| input.starts_with("@@ ")
236+
}
237+
238+
239+
/// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check.
240+
///
241+
/// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a
242+
/// line that was using the pre-increment (++) operator, this will fail.
243+
///
244+
/// Example where this doesn't work:
245+
///
246+
/// --- main.c
247+
/// +++ main.c
248+
/// @@ -1,4 +1,7 @@
249+
/// +#include<stdio.h>
250+
/// +
251+
/// int main() {
252+
/// double a;
253+
/// --- a;
254+
/// +++ a;
255+
/// +printf("%d\n", a);
256+
/// }
257+
///
258+
/// We will fail to parse this entire diff.
259+
///
260+
/// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because
261+
/// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough
262+
/// to guarantee correctness in all cases.
263+
///
264+
///FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need
265+
/// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()?
266+
///FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed.
227267
fn chunk(input: Input<'_>) -> IResult<Input<'_>, Hunk> {
228268
let (input, ranges) = chunk_header(input)?;
229-
let (input, lines) = many1(chunk_line)(input)?;
269+
270+
// Parse chunk lines, using the range information to guide parsing
271+
let (input, lines) = many0(verify(
272+
alt((
273+
// Detect added lines
274+
map(
275+
preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
276+
Line::Add,
277+
),
278+
// Detect removed lines
279+
map(
280+
preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
281+
Line::Remove,
282+
),
283+
// Detect context lines
284+
map(preceded(char(' '), consume_content_line), Line::Context),
285+
// Handle empty lines within the chunk
286+
map(tag("\n"), |_| Line::Context("")),
287+
)),
288+
// Stop parsing when we detect the next header or have parsed the expected number of lines
289+
|_| !is_next_header(input),
290+
))(input)?;
230291

231292
let (old_range, new_range, range_hint) = ranges;
232293
Ok((
@@ -266,48 +327,6 @@ fn u64_digit(input: Input<'_>) -> IResult<Input<'_>, u64> {
266327
Ok((input, num))
267328
}
268329

269-
// Looks for lines starting with + or - or space, but not +++ or ---. Not a foolproof check.
270-
//
271-
// For example, if someone deletes a line that was using the pre-decrement (--) operator or adds a
272-
// line that was using the pre-increment (++) operator, this will fail.
273-
//
274-
// Example where this doesn't work:
275-
//
276-
// --- main.c
277-
// +++ main.c
278-
// @@ -1,4 +1,7 @@
279-
// +#include<stdio.h>
280-
// +
281-
// int main() {
282-
// double a;
283-
// --- a;
284-
// +++ a;
285-
// +printf("%d\n", a);
286-
// }
287-
//
288-
// We will fail to parse this entire diff.
289-
//
290-
// By checking for `+++ ` instead of just `+++`, we add at least a little more robustness because
291-
// we know that people typically write `++a`, not `++ a`. That being said, this is still not enough
292-
// to guarantee correctness in all cases.
293-
//
294-
//FIXME: Use the ranges in the chunk header to figure out how many chunk lines to parse. Will need
295-
// to figure out how to count in nom more robustly than many1!(). Maybe using switch!()?
296-
//FIXME: The test_parse_triple_plus_minus_hack test will no longer panic when this is fixed.
297-
fn chunk_line(input: Input<'_>) -> IResult<Input<'_>, Line> {
298-
alt((
299-
map(
300-
preceded(tuple((char('+'), not(tag("++ ")))), consume_content_line),
301-
Line::Add,
302-
),
303-
map(
304-
preceded(tuple((char('-'), not(tag("-- ")))), consume_content_line),
305-
Line::Remove,
306-
),
307-
map(preceded(char(' '), consume_content_line), Line::Context),
308-
))(input)
309-
}
310-
311330
// Trailing newline indicator
312331
fn no_newline_indicator(input: Input<'_>) -> IResult<Input<'_>, bool> {
313332
map(

tests/parse_samples.rs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,36 @@ fn parse_samples() {
3131
assert_eq!(patches, patches2);
3232
}
3333
}
34+
35+
#[test]
36+
fn parse_wild_samples() {
37+
let samples_path = PathBuf::from(file!())
38+
.parent()
39+
.unwrap()
40+
.join("wild-samples");
41+
for file in fs::read_dir(samples_path).unwrap() {
42+
let path = file.unwrap().path();
43+
if path.extension().unwrap_or_default() != "patch" {
44+
continue;
45+
}
46+
47+
let data = fs::read_to_string(dbg!(&path)).unwrap();
48+
let patches = Patch::from_multiple(&data)
49+
.unwrap_or_else(|err| panic!("failed to parse {:?}, error: {}", path, err));
50+
51+
// Make sure that the patch file we produce parses to the same information as the original
52+
// patch file.
53+
let patch_file: String = patches
54+
.iter()
55+
.map(|patch| format!("{}\n", patch))
56+
.collect();
57+
58+
let patches2 = Patch::from_multiple(&patch_file).unwrap_or_else(|err| {
59+
panic!(
60+
"failed to re-parse {:?} after formatting, error: {}",
61+
path, err
62+
)
63+
});
64+
assert_eq!(patches, patches2);
65+
}
66+
}
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
diff --git a/Makefile b/Makefile
2+
index 9754ddf..b5512de 100644
3+
--- a/Makefile
4+
+++ b/Makefile
5+
@@ -15,16 +15,16 @@
6+
SHELL=/bin/sh
7+
8+
# To assist in cross-compiling
9+
-CC=gcc
10+
-AR=ar
11+
-RANLIB=ranlib
12+
-LDFLAGS=
13+
+CC?=gcc
14+
+AR?=ar
15+
+RANLIB?=ranlib
16+
17+
BIGFILES=-D_FILE_OFFSET_BITS=64
18+
-CFLAGS=-Wall -Winline -O2 -g $(BIGFILES)
19+
+CFLAGS?=-Wall -Winline -O2 -g
20+
+CFLAGS=$(CFLAGS) $(BIGFILES)
21+
22+
# Where you want it installed when you do 'make install'
23+
-PREFIX=/usr/local
24+
+PREFIX=$PREFIX
25+
26+
27+
OBJS= blocksort.o \
28+
diff --git a/Makefile-libbz2_so b/Makefile-libbz2_so
29+
index e58791b..f4b9fa2 100644
30+
--- a/Makefile-libbz2_so
31+
+++ b/Makefile-libbz2_so
32+
@@ -22,9 +22,18 @@
33+
34+
35+
SHELL=/bin/sh
36+
-CC=gcc
37+
+
38+
+# To assist in cross-compiling
39+
+CC?=gcc
40+
+AR?=ar
41+
+RANLIB?=ranlib
42+
+
43+
BIGFILES=-D_FILE_OFFSET_BITS=64
44+
-CFLAGS=-fpic -fPIC -Wall -Winline -O2 -g $(BIGFILES)
45+
+CFLAGS?=-Wall -Winline -O2 -g
46+
+CFLAGS=$(CFLAGS) $(BIGFILES)
47+
+
48+
+# Where you want it installed when you do 'make install'
49+
+PREFIX=$PREFIX
50+
51+
OBJS= blocksort.o \
52+
huffman.o \
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
Index: freeimage/Source/FreeImage/PluginTIFF.cpp
2+
===================================================================
3+
--- freeimage.orig/Source/FreeImage/PluginTIFF.cpp
4+
+++ freeimage/Source/FreeImage/PluginTIFF.cpp
5+
@@ -122,9 +122,14 @@ static void ReadThumbnail(FreeImageIO *i
6+
static int s_format_id;
7+
8+
typedef struct {
9+
+ //! FreeImage IO functions
10+
FreeImageIO *io;
11+
+ //! FreeImage handle
12+
fi_handle handle;
13+
+ //! LibTIFF handle
14+
TIFF *tif;
15+
+ //! Count the number of thumbnails already read (used to avoid recursion on loading)
16+
+ unsigned thumbnailCount;
17+
} fi_TIFFIO;
18+
19+
// ----------------------------------------------------------
20+
@@ -184,10 +189,8 @@ Open a TIFF file descriptor for reading
21+
*/
22+
TIFF *
23+
TIFFFdOpen(thandle_t handle, const char *name, const char *mode) {
24+
- TIFF *tif;
25+
-
26+
// Open the file; the callback will set everything up
27+
- tif = TIFFClientOpen(name, mode, handle,
28+
+ TIFF *tif = TIFFClientOpen(name, mode, handle,
29+
_tiffReadProc, _tiffWriteProc, _tiffSeekProc, _tiffCloseProc,
30+
_tiffSizeProc, _tiffMapProc, _tiffUnmapProc);
31+
32+
@@ -460,9 +463,9 @@ CreateImageType(BOOL header_only, FREE_I
33+
}
34+
35+
}
36+
- else {
37+
+ else if (bpp <= 32) {
38+
39+
- dib = FreeImage_AllocateHeader(header_only, width, height, MIN(bpp, 32), FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK);
40+
+ dib = FreeImage_AllocateHeader(header_only, width, height, bpp, FI_RGBA_RED_MASK, FI_RGBA_GREEN_MASK, FI_RGBA_BLUE_MASK);
41+
}
42+
43+
44+
@@ -1053,6 +1056,7 @@ Open(FreeImageIO *io, fi_handle handle,
45+
if(!fio) return NULL;
46+
fio->io = io;
47+
fio->handle = handle;
48+
+ fio->thumbnailCount = 0;
49+
50+
if (read) {
51+
fio->tif = TIFFFdOpen((thandle_t)fio, "", "r");
52+
@@ -1108,6 +1112,27 @@ check for uncommon bitspersample values
53+
*/
54+
static BOOL
55+
IsValidBitsPerSample(uint16 photometric, uint16 bitspersample, uint16 samplesperpixel) {
56+
+ // get the pixel depth in bits
57+
+ const uint16 pixel_depth = bitspersample * samplesperpixel;
58+
+
59+
+ // check for a supported pixel depth
60+
+ switch (pixel_depth) {
61+
+ case 1:
62+
+ case 4:
63+
+ case 8:
64+
+ case 16:
65+
+ case 24:
66+
+ case 32:
67+
+ case 48:
68+
+ case 64:
69+
+ case 96:
70+
+ case 128:
71+
+ // OK, go on
72+
+ break;
73+
+ default:
74+
+ // unsupported pixel depth
75+
+ return FALSE;
76+
+ }
77+
78+
switch(bitspersample) {
79+
case 1:
80+
@@ -1148,6 +1173,8 @@ IsValidBitsPerSample(uint16 photometric,
81+
default:
82+
return FALSE;
83+
}
84+
+
85+
+ return FALSE;
86+
}
87+
88+
static TIFFLoadMethod
89+
@@ -1237,16 +1264,32 @@ Read embedded thumbnail
90+
static void
91+
ReadThumbnail(FreeImageIO *io, fi_handle handle, void *data, TIFF *tiff, FIBITMAP *dib) {
92+
FIBITMAP* thumbnail = NULL;
93+
+
94+
+ fi_TIFFIO *fio = (fi_TIFFIO*)data;
95+
+
96+
+ /*
97+
+ Thumbnail loading can cause recursions because of the way
98+
+ functions TIFFLastDirectory and TIFFSetSubDirectory are working.
99+
+ We use here a hack to count the number of times the ReadThumbnail function was called.
100+
+ We only allow one call, check for this
101+
+ */
102+
+ if (fio->thumbnailCount > 0) {
103+
+ return;
104+
+ }
105+
+ else {
106+
+ // update the thumbnail count (used to avoid recursion)
107+
+ fio->thumbnailCount++;
108+
+ }
109+
110+
// read exif thumbnail (IFD 1) ...
111+
112+
- /*
113+
- // this code can cause unwanted recursion causing an overflow, it is thus disabled until we have a better solution
114+
- // do we really need to read a thumbnail from the Exif segment ? knowing that TIFF store the thumbnail in the subIFD ...
115+
- //
116+
toff_t exif_offset = 0;
117+
if(TIFFGetField(tiff, TIFFTAG_EXIFIFD, &exif_offset)) {
118+
119+
+ // this code can cause unwanted recursion causing an overflow,
120+
+ // because of the way TIFFLastDirectory work => this is checked
121+
+ // using
122+
+
123+
if(!TIFFLastDirectory(tiff)) {
124+
// save current position
125+
const long tell_pos = io->tell_proc(handle);
126+
@@ -1264,7 +1307,6 @@ ReadThumbnail(FreeImageIO *io, fi_handle
127+
TIFFSetDirectory(tiff, cur_dir);
128+
}
129+
}
130+
- */
131+
132+
// ... or read the first subIFD
133+
134+
@@ -1281,6 +1323,10 @@ ReadThumbnail(FreeImageIO *io, fi_handle
135+
const long tell_pos = io->tell_proc(handle);
136+
const uint16 cur_dir = TIFFCurrentDirectory(tiff);
137+
138+
+ // this code can cause unwanted recursion
139+
+ // causing an overflow, because of the way
140+
+ // TIFFSetSubDirectory work
141+
+
142+
if(TIFFSetSubDirectory(tiff, subIFD_offsets[0])) {
143+
// load the thumbnail
144+
int page = -1;
145+
@@ -2041,7 +2087,7 @@ Load(FreeImageIO *io, fi_handle handle,
146+
}
147+
148+
// calculate src line and dst pitch
149+
- int dst_pitch = FreeImage_GetPitch(dib);
150+
+ unsigned dst_pitch = FreeImage_GetPitch(dib);
151+
uint32 tileRowSize = (uint32)TIFFTileRowSize(tif);
152+
uint32 imageRowSize = (uint32)TIFFScanlineSize(tif);
153+
154+
@@ -2071,7 +2117,7 @@ Load(FreeImageIO *io, fi_handle handle,
155+
BYTE *src_bits = tileBuffer;
156+
BYTE *dst_bits = bits + rowSize;
157+
for(int k = 0; k < nrows; k++) {
158+
- memcpy(dst_bits, src_bits, src_line);
159+
+ memcpy(dst_bits, src_bits, MIN(dst_pitch, src_line));
160+
src_bits += tileRowSize;
161+
dst_bits -= dst_pitch;
162+
}

0 commit comments

Comments
 (0)