From 0c86892faab007ff5d4526dce94c127fbf87fe93 Mon Sep 17 00:00:00 2001 From: Justus Rijjke Date: Sun, 1 Mar 2026 21:28:56 +0100 Subject: [PATCH] Add --skip-compare-text option to bypass text comparison stage --- README.md | 1 + src/diffpdf/__init__.py | 10 +++++++--- src/diffpdf/cli.py | 10 +++++++++- tests/test_api.py | 18 ++++++++++++++++++ tests/test_cli.py | 13 +++++++++++++ 5 files changed, 48 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1b3c667..f258b67 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ Options: --dpi INTEGER Render resolution --output-dir DIRECTORY Diff output directory (saves text diffs and visual diff images on failure) -v, --verbose Increase verbosity + --skip-compare-text Skip text content comparison --version Show the version and exit. --help Show this message and exit. ``` diff --git a/src/diffpdf/__init__.py b/src/diffpdf/__init__.py index 401ddf5..4f2cd67 100644 --- a/src/diffpdf/__init__.py +++ b/src/diffpdf/__init__.py @@ -17,6 +17,7 @@ def diffpdf( dpi: int = 96, output_dir: str | Path | None = None, verbose: bool = False, + skip_compare_text: bool = False, ) -> bool: ref_path = Path(reference) if isinstance(reference, str) else reference actual_path = Path(actual) if isinstance(actual, str) else actual @@ -34,9 +35,12 @@ def diffpdf( if not check_page_counts(ref_path, actual_path): return False - logger.info("[3/4] Checking text content...") - if not check_text_content(ref_path, actual_path, out_path): - return False + if skip_compare_text: + logger.info("[3/4] Skipping text content check") + else: + logger.info("[3/4] Checking text content...") + if not check_text_content(ref_path, actual_path, out_path): + return False logger.info("[4/4] Checking visual content...") if not check_visual_content(ref_path, actual_path, threshold, dpi, out_path): diff --git a/src/diffpdf/cli.py b/src/diffpdf/cli.py index f0627d6..b373adb 100644 --- a/src/diffpdf/cli.py +++ b/src/diffpdf/cli.py @@ -28,6 +28,11 @@ is_flag=True, help="Increase verbosity", ) +@click.option( + "--skip-compare-text", + is_flag=True, + help="Skip text content comparison (useful when different render engines produce identical-looking PDFs with different text extraction order)", +) @click.version_option(package_name="diffpdf") def cli( reference: Path, @@ -36,10 +41,13 @@ def cli( dpi: int, output_dir: Path | None, verbose: bool, + skip_compare_text: bool, ) -> None: """Compare two PDF files for structural, textual, and visual differences.""" try: - if diffpdf(reference, actual, threshold, dpi, output_dir, verbose): + if diffpdf( + reference, actual, threshold, dpi, output_dir, verbose, skip_compare_text + ): sys.exit(0) else: sys.exit(1) diff --git a/tests/test_api.py b/tests/test_api.py index be67a08..fc9bf1c 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -31,6 +31,24 @@ def test_api(ref_pdf_rel, actual_pdf_rel, should_pass): assert result == should_pass +def test_skip_compare_text(): + ref_pdf = TEST_ASSETS_DIR / "fail/1-letter-diff-A.pdf" + actual_pdf = TEST_ASSETS_DIR / "fail/1-letter-diff-B.pdf" + + # Without skip: fails at text stage + assert diffpdf(ref_pdf, actual_pdf) is False + # With skip: text stage bypassed, fails at visual stage (letter difference is visible) + assert diffpdf(ref_pdf, actual_pdf, skip_compare_text=True) is False + + +def test_skip_compare_text_multiplatform(): + ref_pdf = TEST_ASSETS_DIR / "pass/multiplatform-diff-A.pdf" + actual_pdf = TEST_ASSETS_DIR / "pass/multiplatform-diff-B.pdf" + + # Multiplatform PDFs already pass with text comparison + assert diffpdf(ref_pdf, actual_pdf, skip_compare_text=True) is True + + def test_text_diff_output(tmp_path): ref_pdf = TEST_ASSETS_DIR / "fail/1-letter-diff-A.pdf" actual_pdf = TEST_ASSETS_DIR / "fail/1-letter-diff-B.pdf" diff --git a/tests/test_cli.py b/tests/test_cli.py index 6a9b3ac..fcd241c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -20,6 +20,19 @@ def test_cli_with_output_dir(): assert Path("./diff").exists() +def test_skip_compare_text_flag(): + runner = CliRunner() + result = runner.invoke( + cli, + [ + str(TEST_ASSETS_DIR / "pass/multiplatform-diff-A.pdf"), + str(TEST_ASSETS_DIR / "pass/multiplatform-diff-B.pdf"), + "--skip-compare-text", + ], + ) + assert result.exit_code == 0 + + def test_verbose_flag(): runner = CliRunner() result = runner.invoke(