|
3 | 3 | These tests require a valid API key configured in integration_config.py. |
4 | 4 | """ |
5 | 5 |
|
| 6 | +from typing import Union |
| 7 | + |
6 | 8 | import pytest |
7 | 9 |
|
8 | 10 | from nutrient_dws import NutrientClient |
|
19 | 21 | TIMEOUT = 60 |
20 | 22 |
|
21 | 23 |
|
| 24 | +def assert_is_pdf(file_path_or_bytes: Union[str, bytes]) -> None: |
| 25 | + """Assert that a file or bytes is a valid PDF. |
| 26 | +
|
| 27 | + Args: |
| 28 | + file_path_or_bytes: Path to file or bytes content to check. |
| 29 | + """ |
| 30 | + if isinstance(file_path_or_bytes, (str, bytes)): |
| 31 | + if isinstance(file_path_or_bytes, str): |
| 32 | + with open(file_path_or_bytes, "rb") as f: |
| 33 | + content = f.read(8) |
| 34 | + else: |
| 35 | + content = file_path_or_bytes[:8] |
| 36 | + |
| 37 | + # Check PDF magic number |
| 38 | + assert content.startswith(b"%PDF-"), ( |
| 39 | + f"File does not start with PDF magic number, got: {content!r}" |
| 40 | + ) |
| 41 | + else: |
| 42 | + raise ValueError("Input must be file path string or bytes") |
| 43 | + |
| 44 | + |
22 | 45 | @pytest.mark.skipif(not API_KEY, reason="No API key configured in integration_config.py") |
23 | 46 | class TestLiveAPI: |
24 | 47 | """Integration tests against live API.""" |
@@ -76,3 +99,63 @@ def test_builder_api_basic(self, client, sample_pdf_path): |
76 | 99 | # builder.add_step("example-tool", {}) |
77 | 100 |
|
78 | 101 | assert builder is not None |
| 102 | + |
| 103 | + def test_split_pdf_integration(self, client, sample_pdf_path, tmp_path): |
| 104 | + """Test split_pdf method with live API.""" |
| 105 | + # Test splitting PDF into two parts - sample PDF should have multiple pages |
| 106 | + page_ranges = [ |
| 107 | + {"start": 0, "end": 1}, # First page |
| 108 | + {"start": 1}, # Remaining pages |
| 109 | + ] |
| 110 | + |
| 111 | + # Test getting bytes back |
| 112 | + result = client.split_pdf(sample_pdf_path, page_ranges=page_ranges) |
| 113 | + |
| 114 | + assert isinstance(result, list) |
| 115 | + assert len(result) == 2 # Should return exactly 2 parts since sample has multiple pages |
| 116 | + assert all(isinstance(pdf_bytes, bytes) for pdf_bytes in result) |
| 117 | + assert all(len(pdf_bytes) > 0 for pdf_bytes in result) |
| 118 | + |
| 119 | + # Verify both results are valid PDFs |
| 120 | + for pdf_bytes in result: |
| 121 | + assert_is_pdf(pdf_bytes) |
| 122 | + |
| 123 | + def test_split_pdf_with_output_files(self, client, sample_pdf_path, tmp_path): |
| 124 | + """Test split_pdf method saving to output files.""" |
| 125 | + output_paths = [str(tmp_path / "page1.pdf"), str(tmp_path / "remaining.pdf")] |
| 126 | + |
| 127 | + page_ranges = [ |
| 128 | + {"start": 0, "end": 1}, # First page |
| 129 | + {"start": 1}, # Remaining pages |
| 130 | + ] |
| 131 | + |
| 132 | + # Test saving to files |
| 133 | + result = client.split_pdf( |
| 134 | + sample_pdf_path, page_ranges=page_ranges, output_paths=output_paths |
| 135 | + ) |
| 136 | + |
| 137 | + # Should return empty list when saving to files |
| 138 | + assert result == [] |
| 139 | + |
| 140 | + # Check that output files were created |
| 141 | + assert (tmp_path / "page1.pdf").exists() |
| 142 | + assert (tmp_path / "page1.pdf").stat().st_size > 0 |
| 143 | + assert_is_pdf(str(tmp_path / "page1.pdf")) |
| 144 | + |
| 145 | + # Second file should exist since sample PDF has multiple pages |
| 146 | + assert (tmp_path / "remaining.pdf").exists() |
| 147 | + assert (tmp_path / "remaining.pdf").stat().st_size > 0 |
| 148 | + assert_is_pdf(str(tmp_path / "remaining.pdf")) |
| 149 | + |
| 150 | + def test_split_pdf_single_page_default(self, client, sample_pdf_path): |
| 151 | + """Test split_pdf with default behavior (single page).""" |
| 152 | + # Test default splitting (should extract first page) |
| 153 | + result = client.split_pdf(sample_pdf_path) |
| 154 | + |
| 155 | + assert isinstance(result, list) |
| 156 | + assert len(result) == 1 |
| 157 | + assert isinstance(result[0], bytes) |
| 158 | + assert len(result[0]) > 0 |
| 159 | + |
| 160 | + # Verify result is a valid PDF |
| 161 | + assert_is_pdf(result[0]) |
0 commit comments