Skip to content

Commit 5f26233

Browse files
committed
Fix flake8 errors in image_service.py
1 parent 15d7f16 commit 5f26233

File tree

2 files changed

+48
-33
lines changed

2 files changed

+48
-33
lines changed

datafog/processing/image_processing/donut_processor.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,17 @@ async def extract_text_from_image(self, image: Image.Image) -> str:
7373

7474
# If we're in a test environment and PYTEST_DONUT is not enabled, return a mock response
7575
if IN_TEST_ENV and not DONUT_TESTING_ENABLED:
76-
logging.info("Running in test environment without PYTEST_DONUT=yes, returning mock OCR result")
76+
logging.info(
77+
"Running in test environment without PYTEST_DONUT=yes, returning mock OCR result"
78+
)
7779
mock_result = {"text": "Mock OCR text for testing"}
7880
return json.dumps(mock_result)
79-
81+
8082
# If PYTEST_DONUT is enabled, log that we're running real OCR in test mode
8183
if IN_TEST_ENV and DONUT_TESTING_ENABLED:
82-
logging.info("PYTEST_DONUT=yes is set, running actual OCR in test environment")
84+
logging.info(
85+
"PYTEST_DONUT=yes is set, running actual OCR in test environment"
86+
)
8387

8488
# Only import torch and transformers when actually needed and not in test environment
8589
try:

tests/test_ocr_integration.py

Lines changed: 41 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,16 @@
55
Otherwise, they will use a mock implementation.
66
"""
77

8-
import os
8+
import io
99
import json
10-
import pytest
10+
import os
1111
from unittest.mock import patch
12+
13+
import pytest
1214
from PIL import Image
13-
import io
1415

15-
from datafog.services.image_service import ImageService
1616
from datafog.processing.image_processing.donut_processor import DonutProcessor
17-
17+
from datafog.services.image_service import ImageService
1818

1919
# Mark all tests in this file as integration tests
2020
pytestmark = pytest.mark.integration
@@ -24,7 +24,7 @@
2424
def sample_image():
2525
"""Create a simple test image."""
2626
# Create a small white image with some black text
27-
img = Image.new('RGB', (200, 100), color='white')
27+
img = Image.new("RGB", (200, 100), color="white")
2828
return img
2929

3030

@@ -42,21 +42,24 @@ def image_service_donut():
4242

4343
def test_ocr_with_tesseract(image_service_tesseract, sample_image):
4444
"""Test OCR extraction using Tesseract.
45-
45+
4646
This test should always run regardless of the PYTEST_DONUT flag.
4747
"""
4848
# Save the image to a bytes buffer
4949
img_buffer = io.BytesIO()
50-
sample_image.save(img_buffer, format='PNG')
50+
sample_image.save(img_buffer, format="PNG")
5151
img_buffer.seek(0)
52-
52+
5353
# Create a temporary file-like object that PIL can open
54-
with patch('PIL.Image.open', return_value=sample_image):
55-
with patch('os.path.isfile', return_value=True):
54+
with patch("PIL.Image.open", return_value=sample_image):
55+
with patch("os.path.isfile", return_value=True):
5656
# Run the OCR extraction
5757
import asyncio
58-
result = asyncio.run(image_service_tesseract.ocr_extract(['dummy_path.png']))
59-
58+
59+
result = asyncio.run(
60+
image_service_tesseract.ocr_extract(["dummy_path.png"])
61+
)
62+
6063
# Verify that we got some result (even if empty for a blank image)
6164
assert result is not None
6265
assert isinstance(result, list)
@@ -65,54 +68,62 @@ def test_ocr_with_tesseract(image_service_tesseract, sample_image):
6568

6669
def test_ocr_with_donut(sample_image):
6770
"""Test OCR extraction using Donut.
68-
71+
6972
This test will use a mock implementation if PYTEST_DONUT is not set to 'yes'.
7073
It will use the actual implementation if PYTEST_DONUT=yes.
7174
"""
7275
# Save the image to a bytes buffer
7376
img_buffer = io.BytesIO()
74-
sample_image.save(img_buffer, format='PNG')
77+
sample_image.save(img_buffer, format="PNG")
7578
img_buffer.seek(0)
76-
79+
7780
# Force the test environment flag to be recognized
78-
with patch('datafog.processing.image_processing.donut_processor.IN_TEST_ENV', True):
79-
with patch('datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED', False):
81+
with patch("datafog.processing.image_processing.donut_processor.IN_TEST_ENV", True):
82+
with patch(
83+
"datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED",
84+
False,
85+
):
8086
# Create a new image service with Donut enabled
8187
image_service = ImageService(use_donut=True, use_tesseract=False)
82-
88+
8389
# Create a temporary file-like object that PIL can open
84-
with patch('PIL.Image.open', return_value=sample_image):
85-
with patch('os.path.isfile', return_value=True):
90+
with patch("PIL.Image.open", return_value=sample_image):
91+
with patch("os.path.isfile", return_value=True):
8692
# Run the OCR extraction
8793
import asyncio
88-
result = asyncio.run(image_service.ocr_extract(['dummy_path.png']))
89-
94+
95+
result = asyncio.run(image_service.ocr_extract(["dummy_path.png"]))
96+
9097
# Verify that we got some result
9198
assert result is not None
9299
assert isinstance(result, list)
93100
assert len(result) == 1
94-
101+
95102
# We should get the mock result since PYTEST_DONUT is not set
96103
assert "Mock OCR text for testing" in result[0]
97104

98105

99106
def test_donut_processor_directly(sample_image):
100107
"""Test the DonutProcessor directly.
101-
108+
102109
This test will use a mock implementation if PYTEST_DONUT is not set to 'yes'.
103110
It will use the actual implementation if PYTEST_DONUT=yes.
104111
"""
105112
# Force the test environment flag to be recognized
106-
with patch('datafog.processing.image_processing.donut_processor.IN_TEST_ENV', True):
107-
with patch('datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED', False):
113+
with patch("datafog.processing.image_processing.donut_processor.IN_TEST_ENV", True):
114+
with patch(
115+
"datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED",
116+
False,
117+
):
108118
processor = DonutProcessor()
109-
119+
110120
# Run the OCR extraction
111121
import asyncio
122+
112123
result = asyncio.run(processor.extract_text_from_image(sample_image))
113-
124+
114125
# Verify that we got some result
115126
assert result is not None
116-
127+
117128
# If PYTEST_DONUT is not set, we should get the mock result
118129
assert "Mock OCR text for testing" in result

0 commit comments

Comments
 (0)