55Otherwise, they will use a mock implementation.
66"""
77
8- import os
8+ import io
99import json
10- import pytest
10+ import os
1111from unittest .mock import patch
12+
13+ import pytest
1214from PIL import Image
13- import io
1415
15- from datafog .services .image_service import ImageService
1616from datafog .processing .image_processing .donut_processor import DonutProcessor
17-
17+ from datafog . services . image_service import ImageService
1818
1919# Mark all tests in this file as integration tests
2020pytestmark = pytest .mark .integration
2424def sample_image ():
2525 """Create a simple test image."""
2626 # Create a small white image with some black text
27- img = Image .new (' RGB' , (200 , 100 ), color = ' white' )
27+ img = Image .new (" RGB" , (200 , 100 ), color = " white" )
2828 return img
2929
3030
@@ -42,21 +42,24 @@ def image_service_donut():
4242
4343def test_ocr_with_tesseract (image_service_tesseract , sample_image ):
4444 """Test OCR extraction using Tesseract.
45-
45+
4646 This test should always run regardless of the PYTEST_DONUT flag.
4747 """
4848 # Save the image to a bytes buffer
4949 img_buffer = io .BytesIO ()
50- sample_image .save (img_buffer , format = ' PNG' )
50+ sample_image .save (img_buffer , format = " PNG" )
5151 img_buffer .seek (0 )
52-
52+
5353 # Create a temporary file-like object that PIL can open
54- with patch (' PIL.Image.open' , return_value = sample_image ):
55- with patch (' os.path.isfile' , return_value = True ):
54+ with patch (" PIL.Image.open" , return_value = sample_image ):
55+ with patch (" os.path.isfile" , return_value = True ):
5656 # Run the OCR extraction
5757 import asyncio
58- result = asyncio .run (image_service_tesseract .ocr_extract (['dummy_path.png' ]))
59-
58+
59+ result = asyncio .run (
60+ image_service_tesseract .ocr_extract (["dummy_path.png" ])
61+ )
62+
6063 # Verify that we got some result (even if empty for a blank image)
6164 assert result is not None
6265 assert isinstance (result , list )
@@ -65,54 +68,62 @@ def test_ocr_with_tesseract(image_service_tesseract, sample_image):
6568
6669def test_ocr_with_donut (sample_image ):
6770 """Test OCR extraction using Donut.
68-
71+
6972 This test will use a mock implementation if PYTEST_DONUT is not set to 'yes'.
7073 It will use the actual implementation if PYTEST_DONUT=yes.
7174 """
7275 # Save the image to a bytes buffer
7376 img_buffer = io .BytesIO ()
74- sample_image .save (img_buffer , format = ' PNG' )
77+ sample_image .save (img_buffer , format = " PNG" )
7578 img_buffer .seek (0 )
76-
79+
7780 # Force the test environment flag to be recognized
78- with patch ('datafog.processing.image_processing.donut_processor.IN_TEST_ENV' , True ):
79- with patch ('datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED' , False ):
81+ with patch ("datafog.processing.image_processing.donut_processor.IN_TEST_ENV" , True ):
82+ with patch (
83+ "datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED" ,
84+ False ,
85+ ):
8086 # Create a new image service with Donut enabled
8187 image_service = ImageService (use_donut = True , use_tesseract = False )
82-
88+
8389 # Create a temporary file-like object that PIL can open
84- with patch (' PIL.Image.open' , return_value = sample_image ):
85- with patch (' os.path.isfile' , return_value = True ):
90+ with patch (" PIL.Image.open" , return_value = sample_image ):
91+ with patch (" os.path.isfile" , return_value = True ):
8692 # Run the OCR extraction
8793 import asyncio
88- result = asyncio .run (image_service .ocr_extract (['dummy_path.png' ]))
89-
94+
95+ result = asyncio .run (image_service .ocr_extract (["dummy_path.png" ]))
96+
9097 # Verify that we got some result
9198 assert result is not None
9299 assert isinstance (result , list )
93100 assert len (result ) == 1
94-
101+
95102 # We should get the mock result since PYTEST_DONUT is not set
96103 assert "Mock OCR text for testing" in result [0 ]
97104
98105
99106def test_donut_processor_directly (sample_image ):
100107 """Test the DonutProcessor directly.
101-
108+
102109 This test will use a mock implementation if PYTEST_DONUT is not set to 'yes'.
103110 It will use the actual implementation if PYTEST_DONUT=yes.
104111 """
105112 # Force the test environment flag to be recognized
106- with patch ('datafog.processing.image_processing.donut_processor.IN_TEST_ENV' , True ):
107- with patch ('datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED' , False ):
113+ with patch ("datafog.processing.image_processing.donut_processor.IN_TEST_ENV" , True ):
114+ with patch (
115+ "datafog.processing.image_processing.donut_processor.DONUT_TESTING_ENABLED" ,
116+ False ,
117+ ):
108118 processor = DonutProcessor ()
109-
119+
110120 # Run the OCR extraction
111121 import asyncio
122+
112123 result = asyncio .run (processor .extract_text_from_image (sample_image ))
113-
124+
114125 # Verify that we got some result
115126 assert result is not None
116-
127+
117128 # If PYTEST_DONUT is not set, we should get the mock result
118129 assert "Mock OCR text for testing" in result
0 commit comments