Skip to content

Commit 5ead6cc

Browse files
Fix numba JIT configuration failing on airflow runner (#1117)
fix: prevent numba JIT errors during test collection on runner envs Two issues caused collection-time failures when running tests in runner environments (e.g. the ome_sdc conda env where Airflow is used): 1. os.environ.setdefault("NUMBA_DISABLE_JIT", "1") is a no-op if the environment already exports NUMBA_DISABLE_JIT=0. Switched to a force-set assignment, and added a sys.modules patch for the case where a pytest plugin (fast-array-utils, npe2, napari-plugin-engine) has already imported numba before conftest.py runs — numba reads its config once at import time, so the env var alone is too late. 2. Module-level blobs() calls in four test files executed during pytest collection, before conftest.py had taken effect. Replaced each with a @pytest.fixture(scope="module"), which fires during test execution. No performance change — blobs() is still called once per file. refactor: consolidate numba JIT disable into root conftest only The sub-conftest files in core/operations/ and dataloader/ duplicated the same NUMBA_DISABLE_JIT logic already present in the root tests/conftest.py. Removed the duplicates. --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6ee1013 commit 5ead6cc

8 files changed

Lines changed: 50 additions & 49 deletions

File tree

docs/tutorials/notebooks

Submodule notebooks updated 160 files

tests/conftest.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
11
from __future__ import annotations
22

33
import os
4+
import sys
45

56
# Disable numba JIT for the test suite (the test data is small so initializing the JIT is slower than using plain
6-
# Python)
7-
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")
7+
# Python). Force-set (not setdefault) so the runner environment cannot accidentally override with "0".
8+
os.environ["NUMBA_DISABLE_JIT"] = "1"
9+
# If a pytest plugin already imported numba before this conftest ran, patch the cached config value too.
10+
if "numba.core.config" in sys.modules:
11+
sys.modules["numba.core.config"].NUMBA_DISABLE_JIT = 1
812

913
import copy as _copy
1014
from collections.abc import Callable, Sequence

tests/core/operations/conftest.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1 @@
11
from __future__ import annotations
2-
3-
import os
4-
5-
# Disable numba JIT compilation for rasterize tests. Datashader (used by rasterize) triggers
6-
# numba JIT on first call, costing ~1.4s per worker. Python-mode gives identical results for
7-
# the small test data here — unlike real data, there is no throughput advantage from JIT.
8-
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")

tests/core/operations/test_vectorize.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,16 @@
1818
from spatialdata.models.models import ShapesModel
1919
from spatialdata.testing import assert_elements_are_identical
2020

21+
2122
# each of the tests operates on different elements, hence we can initialize the data once without conflicts
22-
sdata = blobs(length=128)
23+
@pytest.fixture(scope="module")
24+
def sdata():
25+
return blobs(length=128)
2326

2427

2528
# conversion from labels
2629
@pytest.mark.parametrize("is_multiscale", [False, True])
27-
def test_labels_2d_to_circles(is_multiscale: bool) -> None:
30+
def test_labels_2d_to_circles(sdata, is_multiscale: bool) -> None:
2831
key = "blobs" + ("_multiscale" if is_multiscale else "") + "_labels"
2932
element = sdata[key]
3033
new_circles = to_circles(element)
@@ -36,7 +39,7 @@ def test_labels_2d_to_circles(is_multiscale: bool) -> None:
3639

3740

3841
@pytest.mark.parametrize("is_multiscale", [False, True])
39-
def test_labels_2d_to_polygons(is_multiscale: bool) -> None:
42+
def test_labels_2d_to_polygons(sdata, is_multiscale: bool) -> None:
4043
key = "blobs" + ("_multiscale" if is_multiscale else "") + "_labels"
4144
element = sdata[key]
4245
new_polygons = to_polygons(element)
@@ -49,7 +52,7 @@ def test_labels_2d_to_polygons(is_multiscale: bool) -> None:
4952
assert ((new_polygons.area - new_polygons.pixel_count) / new_polygons.pixel_count < 0.01).all()
5053

5154

52-
def test_chunked_labels_2d_to_polygons() -> None:
55+
def test_chunked_labels_2d_to_polygons(sdata) -> None:
5356
no_chunks_polygons = to_polygons(sdata["blobs_labels"])
5457

5558
sdata["blobs_labels_chunked"] = sdata["blobs_labels"].copy()
@@ -63,21 +66,21 @@ def test_chunked_labels_2d_to_polygons() -> None:
6366

6467

6568
# conversion from circles
66-
def test_circles_to_circles() -> None:
69+
def test_circles_to_circles(sdata) -> None:
6770
element = sdata["blobs_circles"]
6871
new_circles = to_circles(element)
6972
assert_elements_are_identical(element, new_circles)
7073

7174

72-
def test_circles_to_polygons() -> None:
75+
def test_circles_to_polygons(sdata) -> None:
7376
element = sdata["blobs_circles"]
7477
polygons = to_polygons(element, buffer_resolution=1000)
7578
areas = element.radius**2 * math.pi
7679
assert np.allclose(polygons.area, areas)
7780

7881

7982
# conversion from polygons/multipolygons
80-
def test_polygons_to_circles() -> None:
83+
def test_polygons_to_circles(sdata) -> None:
8184
element = sdata["blobs_polygons"].iloc[:2]
8285
new_circles = to_circles(element)
8386

@@ -93,7 +96,7 @@ def test_polygons_to_circles() -> None:
9396
assert_elements_are_identical(new_circles, expected)
9497

9598

96-
def test_multipolygons_to_circles() -> None:
99+
def test_multipolygons_to_circles(sdata) -> None:
97100
element = sdata["blobs_multipolygons"]
98101
new_circles = to_circles(element)
99102

@@ -108,13 +111,13 @@ def test_multipolygons_to_circles() -> None:
108111
assert_elements_are_identical(new_circles, expected)
109112

110113

111-
def test_polygons_multipolygons_to_polygons() -> None:
114+
def test_polygons_multipolygons_to_polygons(sdata) -> None:
112115
polygons = sdata["blobs_multipolygons"]
113116
assert polygons is to_polygons(polygons)
114117

115118

116119
# conversion from points
117-
def test_points_to_circles() -> None:
120+
def test_points_to_circles(sdata) -> None:
118121
element = sdata["blobs_points"]
119122
with pytest.raises(RuntimeError, match="`radius` must either be provided, either be a column"):
120123
to_circles(element)
@@ -126,18 +129,18 @@ def test_points_to_circles() -> None:
126129
assert np.array_equal(np.ones_like(x), circles["radius"])
127130

128131

129-
def test_points_to_polygons() -> None:
132+
def test_points_to_polygons(sdata) -> None:
130133
with pytest.raises(RuntimeError, match="Cannot convert points to polygons"):
131134
to_polygons(sdata["blobs_points"])
132135

133136

134137
# conversion from images (invalid)
135-
def test_images_to_circles() -> None:
138+
def test_images_to_circles(sdata) -> None:
136139
with pytest.raises(RuntimeError, match=r"Cannot apply to_circles\(\) to images"):
137140
to_circles(sdata["blobs_image"])
138141

139142

140-
def test_images_to_polygons() -> None:
143+
def test_images_to_polygons(sdata) -> None:
141144
with pytest.raises(RuntimeError, match=r"Cannot apply to_polygons\(\) to images"):
142145
to_polygons(sdata["blobs_image"])
143146

tests/core/query/test_relational_query_match_sdata_to_table.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,13 @@ def _make_test_data() -> SpatialData:
1414
return sdata
1515

1616

17-
# constructing the example data; let's use a global variable as we can reuse the same object on most tests
18-
# without having to recreate it
19-
sdata = _make_test_data()
17+
# constructing the example data; reuse the same object on most tests without having to recreate it
18+
@pytest.fixture(scope="module")
19+
def sdata():
20+
return _make_test_data()
2021

2122

22-
def test_match_sdata_to_table_filter_specific_instances():
23+
def test_match_sdata_to_table_filter_specific_instances(sdata):
2324
"""
2425
Filter to keep only specific instances. Note that it works even when the table annotates multiple elements.
2526
"""
@@ -33,7 +34,7 @@ def test_match_sdata_to_table_filter_specific_instances():
3334
assert "blobs_polygons-sdata2" in matched
3435

3536

36-
def test_match_sdata_to_table_filter_specific_instances_element():
37+
def test_match_sdata_to_table_filter_specific_instances_element(sdata):
3738
"""
3839
Filter to keep only specific instances, in a specific element.
3940
"""
@@ -49,7 +50,7 @@ def test_match_sdata_to_table_filter_specific_instances_element():
4950
assert "blobs_polygons-sdata2" not in matched
5051

5152

52-
def test_match_sdata_to_table_filter_by_threshold():
53+
def test_match_sdata_to_table_filter_by_threshold(sdata):
5354
"""
5455
Filter by a threshold on a value column, in a specific element.
5556
"""
@@ -63,7 +64,7 @@ def test_match_sdata_to_table_filter_by_threshold():
6364
assert "blobs_polygons-sdata2" not in matched
6465

6566

66-
def test_match_sdata_to_table_subset_certain_obs():
67+
def test_match_sdata_to_table_subset_certain_obs(sdata):
6768
"""
6869
Subset to certain obs (we could also subset to certain var or layer).
6970
"""
@@ -135,7 +136,7 @@ def test_match_sdata_to_table_match_labels_error():
135136
assert "blobs_points-sdata1" not in matched
136137

137138

138-
def test_match_sdata_to_table_no_table_argument():
139+
def test_match_sdata_to_table_no_table_argument(sdata):
139140
"""
140141
If no table argument is passed, the table_name argument will be used to match the table.
141142
"""

tests/core/test_data_extent.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@
1515
from spatialdata.models import Image2DModel, PointsModel, ShapesModel
1616
from spatialdata.transformations import Affine, Translation, remove_transformation, set_transformation
1717

18-
# for faster tests; we will pay attention not to modify the original data
19-
sdata = blobs()
2018
RNG = default_rng(seed=0)
2119

2220

21+
# for faster tests; we will pay attention not to modify the original data
22+
@pytest.fixture(scope="module")
23+
def sdata():
24+
return blobs()
25+
26+
2327
def check_test_results0(extent, min_coordinates, max_coordinates, axes):
2428
for i, ax in enumerate(axes):
2529
assert np.isclose(extent[ax][0], min_coordinates[i])
@@ -36,7 +40,7 @@ def check_test_results1(extent0, extent1):
3640

3741

3842
@pytest.mark.parametrize("shape_type", ["circles", "polygons", "multipolygons"])
39-
def test_get_extent_shapes(shape_type):
43+
def test_get_extent_shapes(sdata, shape_type):
4044
extent = get_extent(sdata[f"blobs_{shape_type}"])
4145
if shape_type == "circles":
4246
min_coordinates = np.array([98.92618679, 137.62348969])
@@ -58,7 +62,7 @@ def test_get_extent_shapes(shape_type):
5862

5963

6064
@pytest.mark.parametrize("exact", [True, False])
61-
def test_get_extent_points(exact: bool):
65+
def test_get_extent_points(sdata, exact: bool):
6266
# 2d case
6367
extent = get_extent(sdata["blobs_points"], exact=exact)
6468
check_test_results0(
@@ -83,7 +87,7 @@ def test_get_extent_points(exact: bool):
8387

8488
@pytest.mark.parametrize("raster_type", ["image", "labels"])
8589
@pytest.mark.parametrize("multiscale", [False, True])
86-
def test_get_extent_raster(raster_type, multiscale):
90+
def test_get_extent_raster(sdata, raster_type, multiscale):
8791
raster = sdata[f"blobs_multiscale_{raster_type}"] if multiscale else sdata[f"blobs_{raster_type}"]
8892

8993
extent = get_extent(raster)
@@ -95,7 +99,7 @@ def test_get_extent_raster(raster_type, multiscale):
9599
)
96100

97101

98-
def test_get_extent_spatialdata():
102+
def test_get_extent_spatialdata(sdata):
99103
sdata2 = SpatialData(shapes={"circles": sdata["blobs_circles"], "polygons": sdata["blobs_polygons"]})
100104
extent = get_extent(sdata2)
101105
check_test_results0(
@@ -106,7 +110,7 @@ def test_get_extent_spatialdata():
106110
)
107111

108112

109-
def test_get_extent_invalid_coordinate_system():
113+
def test_get_extent_invalid_coordinate_system(sdata):
110114
# element without the coordinate system
111115
with pytest.raises(ValueError):
112116
_ = get_extent(sdata["blobs_circles"], coordinate_system="invalid")
@@ -231,7 +235,7 @@ def test_rotate_vector_data(exact):
231235
check_test_results1(extent, expected)
232236

233237

234-
def test_get_extent_affine_circles():
238+
def test_get_extent_affine_circles(sdata):
235239
"""
236240
Verify that the extent of the transformed circles, computed with exact = False, gives the same result as
237241
transforming the bounding box of the original circles
@@ -304,7 +308,7 @@ def test_get_extent_affine_points3d():
304308
assert np.allclose(transformed_extent_3d["z"], extent_3d["z"])
305309

306310

307-
def test_get_extent_affine_sdata():
311+
def test_get_extent_affine_sdata(sdata):
308312
# let's make a copy since we don't want to modify the original data
309313
sdata2 = SpatialData(
310314
shapes={

tests/dataloader/conftest.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1 @@
11
from __future__ import annotations
2-
3-
import os
4-
5-
# Disable numba JIT compilation for dataloader tests. Datashader (used by rasterize) triggers
6-
# numba JIT on first call, costing ~1.4s per worker. Python-mode gives identical results for
7-
# the small test data here — unlike real data, there is no throughput advantage from JIT.
8-
os.environ.setdefault("NUMBA_DISABLE_JIT", "1")

tests/io/test_readwrite.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -957,7 +957,9 @@ def test_incremental_io_attrs(points: SpatialData, sdata_container_format: Spati
957957
assert sdata2.attrs["c"] == 3
958958

959959

960-
cached_sdata_blobs = blobs()
960+
@pytest.fixture(scope="module")
961+
def _cached_sdata_blobs():
962+
return blobs()
961963

962964

963965
@pytest.mark.filterwarnings("ignore:SpatialData is not stored in the most current format:UserWarning")
@@ -1020,6 +1022,7 @@ def test_delete_element_from_disk(
10201022
@pytest.mark.parametrize("sdata_container_format", SDATA_FORMATS)
10211023
def test_element_already_on_disk_different_type(
10221024
full_sdata,
1025+
_cached_sdata_blobs,
10231026
element_name: str,
10241027
sdata_container_format: SpatialDataContainerFormatType,
10251028
) -> None:
@@ -1037,7 +1040,7 @@ def test_element_already_on_disk_different_type(
10371040
wrong_group = "images" if element_type == "tables" else "tables"
10381041
del getattr(full_sdata, element_type)[element_name]
10391042
getattr(full_sdata, wrong_group)[element_name] = (
1040-
getattr(cached_sdata_blobs, wrong_group).values().__iter__().__next__()
1043+
getattr(_cached_sdata_blobs, wrong_group).values().__iter__().__next__()
10411044
)
10421045
ERROR_MSG = "The in-memory object should have a different name."
10431046

0 commit comments

Comments
 (0)