Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def _read_geo_info(source, *, overview_level: int | None = None):
"""
from ._dtypes import tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._header import parse_all_ifds, parse_header
from ._header import parse_all_ifds, parse_header, select_overview_ifd
from ._reader import _coerce_path, _is_file_like

source = _coerce_path(source)
Expand Down Expand Up @@ -226,10 +226,9 @@ def _read_geo_info(source, *, overview_level: int | None = None):
try:
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
if not ifds:
raise ValueError("No IFDs found in TIFF file")
ifd = select_overview_ifd(ifds, overview_level)
geo_info = extract_geo_info(ifd, data, header.byte_order)
bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down Expand Up @@ -1444,7 +1443,9 @@ def read_geotiff_gpu(source: str, *,
from ._reader import (
_FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path,
)
from ._header import parse_header, parse_all_ifds, validate_tile_layout
from ._header import (
parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
)
from ._dtypes import tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._gpu_decode import gpu_decode_tiles
Expand All @@ -1465,10 +1466,8 @@ def read_geotiff_gpu(source: str, *,
if len(ifds) == 0:
raise ValueError("No IFDs found in TIFF file")

ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Skip mask IFDs (NewSubfileType bit 2)
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down
75 changes: 75 additions & 0 deletions xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,25 @@ def get_values(self, tag: int) -> tuple | None:
return (v,)

# Convenience properties
@property
def subfile_type(self) -> int:
"""NewSubfileType (tag 254) bit flags. 0 if absent.

Bit flags (TIFF 6.0 spec):
bit 0 (& 1) - reduced-resolution overview
bit 1 (& 2) - page of multi-page document
bit 2 (& 4) - transparency mask
"""
v = self.get_value(TAG_NEW_SUBFILE_TYPE, 0)
if isinstance(v, tuple):
v = v[0] if v else 0
return int(v)

@property
def is_mask(self) -> bool:
"""True if this IFD's NewSubfileType marks it as a transparency mask."""
return bool(self.subfile_type & 4)

@property
def width(self) -> int:
return self.get_value(TAG_IMAGE_WIDTH, 0)
Expand Down Expand Up @@ -426,6 +445,62 @@ def parse_ifd(data: bytes | memoryview, offset: int,
return IFD(entries=entries, next_ifd_offset=next_ifd)


def select_overview_ifd(ifds: list[IFD], overview_level: int | None) -> IFD:
"""Pick the IFD for a requested overview level, skipping mask IFDs.

Some COG variants (notably GDAL with internal masks) interleave
transparency-mask IFDs (NewSubfileType bit 2 set) with overview IFDs.
Indexing the raw IFD list by ``overview_level`` then returns a binary
mask instead of a reduced-resolution overview. This helper builds a
filtered list of full-resolution + overview IFDs (mask-bit clear) and
indexes into that.

``overview_level=0`` (or ``None``) returns the full-resolution IFD;
``overview_level=1`` returns the first overview, and so on.

Parameters
----------
ifds : list[IFD]
All IFDs as parsed from the file.
overview_level : int or None
Which overview to return. ``None`` is treated as ``0``.

Returns
-------
IFD

Raises
------
ValueError
If ``ifds`` is empty, or if ``overview_level`` exceeds the number
of non-mask IFDs in the file.
"""
if not ifds:
raise ValueError("No IFDs found in TIFF file")

filtered = [ifd for ifd in ifds if not ifd.is_mask]
if not filtered:
raise ValueError(
"TIFF file contains no full-resolution or overview IFDs "
"(all IFDs are transparency masks)")

level = 0 if overview_level is None else overview_level
if level < 0:
raise ValueError(f"overview_level must be >= 0, got {level}")
if level >= len(filtered):
n_overviews = len(filtered) - 1
n_masks = len(ifds) - len(filtered)
raise ValueError(
f"overview_level={level} is out of range: TIFF has "
f"{len(filtered)} non-mask IFDs (1 full-resolution + "
f"{n_overviews} overview{'s' if n_overviews != 1 else ''}"
f"{f', plus {n_masks} mask IFD' if n_masks else ''}"
f"{'s' if n_masks > 1 else ''}). Valid overview_level values "
f"are 0..{len(filtered) - 1}.")

return filtered[level]


def parse_all_ifds(data: bytes | memoryview,
header: TIFFHeader) -> list[IFD]:
"""Parse all IFDs in a TIFF file.
Expand Down
23 changes: 12 additions & 11 deletions xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@
)
from ._dtypes import SUB_BYTE_BPS, tiff_dtype_to_numpy
from ._geotags import GeoInfo, GeoTransform, extract_geo_info
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header, validate_tile_layout
from ._header import (
IFD,
TIFFHeader,
parse_all_ifds,
parse_header,
select_overview_ifd,
validate_tile_layout,
)

# ---------------------------------------------------------------------------
# Allocation guard: reject TIFF dimensions that would exhaust memory
Expand Down Expand Up @@ -972,11 +979,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
if len(ifds) == 0:
raise ValueError("No IFDs found in COG")

# Select IFD based on overview level
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Select IFD based on overview level, skipping any mask IFDs
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down Expand Up @@ -1131,11 +1135,8 @@ def read_to_array(source, *, window=None, overview_level: int | None = None,
if len(ifds) == 0:
raise ValueError("No IFDs found in TIFF file")

# Select IFD
ifd_idx = 0
if overview_level is not None:
ifd_idx = min(overview_level, len(ifds) - 1)
ifd = ifds[ifd_idx]
# Select IFD, skipping any mask IFDs
ifd = select_overview_ifd(ifds, overview_level)

bps = ifd.bits_per_sample
if isinstance(bps, tuple):
Expand Down
200 changes: 200 additions & 0 deletions xrspatial/geotiff/tests/test_overview_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
"""Regression tests for issue #1504: overview_level must skip mask IFDs.

GDAL COG variants can interleave NewSubfileType=4 (transparency mask) IFDs
with the overview pyramid. Indexing the raw IFD list by overview_level then
returns a 1-bit mask instead of a reduced-resolution overview. The reader
should filter out mask IFDs before resolving overview_level, and raise a
clear ValueError when the requested level is out of range.
"""
from __future__ import annotations

import numpy as np
import pytest
import tifffile

from xrspatial.geotiff import open_geotiff
from xrspatial.geotiff._header import (
IFD,
parse_all_ifds,
parse_header,
select_overview_ifd,
)


def _write_tiff_with_mask(path, full_res, mask, overview):
"""Write a 3-IFD TIFF: full-res, mask (subfiletype=4), overview.

All IFDs are tiled so that the xrspatial reader exercises its tiled-COG
path. Tiles are 16x16 to keep the test files small.
"""
with tifffile.TiffWriter(str(path)) as tw:
# IFD 0: full resolution (subfiletype=0 implicit).
tw.write(full_res, tile=(16, 16), photometric='minisblack')
# IFD 1: transparency mask (subfiletype=4).
tw.write(
mask,
tile=(16, 16),
photometric='minisblack',
subfiletype=4,
)
# IFD 2: reduced-resolution overview (subfiletype=1).
tw.write(
overview,
tile=(16, 16),
photometric='minisblack',
subfiletype=1,
)


def _write_normal_cog(path, full_res, overviews):
"""Write a typical COG: full-res then a chain of overviews (subfiletype=1)."""
with tifffile.TiffWriter(str(path)) as tw:
tw.write(full_res, tile=(16, 16), photometric='minisblack')
for ov in overviews:
tw.write(
ov,
tile=(16, 16),
photometric='minisblack',
subfiletype=1,
)


# ---------------------------------------------------------------------------
# select_overview_ifd unit tests (operate on parsed IFD lists directly)
# ---------------------------------------------------------------------------

class TestSelectOverviewIFD:
def _ifds_for(self, path):
with open(path, 'rb') as f:
data = f.read()
return parse_all_ifds(data, parse_header(data))

def test_skips_mask_ifd(self, tmp_path):
path = tmp_path / 'with_mask.tif'
full = (np.arange(64 * 64, dtype=np.uint16).reshape(64, 64))
mask = np.zeros((64, 64), dtype=bool)
ov = (np.arange(32 * 32, dtype=np.uint16).reshape(32, 32))
_write_tiff_with_mask(path, full, mask, ov)

ifds = self._ifds_for(path)
assert len(ifds) == 3
# Sanity: middle IFD really is the mask.
assert ifds[1].subfile_type & 4 == 4
assert ifds[1].is_mask

# Level 0 is full-res (NOT the mask).
sel0 = select_overview_ifd(ifds, 0)
assert sel0.width == 64 and sel0.height == 64
assert not sel0.is_mask

# Level 1 is the overview, jumping over the mask IFD.
sel1 = select_overview_ifd(ifds, 1)
assert sel1.width == 32 and sel1.height == 32
assert not sel1.is_mask

def test_none_returns_full_res(self, tmp_path):
path = tmp_path / 'plain.tif'
full = np.zeros((32, 32), dtype=np.uint8)
_write_normal_cog(path, full, [])
ifds = self._ifds_for(path)
assert select_overview_ifd(ifds, None).width == 32

def test_out_of_range_raises(self, tmp_path):
path = tmp_path / 'with_mask.tif'
full = np.zeros((64, 64), dtype=np.uint16)
mask = np.zeros((64, 64), dtype=bool)
ov = np.zeros((32, 32), dtype=np.uint16)
_write_tiff_with_mask(path, full, mask, ov)
ifds = self._ifds_for(path)

with pytest.raises(ValueError) as excinfo:
select_overview_ifd(ifds, 99)
msg = str(excinfo.value)
assert 'overview_level=99' in msg
# Useful diagnostic: tells the user how many real IFDs there are.
assert '2 non-mask IFDs' in msg
assert 'mask' in msg.lower()

def test_negative_raises(self, tmp_path):
path = tmp_path / 'plain.tif'
full = np.zeros((32, 32), dtype=np.uint8)
_write_normal_cog(path, full, [])
ifds = self._ifds_for(path)
with pytest.raises(ValueError, match='must be >= 0'):
select_overview_ifd(ifds, -1)

def test_normal_cog_works(self, tmp_path):
path = tmp_path / 'normal_cog.tif'
full = np.full((128, 128), 42, dtype=np.uint16)
ovs = [
np.full((64, 64), 43, dtype=np.uint16),
np.full((32, 32), 44, dtype=np.uint16),
np.full((16, 16), 45, dtype=np.uint16),
]
_write_normal_cog(path, full, ovs)
ifds = self._ifds_for(path)
assert len(ifds) == 4

for level, expected_w in [(0, 128), (1, 64), (2, 32), (3, 16)]:
sel = select_overview_ifd(ifds, level)
assert sel.width == expected_w


# ---------------------------------------------------------------------------
# End-to-end: open_geotiff(overview_level=...) on a file with a mask IFD
# ---------------------------------------------------------------------------

class TestOpenGeotiffSkipsMask:
def test_overview_level_1_returns_overview_not_mask(self, tmp_path):
path = tmp_path / 'gdal_style_cog.tif'
# Distinct fill values per IFD so the test cannot be fooled by shape.
full = np.full((64, 64), 100, dtype=np.uint16)
mask = np.zeros((64, 64), dtype=bool)
overview = np.full((32, 32), 200, dtype=np.uint16)
_write_tiff_with_mask(path, full, mask, overview)

# Sanity: full-res still works.
da_full = open_geotiff(str(path), overview_level=0)
assert da_full.shape == (64, 64)
assert int(da_full.values[0, 0]) == 100
assert da_full.dtype == np.uint16

# The bug: overview_level=1 used to land on the mask IFD.
da_ov = open_geotiff(str(path), overview_level=1)
assert da_ov.shape == (32, 32), (
'overview_level=1 returned wrong shape; likely picked the mask IFD')
assert int(da_ov.values[0, 0]) == 200
assert da_ov.dtype == np.uint16

def test_out_of_range_raises_value_error(self, tmp_path):
path = tmp_path / 'gdal_style_cog.tif'
full = np.zeros((64, 64), dtype=np.uint16)
mask = np.zeros((64, 64), dtype=bool)
overview = np.zeros((32, 32), dtype=np.uint16)
_write_tiff_with_mask(path, full, mask, overview)

with pytest.raises(ValueError) as excinfo:
open_geotiff(str(path), overview_level=99)
msg = str(excinfo.value)
assert 'overview_level=99' in msg
assert '2 non-mask IFDs' in msg

def test_normal_cog_unchanged(self, tmp_path):
path = tmp_path / 'normal_cog.tif'
full = np.full((128, 128), 1, dtype=np.uint16)
ovs = [
np.full((64, 64), 2, dtype=np.uint16),
np.full((32, 32), 3, dtype=np.uint16),
np.full((16, 16), 4, dtype=np.uint16),
]
_write_normal_cog(path, full, ovs)

for level, expected_shape, expected_val in [
(0, (128, 128), 1),
(1, (64, 64), 2),
(2, (32, 32), 3),
(3, (16, 16), 4),
]:
da = open_geotiff(str(path), overview_level=level)
assert da.shape == expected_shape
assert int(da.values[0, 0]) == expected_val
Loading