From 4a1416e3b181e8acc8e5d43bd0e1221b8c78afe8 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 8 May 2026 13:45:41 -0700 Subject: [PATCH 1/2] Apply LERC valid-mask on decode so masked pixels become nodata lerc.decode returns (rc, data, valid_mask, ...) but the wrapper only forwarded data.tobytes(). GDAL writes LERC TIFFs whose masked pixels are zero-filled in the data array, so downstream code that masks by nodata silently sees those zeros as real measurements. What changed: _compression.py: new lerc_decompress_with_mask(data) -> (bytes, valid_mask_or_None). lerc_decompress now calls it and drops the mask, preserving its existing signature. An all-True mask collapses to None so callers skip the fill pass. _reader.py: _decode_strip_or_tile takes a separate path for LERC, calls the new wrapper, and writes nodata into masked positions after reshape. A small _resolve_masked_fill helper reads ifd.nodata_str and falls back to NaN for float dtypes or 0 for integer dtypes when no GDAL_NODATA is set. Each call site (strip reader, tile reader, COG-over-HTTP reader) passes a precomputed masked_fill. tests/test_lerc_valid_mask.py: 8 new tests. Wrapper tests cover the no-mask path, the all-True-mask collapse, and a partial mask. TIFF round-trip tests cover float32 with NaN nodata, float32 with -9999, uint16 with 65535, and a regression that an unmasked LERC file still round-trips bit-exact. The round-trip tests monkeypatch lerc_compress to inject a per-tile mask through a predicate, since the writer hard-codes hasMask=False. Option A was chosen (plumb nodata into the decode path). Option B would have required changing decompress()'s return shape, which has callers in tests and in _gpu_decode.py. Out of scope: the GPU LERC path in _gpu_decode.py still drops the mask. The constraints excluded changes there. --- xrspatial/geotiff/_compression.py | 32 ++- xrspatial/geotiff/_reader.py | 80 +++++- .../geotiff/tests/test_lerc_valid_mask.py | 239 ++++++++++++++++++ 3 files changed, 339 insertions(+), 12 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_lerc_valid_mask.py diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py index a9558b5a..007c78bd 100644 --- a/xrspatial/geotiff/_compression.py +++ b/xrspatial/geotiff/_compression.py @@ -1046,7 +1046,29 @@ def jpeg2000_compress(data: bytes, width: int, height: int, def lerc_decompress(data: bytes, width: int = 0, height: int = 0, samples: int = 1) -> bytes: - """Decompress LERC data. Requires the ``lerc`` package.""" + """Decompress LERC data. Requires the ``lerc`` package. + + Returns the raw decoded pixel bytes. Any LERC valid-mask is dropped + here; masked pixels are returned as LERC's zero fill (the wire + format's default). Callers that need to honour the file's nodata + value should use :func:`lerc_decompress_with_mask` instead and apply + nodata at the array level once dtype is known. + """ + decoded_bytes, _mask = lerc_decompress_with_mask(data) + return decoded_bytes + + +def lerc_decompress_with_mask(data: bytes): + """Decompress LERC data and return ``(bytes, valid_mask_or_None)``. + + ``valid_mask`` is ``None`` when LERC reports the block is fully + valid (no encoded mask, or a mask that is True everywhere); + otherwise it is a numpy array whose shape matches the decoded data + (height, width) or (height, width, samples), with ``0`` marking + pixels the encoder flagged as invalid. LERC zero fills masked + positions in the data array, so the returned mask is the only + signal that lets a reader restore the file's nodata sentinel. + """ if not LERC_AVAILABLE: raise ImportError( "lerc is required to read LERC-compressed TIFFs. " @@ -1056,7 +1078,13 @@ def lerc_decompress(data: bytes, width: int = 0, height: int = 0, if result[0] != 0: raise RuntimeError(f"LERC decode failed with error code {result[0]}") arr = result[1] - return arr.tobytes() + valid_mask = result[2] if len(result) > 2 else None + if valid_mask is None: + return arr.tobytes(), None + valid_mask = np.asarray(valid_mask) + if valid_mask.size == 0 or bool(valid_mask.all()): + return arr.tobytes(), None + return arr.tobytes(), valid_mask def lerc_compress(data: bytes, width: int, height: int, diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index a99f1e63..3395e02d 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -12,9 +12,11 @@ import numpy as np from ._compression import ( + COMPRESSION_LERC, COMPRESSION_NONE, decompress, fp_predictor_decode, + lerc_decompress_with_mask, predictor_decode, unpack_bits, ) @@ -525,9 +527,33 @@ def _packed_byte_count(pixel_count: int, bps: int) -> int: return (pixel_count * bps + 7) // 8 +def _resolve_masked_fill(nodata_str: str | None, dtype: np.dtype): + """Resolve the value to use when restoring LERC-masked pixels. + + Mirrors :func:`_sparse_fill_value` but defaults to NaN for floating + dtypes when the file does not declare a nodata sentinel. Float + rasters with no GDAL_NODATA tag still benefit from NaN propagation + because LERC's zero fill would silently masquerade as a real + measurement at z == 0. + """ + if nodata_str is not None: + try: + v = float(nodata_str) + if dtype.kind == 'f': + return dtype.type(v) + if not math.isnan(v) and not math.isinf(v): + return dtype.type(int(v)) + except (TypeError, ValueError): + pass + if dtype.kind == 'f': + return dtype.type(np.nan) + return dtype.type(0) + + def _decode_strip_or_tile(data_slice, compression, width, height, samples, bps, bytes_per_sample, is_sub_byte, dtype, pred, - byte_order='<', jpeg_tables=None): + byte_order='<', jpeg_tables=None, + masked_fill=None): """Decompress, apply predictor, unpack sub-byte, and reshape a strip/tile. Parameters @@ -551,9 +577,18 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples, else: expected = pixel_count * bytes_per_sample - chunk = decompress(data_slice, compression, expected, - width=width, height=height, samples=samples, - jpeg_tables=jpeg_tables) + lerc_mask = None + if compression == COMPRESSION_LERC: + # LERC needs special handling: lerc.decode also returns a + # valid-mask which the generic decompress() dispatcher discards. + # We capture it here so masked pixels can be restored to nodata + # below, instead of leaking LERC's zero fill into the output. + decoded_bytes, lerc_mask = lerc_decompress_with_mask(data_slice) + chunk = np.frombuffer(decoded_bytes, dtype=np.uint8) + else: + chunk = decompress(data_slice, compression, expected, + width=width, height=height, samples=samples, + jpeg_tables=jpeg_tables) # Validate the decompressed byte count. A truncated deflate stream or a # buggy compressor can produce fewer or more bytes than expected. Without @@ -587,8 +622,23 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples, pixels = pixels.astype(dtype) if samples > 1: - return pixels.reshape(height, width, samples) - return pixels.reshape(height, width) + out = pixels.reshape(height, width, samples) + else: + out = pixels.reshape(height, width) + + # Restore nodata in positions LERC flagged as invalid. LERC + # zero-fills masked pixels in the data array, which would otherwise + # be indistinguishable from real zero readings downstream. + if lerc_mask is not None and masked_fill is not None: + mask_arr = np.asarray(lerc_mask) + if mask_arr.ndim == 2 and out.ndim == 3: + mask_arr = mask_arr[..., None] + invalid = np.broadcast_to(mask_arr == 0, out.shape) + if invalid.any(): + if not out.flags.writeable: + out = out.copy() + np.putmask(out, invalid, masked_fill) + return out import sys as _sys @@ -667,6 +717,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, bytes_per_sample = bps // 8 is_sub_byte = bps in SUB_BYTE_BPS jpeg_tables = ifd.jpeg_tables + masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) + if compression == COMPRESSION_LERC else None) if offsets is None or byte_counts is None: raise ValueError("Missing strip offsets or byte counts") @@ -727,7 +779,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, strip_data, compression, width, strip_rows, 1, bps, bytes_per_sample, is_sub_byte, dtype, pred, byte_order=header.byte_order, - jpeg_tables=jpeg_tables) + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) src_r0 = max(r0 - strip_row, 0) src_r1 = min(r1 - strip_row, strip_rows) @@ -753,7 +806,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader, strip_data, compression, width, strip_rows, samples, bps, bytes_per_sample, is_sub_byte, dtype, pred, byte_order=header.byte_order, - jpeg_tables=jpeg_tables) + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) src_r0 = max(r0 - strip_row, 0) src_r1 = min(r1 - strip_row, strip_rows) @@ -804,6 +858,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, bytes_per_sample = bps // 8 is_sub_byte = bps in SUB_BYTE_BPS jpeg_tables = ifd.jpeg_tables + masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) + if compression == COMPRESSION_LERC else None) offsets = ifd.tile_offsets byte_counts = ifd.tile_byte_counts @@ -900,7 +956,8 @@ def _decode_one(job): tile_data, compression, tw, th, tile_samples, bps, bytes_per_sample, is_sub_byte, dtype, pred, byte_order=header.byte_order, - jpeg_tables=jpeg_tables) + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) if use_parallel: from concurrent.futures import ThreadPoolExecutor @@ -1012,6 +1069,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, bytes_per_sample = bps // 8 is_sub_byte = bps in SUB_BYTE_BPS jpeg_tables = ifd.jpeg_tables + masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype) + if compression == COMPRESSION_LERC else None) offsets = ifd.tile_offsets byte_counts = ifd.tile_byte_counts @@ -1079,7 +1138,8 @@ def _read_cog_http(url: str, overview_level: int | None = None, tile_data, compression, tw, th, samples, bps, bytes_per_sample, is_sub_byte, dtype, pred, byte_order=header.byte_order, - jpeg_tables=jpeg_tables) + jpeg_tables=jpeg_tables, + masked_fill=masked_fill) y0 = tr * th x0 = tc * tw diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask.py b/xrspatial/geotiff/tests/test_lerc_valid_mask.py new file mode 100644 index 00000000..0aa5aa93 --- /dev/null +++ b/xrspatial/geotiff/tests/test_lerc_valid_mask.py @@ -0,0 +1,239 @@ +"""Tests for LERC valid-mask handling on decode (issue #A4). + +LERC's ``decode`` returns ``(rc, data, valid_mask, ...)``. When a file +was encoded with a NoData mask, masked pixels in ``data`` are zero +filled. Without consulting ``valid_mask`` the reader cannot tell those +zeros from real measurements, so any nodata-based downstream masking +silently misses them. + +These tests cover: + +* The low-level wrapper ``lerc_decompress_with_mask`` returns the mask + and ``lerc_decompress`` still returns raw bytes for backwards compat. +* TIFF round-trip with a NaN nodata float file: NaN comes back at every + masked position. +* TIFF round-trip with an explicit float sentinel (-9999): masked + positions land on the sentinel. +* TIFF round-trip with an integer sentinel (uint16): masked positions + land on the sentinel. +* Files with no mask round-trip bit-exactly (regression guard). +""" +from __future__ import annotations + +import numpy as np +import pytest + +lerc = pytest.importorskip("lerc") + +from xrspatial.geotiff._compression import ( # noqa: E402 + LERC_AVAILABLE, + lerc_decompress, + lerc_decompress_with_mask, +) + +pytestmark = pytest.mark.skipif( + not LERC_AVAILABLE, + reason="lerc not installed", +) + + +def _encode_with_mask(arr: np.ndarray, mask: np.ndarray | None, + max_z_error: float = 0.0) -> bytes: + """Encode an array with LERC, optionally with a per-pixel valid mask.""" + has_mask = mask is not None + result = lerc.encode(arr, 1, has_mask, mask, max_z_error, 1) + assert result[0] == 0, f"lerc.encode returned error {result[0]}" + return bytes(result[2]) + + +# --------------------------------------------------------------------------- +# Wrapper-level tests (no TIFF writer involvement) +# --------------------------------------------------------------------------- + +class TestLercDecompressWithMask: + """Direct tests of the new ``lerc_decompress_with_mask`` wrapper.""" + + def test_no_mask_returns_none(self): + arr = np.arange(12, dtype=np.float32).reshape(3, 4) + blob = _encode_with_mask(arr, None) + decoded_bytes, mask = lerc_decompress_with_mask(blob) + out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(3, 4) + np.testing.assert_array_equal(out, arr) + assert mask is None + + def test_all_valid_mask_collapses_to_none(self): + arr = np.arange(12, dtype=np.float32).reshape(3, 4) + valid = np.ones((3, 4), dtype=np.uint8) + blob = _encode_with_mask(arr, valid) + decoded_bytes, mask = lerc_decompress_with_mask(blob) + out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(3, 4) + np.testing.assert_array_equal(out, arr) + # All-True mask carries no information, treated as no mask. + assert mask is None + + def test_partial_mask_returns_array(self): + arr = np.array([[1.0, 2.0, 3.0]], dtype=np.float32) + valid = np.array([[1, 0, 1]], dtype=np.uint8) + blob = _encode_with_mask(arr, valid) + decoded_bytes, mask = lerc_decompress_with_mask(blob) + out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(1, 3) + # LERC zero-fills masked positions in the data array. + np.testing.assert_array_equal(out, np.array([[1.0, 0.0, 3.0]], + dtype=np.float32)) + assert mask is not None + np.testing.assert_array_equal(np.asarray(mask).reshape(1, 3), + np.array([[1, 0, 1]], dtype=np.uint8)) + + def test_legacy_decompress_drops_mask(self): + """``lerc_decompress`` still returns just raw bytes.""" + arr = np.array([[1.0, 2.0, 3.0]], dtype=np.float32) + valid = np.array([[1, 0, 1]], dtype=np.uint8) + blob = _encode_with_mask(arr, valid) + out_bytes = lerc_decompress(blob) + assert isinstance(out_bytes, bytes) + out = np.frombuffer(out_bytes, dtype=np.float32).reshape(1, 3) + np.testing.assert_array_equal(out, np.array([[1.0, 0.0, 3.0]], + dtype=np.float32)) + + +# --------------------------------------------------------------------------- +# Reader-level (TIFF round-trip) tests +# --------------------------------------------------------------------------- + +@pytest.fixture +def lerc_writer_with_mask(monkeypatch): + """Patch ``lerc_compress`` to embed a valid-mask the writer can't pass. + + The writer hard-codes ``hasMask=False`` in its call to ``lerc.encode`` + because there's no plumbing for per-pixel mask data. The fixture + swaps in an encoder that derives the mask from a holder ``invalid`` + callable: given the (height, width) tile shape and the decoded + pixel array it returns a uint8 valid mask (1=valid, 0=invalid) the + same shape. This lets the test specify masking by predicate so the + mask matches whatever tile padding the writer emits. + """ + holder = {"invalid": None} + + def _patched(data, width, height, samples=1, + dtype=np.dtype('float32'), max_z_error=0.0): + if samples == 1: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width) + else: + arr = np.frombuffer(data, dtype=dtype).reshape( + height, width, samples) + invalid_pred = holder["invalid"] + if invalid_pred is None: + mask = None + has_mask = False + else: + invalid = invalid_pred(arr) + mask = np.where(invalid, np.uint8(0), np.uint8(1)) + has_mask = True + result = lerc.encode(arr, samples, has_mask, mask, max_z_error, 1) + if result[0] != 0: + raise RuntimeError( + f"LERC encode failed with error code {result[0]}") + return bytes(result[2]) + + monkeypatch.setattr( + "xrspatial.geotiff._compression.lerc_compress", _patched, + ) + return holder + + +class TestLercTiffRoundTripWithMask: + """End-to-end TIFF round-trips that exercise the reader's mask merge.""" + + def test_float32_nan_nodata(self, tmp_path, lerc_writer_with_mask): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8) + # Mask positions (0, 1) and (5, 4); the rest stays valid. + invalid_positions = {(0, 1), (5, 4)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_nan.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=float("nan")) + + out, _geo = read_to_array(path) + for r in range(8): + for c in range(8): + if (r, c) in invalid_positions: + assert np.isnan(out[r, c]), \ + f"expected NaN at ({r},{c}), got {out[r, c]}" + else: + assert out[r, c] == arr[r, c], \ + f"value mismatch at ({r},{c})" + + def test_float32_sentinel_nodata(self, tmp_path, lerc_writer_with_mask): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8) + invalid_positions = {(0, 1), (3, 3), (7, 7)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_sentinel_f32.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=-9999.0) + + out, _geo = read_to_array(path) + for r in range(8): + for c in range(8): + if (r, c) in invalid_positions: + assert out[r, c] == np.float32(-9999.0), \ + f"expected sentinel at ({r},{c}), got {out[r, c]}" + else: + assert out[r, c] == arr[r, c] + + def test_uint16_sentinel_nodata(self, tmp_path, lerc_writer_with_mask): + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + arr = (np.arange(1, 65, dtype=np.uint16) * 100).reshape(8, 8) + invalid_positions = {(0, 1), (4, 4)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_uint16.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=65535) + + out, _geo = read_to_array(path) + for r in range(8): + for c in range(8): + if (r, c) in invalid_positions: + assert out[r, c] == np.uint16(65535) + else: + assert out[r, c] == arr[r, c] + + def test_no_mask_roundtrip_bitexact(self, tmp_path): + """Sanity: an all-valid LERC file (no mask) still round-trips.""" + from xrspatial.geotiff._writer import write + from xrspatial.geotiff._reader import read_to_array + + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / "lerc_no_mask.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8) + + out, _geo = read_to_array(path) + np.testing.assert_array_equal(out, arr) From 0ee0f5aa920539b63f1f68f6e519a3ad30ec2ea0 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Fri, 8 May 2026 13:57:50 -0700 Subject: [PATCH 2/2] Apply LERC valid-mask in GPU decode path (depends on #1529) The CPU LERC reader from #1529 honours the LERC valid-mask and writes the file's nodata sentinel into masked pixels. The GPU LERC tile-decode path was still dropping the mask, so masked pixels read back as 0 on GPU but as NaN or the sentinel on CPU. Same bug, GPU side. Changes: _gpu_decode.py: the LERC branch now calls lerc_decompress_with_mask per tile and keeps any returned valid-mask. After predictor decode and tile assembly, _apply_lerc_mask_fill builds an invalid mask on host (matching the GPU assembly kernel's tile-grid layout), copies it to GPU once, and overwrites masked positions with the resolved fill value. Tiles LERC reports as fully valid skip the host work, so the no-mask path stays zero-copy. gpu_decode_tiles and gpu_decode_tiles_from_file get a masked_fill keyword that is forwarded through. read_geotiff_gpu computes it via _resolve_masked_fill(ifd.nodata_str, file_dtype) for LERC sources. tests/test_lerc_valid_mask_gpu.py: 4 tests covering float32+NaN, float32+sentinel, uint16+sentinel, and the no-mask regression. Each compares read_geotiff_gpu output to read_to_array output for the same file. Skipped unless cupy + CUDA + lerc are available. Out of scope: the encode side. The xrspatial writer still hard-codes hasMask=False; the tests reuse the lerc_compress monkeypatch fixture from the CPU PR to inject a valid-mask through lerc.encode directly. --- xrspatial/geotiff/__init__.py | 10 + xrspatial/geotiff/_gpu_decode.py | 105 +++++++++- .../geotiff/tests/test_lerc_valid_mask_gpu.py | 181 ++++++++++++++++++ 3 files changed, 291 insertions(+), 5 deletions(-) create mode 100644 xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index 11e740d0..a1b714fc 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1469,7 +1469,9 @@ def read_geotiff_gpu(source: str, *, from ._reader import ( _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path, + _resolve_masked_fill, ) + from ._compression import COMPRESSION_LERC from ._header import ( parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout, ) @@ -1552,6 +1554,12 @@ def read_geotiff_gpu(source: str, *, # Sparse tiles (byte_count == 0) are unsupported on the GPU pipeline; # the CPU reader fills them with nodata and copies onto the GPU. has_sparse_tile = any(bc == 0 for bc in byte_counts) + # LERC tiles can carry a per-pixel valid mask that GDAL writes + # zero-filled in the data array. Compute the nodata fill the same + # way the CPU reader does so the GPU decode path can restore it + # post-assembly (mirrors PR #1529 for the CPU path). + masked_fill = (_resolve_masked_fill(ifd.nodata_str, file_dtype) + if compression == COMPRESSION_LERC else None) if has_sparse_tile: arr_cpu, _ = read_to_array(source, overview_level=overview_level) arr_gpu = cupy.asarray(arr_cpu) @@ -1565,6 +1573,7 @@ def read_geotiff_gpu(source: str, *, tw, th, width, height, compression, predictor, file_dtype, samples, byte_order=header.byte_order, + masked_fill=masked_fill, ) except Exception as e: if gpu == 'strict': @@ -1596,6 +1605,7 @@ def read_geotiff_gpu(source: str, *, tw, th, width, height, compression, predictor, file_dtype, samples, byte_order=header.byte_order, + masked_fill=masked_fill, ) except Exception as e: if gpu == 'strict': diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py index d18b5cfd..078a05d2 100644 --- a/xrspatial/geotiff/_gpu_decode.py +++ b/xrspatial/geotiff/_gpu_decode.py @@ -1406,6 +1406,7 @@ def gpu_decode_tiles_from_file( dtype: np.dtype, samples: int = 1, byte_order: str = '<', + masked_fill=None, ): """Decode tiles from a file, using GDS if available. @@ -1450,7 +1451,7 @@ def gpu_decode_tiles_from_file( return gpu_decode_tiles( compressed_tiles, tile_width, tile_height, image_width, image_height, compression, predictor, dtype, samples, - byte_order=byte_order) + byte_order=byte_order, masked_fill=masked_fill) def _try_nvcomp_from_device_bufs(d_tiles, tile_bytes, compression): @@ -1665,6 +1666,7 @@ def gpu_decode_tiles( dtype: np.dtype, samples: int = 1, byte_order: str = '<', + masked_fill=None, ): """Decode and assemble TIFF tiles entirely on GPU. @@ -1684,6 +1686,12 @@ def gpu_decode_tiles( Output pixel dtype. samples : int Samples per pixel. + masked_fill : scalar or None + Value to write into pixels that LERC reports as invalid. Only + consulted for LERC-compressed inputs (tag 34887). ``None`` + leaves any masked pixels at LERC's zero fill. Use + ``_resolve_masked_fill(ifd.nodata_str, dtype)`` from + ``_reader.py`` to mirror the CPU reader's nodata semantics. Returns ------- @@ -1696,6 +1704,12 @@ def gpu_decode_tiles( bytes_per_pixel = dtype.itemsize * samples tile_bytes = tile_width * tile_height * bytes_per_pixel + # Per-tile LERC valid masks; populated only by the LERC branch + # below. Kept as a flat local so the post-assembly fill block at + # the end of this function can find it regardless of which decode + # branch ran. + _lerc_masks: list[np.ndarray | None] | None = None + # Try nvCOMP batch decompression first (much faster if available) nvcomp_result = _try_nvcomp_batch_decompress( compressed_tiles, tile_bytes, compression) @@ -1823,19 +1837,34 @@ def gpu_decode_tiles( d_decomp_offsets = cupy.asarray(decomp_offsets) elif compression == 34887: # LERC - from ._compression import lerc_decompress + from ._compression import lerc_decompress_with_mask raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8) + # Per-tile valid masks captured from LERC. None entries mean + # "all valid" (LERC returned no mask, or an all-True mask). + # The host-side mask buffer is materialised lazily: it stays + # None until at least one tile reports a real partial mask. + per_tile_masks: list[np.ndarray | None] = [None] * n_tiles + any_lerc_mask = False for i, tile in enumerate(compressed_tiles): start = i * tile_bytes - chunk = np.frombuffer( - lerc_decompress(tile, tile_width, tile_height, samples), - dtype=np.uint8) + decoded_bytes, valid_mask = lerc_decompress_with_mask(tile) + chunk = np.frombuffer(decoded_bytes, dtype=np.uint8) raw_host[start:start + min(len(chunk), tile_bytes)] = \ chunk[:tile_bytes] if len(chunk) >= tile_bytes else \ np.pad(chunk, (0, tile_bytes - len(chunk))) + if valid_mask is not None: + per_tile_masks[i] = np.asarray(valid_mask) + any_lerc_mask = True d_decomp = cupy.asarray(raw_host) decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes d_decomp_offsets = cupy.asarray(decomp_offsets) + if any_lerc_mask: + # Stash per-tile masks for the post-assembly fill pass below. + # Stored in a ``_lerc_masks`` local that the LERC fill block + # picks up after predictor decode and tile assembly. + _lerc_masks = per_tile_masks + else: + _lerc_masks = None elif compression == 1: # Uncompressed raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8) @@ -1919,6 +1948,72 @@ def gpu_decode_tiles( if byte_order == '>' and dtype.itemsize > 1 and predictor != 2: # cupy.ndarray has no .byteswap(), so use the dtype-view helper. out = _xp_byteswap(out) + + # LERC valid-mask fill: GDAL writes LERC TIFFs with masked pixels + # zero-filled in the data array, so without restoring nodata here a + # masked pixel reads back as a real zero measurement. Mirrors the + # CPU path in ``_decode_strip_or_tile`` (PR #1529). + if _lerc_masks is not None and masked_fill is not None: + out = _apply_lerc_mask_fill( + out, _lerc_masks, tile_width, tile_height, + image_width, image_height, samples, masked_fill) + return out + + +def _apply_lerc_mask_fill(out, per_tile_masks, tile_width, tile_height, + image_width, image_height, samples, masked_fill): + """Write *masked_fill* into LERC-invalid positions of an assembled image. + + Each tile contributes either an ``(h, w)``/``(h, w, samples)`` valid + mask (1=valid, 0=invalid) or ``None`` for "all valid". The host + builds a single assembled boolean invalid-mask sized to the output + image, transfers it to the GPU once, and uses it to overwrite + masked positions on device. + """ + import cupy + + n_tiles = len(per_tile_masks) + tiles_across = math.ceil(image_width / tile_width) + tiles_down = math.ceil(image_height / tile_height) + if n_tiles != tiles_across * tiles_down: + # Defensive: tile grid mismatch means we cannot place masks + # safely. Skip the fill rather than corrupt the output. + return out + + invalid = np.zeros((image_height, image_width), dtype=bool) + for idx, mask in enumerate(per_tile_masks): + if mask is None: + continue + tr = idx // tiles_across + tc = idx % tiles_across + y0 = tr * tile_height + x0 = tc * tile_width + # Trim the tile mask to the visible portion of the image so + # right- and bottom-edge tile padding does not leak into the + # assembled mask. + h = min(tile_height, image_height - y0) + w = min(tile_width, image_width - x0) + if h <= 0 or w <= 0: + continue + m = np.asarray(mask) + # LERC may report the mask as (h, w) or (h, w, samples); for the + # invalid-fill we collapse multi-sample masks via "any sample + # invalid". + if m.ndim == 3: + m2 = (m == 0).any(axis=2) + else: + m2 = (m == 0) + invalid[y0:y0 + h, x0:x0 + w] = m2[:h, :w] + + if not invalid.any(): + return out + + d_invalid = cupy.asarray(invalid) + if out.ndim == 3: + # Broadcast (H, W) mask across the sample axis. + out[d_invalid, ...] = out.dtype.type(masked_fill) + else: + out[d_invalid] = out.dtype.type(masked_fill) return out diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py new file mode 100644 index 00000000..0f39697b --- /dev/null +++ b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py @@ -0,0 +1,181 @@ +"""GPU follow-up to PR #1529 (LERC valid-mask on decode). + +The CPU LERC reader honours the LERC valid-mask and writes the file's +nodata sentinel into masked pixels. The GPU LERC tile-decode path used +to discard the mask, so masked pixels read back as LERC's zero fill +(real-looking measurements at z == 0) on GPU but as NaN/sentinel on +CPU. These tests confirm the GPU path now matches the CPU path for +representative LERC mask combinations. + +Mirrors the structure of ``test_lerc_valid_mask.py`` but compares +``read_geotiff_gpu`` output to ``read_to_array`` output for each case. +""" +from __future__ import annotations + +import importlib.util + +import numpy as np +import pytest + +lerc = pytest.importorskip("lerc") + +from xrspatial.geotiff._compression import LERC_AVAILABLE # noqa: E402 + + +def _gpu_available() -> bool: + """True if cupy is importable and CUDA is initialised.""" + if importlib.util.find_spec("cupy") is None: + return False + try: + import cupy + return bool(cupy.cuda.is_available()) + except Exception: + return False + + +_HAS_GPU = _gpu_available() +_gpu_only = pytest.mark.skipif( + not (_HAS_GPU and LERC_AVAILABLE), + reason="cupy + CUDA + lerc required", +) + + +@pytest.fixture +def lerc_writer_with_mask(monkeypatch): + """Patch ``lerc_compress`` to embed a valid-mask the writer can't pass. + + The xrspatial writer hard-codes ``hasMask=False`` in its call to + ``lerc.encode``. Tests inject a per-tile mask through this holder's + ``invalid`` predicate so the masked pixels survive the encode and + show up at decode time. Same pattern as the CPU test fixture in + ``test_lerc_valid_mask.py``. + """ + holder = {"invalid": None} + + def _patched(data, width, height, samples=1, + dtype=np.dtype('float32'), max_z_error=0.0): + if samples == 1: + arr = np.frombuffer(data, dtype=dtype).reshape(height, width) + else: + arr = np.frombuffer(data, dtype=dtype).reshape( + height, width, samples) + invalid_pred = holder["invalid"] + if invalid_pred is None: + mask = None + has_mask = False + else: + invalid = invalid_pred(arr) + mask = np.where(invalid, np.uint8(0), np.uint8(1)) + has_mask = True + result = lerc.encode(arr, samples, has_mask, mask, max_z_error, 1) + if result[0] != 0: + raise RuntimeError( + f"LERC encode failed with error code {result[0]}") + return bytes(result[2]) + + monkeypatch.setattr( + "xrspatial.geotiff._compression.lerc_compress", _patched, + ) + return holder + + +def _read_cpu_gpu(path): + """Read *path* with both readers and return ``(cpu_array, gpu_host_array)``.""" + from xrspatial.geotiff import read_geotiff_gpu + from xrspatial.geotiff._reader import read_to_array + + cpu, _geo = read_to_array(path) + gpu_da = read_geotiff_gpu(path, gpu='strict') + gpu_host = gpu_da.data.get() + return cpu, gpu_host + + +@_gpu_only +class TestGpuLercValidMask: + """End-to-end TIFF round-trips comparing GPU vs CPU output.""" + + def test_float32_nan_nodata(self, tmp_path, lerc_writer_with_mask): + """Float32 LERC + NaN nodata: GPU output matches CPU output.""" + from xrspatial.geotiff._writer import write + + arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8) + invalid_positions = {(0, 1), (5, 4)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_nan_gpu.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=float("nan")) + + cpu, gpu = _read_cpu_gpu(path) + # NaN positions + for (r, c) in invalid_positions: + assert np.isnan(cpu[r, c]) + assert np.isnan(gpu[r, c]) + # Valid positions agree exactly + cpu_valid = np.where(np.isnan(cpu), 0.0, cpu) + gpu_valid = np.where(np.isnan(gpu), 0.0, gpu) + np.testing.assert_array_equal(cpu_valid, gpu_valid) + + def test_float32_sentinel_nodata(self, tmp_path, lerc_writer_with_mask): + """Float32 LERC + sentinel nodata (-9999): GPU matches CPU.""" + from xrspatial.geotiff._writer import write + + arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8) + invalid_positions = {(0, 1), (3, 3), (7, 7)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_sentinel_f32_gpu.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=-9999.0) + + cpu, gpu = _read_cpu_gpu(path) + np.testing.assert_array_equal(cpu, gpu) + for (r, c) in invalid_positions: + assert gpu[r, c] == np.float32(-9999.0) + + def test_uint16_sentinel_nodata(self, tmp_path, lerc_writer_with_mask): + """Uint16 LERC + sentinel nodata (65535): GPU matches CPU.""" + from xrspatial.geotiff._writer import write + + arr = (np.arange(1, 65, dtype=np.uint16) * 100).reshape(8, 8) + invalid_positions = {(0, 1), (4, 4)} + + def invalid_pred(a): + m = np.zeros(a.shape[:2], dtype=bool) + for r, c in invalid_positions: + m[r, c] = True + return m + lerc_writer_with_mask["invalid"] = invalid_pred + + path = str(tmp_path / "lerc_mask_uint16_gpu.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8, + nodata=65535) + + cpu, gpu = _read_cpu_gpu(path) + np.testing.assert_array_equal(cpu, gpu) + for (r, c) in invalid_positions: + assert gpu[r, c] == np.uint16(65535) + + def test_no_mask_roundtrip_bitexact(self, tmp_path): + """All-valid LERC (no encoded mask): GPU and CPU agree bit-exact.""" + from xrspatial.geotiff._writer import write + + arr = np.arange(64, dtype=np.float32).reshape(8, 8) + path = str(tmp_path / "lerc_no_mask_gpu.tif") + write(arr, path, compression="lerc", tiled=True, tile_size=8) + + cpu, gpu = _read_cpu_gpu(path) + np.testing.assert_array_equal(cpu, arr) + np.testing.assert_array_equal(gpu, arr)