xarray-contrib · brendancol · May 8, 2026 · May 8, 2026
diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
@@ -1469,7 +1469,9 @@ def read_geotiff_gpu(source: str, *,
 
     from ._reader import (
         _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path,
+        _resolve_masked_fill,
     )
+    from ._compression import COMPRESSION_LERC
     from ._header import (
         parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
     )
@@ -1552,6 +1554,12 @@ def read_geotiff_gpu(source: str, *,
     # Sparse tiles (byte_count == 0) are unsupported on the GPU pipeline;
     # the CPU reader fills them with nodata and copies onto the GPU.
     has_sparse_tile = any(bc == 0 for bc in byte_counts)
+    # LERC tiles can carry a per-pixel valid mask that GDAL writes
+    # zero-filled in the data array.  Compute the nodata fill the same
+    # way the CPU reader does so the GPU decode path can restore it
+    # post-assembly (mirrors PR #1529 for the CPU path).
+    masked_fill = (_resolve_masked_fill(ifd.nodata_str, file_dtype)
+                   if compression == COMPRESSION_LERC else None)
     if has_sparse_tile:
         arr_cpu, _ = read_to_array(source, overview_level=overview_level)
         arr_gpu = cupy.asarray(arr_cpu)
@@ -1565,6 +1573,7 @@ def read_geotiff_gpu(source: str, *,
                 tw, th, width, height,
                 compression, predictor, file_dtype, samples,
                 byte_order=header.byte_order,
+                masked_fill=masked_fill,
             )
         except Exception as e:
             if gpu == 'strict':
@@ -1596,6 +1605,7 @@ def read_geotiff_gpu(source: str, *,
                 tw, th, width, height,
                 compression, predictor, file_dtype, samples,
                 byte_order=header.byte_order,
+                masked_fill=masked_fill,
             )
         except Exception as e:
             if gpu == 'strict':

diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py
@@ -1046,7 +1046,29 @@ def jpeg2000_compress(data: bytes, width: int, height: int,
 
 def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
                     samples: int = 1) -> bytes:
-    """Decompress LERC data. Requires the ``lerc`` package."""
+    """Decompress LERC data. Requires the ``lerc`` package.
+
+    Returns the raw decoded pixel bytes.  Any LERC valid-mask is dropped
+    here; masked pixels are returned as LERC's zero fill (the wire
+    format's default).  Callers that need to honour the file's nodata
+    value should use :func:`lerc_decompress_with_mask` instead and apply
+    nodata at the array level once dtype is known.
+    """
+    decoded_bytes, _mask = lerc_decompress_with_mask(data)
+    return decoded_bytes
+
+
+def lerc_decompress_with_mask(data: bytes):
+    """Decompress LERC data and return ``(bytes, valid_mask_or_None)``.
+
+    ``valid_mask`` is ``None`` when LERC reports the block is fully
+    valid (no encoded mask, or a mask that is True everywhere);
+    otherwise it is a numpy array whose shape matches the decoded data
+    (height, width) or (height, width, samples), with ``0`` marking
+    pixels the encoder flagged as invalid.  LERC zero fills masked
+    positions in the data array, so the returned mask is the only
+    signal that lets a reader restore the file's nodata sentinel.
+    """
     if not LERC_AVAILABLE:
         raise ImportError(
             "lerc is required to read LERC-compressed TIFFs. "
@@ -1056,7 +1078,13 @@ def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
     if result[0] != 0:
         raise RuntimeError(f"LERC decode failed with error code {result[0]}")
     arr = result[1]
-    return arr.tobytes()
+    valid_mask = result[2] if len(result) > 2 else None
+    if valid_mask is None:
+        return arr.tobytes(), None
+    valid_mask = np.asarray(valid_mask)
+    if valid_mask.size == 0 or bool(valid_mask.all()):
+        return arr.tobytes(), None
+    return arr.tobytes(), valid_mask
 
 
 def lerc_compress(data: bytes, width: int, height: int,

diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py
@@ -1406,6 +1406,7 @@ def gpu_decode_tiles_from_file(
     dtype: np.dtype,
     samples: int = 1,
     byte_order: str = '<',
+    masked_fill=None,
 ):
     """Decode tiles from a file, using GDS if available.
 
@@ -1450,7 +1451,7 @@ def gpu_decode_tiles_from_file(
     return gpu_decode_tiles(
         compressed_tiles, tile_width, tile_height,
         image_width, image_height, compression, predictor, dtype, samples,
-        byte_order=byte_order)
+        byte_order=byte_order, masked_fill=masked_fill)
 
 
 def _try_nvcomp_from_device_bufs(d_tiles, tile_bytes, compression):
@@ -1665,6 +1666,7 @@ def gpu_decode_tiles(
     dtype: np.dtype,
     samples: int = 1,
     byte_order: str = '<',
+    masked_fill=None,
 ):
     """Decode and assemble TIFF tiles entirely on GPU.
 
@@ -1684,6 +1686,12 @@ def gpu_decode_tiles(
         Output pixel dtype.
     samples : int
         Samples per pixel.
+    masked_fill : scalar or None
+        Value to write into pixels that LERC reports as invalid.  Only
+        consulted for LERC-compressed inputs (tag 34887).  ``None``
+        leaves any masked pixels at LERC's zero fill.  Use
+        ``_resolve_masked_fill(ifd.nodata_str, dtype)`` from
+        ``_reader.py`` to mirror the CPU reader's nodata semantics.
 
     Returns
     -------
@@ -1696,6 +1704,12 @@ def gpu_decode_tiles(
     bytes_per_pixel = dtype.itemsize * samples
     tile_bytes = tile_width * tile_height * bytes_per_pixel
 
+    # Per-tile LERC valid masks; populated only by the LERC branch
+    # below.  Kept as a flat local so the post-assembly fill block at
+    # the end of this function can find it regardless of which decode
+    # branch ran.
+    _lerc_masks: list[np.ndarray | None] | None = None
+
     # Try nvCOMP batch decompression first (much faster if available)
     nvcomp_result = _try_nvcomp_batch_decompress(
         compressed_tiles, tile_bytes, compression)
@@ -1823,19 +1837,34 @@ def gpu_decode_tiles(
             d_decomp_offsets = cupy.asarray(decomp_offsets)
 
     elif compression == 34887:  # LERC
-        from ._compression import lerc_decompress
+        from ._compression import lerc_decompress_with_mask
         raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
+        # Per-tile valid masks captured from LERC.  None entries mean
+        # "all valid" (LERC returned no mask, or an all-True mask).
+        # The host-side mask buffer is materialised lazily: it stays
+        # None until at least one tile reports a real partial mask.
+        per_tile_masks: list[np.ndarray | None] = [None] * n_tiles
+        any_lerc_mask = False
         for i, tile in enumerate(compressed_tiles):
             start = i * tile_bytes
-            chunk = np.frombuffer(
-                lerc_decompress(tile, tile_width, tile_height, samples),
-                dtype=np.uint8)
+            decoded_bytes, valid_mask = lerc_decompress_with_mask(tile)
+            chunk = np.frombuffer(decoded_bytes, dtype=np.uint8)
             raw_host[start:start + min(len(chunk), tile_bytes)] = \
                 chunk[:tile_bytes] if len(chunk) >= tile_bytes else \
                 np.pad(chunk, (0, tile_bytes - len(chunk)))
+            if valid_mask is not None:
+                per_tile_masks[i] = np.asarray(valid_mask)
+                any_lerc_mask = True
         d_decomp = cupy.asarray(raw_host)
         decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
         d_decomp_offsets = cupy.asarray(decomp_offsets)
+        if any_lerc_mask:
+            # Stash per-tile masks for the post-assembly fill pass below.
+            # Stored in a ``_lerc_masks`` local that the LERC fill block
+            # picks up after predictor decode and tile assembly.
+            _lerc_masks = per_tile_masks
+        else:
+            _lerc_masks = None
 
     elif compression == 1:  # Uncompressed
         raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
@@ -1919,6 +1948,72 @@ def gpu_decode_tiles(
     if byte_order == '>' and dtype.itemsize > 1 and predictor != 2:
         # cupy.ndarray has no .byteswap(), so use the dtype-view helper.
         out = _xp_byteswap(out)
+
+    # LERC valid-mask fill: GDAL writes LERC TIFFs with masked pixels
+    # zero-filled in the data array, so without restoring nodata here a
+    # masked pixel reads back as a real zero measurement.  Mirrors the
+    # CPU path in ``_decode_strip_or_tile`` (PR #1529).
+    if _lerc_masks is not None and masked_fill is not None:
+        out = _apply_lerc_mask_fill(
+            out, _lerc_masks, tile_width, tile_height,
+            image_width, image_height, samples, masked_fill)
+    return out
+
+
+def _apply_lerc_mask_fill(out, per_tile_masks, tile_width, tile_height,
+                          image_width, image_height, samples, masked_fill):
+    """Write *masked_fill* into LERC-invalid positions of an assembled image.
+
+    Each tile contributes either an ``(h, w)``/``(h, w, samples)`` valid
+    mask (1=valid, 0=invalid) or ``None`` for "all valid".  The host
+    builds a single assembled boolean invalid-mask sized to the output
+    image, transfers it to the GPU once, and uses it to overwrite
+    masked positions on device.
+    """
+    import cupy
+
+    n_tiles = len(per_tile_masks)
+    tiles_across = math.ceil(image_width / tile_width)
+    tiles_down = math.ceil(image_height / tile_height)
+    if n_tiles != tiles_across * tiles_down:
+        # Defensive: tile grid mismatch means we cannot place masks
+        # safely.  Skip the fill rather than corrupt the output.
+        return out
+
+    invalid = np.zeros((image_height, image_width), dtype=bool)
+    for idx, mask in enumerate(per_tile_masks):
+        if mask is None:
+            continue
+        tr = idx // tiles_across
+        tc = idx % tiles_across
+        y0 = tr * tile_height
+        x0 = tc * tile_width
+        # Trim the tile mask to the visible portion of the image so
+        # right- and bottom-edge tile padding does not leak into the
+        # assembled mask.
+        h = min(tile_height, image_height - y0)
+        w = min(tile_width, image_width - x0)
+        if h <= 0 or w <= 0:
+            continue
+        m = np.asarray(mask)
+        # LERC may report the mask as (h, w) or (h, w, samples); for the
+        # invalid-fill we collapse multi-sample masks via "any sample
+        # invalid".
+        if m.ndim == 3:
+            m2 = (m == 0).any(axis=2)
+        else:
+            m2 = (m == 0)
+        invalid[y0:y0 + h, x0:x0 + w] = m2[:h, :w]
+
+    if not invalid.any():
+        return out
+
+    d_invalid = cupy.asarray(invalid)
+    if out.ndim == 3:
+        # Broadcast (H, W) mask across the sample axis.
+        out[d_invalid, ...] = out.dtype.type(masked_fill)
+    else:
+        out[d_invalid] = out.dtype.type(masked_fill)
     return out