From 4a1416e3b181e8acc8e5d43bd0e1221b8c78afe8 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Fri, 8 May 2026 13:45:41 -0700
Subject: [PATCH 1/2] Apply LERC valid-mask on decode so masked pixels become
 nodata

lerc.decode returns (rc, data, valid_mask, ...) but the wrapper only
forwarded data.tobytes(). GDAL writes LERC TIFFs whose masked pixels
are zero-filled in the data array, so downstream code that masks by
nodata silently sees those zeros as real measurements.

What changed:

_compression.py: new lerc_decompress_with_mask(data) ->
(bytes, valid_mask_or_None). lerc_decompress now calls it and drops
the mask, preserving its existing signature. An all-True mask
collapses to None so callers skip the fill pass.

_reader.py: _decode_strip_or_tile takes a separate path for LERC,
calls the new wrapper, and writes nodata into masked positions after
reshape. A small _resolve_masked_fill helper reads ifd.nodata_str and
falls back to NaN for float dtypes or 0 for integer dtypes when no
GDAL_NODATA is set. Each call site (strip reader, tile reader,
COG-over-HTTP reader) passes a precomputed masked_fill.

tests/test_lerc_valid_mask.py: 8 new tests. Wrapper tests cover the
no-mask path, the all-True-mask collapse, and a partial mask. TIFF
round-trip tests cover float32 with NaN nodata, float32 with -9999,
uint16 with 65535, and a regression that an unmasked LERC file still
round-trips bit-exact. The round-trip tests monkeypatch lerc_compress
to inject a per-tile mask through a predicate, since the writer
hard-codes hasMask=False.

Option A was chosen (plumb nodata into the decode path). Option B
would have required changing decompress()'s return shape, which has
callers in tests and in _gpu_decode.py.

Out of scope: the GPU LERC path in _gpu_decode.py still drops the
mask. The constraints excluded changes there.
---
 xrspatial/geotiff/_compression.py             |  32 ++-
 xrspatial/geotiff/_reader.py                  |  80 +++++-
 .../geotiff/tests/test_lerc_valid_mask.py     | 239 ++++++++++++++++++
 3 files changed, 339 insertions(+), 12 deletions(-)
 create mode 100644 xrspatial/geotiff/tests/test_lerc_valid_mask.py

diff --git a/xrspatial/geotiff/_compression.py b/xrspatial/geotiff/_compression.py
index a9558b5a..007c78bd 100644
--- a/xrspatial/geotiff/_compression.py
+++ b/xrspatial/geotiff/_compression.py
@@ -1046,7 +1046,29 @@ def jpeg2000_compress(data: bytes, width: int, height: int,
 
 def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
                     samples: int = 1) -> bytes:
-    """Decompress LERC data. Requires the ``lerc`` package."""
+    """Decompress LERC data. Requires the ``lerc`` package.
+
+    Returns the raw decoded pixel bytes.  Any LERC valid-mask is dropped
+    here; masked pixels are returned as LERC's zero fill (the wire
+    format's default).  Callers that need to honour the file's nodata
+    value should use :func:`lerc_decompress_with_mask` instead and apply
+    nodata at the array level once dtype is known.
+    """
+    decoded_bytes, _mask = lerc_decompress_with_mask(data)
+    return decoded_bytes
+
+
+def lerc_decompress_with_mask(data: bytes):
+    """Decompress LERC data and return ``(bytes, valid_mask_or_None)``.
+
+    ``valid_mask`` is ``None`` when LERC reports the block is fully
+    valid (no encoded mask, or a mask that is True everywhere);
+    otherwise it is a numpy array whose shape matches the decoded data
+    (height, width) or (height, width, samples), with ``0`` marking
+    pixels the encoder flagged as invalid.  LERC zero fills masked
+    positions in the data array, so the returned mask is the only
+    signal that lets a reader restore the file's nodata sentinel.
+    """
     if not LERC_AVAILABLE:
         raise ImportError(
             "lerc is required to read LERC-compressed TIFFs. "
@@ -1056,7 +1078,13 @@ def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
     if result[0] != 0:
         raise RuntimeError(f"LERC decode failed with error code {result[0]}")
     arr = result[1]
-    return arr.tobytes()
+    valid_mask = result[2] if len(result) > 2 else None
+    if valid_mask is None:
+        return arr.tobytes(), None
+    valid_mask = np.asarray(valid_mask)
+    if valid_mask.size == 0 or bool(valid_mask.all()):
+        return arr.tobytes(), None
+    return arr.tobytes(), valid_mask
 
 
 def lerc_compress(data: bytes, width: int, height: int,
diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py
index a99f1e63..3395e02d 100644
--- a/xrspatial/geotiff/_reader.py
+++ b/xrspatial/geotiff/_reader.py
@@ -12,9 +12,11 @@
 import numpy as np
 
 from ._compression import (
+    COMPRESSION_LERC,
     COMPRESSION_NONE,
     decompress,
     fp_predictor_decode,
+    lerc_decompress_with_mask,
     predictor_decode,
     unpack_bits,
 )
@@ -525,9 +527,33 @@ def _packed_byte_count(pixel_count: int, bps: int) -> int:
     return (pixel_count * bps + 7) // 8
 
 
+def _resolve_masked_fill(nodata_str: str | None, dtype: np.dtype):
+    """Resolve the value to use when restoring LERC-masked pixels.
+
+    Mirrors :func:`_sparse_fill_value` but defaults to NaN for floating
+    dtypes when the file does not declare a nodata sentinel.  Float
+    rasters with no GDAL_NODATA tag still benefit from NaN propagation
+    because LERC's zero fill would silently masquerade as a real
+    measurement at z == 0.
+    """
+    if nodata_str is not None:
+        try:
+            v = float(nodata_str)
+            if dtype.kind == 'f':
+                return dtype.type(v)
+            if not math.isnan(v) and not math.isinf(v):
+                return dtype.type(int(v))
+        except (TypeError, ValueError):
+            pass
+    if dtype.kind == 'f':
+        return dtype.type(np.nan)
+    return dtype.type(0)
+
+
 def _decode_strip_or_tile(data_slice, compression, width, height, samples,
                           bps, bytes_per_sample, is_sub_byte, dtype, pred,
-                          byte_order='<', jpeg_tables=None):
+                          byte_order='<', jpeg_tables=None,
+                          masked_fill=None):
     """Decompress, apply predictor, unpack sub-byte, and reshape a strip/tile.
 
     Parameters
@@ -551,9 +577,18 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
     else:
         expected = pixel_count * bytes_per_sample
 
-    chunk = decompress(data_slice, compression, expected,
-                       width=width, height=height, samples=samples,
-                       jpeg_tables=jpeg_tables)
+    lerc_mask = None
+    if compression == COMPRESSION_LERC:
+        # LERC needs special handling: lerc.decode also returns a
+        # valid-mask which the generic decompress() dispatcher discards.
+        # We capture it here so masked pixels can be restored to nodata
+        # below, instead of leaking LERC's zero fill into the output.
+        decoded_bytes, lerc_mask = lerc_decompress_with_mask(data_slice)
+        chunk = np.frombuffer(decoded_bytes, dtype=np.uint8)
+    else:
+        chunk = decompress(data_slice, compression, expected,
+                           width=width, height=height, samples=samples,
+                           jpeg_tables=jpeg_tables)
 
     # Validate the decompressed byte count.  A truncated deflate stream or a
     # buggy compressor can produce fewer or more bytes than expected.  Without
@@ -587,8 +622,23 @@ def _decode_strip_or_tile(data_slice, compression, width, height, samples,
             pixels = pixels.astype(dtype)
 
     if samples > 1:
-        return pixels.reshape(height, width, samples)
-    return pixels.reshape(height, width)
+        out = pixels.reshape(height, width, samples)
+    else:
+        out = pixels.reshape(height, width)
+
+    # Restore nodata in positions LERC flagged as invalid.  LERC
+    # zero-fills masked pixels in the data array, which would otherwise
+    # be indistinguishable from real zero readings downstream.
+    if lerc_mask is not None and masked_fill is not None:
+        mask_arr = np.asarray(lerc_mask)
+        if mask_arr.ndim == 2 and out.ndim == 3:
+            mask_arr = mask_arr[..., None]
+        invalid = np.broadcast_to(mask_arr == 0, out.shape)
+        if invalid.any():
+            if not out.flags.writeable:
+                out = out.copy()
+            np.putmask(out, invalid, masked_fill)
+    return out
 
 
 import sys as _sys
@@ -667,6 +717,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
     bytes_per_sample = bps // 8
     is_sub_byte = bps in SUB_BYTE_BPS
     jpeg_tables = ifd.jpeg_tables
+    masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype)
+                   if compression == COMPRESSION_LERC else None)
 
     if offsets is None or byte_counts is None:
         raise ValueError("Missing strip offsets or byte counts")
@@ -727,7 +779,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
                     strip_data, compression, width, strip_rows, 1,
                     bps, bytes_per_sample, is_sub_byte, dtype, pred,
                     byte_order=header.byte_order,
-                    jpeg_tables=jpeg_tables)
+                    jpeg_tables=jpeg_tables,
+                    masked_fill=masked_fill)
 
                 src_r0 = max(r0 - strip_row, 0)
                 src_r1 = min(r1 - strip_row, strip_rows)
@@ -753,7 +806,8 @@ def _read_strips(data: bytes, ifd: IFD, header: TIFFHeader,
                 strip_data, compression, width, strip_rows, samples,
                 bps, bytes_per_sample, is_sub_byte, dtype, pred,
                 byte_order=header.byte_order,
-                jpeg_tables=jpeg_tables)
+                jpeg_tables=jpeg_tables,
+                masked_fill=masked_fill)
 
             src_r0 = max(r0 - strip_row, 0)
             src_r1 = min(r1 - strip_row, strip_rows)
@@ -804,6 +858,8 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,
     bytes_per_sample = bps // 8
     is_sub_byte = bps in SUB_BYTE_BPS
     jpeg_tables = ifd.jpeg_tables
+    masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype)
+                   if compression == COMPRESSION_LERC else None)
 
     offsets = ifd.tile_offsets
     byte_counts = ifd.tile_byte_counts
@@ -900,7 +956,8 @@ def _decode_one(job):
             tile_data, compression, tw, th, tile_samples,
             bps, bytes_per_sample, is_sub_byte, dtype, pred,
             byte_order=header.byte_order,
-            jpeg_tables=jpeg_tables)
+            jpeg_tables=jpeg_tables,
+            masked_fill=masked_fill)
 
     if use_parallel:
         from concurrent.futures import ThreadPoolExecutor
@@ -1012,6 +1069,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
     bytes_per_sample = bps // 8
     is_sub_byte = bps in SUB_BYTE_BPS
     jpeg_tables = ifd.jpeg_tables
+    masked_fill = (_resolve_masked_fill(ifd.nodata_str, dtype)
+                   if compression == COMPRESSION_LERC else None)
 
     offsets = ifd.tile_offsets
     byte_counts = ifd.tile_byte_counts
@@ -1079,7 +1138,8 @@ def _read_cog_http(url: str, overview_level: int | None = None,
             tile_data, compression, tw, th, samples,
             bps, bytes_per_sample, is_sub_byte, dtype, pred,
             byte_order=header.byte_order,
-            jpeg_tables=jpeg_tables)
+            jpeg_tables=jpeg_tables,
+            masked_fill=masked_fill)
 
         y0 = tr * th
         x0 = tc * tw
diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask.py b/xrspatial/geotiff/tests/test_lerc_valid_mask.py
new file mode 100644
index 00000000..0aa5aa93
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_lerc_valid_mask.py
@@ -0,0 +1,239 @@
+"""Tests for LERC valid-mask handling on decode (issue #A4).
+
+LERC's ``decode`` returns ``(rc, data, valid_mask, ...)``.  When a file
+was encoded with a NoData mask, masked pixels in ``data`` are zero
+filled.  Without consulting ``valid_mask`` the reader cannot tell those
+zeros from real measurements, so any nodata-based downstream masking
+silently misses them.
+
+These tests cover:
+
+* The low-level wrapper ``lerc_decompress_with_mask`` returns the mask
+  and ``lerc_decompress`` still returns raw bytes for backwards compat.
+* TIFF round-trip with a NaN nodata float file: NaN comes back at every
+  masked position.
+* TIFF round-trip with an explicit float sentinel (-9999): masked
+  positions land on the sentinel.
+* TIFF round-trip with an integer sentinel (uint16): masked positions
+  land on the sentinel.
+* Files with no mask round-trip bit-exactly (regression guard).
+"""
+from __future__ import annotations
+
+import numpy as np
+import pytest
+
+lerc = pytest.importorskip("lerc")
+
+from xrspatial.geotiff._compression import (  # noqa: E402
+    LERC_AVAILABLE,
+    lerc_decompress,
+    lerc_decompress_with_mask,
+)
+
+pytestmark = pytest.mark.skipif(
+    not LERC_AVAILABLE,
+    reason="lerc not installed",
+)
+
+
+def _encode_with_mask(arr: np.ndarray, mask: np.ndarray | None,
+                      max_z_error: float = 0.0) -> bytes:
+    """Encode an array with LERC, optionally with a per-pixel valid mask."""
+    has_mask = mask is not None
+    result = lerc.encode(arr, 1, has_mask, mask, max_z_error, 1)
+    assert result[0] == 0, f"lerc.encode returned error {result[0]}"
+    return bytes(result[2])
+
+
+# ---------------------------------------------------------------------------
+# Wrapper-level tests (no TIFF writer involvement)
+# ---------------------------------------------------------------------------
+
+class TestLercDecompressWithMask:
+    """Direct tests of the new ``lerc_decompress_with_mask`` wrapper."""
+
+    def test_no_mask_returns_none(self):
+        arr = np.arange(12, dtype=np.float32).reshape(3, 4)
+        blob = _encode_with_mask(arr, None)
+        decoded_bytes, mask = lerc_decompress_with_mask(blob)
+        out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(3, 4)
+        np.testing.assert_array_equal(out, arr)
+        assert mask is None
+
+    def test_all_valid_mask_collapses_to_none(self):
+        arr = np.arange(12, dtype=np.float32).reshape(3, 4)
+        valid = np.ones((3, 4), dtype=np.uint8)
+        blob = _encode_with_mask(arr, valid)
+        decoded_bytes, mask = lerc_decompress_with_mask(blob)
+        out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(3, 4)
+        np.testing.assert_array_equal(out, arr)
+        # All-True mask carries no information, treated as no mask.
+        assert mask is None
+
+    def test_partial_mask_returns_array(self):
+        arr = np.array([[1.0, 2.0, 3.0]], dtype=np.float32)
+        valid = np.array([[1, 0, 1]], dtype=np.uint8)
+        blob = _encode_with_mask(arr, valid)
+        decoded_bytes, mask = lerc_decompress_with_mask(blob)
+        out = np.frombuffer(decoded_bytes, dtype=np.float32).reshape(1, 3)
+        # LERC zero-fills masked positions in the data array.
+        np.testing.assert_array_equal(out, np.array([[1.0, 0.0, 3.0]],
+                                                    dtype=np.float32))
+        assert mask is not None
+        np.testing.assert_array_equal(np.asarray(mask).reshape(1, 3),
+                                      np.array([[1, 0, 1]], dtype=np.uint8))
+
+    def test_legacy_decompress_drops_mask(self):
+        """``lerc_decompress`` still returns just raw bytes."""
+        arr = np.array([[1.0, 2.0, 3.0]], dtype=np.float32)
+        valid = np.array([[1, 0, 1]], dtype=np.uint8)
+        blob = _encode_with_mask(arr, valid)
+        out_bytes = lerc_decompress(blob)
+        assert isinstance(out_bytes, bytes)
+        out = np.frombuffer(out_bytes, dtype=np.float32).reshape(1, 3)
+        np.testing.assert_array_equal(out, np.array([[1.0, 0.0, 3.0]],
+                                                    dtype=np.float32))
+
+
+# ---------------------------------------------------------------------------
+# Reader-level (TIFF round-trip) tests
+# ---------------------------------------------------------------------------
+
+@pytest.fixture
+def lerc_writer_with_mask(monkeypatch):
+    """Patch ``lerc_compress`` to embed a valid-mask the writer can't pass.
+
+    The writer hard-codes ``hasMask=False`` in its call to ``lerc.encode``
+    because there's no plumbing for per-pixel mask data.  The fixture
+    swaps in an encoder that derives the mask from a holder ``invalid``
+    callable: given the (height, width) tile shape and the decoded
+    pixel array it returns a uint8 valid mask (1=valid, 0=invalid) the
+    same shape.  This lets the test specify masking by predicate so the
+    mask matches whatever tile padding the writer emits.
+    """
+    holder = {"invalid": None}
+
+    def _patched(data, width, height, samples=1,
+                 dtype=np.dtype('float32'), max_z_error=0.0):
+        if samples == 1:
+            arr = np.frombuffer(data, dtype=dtype).reshape(height, width)
+        else:
+            arr = np.frombuffer(data, dtype=dtype).reshape(
+                height, width, samples)
+        invalid_pred = holder["invalid"]
+        if invalid_pred is None:
+            mask = None
+            has_mask = False
+        else:
+            invalid = invalid_pred(arr)
+            mask = np.where(invalid, np.uint8(0), np.uint8(1))
+            has_mask = True
+        result = lerc.encode(arr, samples, has_mask, mask, max_z_error, 1)
+        if result[0] != 0:
+            raise RuntimeError(
+                f"LERC encode failed with error code {result[0]}")
+        return bytes(result[2])
+
+    monkeypatch.setattr(
+        "xrspatial.geotiff._compression.lerc_compress", _patched,
+    )
+    return holder
+
+
+class TestLercTiffRoundTripWithMask:
+    """End-to-end TIFF round-trips that exercise the reader's mask merge."""
+
+    def test_float32_nan_nodata(self, tmp_path, lerc_writer_with_mask):
+        from xrspatial.geotiff._writer import write
+        from xrspatial.geotiff._reader import read_to_array
+
+        arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8)
+        # Mask positions (0, 1) and (5, 4); the rest stays valid.
+        invalid_positions = {(0, 1), (5, 4)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_nan.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=float("nan"))
+
+        out, _geo = read_to_array(path)
+        for r in range(8):
+            for c in range(8):
+                if (r, c) in invalid_positions:
+                    assert np.isnan(out[r, c]), \
+                        f"expected NaN at ({r},{c}), got {out[r, c]}"
+                else:
+                    assert out[r, c] == arr[r, c], \
+                        f"value mismatch at ({r},{c})"
+
+    def test_float32_sentinel_nodata(self, tmp_path, lerc_writer_with_mask):
+        from xrspatial.geotiff._writer import write
+        from xrspatial.geotiff._reader import read_to_array
+
+        arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8)
+        invalid_positions = {(0, 1), (3, 3), (7, 7)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_sentinel_f32.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=-9999.0)
+
+        out, _geo = read_to_array(path)
+        for r in range(8):
+            for c in range(8):
+                if (r, c) in invalid_positions:
+                    assert out[r, c] == np.float32(-9999.0), \
+                        f"expected sentinel at ({r},{c}), got {out[r, c]}"
+                else:
+                    assert out[r, c] == arr[r, c]
+
+    def test_uint16_sentinel_nodata(self, tmp_path, lerc_writer_with_mask):
+        from xrspatial.geotiff._writer import write
+        from xrspatial.geotiff._reader import read_to_array
+
+        arr = (np.arange(1, 65, dtype=np.uint16) * 100).reshape(8, 8)
+        invalid_positions = {(0, 1), (4, 4)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_uint16.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=65535)
+
+        out, _geo = read_to_array(path)
+        for r in range(8):
+            for c in range(8):
+                if (r, c) in invalid_positions:
+                    assert out[r, c] == np.uint16(65535)
+                else:
+                    assert out[r, c] == arr[r, c]
+
+    def test_no_mask_roundtrip_bitexact(self, tmp_path):
+        """Sanity: an all-valid LERC file (no mask) still round-trips."""
+        from xrspatial.geotiff._writer import write
+        from xrspatial.geotiff._reader import read_to_array
+
+        arr = np.arange(64, dtype=np.float32).reshape(8, 8)
+        path = str(tmp_path / "lerc_no_mask.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8)
+
+        out, _geo = read_to_array(path)
+        np.testing.assert_array_equal(out, arr)

From 0ee0f5aa920539b63f1f68f6e519a3ad30ec2ea0 Mon Sep 17 00:00:00 2001
From: Brendan Collins <brendancol@gmail.com>
Date: Fri, 8 May 2026 13:57:50 -0700
Subject: [PATCH 2/2] Apply LERC valid-mask in GPU decode path (depends on
 #1529)

The CPU LERC reader from #1529 honours the LERC valid-mask and writes
the file's nodata sentinel into masked pixels. The GPU LERC tile-decode
path was still dropping the mask, so masked pixels read back as 0 on
GPU but as NaN or the sentinel on CPU. Same bug, GPU side.

Changes:

_gpu_decode.py: the LERC branch now calls lerc_decompress_with_mask
per tile and keeps any returned valid-mask. After predictor decode and
tile assembly, _apply_lerc_mask_fill builds an invalid mask on host
(matching the GPU assembly kernel's tile-grid layout), copies it to
GPU once, and overwrites masked positions with the resolved fill
value. Tiles LERC reports as fully valid skip the host work, so the
no-mask path stays zero-copy.

gpu_decode_tiles and gpu_decode_tiles_from_file get a masked_fill
keyword that is forwarded through. read_geotiff_gpu computes it via
_resolve_masked_fill(ifd.nodata_str, file_dtype) for LERC sources.

tests/test_lerc_valid_mask_gpu.py: 4 tests covering float32+NaN,
float32+sentinel, uint16+sentinel, and the no-mask regression. Each
compares read_geotiff_gpu output to read_to_array output for the same
file. Skipped unless cupy + CUDA + lerc are available.

Out of scope: the encode side. The xrspatial writer still hard-codes
hasMask=False; the tests reuse the lerc_compress monkeypatch fixture
from the CPU PR to inject a valid-mask through lerc.encode directly.
---
 xrspatial/geotiff/__init__.py                 |  10 +
 xrspatial/geotiff/_gpu_decode.py              | 105 +++++++++-
 .../geotiff/tests/test_lerc_valid_mask_gpu.py | 181 ++++++++++++++++++
 3 files changed, 291 insertions(+), 5 deletions(-)
 create mode 100644 xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py

diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py
index 11e740d0..a1b714fc 100644
--- a/xrspatial/geotiff/__init__.py
+++ b/xrspatial/geotiff/__init__.py
@@ -1469,7 +1469,9 @@ def read_geotiff_gpu(source: str, *,
 
     from ._reader import (
         _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path,
+        _resolve_masked_fill,
     )
+    from ._compression import COMPRESSION_LERC
     from ._header import (
         parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
     )
@@ -1552,6 +1554,12 @@ def read_geotiff_gpu(source: str, *,
     # Sparse tiles (byte_count == 0) are unsupported on the GPU pipeline;
     # the CPU reader fills them with nodata and copies onto the GPU.
     has_sparse_tile = any(bc == 0 for bc in byte_counts)
+    # LERC tiles can carry a per-pixel valid mask that GDAL writes
+    # zero-filled in the data array.  Compute the nodata fill the same
+    # way the CPU reader does so the GPU decode path can restore it
+    # post-assembly (mirrors PR #1529 for the CPU path).
+    masked_fill = (_resolve_masked_fill(ifd.nodata_str, file_dtype)
+                   if compression == COMPRESSION_LERC else None)
     if has_sparse_tile:
         arr_cpu, _ = read_to_array(source, overview_level=overview_level)
         arr_gpu = cupy.asarray(arr_cpu)
@@ -1565,6 +1573,7 @@ def read_geotiff_gpu(source: str, *,
                 tw, th, width, height,
                 compression, predictor, file_dtype, samples,
                 byte_order=header.byte_order,
+                masked_fill=masked_fill,
             )
         except Exception as e:
             if gpu == 'strict':
@@ -1596,6 +1605,7 @@ def read_geotiff_gpu(source: str, *,
                 tw, th, width, height,
                 compression, predictor, file_dtype, samples,
                 byte_order=header.byte_order,
+                masked_fill=masked_fill,
             )
         except Exception as e:
             if gpu == 'strict':
diff --git a/xrspatial/geotiff/_gpu_decode.py b/xrspatial/geotiff/_gpu_decode.py
index d18b5cfd..078a05d2 100644
--- a/xrspatial/geotiff/_gpu_decode.py
+++ b/xrspatial/geotiff/_gpu_decode.py
@@ -1406,6 +1406,7 @@ def gpu_decode_tiles_from_file(
     dtype: np.dtype,
     samples: int = 1,
     byte_order: str = '<',
+    masked_fill=None,
 ):
     """Decode tiles from a file, using GDS if available.
 
@@ -1450,7 +1451,7 @@ def gpu_decode_tiles_from_file(
     return gpu_decode_tiles(
         compressed_tiles, tile_width, tile_height,
         image_width, image_height, compression, predictor, dtype, samples,
-        byte_order=byte_order)
+        byte_order=byte_order, masked_fill=masked_fill)
 
 
 def _try_nvcomp_from_device_bufs(d_tiles, tile_bytes, compression):
@@ -1665,6 +1666,7 @@ def gpu_decode_tiles(
     dtype: np.dtype,
     samples: int = 1,
     byte_order: str = '<',
+    masked_fill=None,
 ):
     """Decode and assemble TIFF tiles entirely on GPU.
 
@@ -1684,6 +1686,12 @@ def gpu_decode_tiles(
         Output pixel dtype.
     samples : int
         Samples per pixel.
+    masked_fill : scalar or None
+        Value to write into pixels that LERC reports as invalid.  Only
+        consulted for LERC-compressed inputs (tag 34887).  ``None``
+        leaves any masked pixels at LERC's zero fill.  Use
+        ``_resolve_masked_fill(ifd.nodata_str, dtype)`` from
+        ``_reader.py`` to mirror the CPU reader's nodata semantics.
 
     Returns
     -------
@@ -1696,6 +1704,12 @@ def gpu_decode_tiles(
     bytes_per_pixel = dtype.itemsize * samples
     tile_bytes = tile_width * tile_height * bytes_per_pixel
 
+    # Per-tile LERC valid masks; populated only by the LERC branch
+    # below.  Kept as a flat local so the post-assembly fill block at
+    # the end of this function can find it regardless of which decode
+    # branch ran.
+    _lerc_masks: list[np.ndarray | None] | None = None
+
     # Try nvCOMP batch decompression first (much faster if available)
     nvcomp_result = _try_nvcomp_batch_decompress(
         compressed_tiles, tile_bytes, compression)
@@ -1823,19 +1837,34 @@ def gpu_decode_tiles(
             d_decomp_offsets = cupy.asarray(decomp_offsets)
 
     elif compression == 34887:  # LERC
-        from ._compression import lerc_decompress
+        from ._compression import lerc_decompress_with_mask
         raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
+        # Per-tile valid masks captured from LERC.  None entries mean
+        # "all valid" (LERC returned no mask, or an all-True mask).
+        # The host-side mask buffer is materialised lazily: it stays
+        # None until at least one tile reports a real partial mask.
+        per_tile_masks: list[np.ndarray | None] = [None] * n_tiles
+        any_lerc_mask = False
         for i, tile in enumerate(compressed_tiles):
             start = i * tile_bytes
-            chunk = np.frombuffer(
-                lerc_decompress(tile, tile_width, tile_height, samples),
-                dtype=np.uint8)
+            decoded_bytes, valid_mask = lerc_decompress_with_mask(tile)
+            chunk = np.frombuffer(decoded_bytes, dtype=np.uint8)
             raw_host[start:start + min(len(chunk), tile_bytes)] = \
                 chunk[:tile_bytes] if len(chunk) >= tile_bytes else \
                 np.pad(chunk, (0, tile_bytes - len(chunk)))
+            if valid_mask is not None:
+                per_tile_masks[i] = np.asarray(valid_mask)
+                any_lerc_mask = True
         d_decomp = cupy.asarray(raw_host)
         decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
         d_decomp_offsets = cupy.asarray(decomp_offsets)
+        if any_lerc_mask:
+            # Stash per-tile masks for the post-assembly fill pass below.
+            # Stored in a ``_lerc_masks`` local that the LERC fill block
+            # picks up after predictor decode and tile assembly.
+            _lerc_masks = per_tile_masks
+        else:
+            _lerc_masks = None
 
     elif compression == 1:  # Uncompressed
         raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
@@ -1919,6 +1948,72 @@ def gpu_decode_tiles(
     if byte_order == '>' and dtype.itemsize > 1 and predictor != 2:
         # cupy.ndarray has no .byteswap(), so use the dtype-view helper.
         out = _xp_byteswap(out)
+
+    # LERC valid-mask fill: GDAL writes LERC TIFFs with masked pixels
+    # zero-filled in the data array, so without restoring nodata here a
+    # masked pixel reads back as a real zero measurement.  Mirrors the
+    # CPU path in ``_decode_strip_or_tile`` (PR #1529).
+    if _lerc_masks is not None and masked_fill is not None:
+        out = _apply_lerc_mask_fill(
+            out, _lerc_masks, tile_width, tile_height,
+            image_width, image_height, samples, masked_fill)
+    return out
+
+
+def _apply_lerc_mask_fill(out, per_tile_masks, tile_width, tile_height,
+                          image_width, image_height, samples, masked_fill):
+    """Write *masked_fill* into LERC-invalid positions of an assembled image.
+
+    Each tile contributes either an ``(h, w)``/``(h, w, samples)`` valid
+    mask (1=valid, 0=invalid) or ``None`` for "all valid".  The host
+    builds a single assembled boolean invalid-mask sized to the output
+    image, transfers it to the GPU once, and uses it to overwrite
+    masked positions on device.
+    """
+    import cupy
+
+    n_tiles = len(per_tile_masks)
+    tiles_across = math.ceil(image_width / tile_width)
+    tiles_down = math.ceil(image_height / tile_height)
+    if n_tiles != tiles_across * tiles_down:
+        # Defensive: tile grid mismatch means we cannot place masks
+        # safely.  Skip the fill rather than corrupt the output.
+        return out
+
+    invalid = np.zeros((image_height, image_width), dtype=bool)
+    for idx, mask in enumerate(per_tile_masks):
+        if mask is None:
+            continue
+        tr = idx // tiles_across
+        tc = idx % tiles_across
+        y0 = tr * tile_height
+        x0 = tc * tile_width
+        # Trim the tile mask to the visible portion of the image so
+        # right- and bottom-edge tile padding does not leak into the
+        # assembled mask.
+        h = min(tile_height, image_height - y0)
+        w = min(tile_width, image_width - x0)
+        if h <= 0 or w <= 0:
+            continue
+        m = np.asarray(mask)
+        # LERC may report the mask as (h, w) or (h, w, samples); for the
+        # invalid-fill we collapse multi-sample masks via "any sample
+        # invalid".
+        if m.ndim == 3:
+            m2 = (m == 0).any(axis=2)
+        else:
+            m2 = (m == 0)
+        invalid[y0:y0 + h, x0:x0 + w] = m2[:h, :w]
+
+    if not invalid.any():
+        return out
+
+    d_invalid = cupy.asarray(invalid)
+    if out.ndim == 3:
+        # Broadcast (H, W) mask across the sample axis.
+        out[d_invalid, ...] = out.dtype.type(masked_fill)
+    else:
+        out[d_invalid] = out.dtype.type(masked_fill)
     return out
 
 
diff --git a/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py
new file mode 100644
index 00000000..0f39697b
--- /dev/null
+++ b/xrspatial/geotiff/tests/test_lerc_valid_mask_gpu.py
@@ -0,0 +1,181 @@
+"""GPU follow-up to PR #1529 (LERC valid-mask on decode).
+
+The CPU LERC reader honours the LERC valid-mask and writes the file's
+nodata sentinel into masked pixels.  The GPU LERC tile-decode path used
+to discard the mask, so masked pixels read back as LERC's zero fill
+(real-looking measurements at z == 0) on GPU but as NaN/sentinel on
+CPU.  These tests confirm the GPU path now matches the CPU path for
+representative LERC mask combinations.
+
+Mirrors the structure of ``test_lerc_valid_mask.py`` but compares
+``read_geotiff_gpu`` output to ``read_to_array`` output for each case.
+"""
+from __future__ import annotations
+
+import importlib.util
+
+import numpy as np
+import pytest
+
+lerc = pytest.importorskip("lerc")
+
+from xrspatial.geotiff._compression import LERC_AVAILABLE  # noqa: E402
+
+
+def _gpu_available() -> bool:
+    """True if cupy is importable and CUDA is initialised."""
+    if importlib.util.find_spec("cupy") is None:
+        return False
+    try:
+        import cupy
+        return bool(cupy.cuda.is_available())
+    except Exception:
+        return False
+
+
+_HAS_GPU = _gpu_available()
+_gpu_only = pytest.mark.skipif(
+    not (_HAS_GPU and LERC_AVAILABLE),
+    reason="cupy + CUDA + lerc required",
+)
+
+
+@pytest.fixture
+def lerc_writer_with_mask(monkeypatch):
+    """Patch ``lerc_compress`` to embed a valid-mask the writer can't pass.
+
+    The xrspatial writer hard-codes ``hasMask=False`` in its call to
+    ``lerc.encode``.  Tests inject a per-tile mask through this holder's
+    ``invalid`` predicate so the masked pixels survive the encode and
+    show up at decode time.  Same pattern as the CPU test fixture in
+    ``test_lerc_valid_mask.py``.
+    """
+    holder = {"invalid": None}
+
+    def _patched(data, width, height, samples=1,
+                 dtype=np.dtype('float32'), max_z_error=0.0):
+        if samples == 1:
+            arr = np.frombuffer(data, dtype=dtype).reshape(height, width)
+        else:
+            arr = np.frombuffer(data, dtype=dtype).reshape(
+                height, width, samples)
+        invalid_pred = holder["invalid"]
+        if invalid_pred is None:
+            mask = None
+            has_mask = False
+        else:
+            invalid = invalid_pred(arr)
+            mask = np.where(invalid, np.uint8(0), np.uint8(1))
+            has_mask = True
+        result = lerc.encode(arr, samples, has_mask, mask, max_z_error, 1)
+        if result[0] != 0:
+            raise RuntimeError(
+                f"LERC encode failed with error code {result[0]}")
+        return bytes(result[2])
+
+    monkeypatch.setattr(
+        "xrspatial.geotiff._compression.lerc_compress", _patched,
+    )
+    return holder
+
+
+def _read_cpu_gpu(path):
+    """Read *path* with both readers and return ``(cpu_array, gpu_host_array)``."""
+    from xrspatial.geotiff import read_geotiff_gpu
+    from xrspatial.geotiff._reader import read_to_array
+
+    cpu, _geo = read_to_array(path)
+    gpu_da = read_geotiff_gpu(path, gpu='strict')
+    gpu_host = gpu_da.data.get()
+    return cpu, gpu_host
+
+
+@_gpu_only
+class TestGpuLercValidMask:
+    """End-to-end TIFF round-trips comparing GPU vs CPU output."""
+
+    def test_float32_nan_nodata(self, tmp_path, lerc_writer_with_mask):
+        """Float32 LERC + NaN nodata: GPU output matches CPU output."""
+        from xrspatial.geotiff._writer import write
+
+        arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8)
+        invalid_positions = {(0, 1), (5, 4)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_nan_gpu.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=float("nan"))
+
+        cpu, gpu = _read_cpu_gpu(path)
+        # NaN positions
+        for (r, c) in invalid_positions:
+            assert np.isnan(cpu[r, c])
+            assert np.isnan(gpu[r, c])
+        # Valid positions agree exactly
+        cpu_valid = np.where(np.isnan(cpu), 0.0, cpu)
+        gpu_valid = np.where(np.isnan(gpu), 0.0, gpu)
+        np.testing.assert_array_equal(cpu_valid, gpu_valid)
+
+    def test_float32_sentinel_nodata(self, tmp_path, lerc_writer_with_mask):
+        """Float32 LERC + sentinel nodata (-9999): GPU matches CPU."""
+        from xrspatial.geotiff._writer import write
+
+        arr = np.arange(1, 65, dtype=np.float32).reshape(8, 8)
+        invalid_positions = {(0, 1), (3, 3), (7, 7)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_sentinel_f32_gpu.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=-9999.0)
+
+        cpu, gpu = _read_cpu_gpu(path)
+        np.testing.assert_array_equal(cpu, gpu)
+        for (r, c) in invalid_positions:
+            assert gpu[r, c] == np.float32(-9999.0)
+
+    def test_uint16_sentinel_nodata(self, tmp_path, lerc_writer_with_mask):
+        """Uint16 LERC + sentinel nodata (65535): GPU matches CPU."""
+        from xrspatial.geotiff._writer import write
+
+        arr = (np.arange(1, 65, dtype=np.uint16) * 100).reshape(8, 8)
+        invalid_positions = {(0, 1), (4, 4)}
+
+        def invalid_pred(a):
+            m = np.zeros(a.shape[:2], dtype=bool)
+            for r, c in invalid_positions:
+                m[r, c] = True
+            return m
+        lerc_writer_with_mask["invalid"] = invalid_pred
+
+        path = str(tmp_path / "lerc_mask_uint16_gpu.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8,
+              nodata=65535)
+
+        cpu, gpu = _read_cpu_gpu(path)
+        np.testing.assert_array_equal(cpu, gpu)
+        for (r, c) in invalid_positions:
+            assert gpu[r, c] == np.uint16(65535)
+
+    def test_no_mask_roundtrip_bitexact(self, tmp_path):
+        """All-valid LERC (no encoded mask): GPU and CPU agree bit-exact."""
+        from xrspatial.geotiff._writer import write
+
+        arr = np.arange(64, dtype=np.float32).reshape(8, 8)
+        path = str(tmp_path / "lerc_no_mask_gpu.tif")
+        write(arr, path, compression="lerc", tiled=True, tile_size=8)
+
+        cpu, gpu = _read_cpu_gpu(path)
+        np.testing.assert_array_equal(cpu, arr)
+        np.testing.assert_array_equal(gpu, arr)