Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1469,7 +1469,9 @@ def read_geotiff_gpu(source: str, *,

from ._reader import (
_FileSource, _check_dimensions, MAX_PIXELS_DEFAULT, _coerce_path,
_resolve_masked_fill,
)
from ._compression import COMPRESSION_LERC
from ._header import (
parse_header, parse_all_ifds, select_overview_ifd, validate_tile_layout,
)
Expand Down Expand Up @@ -1552,6 +1554,12 @@ def read_geotiff_gpu(source: str, *,
# Sparse tiles (byte_count == 0) are unsupported on the GPU pipeline;
# the CPU reader fills them with nodata and copies onto the GPU.
has_sparse_tile = any(bc == 0 for bc in byte_counts)
# LERC tiles can carry a per-pixel valid mask that GDAL writes
# zero-filled in the data array. Compute the nodata fill the same
# way the CPU reader does so the GPU decode path can restore it
# post-assembly (mirrors PR #1529 for the CPU path).
masked_fill = (_resolve_masked_fill(ifd.nodata_str, file_dtype)
if compression == COMPRESSION_LERC else None)
if has_sparse_tile:
arr_cpu, _ = read_to_array(source, overview_level=overview_level)
arr_gpu = cupy.asarray(arr_cpu)
Expand All @@ -1565,6 +1573,7 @@ def read_geotiff_gpu(source: str, *,
tw, th, width, height,
compression, predictor, file_dtype, samples,
byte_order=header.byte_order,
masked_fill=masked_fill,
)
except Exception as e:
if gpu == 'strict':
Expand Down Expand Up @@ -1596,6 +1605,7 @@ def read_geotiff_gpu(source: str, *,
tw, th, width, height,
compression, predictor, file_dtype, samples,
byte_order=header.byte_order,
masked_fill=masked_fill,
)
except Exception as e:
if gpu == 'strict':
Expand Down
32 changes: 30 additions & 2 deletions xrspatial/geotiff/_compression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,7 +1046,29 @@ def jpeg2000_compress(data: bytes, width: int, height: int,

def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
samples: int = 1) -> bytes:
"""Decompress LERC data. Requires the ``lerc`` package."""
"""Decompress LERC data. Requires the ``lerc`` package.

Returns the raw decoded pixel bytes. Any LERC valid-mask is dropped
here; masked pixels are returned as LERC's zero fill (the wire
format's default). Callers that need to honour the file's nodata
value should use :func:`lerc_decompress_with_mask` instead and apply
nodata at the array level once dtype is known.
"""
decoded_bytes, _mask = lerc_decompress_with_mask(data)
return decoded_bytes


def lerc_decompress_with_mask(data: bytes):
"""Decompress LERC data and return ``(bytes, valid_mask_or_None)``.

``valid_mask`` is ``None`` when LERC reports the block is fully
valid (no encoded mask, or a mask that is True everywhere);
otherwise it is a numpy array whose shape matches the decoded data
(height, width) or (height, width, samples), with ``0`` marking
pixels the encoder flagged as invalid. LERC zero fills masked
positions in the data array, so the returned mask is the only
signal that lets a reader restore the file's nodata sentinel.
"""
if not LERC_AVAILABLE:
raise ImportError(
"lerc is required to read LERC-compressed TIFFs. "
Expand All @@ -1056,7 +1078,13 @@ def lerc_decompress(data: bytes, width: int = 0, height: int = 0,
if result[0] != 0:
raise RuntimeError(f"LERC decode failed with error code {result[0]}")
arr = result[1]
return arr.tobytes()
valid_mask = result[2] if len(result) > 2 else None
if valid_mask is None:
return arr.tobytes(), None
valid_mask = np.asarray(valid_mask)
if valid_mask.size == 0 or bool(valid_mask.all()):
return arr.tobytes(), None
return arr.tobytes(), valid_mask


def lerc_compress(data: bytes, width: int, height: int,
Expand Down
105 changes: 100 additions & 5 deletions xrspatial/geotiff/_gpu_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,7 @@ def gpu_decode_tiles_from_file(
dtype: np.dtype,
samples: int = 1,
byte_order: str = '<',
masked_fill=None,
):
"""Decode tiles from a file, using GDS if available.

Expand Down Expand Up @@ -1450,7 +1451,7 @@ def gpu_decode_tiles_from_file(
return gpu_decode_tiles(
compressed_tiles, tile_width, tile_height,
image_width, image_height, compression, predictor, dtype, samples,
byte_order=byte_order)
byte_order=byte_order, masked_fill=masked_fill)


def _try_nvcomp_from_device_bufs(d_tiles, tile_bytes, compression):
Expand Down Expand Up @@ -1665,6 +1666,7 @@ def gpu_decode_tiles(
dtype: np.dtype,
samples: int = 1,
byte_order: str = '<',
masked_fill=None,
):
"""Decode and assemble TIFF tiles entirely on GPU.

Expand All @@ -1684,6 +1686,12 @@ def gpu_decode_tiles(
Output pixel dtype.
samples : int
Samples per pixel.
masked_fill : scalar or None
Value to write into pixels that LERC reports as invalid. Only
consulted for LERC-compressed inputs (tag 34887). ``None``
leaves any masked pixels at LERC's zero fill. Use
``_resolve_masked_fill(ifd.nodata_str, dtype)`` from
``_reader.py`` to mirror the CPU reader's nodata semantics.

Returns
-------
Expand All @@ -1696,6 +1704,12 @@ def gpu_decode_tiles(
bytes_per_pixel = dtype.itemsize * samples
tile_bytes = tile_width * tile_height * bytes_per_pixel

# Per-tile LERC valid masks; populated only by the LERC branch
# below. Kept as a flat local so the post-assembly fill block at
# the end of this function can find it regardless of which decode
# branch ran.
_lerc_masks: list[np.ndarray | None] | None = None

# Try nvCOMP batch decompression first (much faster if available)
nvcomp_result = _try_nvcomp_batch_decompress(
compressed_tiles, tile_bytes, compression)
Expand Down Expand Up @@ -1823,19 +1837,34 @@ def gpu_decode_tiles(
d_decomp_offsets = cupy.asarray(decomp_offsets)

elif compression == 34887: # LERC
from ._compression import lerc_decompress
from ._compression import lerc_decompress_with_mask
raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
# Per-tile valid masks captured from LERC. None entries mean
# "all valid" (LERC returned no mask, or an all-True mask).
# The host-side mask buffer is materialised lazily: it stays
# None until at least one tile reports a real partial mask.
per_tile_masks: list[np.ndarray | None] = [None] * n_tiles
any_lerc_mask = False
for i, tile in enumerate(compressed_tiles):
start = i * tile_bytes
chunk = np.frombuffer(
lerc_decompress(tile, tile_width, tile_height, samples),
dtype=np.uint8)
decoded_bytes, valid_mask = lerc_decompress_with_mask(tile)
chunk = np.frombuffer(decoded_bytes, dtype=np.uint8)
raw_host[start:start + min(len(chunk), tile_bytes)] = \
chunk[:tile_bytes] if len(chunk) >= tile_bytes else \
np.pad(chunk, (0, tile_bytes - len(chunk)))
if valid_mask is not None:
per_tile_masks[i] = np.asarray(valid_mask)
any_lerc_mask = True
d_decomp = cupy.asarray(raw_host)
decomp_offsets = np.arange(n_tiles, dtype=np.int64) * tile_bytes
d_decomp_offsets = cupy.asarray(decomp_offsets)
if any_lerc_mask:
# Stash per-tile masks for the post-assembly fill pass below.
# Stored in a ``_lerc_masks`` local that the LERC fill block
# picks up after predictor decode and tile assembly.
_lerc_masks = per_tile_masks
else:
_lerc_masks = None

elif compression == 1: # Uncompressed
raw_host = np.empty(n_tiles * tile_bytes, dtype=np.uint8)
Expand Down Expand Up @@ -1919,6 +1948,72 @@ def gpu_decode_tiles(
if byte_order == '>' and dtype.itemsize > 1 and predictor != 2:
# cupy.ndarray has no .byteswap(), so use the dtype-view helper.
out = _xp_byteswap(out)

# LERC valid-mask fill: GDAL writes LERC TIFFs with masked pixels
# zero-filled in the data array, so without restoring nodata here a
# masked pixel reads back as a real zero measurement. Mirrors the
# CPU path in ``_decode_strip_or_tile`` (PR #1529).
if _lerc_masks is not None and masked_fill is not None:
out = _apply_lerc_mask_fill(
out, _lerc_masks, tile_width, tile_height,
image_width, image_height, samples, masked_fill)
return out


def _apply_lerc_mask_fill(out, per_tile_masks, tile_width, tile_height,
image_width, image_height, samples, masked_fill):
"""Write *masked_fill* into LERC-invalid positions of an assembled image.

Each tile contributes either an ``(h, w)``/``(h, w, samples)`` valid
mask (1=valid, 0=invalid) or ``None`` for "all valid". The host
builds a single assembled boolean invalid-mask sized to the output
image, transfers it to the GPU once, and uses it to overwrite
masked positions on device.
"""
import cupy

n_tiles = len(per_tile_masks)
tiles_across = math.ceil(image_width / tile_width)
tiles_down = math.ceil(image_height / tile_height)
if n_tiles != tiles_across * tiles_down:
# Defensive: tile grid mismatch means we cannot place masks
# safely. Skip the fill rather than corrupt the output.
return out

invalid = np.zeros((image_height, image_width), dtype=bool)
for idx, mask in enumerate(per_tile_masks):
if mask is None:
continue
tr = idx // tiles_across
tc = idx % tiles_across
y0 = tr * tile_height
x0 = tc * tile_width
# Trim the tile mask to the visible portion of the image so
# right- and bottom-edge tile padding does not leak into the
# assembled mask.
h = min(tile_height, image_height - y0)
w = min(tile_width, image_width - x0)
if h <= 0 or w <= 0:
continue
m = np.asarray(mask)
# LERC may report the mask as (h, w) or (h, w, samples); for the
# invalid-fill we collapse multi-sample masks via "any sample
# invalid".
if m.ndim == 3:
m2 = (m == 0).any(axis=2)
else:
m2 = (m == 0)
invalid[y0:y0 + h, x0:x0 + w] = m2[:h, :w]

if not invalid.any():
return out

d_invalid = cupy.asarray(invalid)
if out.ndim == 3:
# Broadcast (H, W) mask across the sample axis.
out[d_invalid, ...] = out.dtype.type(masked_fill)
else:
out[d_invalid] = out.dtype.type(masked_fill)
return out


Expand Down
Loading
Loading