From 2bafef9e78ca0d6d453d97f4d6890c4a5f1fbe5d Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Mon, 4 May 2026 14:41:05 -0700 Subject: [PATCH 1/8] Mask scrip file before ESMF_RegridWeightGen Mask scrip file to only use cells overlapping with MALI mesh before ESMF_RegridWeightGen. This can lead to a considerable decrease in computational cost when interpolating to large meshes. --- compass/landice/mesh.py | 181 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 180 insertions(+), 1 deletion(-) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 0d7455bcc4..5df8893bcc 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -11,6 +11,7 @@ import numpy as np import xarray from geometric_features import FeatureCollection, GeometricFeatures +from matplotlib.path import Path from mpas_tools.io import write_netcdf from mpas_tools.logging import check_call from mpas_tools.mesh.conversion import convert, cull @@ -18,9 +19,11 @@ from mpas_tools.mesh.creation.sort_mesh import sort_mesh from mpas_tools.scrip.from_mpas import scrip_from_mpas from netCDF4 import Dataset +from pyproj import Transformer from scipy import ndimage from scipy.interpolate import interpn from scipy.ndimage import distance_transform_edt +from scipy.spatial import ConvexHull def mpas_flood_fill(seed_mask, grow_mask, cellsOnCell, nEdgesOnCell, @@ -1130,6 +1133,170 @@ def _nearest_fill_from_valid(field2d, valid_mask): return preprocessed_gridded_dataset +def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, + masked_source_scrip, domain, + buffer_m=50e3, + source_crs='EPSG:4326', + mesh_crs=None, + logger=None): + """ + Create a new source SCRIP file with grid_imask set to 1 only for cells + that fall within the convex hull (plus buffer) of the destination SCRIP + footprint. This dramatically reduces the work ESMF_RegridWeightGen must do + when the source dataset is much larger than the destination mesh. + + Parameters + ---------- + source_scrip : str + Input source SCRIP file + + dest_scrip : str + Destination SCRIP file used by ESMF_RegridWeightGen + + masked_source_scrip : str + Output source SCRIP file with updated grid_imask + + domain : str + Projection domain. Recognised convenience keys: 'greenland', + 'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires + mesh_crs to be supplied explicitly. + + buffer_m : float, optional + Approximate outward hull expansion in meters + + source_crs : str, optional + CRS of lon/lat variables in SCRIP files (default 'EPSG:4326') + + mesh_crs : str, optional + Proj4/EPSG string for planar coordinates. Derived from domain if None. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + """ + + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + + if mesh_crs is None: + if domain not in projections: + raise ValueError( + f"Unknown domain '{domain}'. Expected one of " + f"{list(projections.keys())}, or supply mesh_crs explicitly." + ) + mesh_crs = projections[domain] + + transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True) + + def _maybe_deg(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + def _projected_centers(ds): + if ('grid_center_x' in ds.variables and + 'grid_center_y' in ds.variables): + xc = ds['grid_center_x'].values + yc = ds['grid_center_y'].values + _log('Using existing projected source centers.') + else: + lon = ds['grid_center_lon'].values + lat = ds['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + xc, yc = transformer.transform(lon, lat) + _log('Projected source centers from lon/lat.') + return np.asarray(xc), np.asarray(yc) + + def _projected_dest_points(ds): + if ('grid_corner_x' in ds.variables and + 'grid_corner_y' in ds.variables): + x = ds['grid_corner_x'].values + y = ds['grid_corner_y'].values + _log('Using existing projected destination corners.') + elif ('grid_corner_lon' in ds.variables and + 'grid_corner_lat' in ds.variables): + lon = ds['grid_corner_lon'].values + lat = ds['grid_corner_lat'].values + lon, lat = _maybe_deg(lon, lat) + x, y = transformer.transform(lon, lat) + _log('Projected destination corners from lon/lat.') + elif ('grid_center_x' in ds.variables and + 'grid_center_y' in ds.variables): + x = ds['grid_center_x'].values + y = ds['grid_center_y'].values + _log('Using existing projected destination centers.') + else: + lon = ds['grid_center_lon'].values + lat = ds['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + x, y = transformer.transform(lon, lat) + _log('Projected destination centers from lon/lat.') + pts = np.column_stack([np.ravel(x), np.ravel(y)]) + pts = pts[np.all(np.isfinite(pts), axis=1)] + pts = np.unique(pts, axis=0) + return pts + + with xarray.open_dataset(dest_scrip) as ds_dst: + dst_points = _projected_dest_points(ds_dst) + + if dst_points.shape[0] < 3: + raise ValueError( + 'Not enough destination points to build a convex hull.') + + hull = ConvexHull(dst_points) + hull_points = dst_points[hull.vertices] + + if buffer_m > 0.0: + centroid = hull_points.mean(axis=0) + vec = hull_points - centroid + radius = np.sqrt((vec ** 2).sum(axis=1)) + nonzero = radius > 0.0 + vec[nonzero] = vec[nonzero] / radius[nonzero, np.newaxis] + hull_points = hull_points + buffer_m * vec + + hull_path = Path(hull_points, closed=True) + + with xarray.open_dataset(source_scrip) as ds_src: + xc, yc = _projected_centers(ds_src) + + _log(f'destination hull x extent: {hull_points[:, 0].min():.0f} to ' + f'{hull_points[:, 0].max():.0f}') + _log(f'destination hull y extent: {hull_points[:, 1].min():.0f} to ' + f'{hull_points[:, 1].max():.0f}') + _log(f'source x extent: {np.nanmin(xc):.0f} to {np.nanmax(xc):.0f}') + _log(f'source y extent: {np.nanmin(yc):.0f} to {np.nanmax(yc):.0f}') + + src_pts = np.column_stack([xc, yc]) + inside = hull_path.contains_points(src_pts).astype(np.int32) + _log(f'active source cells after masking: ' + f'{inside.sum()} / {inside.size}') + + ds_out = ds_src.copy() + ds_out['grid_imask'] = xarray.DataArray( + inside, + dims=('grid_size',), + attrs={'long_name': '0/1 mask for active source cells'} + ) + ds_out.to_netcdf(masked_source_scrip, mode='w') + + def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, nProcs, dest_file, proj, variables="all"): """ @@ -1194,10 +1361,22 @@ def __guess_scrip_name(filename): '-r', '2'] check_call(args, logger=logger) + # Mask source SCRIP to the footprint of the destination mesh so that + # ESMF_RegridWeightGen only processes the cells that overlap the target. + stem = os.path.splitext(source_scrip)[0] # strips .nc + masked_source_scrip = f'{stem}_masked.nc' + logger.info('masking source SCRIP to destination mesh footprint') + add_grid_imask_from_dst_scrip_hull( + source_scrip=source_scrip, + dest_scrip=mali_scrip, + masked_source_scrip=masked_source_scrip, + domain=proj, + logger=logger) + # Generate remapping weights logger.info('generating gridded dataset -> MPAS weights') args = [parallel_executable, '-n', nProcs, 'ESMF_RegridWeightGen', - '--source', source_scrip, + '--source', masked_source_scrip, '--destination', mali_scrip, '--weight', weights_filename, '--method', 'conserve', From 6051374f2eab3199b0215ec8094554252ad9a5ca Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Mon, 4 May 2026 20:04:32 -0700 Subject: [PATCH 2/8] Update docs to describe masking SCRIP file before ESMF_RegridWeightGen --- docs/developers_guide/landice/api.rst | 1 + docs/users_guide/landice/test_groups/antarctica.rst | 7 +++++++ docs/users_guide/landice/test_groups/greenland.rst | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/docs/developers_guide/landice/api.rst b/docs/developers_guide/landice/api.rst index 55378a3f6c..4ffb7c4bb4 100644 --- a/docs/developers_guide/landice/api.rst +++ b/docs/developers_guide/landice/api.rst @@ -589,6 +589,7 @@ Landice Framework iceshelf_melt.calc_mean_TF mesh.add_bedmachine_thk_to_ais_gridded_data + mesh.add_grid_imask_from_dst_scrip_hull mesh.clean_up_after_interp mesh.clip_mesh_to_bounding_box mesh.get_mesh_config_bounding_box diff --git a/docs/users_guide/landice/test_groups/antarctica.rst b/docs/users_guide/landice/test_groups/antarctica.rst index 115d7ade26..e38b1340e8 100644 --- a/docs/users_guide/landice/test_groups/antarctica.rst +++ b/docs/users_guide/landice/test_groups/antarctica.rst @@ -124,4 +124,11 @@ datasets, so interpolation is enabled by default. The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. +Before running ``ESMF_RegridWeightGen``, the interpolation step automatically +masks the source SCRIP file so that only cells overlapping the destination +mesh footprint (plus a 50 km buffer) are active. This avoids unnecessary +weight computation for the large portions of the BedMachine Antarctica domain +that lie outside the target mesh and substantially reduces ESMF +weight-generation time. + There is no model integration step. diff --git a/docs/users_guide/landice/test_groups/greenland.rst b/docs/users_guide/landice/test_groups/greenland.rst index f20f7dab1d..6448fc73ab 100644 --- a/docs/users_guide/landice/test_groups/greenland.rst +++ b/docs/users_guide/landice/test_groups/greenland.rst @@ -173,3 +173,10 @@ dataset interpolation step is skipped. The default config values include both datasets, so interpolation is enabled by default. The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. + +Before running ``ESMF_RegridWeightGen``, the interpolation step automatically +masks the source SCRIP file so that only cells overlapping the destination +mesh footprint (plus a 50 km buffer) are active. This avoids unnecessary +weight computation for the large portions of the BedMachine v6 domain +(150 m native resolution, ~187 million cells) that lie outside the target +mesh and substantially reduces ESMF weight-generation time and memory footprint. From 011e29241f1aa63243800a122f2ff68e2eaa7a91 Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Mon, 4 May 2026 20:07:07 -0700 Subject: [PATCH 3/8] Change default nProcs from 2048 to 1024 for 1-10km greenland mesh Change default nProcs from 2048 to 1024 for 1-10km greenland mesh, which is now possible thanks to masked scrip file that reduces the memory footprint of ESMF_RegridWeightGen. --- compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg b/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg index 0c06f6e393..a1ec03e711 100644 --- a/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg +++ b/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg @@ -70,4 +70,4 @@ measures_filename = greenland_vel_mosaic500_extrap.nc src_proj = gis-gimp # number of processors to use for ESMF_RegridWeightGen -nProcs = 2048 +nProcs = 1024 From b04bc64b35a9dab76aa4fc56f100440a1943b240 Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Mon, 4 May 2026 20:15:32 -0700 Subject: [PATCH 4/8] Add comment detailing potential optimization Add comment detailing potential optimization, in which the convex hull of the MALI mesh is cached and reused by multiple source datasets when calculating masks before ESMF_RegridWeightGen. --- compass/landice/mesh.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 5df8893bcc..7a790ed571 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -1669,6 +1669,14 @@ def run_optional_interpolation( parallel_executable, nProcs, mesh_filename, src_proj, variables='all') + # Note: interp_gridded2mali independently masks the source SCRIP for + # each dataset call below. The destination hull is recomputed for each + # call, which is redundant since dst_scrip_file is the same for both. + # The redundant cost (reading dst_scrip_file and building the convex + # hull) scales with the destination MALI mesh size, so it becomes + # non-trivial for large meshes. If this becomes a bottleneck, it would + # be worth refactoring to compute the destination hull once and pass it + # to each interp_gridded2mali call. if measures_dataset is not None: measures_vars = ['observedSurfaceVelocityX', 'observedSurfaceVelocityY', From 9064a8c963f20aa2e12c7dca0abcd2f3304f2e4a Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Mon, 4 May 2026 20:22:39 -0700 Subject: [PATCH 5/8] Reuse convex hull of MALI mesh for multiple source data sets. Reuse convex hull of MALI mesh to avoid redundant computation when interpolating from multiple source data sets (e.g., BedMachine and MEaSUREs). --- compass/landice/mesh.py | 209 +++++++++++++++----- docs/developers_guide/landice/api.rst | 1 + docs/developers_guide/landice/framework.rst | 38 +++- 3 files changed, 196 insertions(+), 52 deletions(-) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 7a790ed571..0ba5d66fe0 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -1133,28 +1133,23 @@ def _nearest_fill_from_valid(field2d, valid_mask): return preprocessed_gridded_dataset -def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, - masked_source_scrip, domain, - buffer_m=50e3, - source_crs='EPSG:4326', - mesh_crs=None, - logger=None): +def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3, + source_crs='EPSG:4326', mesh_crs=None, + logger=None): """ - Create a new source SCRIP file with grid_imask set to 1 only for cells - that fall within the convex hull (plus buffer) of the destination SCRIP - footprint. This dramatically reduces the work ESMF_RegridWeightGen must do - when the source dataset is much larger than the destination mesh. + Build a buffered convex hull from the footprint of a destination SCRIP + file and return it as a :py:class:`matplotlib.path.Path`. + + This is factored out of + :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull` so + that the hull can be computed once and reused across multiple source + datasets that share the same destination mesh, avoiding redundant I/O + and computation. Parameters ---------- - source_scrip : str - Input source SCRIP file - dest_scrip : str - Destination SCRIP file used by ESMF_RegridWeightGen - - masked_source_scrip : str - Output source SCRIP file with updated grid_imask + Destination SCRIP file domain : str Projection domain. Recognised convenience keys: 'greenland', @@ -1162,16 +1157,21 @@ def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, mesh_crs to be supplied explicitly. buffer_m : float, optional - Approximate outward hull expansion in meters + Approximate outward hull expansion in meters (default 50 km) source_crs : str, optional - CRS of lon/lat variables in SCRIP files (default 'EPSG:4326') + CRS of lon/lat variables in the SCRIP file (default 'EPSG:4326') mesh_crs : str, optional Proj4/EPSG string for planar coordinates. Derived from domain if None. logger : logging.Logger, optional Logger for status messages; falls back to print if None. + + Returns + ------- + hull_path : matplotlib.path.Path + Buffered convex hull of the destination mesh footprint. """ def _log(msg): @@ -1210,20 +1210,6 @@ def _maybe_deg(lon, lat): lat = np.rad2deg(lat) return lon, lat - def _projected_centers(ds): - if ('grid_center_x' in ds.variables and - 'grid_center_y' in ds.variables): - xc = ds['grid_center_x'].values - yc = ds['grid_center_y'].values - _log('Using existing projected source centers.') - else: - lon = ds['grid_center_lon'].values - lat = ds['grid_center_lat'].values - lon, lat = _maybe_deg(lon, lat) - xc, yc = transformer.transform(lon, lat) - _log('Projected source centers from lon/lat.') - return np.asarray(xc), np.asarray(yc) - def _projected_dest_points(ds): if ('grid_corner_x' in ds.variables and 'grid_corner_y' in ds.variables): @@ -1271,15 +1257,127 @@ def _projected_dest_points(ds): vec[nonzero] = vec[nonzero] / radius[nonzero, np.newaxis] hull_points = hull_points + buffer_m * vec - hull_path = Path(hull_points, closed=True) + _log(f'destination hull x extent: {hull_points[:, 0].min():.0f} to ' + f'{hull_points[:, 0].max():.0f}') + _log(f'destination hull y extent: {hull_points[:, 1].min():.0f} to ' + f'{hull_points[:, 1].max():.0f}') + + return Path(hull_points, closed=True) + + +def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, + masked_source_scrip, domain, + buffer_m=50e3, + source_crs='EPSG:4326', + mesh_crs=None, + hull_path=None, + logger=None): + """ + Create a new source SCRIP file with grid_imask set to 1 only for cells + that fall within the convex hull (plus buffer) of the destination SCRIP + footprint. This dramatically reduces the work ESMF_RegridWeightGen must do + when the source dataset is much larger than the destination mesh. + + Parameters + ---------- + source_scrip : str + Input source SCRIP file + + dest_scrip : str + Destination SCRIP file used by ESMF_RegridWeightGen. Ignored if + hull_path is provided. + + masked_source_scrip : str + Output source SCRIP file with updated grid_imask + + domain : str + Projection domain. Recognised convenience keys: 'greenland', + 'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires + mesh_crs to be supplied explicitly. + + buffer_m : float, optional + Approximate outward hull expansion in meters. Ignored if hull_path + is provided. + + source_crs : str, optional + CRS of lon/lat variables in SCRIP files (default 'EPSG:4326') + + mesh_crs : str, optional + Proj4/EPSG string for planar coordinates. Derived from domain if None. + + hull_path : matplotlib.path.Path or None, optional + Pre-built buffered convex hull of the destination mesh footprint, as + returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, dest_scrip is not read and the hull is not recomputed, + which is useful when masking multiple source datasets against the same + destination mesh. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + """ + + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + + if mesh_crs is None: + if domain not in projections: + raise ValueError( + f"Unknown domain '{domain}'. Expected one of " + f"{list(projections.keys())}, or supply mesh_crs explicitly." + ) + mesh_crs = projections[domain] + + transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True) + + def _maybe_deg(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + def _projected_centers(ds): + if ('grid_center_x' in ds.variables and + 'grid_center_y' in ds.variables): + xc = ds['grid_center_x'].values + yc = ds['grid_center_y'].values + _log('Using existing projected source centers.') + else: + lon = ds['grid_center_lon'].values + lat = ds['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + xc, yc = transformer.transform(lon, lat) + _log('Projected source centers from lon/lat.') + return np.asarray(xc), np.asarray(yc) + + if hull_path is None: + hull_path = build_dst_scrip_hull( + dest_scrip=dest_scrip, + domain=domain, + buffer_m=buffer_m, + source_crs=source_crs, + mesh_crs=mesh_crs, + logger=logger) with xarray.open_dataset(source_scrip) as ds_src: xc, yc = _projected_centers(ds_src) - _log(f'destination hull x extent: {hull_points[:, 0].min():.0f} to ' - f'{hull_points[:, 0].max():.0f}') - _log(f'destination hull y extent: {hull_points[:, 1].min():.0f} to ' - f'{hull_points[:, 1].max():.0f}') _log(f'source x extent: {np.nanmin(xc):.0f} to {np.nanmax(xc):.0f}') _log(f'source y extent: {np.nanmin(yc):.0f} to {np.nanmax(yc):.0f}') @@ -1298,7 +1396,8 @@ def _projected_dest_points(ds): def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, - nProcs, dest_file, proj, variables="all"): + nProcs, dest_file, proj, variables="all", + hull_path=None): """ Interpolate gridded dataset (e.g. MEASURES, BedMachine) onto a MALI mesh @@ -1324,6 +1423,13 @@ def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, variables: "all" or list of strings either the string "all" or a list of strings + + hull_path : matplotlib.path.Path or None, optional + Pre-built buffered convex hull of the destination mesh footprint, as + returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, the hull is not recomputed from mali_scrip, which + avoids redundant I/O and computation when this function is called + multiple times with the same destination mesh. """ def __guess_scrip_name(filename): @@ -1371,6 +1477,7 @@ def __guess_scrip_name(filename): dest_scrip=mali_scrip, masked_source_scrip=masked_source_scrip, domain=proj, + hull_path=hull_path, logger=logger) # Generate remapping weights @@ -1664,19 +1771,20 @@ def run_optional_interpolation( dst_scrip_file = f'{mesh_base}_scrip.nc' scrip_from_mpas(mesh_filename, dst_scrip_file) + # Build the destination hull once so it can be reused for both + # BedMachine and MEaSUREs without re-reading dst_scrip_file. + logger.info('building destination mesh convex hull for source masking') + hull_path = build_dst_scrip_hull( + dest_scrip=dst_scrip_file, + domain=src_proj, + logger=logger) + if bedmachine_dataset is not None: interp_gridded2mali(self, bedmachine_dataset, dst_scrip_file, parallel_executable, nProcs, - mesh_filename, src_proj, variables='all') - - # Note: interp_gridded2mali independently masks the source SCRIP for - # each dataset call below. The destination hull is recomputed for each - # call, which is redundant since dst_scrip_file is the same for both. - # The redundant cost (reading dst_scrip_file and building the convex - # hull) scales with the destination MALI mesh size, so it becomes - # non-trivial for large meshes. If this becomes a bottleneck, it would - # be worth refactoring to compute the destination hull once and pass it - # to each interp_gridded2mali call. + mesh_filename, src_proj, variables='all', + hull_path=hull_path) + if measures_dataset is not None: measures_vars = ['observedSurfaceVelocityX', 'observedSurfaceVelocityY', @@ -1684,7 +1792,8 @@ def run_optional_interpolation( interp_gridded2mali(self, measures_dataset, dst_scrip_file, parallel_executable, nProcs, mesh_filename, src_proj, - variables=measures_vars) + variables=measures_vars, + hull_path=hull_path) clean_up_after_interp(mesh_filename) finally: diff --git a/docs/developers_guide/landice/api.rst b/docs/developers_guide/landice/api.rst index 4ffb7c4bb4..307c094d27 100644 --- a/docs/developers_guide/landice/api.rst +++ b/docs/developers_guide/landice/api.rst @@ -590,6 +590,7 @@ Landice Framework mesh.add_bedmachine_thk_to_ais_gridded_data mesh.add_grid_imask_from_dst_scrip_hull + mesh.build_dst_scrip_hull mesh.clean_up_after_interp mesh.clip_mesh_to_bounding_box mesh.get_mesh_config_bounding_box diff --git a/docs/developers_guide/landice/framework.rst b/docs/developers_guide/landice/framework.rst index 463187675a..0b96a0d5ee 100644 --- a/docs/developers_guide/landice/framework.rst +++ b/docs/developers_guide/landice/framework.rst @@ -66,7 +66,36 @@ to the gridded dataset in order to separate the ice sheet from peripheral ice. :py:func:`compass.landice.mesh.interp_gridded2mali()` interpolates gridded data (e.g. BedMachine thickness or MEaSUREs ice velocity) to a MALI mesh, accounting for masking of the ice extent to avoid interpolation ramps. This functions works -for both Antarctica and Greenland. +for both Antarctica and Greenland. Before calling ``ESMF_RegridWeightGen``, it +calls :py:func:`compass.landice.mesh.build_dst_scrip_hull()` (unless a pre-built +``hull_path`` is passed) and then +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` to restrict +the source SCRIP to only the cells that overlap the destination mesh footprint. +This can substantially reduce the weight-generation time for high-resolution +source datasets (e.g. 150 m BedMachine v6) that are much larger than the target +mesh domain. When interpolating multiple datasets to the same destination mesh, +pass a pre-built ``hull_path`` (from +:py:func:`compass.landice.mesh.build_dst_scrip_hull()`) to avoid recomputing the +hull for each dataset. + +:py:func:`compass.landice.mesh.build_dst_scrip_hull()` builds a buffered +convex hull from a destination SCRIP file and returns it as a +``matplotlib.path.Path``. It is factored out of +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` so the +hull can be computed once and passed to multiple +:py:func:`compass.landice.mesh.interp_gridded2mali()` calls that share the +same destination mesh, avoiding redundant I/O whose cost scales with +destination mesh size. + +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` creates a +new source SCRIP file with ``grid_imask`` set to 1 only for source cells that +fall within the convex hull of the destination SCRIP footprint (plus a +configurable buffer, default 50 km). The function projects both SCRIP files +into a planar stereographic coordinate system (determined by the ``domain`` +argument, e.g. ``'gis-gimp'`` or ``'ais-bedmap2'``) and uses a +``matplotlib.path.Path`` point-in-polygon test for efficiency. The resulting +masked SCRIP file is passed to ``ESMF_RegridWeightGen`` in place of the +full-domain source SCRIP. :py:func:`compass.landice.mesh.preprocess_ais_data()` performs adjustments to gridded AIS datasets needed for rest of compass workflow to utilize them. @@ -219,4 +248,9 @@ For mesh prototyping, it is a good idea to use `interpolate_data = False`, as the interpolation step can be slow and require many more resources than the rest of the mesh generation process. For instance, a 1–10km Greenland mesh can be created on a single Perlmutter CPU node, but data interpolation may -require up to 16 nodes. +require several nodes. The source SCRIP masking performed by +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` before +calling ``ESMF_RegridWeightGen`` reduces the effective source-grid size to +only the cells overlapping the destination mesh footprint, which significantly +reduces the weight-generation time compared to running on the full global or +continental source SCRIP. From 05d685041ff61cf1b209f971f836e8b4d120d810 Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Tue, 5 May 2026 10:10:33 -0700 Subject: [PATCH 6/8] Plot convex hull of MALI mesh Plot the convex hull fo the MALI mesh as well as the active cells in the SCRIP file after masking and the source data set bounds. Save plot to png. --- compass/landice/mesh.py | 201 ++++++++++++++++++++++++++ docs/developers_guide/landice/api.rst | 1 + 2 files changed, 202 insertions(+) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 0ba5d66fe0..12df70844b 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -1395,6 +1395,127 @@ def _projected_centers(ds): ds_out.to_netcdf(masked_source_scrip, mode='w') +def plot_hull_diagnostic(hull_path, dest_scrip, source_bbox_files, + transformer, _maybe_deg, active_xc, active_yc, + logger=None): + """ + Save a diagnostic PNG (``convex_hull.png``) showing the masking geometry. + + Parameters + ---------- + hull_path : matplotlib.path.Path + Buffered convex hull of the destination mesh. + + dest_scrip : str + Destination SCRIP file (used to scatter MALI domain extent). + + source_bbox_files : list of tuple + Each entry is ``(filepath, label, edgecolor)`` for a source dataset + whose bounding box should be drawn. + + transformer : pyproj.Transformer + Transformer from lon/lat to planar coordinates. + + _maybe_deg : callable + Helper that converts radian angles to degrees when needed. + + active_xc : numpy.ndarray + Projected x-coordinates of all active (masked-in) source cells. + + active_yc : numpy.ndarray + Projected y-coordinates of all active (masked-in) source cells. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + """ + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + try: + from matplotlib.patches import Polygon as MplPolygon + from matplotlib.patches import Rectangle + + # --- destination SCRIP centers (MALI domain extent) --- + with xarray.open_dataset(dest_scrip) as ds_dst: + if ('grid_center_x' in ds_dst.variables and + 'grid_center_y' in ds_dst.variables): + xd = np.asarray(ds_dst['grid_center_x'].values) + yd = np.asarray(ds_dst['grid_center_y'].values) + else: + lon = ds_dst['grid_center_lon'].values + lat = ds_dst['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + xd, yd = transformer.transform(lon, lat) + xd, yd = np.asarray(xd), np.asarray(yd) + + rng = np.random.default_rng(seed=0) + + def _subsample(x, y, n=50000): + idx = np.where(np.isfinite(x) & np.isfinite(y))[0] + if len(idx) > n: + idx = rng.choice(idx, size=n, replace=False) + return x[idx], y[idx] + + def _src_extent(filepath): + """Return (x_min, x_max, y_min, y_max) from a gridded dataset.""" + with xarray.open_dataset(filepath) as ds: + if 'x1' in ds and 'y1' in ds: + x, y = ds['x1'].values, ds['y1'].values + elif 'x' in ds and 'y' in ds: + x, y = ds['x'].values, ds['y'].values + else: + return None + return float(x.min()), float(x.max()), \ + float(y.min()), float(y.max()) + + fig, ax = plt.subplots(figsize=(10, 10)) + + # active source cells — ice-sheet extent (tab:blue) + xs, ys = _subsample(active_xc, active_yc) + ax.scatter(xs, ys, s=0.5, color='tab:blue', alpha=0.4, + label='active source cells', rasterized=True) + + # MALI domain extent (tab:pink) + xs, ys = _subsample(xd, yd) + ax.scatter(xs, ys, s=1.5, color='tab:pink', alpha=0.6, + label='MALI domain extent', rasterized=True) + + # source bounding boxes + for filepath, label, color in source_bbox_files: + ext = _src_extent(filepath) + if ext is None: + continue + x0, x1, y0, y1 = ext + bbox = Rectangle( + (x0, y0), x1 - x0, y1 - y0, + linewidth=1.5, edgecolor=color, + facecolor='none', label=label) + ax.add_patch(bbox) + + # convex hull (tab:green) + hull_verts = hull_path.vertices + hull_patch = MplPolygon( + hull_verts, closed=True, + linewidth=2, edgecolor='tab:green', + facecolor='none', label='convex hull (buffered)') + ax.add_patch(hull_patch) + + ax.set_aspect('equal') + ax.set_xlabel('x (m)') + ax.set_ylabel('y (m)') + ax.legend(loc='best', markerscale=6) + ax.set_title('Source SCRIP masking: hull vs. domain') + fig.savefig('convex_hull.png', dpi=150, + bbox_inches='tight', facecolor=fig.get_facecolor()) + plt.close(fig) + _log('Saved masking diagnostic plot to convex_hull.png') + except Exception as exc: + _log(f'Warning: could not save convex hull plot: {exc}') + + def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, nProcs, dest_file, proj, variables="all", hull_path=None): @@ -1795,6 +1916,86 @@ def run_optional_interpolation( variables=measures_vars, hull_path=hull_path) + # Diagnostic plot: show hull, MALI domain, and bounding boxes for + # all source datasets that were interpolated. + if bedmachine_dataset is not None or measures_dataset is not None: + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0' + ' +k_0=1.0 +x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0' + ' +k_0=1.0 +x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + mesh_crs = projections.get(src_proj, src_proj) + transformer = Transformer.from_crs( + 'EPSG:4326', mesh_crs, always_xy=True) + + def _maybe_deg_plot(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + # Collect active source cells from BedMachine (first dataset) + # as a representative ice-sheet extent for the plot. + active_xc = np.array([]) + active_yc = np.array([]) + first_src = bedmachine_dataset or measures_dataset + bm_stem = os.path.splitext( + os.path.basename(first_src))[0] + # Try to find the masked SCRIP that was written to workdir + import re as _re + match = _re.search(r'(^.*[_-]v\d*[_-])+', bm_stem) + if match: + scrip_stem = bm_stem[:match.end() - 1] + else: + scrip_stem = bm_stem + masked_scrip = f'{scrip_stem}.scrip_masked.nc' + if os.path.exists(masked_scrip): + with xarray.open_dataset(masked_scrip) as ds_m: + imask = ds_m['grid_imask'].values.astype(bool) + if ('grid_center_x' in ds_m.variables and + 'grid_center_y' in ds_m.variables): + active_xc = ds_m['grid_center_x'].values[imask] + active_yc = ds_m['grid_center_y'].values[imask] + elif ('grid_center_lon' in ds_m.variables and + 'grid_center_lat' in ds_m.variables): + lon = ds_m['grid_center_lon'].values[imask] + lat = ds_m['grid_center_lat'].values[imask] + lon, lat = _maybe_deg_plot(lon, lat) + active_xc, active_yc = transformer.transform( + lon, lat) + active_xc = np.asarray(active_xc) + active_yc = np.asarray(active_yc) + + bbox_files = [] + if bedmachine_dataset is not None: + bbox_files.append(( + bedmachine_dataset, + 'BedMachine bounding box', + 'black')) + if measures_dataset is not None: + bbox_files.append(( + measures_dataset, + 'MEaSUREs bounding box', + 'grey')) + + plot_hull_diagnostic( + hull_path=hull_path, + dest_scrip=dst_scrip_file, + source_bbox_files=bbox_files, + transformer=transformer, + _maybe_deg=_maybe_deg_plot, + active_xc=active_xc, + active_yc=active_yc, + logger=logger) + clean_up_after_interp(mesh_filename) finally: for subset_file in subset_files: diff --git a/docs/developers_guide/landice/api.rst b/docs/developers_guide/landice/api.rst index 307c094d27..baac89ca63 100644 --- a/docs/developers_guide/landice/api.rst +++ b/docs/developers_guide/landice/api.rst @@ -593,6 +593,7 @@ Landice Framework mesh.build_dst_scrip_hull mesh.clean_up_after_interp mesh.clip_mesh_to_bounding_box + mesh.plot_hull_diagnostic mesh.get_mesh_config_bounding_box mesh.get_optional_interp_datasets mesh.gridded_flood_fill From 8c1dc31656bfdd9a49615c022b7478ff35bf3780 Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Tue, 5 May 2026 10:41:52 -0700 Subject: [PATCH 7/8] Remove scatterplot of source grid cells Remove scatterplot of source grid cells, which is redundant with the convex hull contour. --- compass/landice/mesh.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 12df70844b..a40e70f8c9 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -1475,13 +1475,11 @@ def _src_extent(filepath): # active source cells — ice-sheet extent (tab:blue) xs, ys = _subsample(active_xc, active_yc) - ax.scatter(xs, ys, s=0.5, color='tab:blue', alpha=0.4, - label='active source cells', rasterized=True) # MALI domain extent (tab:pink) xs, ys = _subsample(xd, yd) ax.scatter(xs, ys, s=1.5, color='tab:pink', alpha=0.6, - label='MALI domain extent', rasterized=True) + label='MALI cells (subsampled)', rasterized=True) # source bounding boxes for filepath, label, color in source_bbox_files: @@ -1495,7 +1493,7 @@ def _src_extent(filepath): facecolor='none', label=label) ax.add_patch(bbox) - # convex hull (tab:green) + # convex hull hull_verts = hull_path.vertices hull_patch = MplPolygon( hull_verts, closed=True, From 69d8b8e3d6ec1de26af1a9e417a8e0a89bac3131 Mon Sep 17 00:00:00 2001 From: Trevor Hillebrand Date: Tue, 5 May 2026 12:00:24 -0700 Subject: [PATCH 8/8] Use rasterize-dilate-contour approach rather than convex hull Use rasterize-dilate-contour approach rather than convex hull to mask SCRIP files for source data sets. This results in a much tighter-fitting mask around the MALI domain and masks out many more cells from the source files, which will significantly further decrease the memory footprint of ESMF_RegridWeightGen. --- compass/landice/mesh.py | 133 ++++++++++++------ docs/developers_guide/landice/framework.rst | 13 +- .../landice/test_groups/antarctica.rst | 11 +- .../landice/test_groups/greenland.rst | 12 +- 4 files changed, 112 insertions(+), 57 deletions(-) diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index a40e70f8c9..bfe1bb3734 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -22,8 +22,7 @@ from pyproj import Transformer from scipy import ndimage from scipy.interpolate import interpn -from scipy.ndimage import distance_transform_edt -from scipy.spatial import ConvexHull +from scipy.ndimage import binary_dilation, distance_transform_edt def mpas_flood_fill(seed_mask, grow_mask, cellsOnCell, nEdgesOnCell, @@ -1137,12 +1136,18 @@ def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3, source_crs='EPSG:4326', mesh_crs=None, logger=None): """ - Build a buffered convex hull from the footprint of a destination SCRIP - file and return it as a :py:class:`matplotlib.path.Path`. + Build a buffered concave boundary from the footprint of a destination + SCRIP file and return it as a :py:class:`matplotlib.path.Path`. + + The boundary is constructed by rasterising the destination mesh points + onto a coarse grid (10 km cell size), dilating by ``buffer_m``, and + extracting the outer contour. This produces a tight, concave mask that + follows the actual domain shape (including bays and fjords) while + remaining computationally inexpensive (< 1 s for any realistic mesh). This is factored out of :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull` so - that the hull can be computed once and reused across multiple source + that the boundary can be computed once and reused across multiple source datasets that share the same destination mesh, avoiding redundant I/O and computation. @@ -1157,7 +1162,7 @@ def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3, mesh_crs to be supplied explicitly. buffer_m : float, optional - Approximate outward hull expansion in meters (default 50 km) + Outward buffer distance in meters (default 50 km) source_crs : str, optional CRS of lon/lat variables in the SCRIP file (default 'EPSG:4326') @@ -1171,7 +1176,7 @@ def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3, Returns ------- hull_path : matplotlib.path.Path - Buffered convex hull of the destination mesh footprint. + Buffered concave boundary of the destination mesh footprint. """ def _log(msg): @@ -1236,7 +1241,6 @@ def _projected_dest_points(ds): _log('Projected destination centers from lon/lat.') pts = np.column_stack([np.ravel(x), np.ravel(y)]) pts = pts[np.all(np.isfinite(pts), axis=1)] - pts = np.unique(pts, axis=0) return pts with xarray.open_dataset(dest_scrip) as ds_dst: @@ -1244,25 +1248,68 @@ def _projected_dest_points(ds): if dst_points.shape[0] < 3: raise ValueError( - 'Not enough destination points to build a convex hull.') - - hull = ConvexHull(dst_points) - hull_points = dst_points[hull.vertices] + 'Not enough destination points to build a boundary.') + + # --- Rasterize-dilate-contour approach --- + grid_spacing = 10e3 # 10 km cell size for the coarse raster + + x_min = dst_points[:, 0].min() - buffer_m + x_max = dst_points[:, 0].max() + buffer_m + y_min = dst_points[:, 1].min() - buffer_m + y_max = dst_points[:, 1].max() + buffer_m + + nx = int(np.ceil((x_max - x_min) / grid_spacing)) + 1 + ny = int(np.ceil((y_max - y_min) / grid_spacing)) + 1 + + # Bin destination points onto the coarse grid + ix = ((dst_points[:, 0] - x_min) / grid_spacing).astype(int) + iy = ((dst_points[:, 1] - y_min) / grid_spacing).astype(int) + ix = np.clip(ix, 0, nx - 1) + iy = np.clip(iy, 0, ny - 1) + occupancy = np.zeros((ny, nx), dtype=bool) + occupancy[iy, ix] = True + + # Build a disk structuring element for dilation + radius_px = int(np.ceil(buffer_m / grid_spacing)) + yy, xx = np.ogrid[-radius_px:radius_px + 1, + -radius_px:radius_px + 1] + disk = (xx ** 2 + yy ** 2) <= radius_px ** 2 + + # Dilate the occupancy mask + dilated = binary_dilation(occupancy, structure=disk) + + # Pad with False so the contour never exits the grid boundary + # (prevents straight-line artifacts from open contour paths) + dilated_padded = np.pad(dilated, pad_width=1, + constant_values=False) + + # Extract the outer contour at the 0.5 level + fig_tmp, ax_tmp = plt.subplots() + cs = ax_tmp.contour(dilated_padded.astype(float), levels=[0.5]) + plt.close(fig_tmp) + + # Find the longest contour path (outer boundary) + all_paths = cs.allsegs[0] + if not all_paths: + raise ValueError( + 'Could not extract a contour from the dilated mask.') + longest = max(all_paths, key=lambda p: len(p)) - if buffer_m > 0.0: - centroid = hull_points.mean(axis=0) - vec = hull_points - centroid - radius = np.sqrt((vec ** 2).sum(axis=1)) - nonzero = radius > 0.0 - vec[nonzero] = vec[nonzero] / radius[nonzero, np.newaxis] - hull_points = hull_points + buffer_m * vec + # Convert pixel coordinates back to projected coordinates + # (subtract 1 to account for the padding pixel) + poly_x = x_min + (longest[:, 0] - 1) * grid_spacing + poly_y = y_min + (longest[:, 1] - 1) * grid_spacing + boundary_pts = np.column_stack([poly_x, poly_y]) - _log(f'destination hull x extent: {hull_points[:, 0].min():.0f} to ' - f'{hull_points[:, 0].max():.0f}') - _log(f'destination hull y extent: {hull_points[:, 1].min():.0f} to ' - f'{hull_points[:, 1].max():.0f}') + _log(f'destination boundary x extent: ' + f'{boundary_pts[:, 0].min():.0f} to ' + f'{boundary_pts[:, 0].max():.0f}') + _log(f'destination boundary y extent: ' + f'{boundary_pts[:, 1].min():.0f} to ' + f'{boundary_pts[:, 1].max():.0f}') + _log(f'boundary polygon vertices: {len(boundary_pts)}') - return Path(hull_points, closed=True) + return Path(boundary_pts, closed=True) def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, @@ -1274,7 +1321,7 @@ def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, logger=None): """ Create a new source SCRIP file with grid_imask set to 1 only for cells - that fall within the convex hull (plus buffer) of the destination SCRIP + that fall within the boundary (plus buffer) of the destination SCRIP footprint. This dramatically reduces the work ESMF_RegridWeightGen must do when the source dataset is much larger than the destination mesh. @@ -1306,11 +1353,11 @@ def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, Proj4/EPSG string for planar coordinates. Derived from domain if None. hull_path : matplotlib.path.Path or None, optional - Pre-built buffered convex hull of the destination mesh footprint, as - returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. - When provided, dest_scrip is not read and the hull is not recomputed, - which is useful when masking multiple source datasets against the same - destination mesh. + Pre-built buffered concave boundary of the destination mesh footprint, + as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, dest_scrip is not read and the boundary is not + recomputed, which is useful when masking multiple source datasets + against the same destination mesh. logger : logging.Logger, optional Logger for status messages; falls back to print if None. @@ -1399,12 +1446,12 @@ def plot_hull_diagnostic(hull_path, dest_scrip, source_bbox_files, transformer, _maybe_deg, active_xc, active_yc, logger=None): """ - Save a diagnostic PNG (``convex_hull.png``) showing the masking geometry. + Save a diagnostic PNG (``mesh_boundary.png``) showing the masking geometry. Parameters ---------- hull_path : matplotlib.path.Path - Buffered convex hull of the destination mesh. + Buffered concave boundary of the destination mesh. dest_scrip : str Destination SCRIP file (used to scatter MALI domain extent). @@ -1493,25 +1540,25 @@ def _src_extent(filepath): facecolor='none', label=label) ax.add_patch(bbox) - # convex hull + # mesh boundary (concave, buffered) hull_verts = hull_path.vertices hull_patch = MplPolygon( hull_verts, closed=True, linewidth=2, edgecolor='tab:green', - facecolor='none', label='convex hull (buffered)') + facecolor='none', label='mesh boundary (buffered)') ax.add_patch(hull_patch) ax.set_aspect('equal') ax.set_xlabel('x (m)') ax.set_ylabel('y (m)') ax.legend(loc='best', markerscale=6) - ax.set_title('Source SCRIP masking: hull vs. domain') - fig.savefig('convex_hull.png', dpi=150, + ax.set_title('Source SCRIP masking: boundary vs. domain') + fig.savefig('mesh_boundary.png', dpi=150, bbox_inches='tight', facecolor=fig.get_facecolor()) plt.close(fig) - _log('Saved masking diagnostic plot to convex_hull.png') + _log('Saved masking diagnostic plot to mesh_boundary.png') except Exception as exc: - _log(f'Warning: could not save convex hull plot: {exc}') + _log(f'Warning: could not save mesh boundary plot: {exc}') def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, @@ -1544,9 +1591,9 @@ def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, either the string "all" or a list of strings hull_path : matplotlib.path.Path or None, optional - Pre-built buffered convex hull of the destination mesh footprint, as - returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. - When provided, the hull is not recomputed from mali_scrip, which + Pre-built buffered concave boundary of the destination mesh footprint, + as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, the boundary is not recomputed from mali_scrip, which avoids redundant I/O and computation when this function is called multiple times with the same destination mesh. """ @@ -1890,9 +1937,9 @@ def run_optional_interpolation( dst_scrip_file = f'{mesh_base}_scrip.nc' scrip_from_mpas(mesh_filename, dst_scrip_file) - # Build the destination hull once so it can be reused for both + # Build the destination boundary once so it can be reused for both # BedMachine and MEaSUREs without re-reading dst_scrip_file. - logger.info('building destination mesh convex hull for source masking') + logger.info('building destination mesh boundary for source masking') hull_path = build_dst_scrip_hull( dest_scrip=dst_scrip_file, domain=src_proj, diff --git a/docs/developers_guide/landice/framework.rst b/docs/developers_guide/landice/framework.rst index 0b96a0d5ee..712eed33ba 100644 --- a/docs/developers_guide/landice/framework.rst +++ b/docs/developers_guide/landice/framework.rst @@ -79,17 +79,22 @@ pass a pre-built ``hull_path`` (from hull for each dataset. :py:func:`compass.landice.mesh.build_dst_scrip_hull()` builds a buffered -convex hull from a destination SCRIP file and returns it as a -``matplotlib.path.Path``. It is factored out of +concave boundary from a destination SCRIP file and returns it as a +``matplotlib.path.Path``. The boundary is constructed by rasterising +destination mesh points onto a coarse grid (10 km cell size), dilating +by the buffer distance (default 50 km), and extracting the outer contour. +This produces a tight boundary that follows the actual domain shape +(including bays and fjords) while remaining computationally inexpensive. +It is factored out of :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` so the -hull can be computed once and passed to multiple +boundary can be computed once and passed to multiple :py:func:`compass.landice.mesh.interp_gridded2mali()` calls that share the same destination mesh, avoiding redundant I/O whose cost scales with destination mesh size. :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` creates a new source SCRIP file with ``grid_imask`` set to 1 only for source cells that -fall within the convex hull of the destination SCRIP footprint (plus a +fall within the concave boundary of the destination SCRIP footprint (plus a configurable buffer, default 50 km). The function projects both SCRIP files into a planar stereographic coordinate system (determined by the ``domain`` argument, e.g. ``'gis-gimp'`` or ``'ais-bedmap2'``) and uses a diff --git a/docs/users_guide/landice/test_groups/antarctica.rst b/docs/users_guide/landice/test_groups/antarctica.rst index e38b1340e8..a9fabb8c01 100644 --- a/docs/users_guide/landice/test_groups/antarctica.rst +++ b/docs/users_guide/landice/test_groups/antarctica.rst @@ -125,10 +125,11 @@ The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. Before running ``ESMF_RegridWeightGen``, the interpolation step automatically -masks the source SCRIP file so that only cells overlapping the destination -mesh footprint (plus a 50 km buffer) are active. This avoids unnecessary -weight computation for the large portions of the BedMachine Antarctica domain -that lie outside the target mesh and substantially reduces ESMF -weight-generation time. +masks the source SCRIP file so that only cells overlapping a tight concave +boundary around the destination mesh (plus a 50 km buffer) are active. The +boundary follows the actual domain shape rather than a simple convex hull. +This avoids unnecessary weight computation for the large portions of the +BedMachine Antarctica domain that lie outside the target mesh and substantially +reduces ESMF weight-generation time. There is no model integration step. diff --git a/docs/users_guide/landice/test_groups/greenland.rst b/docs/users_guide/landice/test_groups/greenland.rst index 6448fc73ab..cf544f7516 100644 --- a/docs/users_guide/landice/test_groups/greenland.rst +++ b/docs/users_guide/landice/test_groups/greenland.rst @@ -175,8 +175,10 @@ The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. Before running ``ESMF_RegridWeightGen``, the interpolation step automatically -masks the source SCRIP file so that only cells overlapping the destination -mesh footprint (plus a 50 km buffer) are active. This avoids unnecessary -weight computation for the large portions of the BedMachine v6 domain -(150 m native resolution, ~187 million cells) that lie outside the target -mesh and substantially reduces ESMF weight-generation time and memory footprint. +masks the source SCRIP file so that only cells overlapping a tight concave +boundary around the destination mesh (plus a 50 km buffer) are active. The +boundary follows the actual domain shape — including bays and fjords — rather +than a simple convex hull. This avoids unnecessary weight computation for the +large portions of the BedMachine v6 domain (150 m native resolution, +~187 million cells) that lie outside the target mesh and substantially reduces +ESMF weight-generation time and memory footprint.