diff --git a/compass/landice/mesh.py b/compass/landice/mesh.py index 0d7455bcc4..bfe1bb3734 100644 --- a/compass/landice/mesh.py +++ b/compass/landice/mesh.py @@ -11,6 +11,7 @@ import numpy as np import xarray from geometric_features import FeatureCollection, GeometricFeatures +from matplotlib.path import Path from mpas_tools.io import write_netcdf from mpas_tools.logging import check_call from mpas_tools.mesh.conversion import convert, cull @@ -18,9 +19,10 @@ from mpas_tools.mesh.creation.sort_mesh import sort_mesh from mpas_tools.scrip.from_mpas import scrip_from_mpas from netCDF4 import Dataset +from pyproj import Transformer from scipy import ndimage from scipy.interpolate import interpn -from scipy.ndimage import distance_transform_edt +from scipy.ndimage import binary_dilation, distance_transform_edt def mpas_flood_fill(seed_mask, grow_mask, cellsOnCell, nEdgesOnCell, @@ -1130,8 +1132,438 @@ def _nearest_fill_from_valid(field2d, valid_mask): return preprocessed_gridded_dataset +def build_dst_scrip_hull(dest_scrip, domain, buffer_m=50e3, + source_crs='EPSG:4326', mesh_crs=None, + logger=None): + """ + Build a buffered concave boundary from the footprint of a destination + SCRIP file and return it as a :py:class:`matplotlib.path.Path`. + + The boundary is constructed by rasterising the destination mesh points + onto a coarse grid (10 km cell size), dilating by ``buffer_m``, and + extracting the outer contour. This produces a tight, concave mask that + follows the actual domain shape (including bays and fjords) while + remaining computationally inexpensive (< 1 s for any realistic mesh). + + This is factored out of + :py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull` so + that the boundary can be computed once and reused across multiple source + datasets that share the same destination mesh, avoiding redundant I/O + and computation. + + Parameters + ---------- + dest_scrip : str + Destination SCRIP file + + domain : str + Projection domain. Recognised convenience keys: 'greenland', + 'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires + mesh_crs to be supplied explicitly. + + buffer_m : float, optional + Outward buffer distance in meters (default 50 km) + + source_crs : str, optional + CRS of lon/lat variables in the SCRIP file (default 'EPSG:4326') + + mesh_crs : str, optional + Proj4/EPSG string for planar coordinates. Derived from domain if None. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + + Returns + ------- + hull_path : matplotlib.path.Path + Buffered concave boundary of the destination mesh footprint. + """ + + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + + if mesh_crs is None: + if domain not in projections: + raise ValueError( + f"Unknown domain '{domain}'. Expected one of " + f"{list(projections.keys())}, or supply mesh_crs explicitly." + ) + mesh_crs = projections[domain] + + transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True) + + def _maybe_deg(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + def _projected_dest_points(ds): + if ('grid_corner_x' in ds.variables and + 'grid_corner_y' in ds.variables): + x = ds['grid_corner_x'].values + y = ds['grid_corner_y'].values + _log('Using existing projected destination corners.') + elif ('grid_corner_lon' in ds.variables and + 'grid_corner_lat' in ds.variables): + lon = ds['grid_corner_lon'].values + lat = ds['grid_corner_lat'].values + lon, lat = _maybe_deg(lon, lat) + x, y = transformer.transform(lon, lat) + _log('Projected destination corners from lon/lat.') + elif ('grid_center_x' in ds.variables and + 'grid_center_y' in ds.variables): + x = ds['grid_center_x'].values + y = ds['grid_center_y'].values + _log('Using existing projected destination centers.') + else: + lon = ds['grid_center_lon'].values + lat = ds['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + x, y = transformer.transform(lon, lat) + _log('Projected destination centers from lon/lat.') + pts = np.column_stack([np.ravel(x), np.ravel(y)]) + pts = pts[np.all(np.isfinite(pts), axis=1)] + return pts + + with xarray.open_dataset(dest_scrip) as ds_dst: + dst_points = _projected_dest_points(ds_dst) + + if dst_points.shape[0] < 3: + raise ValueError( + 'Not enough destination points to build a boundary.') + + # --- Rasterize-dilate-contour approach --- + grid_spacing = 10e3 # 10 km cell size for the coarse raster + + x_min = dst_points[:, 0].min() - buffer_m + x_max = dst_points[:, 0].max() + buffer_m + y_min = dst_points[:, 1].min() - buffer_m + y_max = dst_points[:, 1].max() + buffer_m + + nx = int(np.ceil((x_max - x_min) / grid_spacing)) + 1 + ny = int(np.ceil((y_max - y_min) / grid_spacing)) + 1 + + # Bin destination points onto the coarse grid + ix = ((dst_points[:, 0] - x_min) / grid_spacing).astype(int) + iy = ((dst_points[:, 1] - y_min) / grid_spacing).astype(int) + ix = np.clip(ix, 0, nx - 1) + iy = np.clip(iy, 0, ny - 1) + occupancy = np.zeros((ny, nx), dtype=bool) + occupancy[iy, ix] = True + + # Build a disk structuring element for dilation + radius_px = int(np.ceil(buffer_m / grid_spacing)) + yy, xx = np.ogrid[-radius_px:radius_px + 1, + -radius_px:radius_px + 1] + disk = (xx ** 2 + yy ** 2) <= radius_px ** 2 + + # Dilate the occupancy mask + dilated = binary_dilation(occupancy, structure=disk) + + # Pad with False so the contour never exits the grid boundary + # (prevents straight-line artifacts from open contour paths) + dilated_padded = np.pad(dilated, pad_width=1, + constant_values=False) + + # Extract the outer contour at the 0.5 level + fig_tmp, ax_tmp = plt.subplots() + cs = ax_tmp.contour(dilated_padded.astype(float), levels=[0.5]) + plt.close(fig_tmp) + + # Find the longest contour path (outer boundary) + all_paths = cs.allsegs[0] + if not all_paths: + raise ValueError( + 'Could not extract a contour from the dilated mask.') + longest = max(all_paths, key=lambda p: len(p)) + + # Convert pixel coordinates back to projected coordinates + # (subtract 1 to account for the padding pixel) + poly_x = x_min + (longest[:, 0] - 1) * grid_spacing + poly_y = y_min + (longest[:, 1] - 1) * grid_spacing + boundary_pts = np.column_stack([poly_x, poly_y]) + + _log(f'destination boundary x extent: ' + f'{boundary_pts[:, 0].min():.0f} to ' + f'{boundary_pts[:, 0].max():.0f}') + _log(f'destination boundary y extent: ' + f'{boundary_pts[:, 1].min():.0f} to ' + f'{boundary_pts[:, 1].max():.0f}') + _log(f'boundary polygon vertices: {len(boundary_pts)}') + + return Path(boundary_pts, closed=True) + + +def add_grid_imask_from_dst_scrip_hull(source_scrip, dest_scrip, + masked_source_scrip, domain, + buffer_m=50e3, + source_crs='EPSG:4326', + mesh_crs=None, + hull_path=None, + logger=None): + """ + Create a new source SCRIP file with grid_imask set to 1 only for cells + that fall within the boundary (plus buffer) of the destination SCRIP + footprint. This dramatically reduces the work ESMF_RegridWeightGen must do + when the source dataset is much larger than the destination mesh. + + Parameters + ---------- + source_scrip : str + Input source SCRIP file + + dest_scrip : str + Destination SCRIP file used by ESMF_RegridWeightGen. Ignored if + hull_path is provided. + + masked_source_scrip : str + Output source SCRIP file with updated grid_imask + + domain : str + Projection domain. Recognised convenience keys: 'greenland', + 'gis-gimp', 'antarctica', 'ais-bedmap2'. Any other value requires + mesh_crs to be supplied explicitly. + + buffer_m : float, optional + Approximate outward hull expansion in meters. Ignored if hull_path + is provided. + + source_crs : str, optional + CRS of lon/lat variables in SCRIP files (default 'EPSG:4326') + + mesh_crs : str, optional + Proj4/EPSG string for planar coordinates. Derived from domain if None. + + hull_path : matplotlib.path.Path or None, optional + Pre-built buffered concave boundary of the destination mesh footprint, + as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, dest_scrip is not read and the boundary is not + recomputed, which is useful when masking multiple source datasets + against the same destination mesh. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + """ + + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0 +k_0=1.0 ' + '+x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + + if mesh_crs is None: + if domain not in projections: + raise ValueError( + f"Unknown domain '{domain}'. Expected one of " + f"{list(projections.keys())}, or supply mesh_crs explicitly." + ) + mesh_crs = projections[domain] + + transformer = Transformer.from_crs(source_crs, mesh_crs, always_xy=True) + + def _maybe_deg(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + def _projected_centers(ds): + if ('grid_center_x' in ds.variables and + 'grid_center_y' in ds.variables): + xc = ds['grid_center_x'].values + yc = ds['grid_center_y'].values + _log('Using existing projected source centers.') + else: + lon = ds['grid_center_lon'].values + lat = ds['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + xc, yc = transformer.transform(lon, lat) + _log('Projected source centers from lon/lat.') + return np.asarray(xc), np.asarray(yc) + + if hull_path is None: + hull_path = build_dst_scrip_hull( + dest_scrip=dest_scrip, + domain=domain, + buffer_m=buffer_m, + source_crs=source_crs, + mesh_crs=mesh_crs, + logger=logger) + + with xarray.open_dataset(source_scrip) as ds_src: + xc, yc = _projected_centers(ds_src) + + _log(f'source x extent: {np.nanmin(xc):.0f} to {np.nanmax(xc):.0f}') + _log(f'source y extent: {np.nanmin(yc):.0f} to {np.nanmax(yc):.0f}') + + src_pts = np.column_stack([xc, yc]) + inside = hull_path.contains_points(src_pts).astype(np.int32) + _log(f'active source cells after masking: ' + f'{inside.sum()} / {inside.size}') + + ds_out = ds_src.copy() + ds_out['grid_imask'] = xarray.DataArray( + inside, + dims=('grid_size',), + attrs={'long_name': '0/1 mask for active source cells'} + ) + ds_out.to_netcdf(masked_source_scrip, mode='w') + + +def plot_hull_diagnostic(hull_path, dest_scrip, source_bbox_files, + transformer, _maybe_deg, active_xc, active_yc, + logger=None): + """ + Save a diagnostic PNG (``mesh_boundary.png``) showing the masking geometry. + + Parameters + ---------- + hull_path : matplotlib.path.Path + Buffered concave boundary of the destination mesh. + + dest_scrip : str + Destination SCRIP file (used to scatter MALI domain extent). + + source_bbox_files : list of tuple + Each entry is ``(filepath, label, edgecolor)`` for a source dataset + whose bounding box should be drawn. + + transformer : pyproj.Transformer + Transformer from lon/lat to planar coordinates. + + _maybe_deg : callable + Helper that converts radian angles to degrees when needed. + + active_xc : numpy.ndarray + Projected x-coordinates of all active (masked-in) source cells. + + active_yc : numpy.ndarray + Projected y-coordinates of all active (masked-in) source cells. + + logger : logging.Logger, optional + Logger for status messages; falls back to print if None. + """ + def _log(msg): + if logger is not None: + logger.info(msg) + else: + print(msg) + + try: + from matplotlib.patches import Polygon as MplPolygon + from matplotlib.patches import Rectangle + + # --- destination SCRIP centers (MALI domain extent) --- + with xarray.open_dataset(dest_scrip) as ds_dst: + if ('grid_center_x' in ds_dst.variables and + 'grid_center_y' in ds_dst.variables): + xd = np.asarray(ds_dst['grid_center_x'].values) + yd = np.asarray(ds_dst['grid_center_y'].values) + else: + lon = ds_dst['grid_center_lon'].values + lat = ds_dst['grid_center_lat'].values + lon, lat = _maybe_deg(lon, lat) + xd, yd = transformer.transform(lon, lat) + xd, yd = np.asarray(xd), np.asarray(yd) + + rng = np.random.default_rng(seed=0) + + def _subsample(x, y, n=50000): + idx = np.where(np.isfinite(x) & np.isfinite(y))[0] + if len(idx) > n: + idx = rng.choice(idx, size=n, replace=False) + return x[idx], y[idx] + + def _src_extent(filepath): + """Return (x_min, x_max, y_min, y_max) from a gridded dataset.""" + with xarray.open_dataset(filepath) as ds: + if 'x1' in ds and 'y1' in ds: + x, y = ds['x1'].values, ds['y1'].values + elif 'x' in ds and 'y' in ds: + x, y = ds['x'].values, ds['y'].values + else: + return None + return float(x.min()), float(x.max()), \ + float(y.min()), float(y.max()) + + fig, ax = plt.subplots(figsize=(10, 10)) + + # active source cells — ice-sheet extent (tab:blue) + xs, ys = _subsample(active_xc, active_yc) + + # MALI domain extent (tab:pink) + xs, ys = _subsample(xd, yd) + ax.scatter(xs, ys, s=1.5, color='tab:pink', alpha=0.6, + label='MALI cells (subsampled)', rasterized=True) + + # source bounding boxes + for filepath, label, color in source_bbox_files: + ext = _src_extent(filepath) + if ext is None: + continue + x0, x1, y0, y1 = ext + bbox = Rectangle( + (x0, y0), x1 - x0, y1 - y0, + linewidth=1.5, edgecolor=color, + facecolor='none', label=label) + ax.add_patch(bbox) + + # mesh boundary (concave, buffered) + hull_verts = hull_path.vertices + hull_patch = MplPolygon( + hull_verts, closed=True, + linewidth=2, edgecolor='tab:green', + facecolor='none', label='mesh boundary (buffered)') + ax.add_patch(hull_patch) + + ax.set_aspect('equal') + ax.set_xlabel('x (m)') + ax.set_ylabel('y (m)') + ax.legend(loc='best', markerscale=6) + ax.set_title('Source SCRIP masking: boundary vs. domain') + fig.savefig('mesh_boundary.png', dpi=150, + bbox_inches='tight', facecolor=fig.get_facecolor()) + plt.close(fig) + _log('Saved masking diagnostic plot to mesh_boundary.png') + except Exception as exc: + _log(f'Warning: could not save mesh boundary plot: {exc}') + + def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, - nProcs, dest_file, proj, variables="all"): + nProcs, dest_file, proj, variables="all", + hull_path=None): """ Interpolate gridded dataset (e.g. MEASURES, BedMachine) onto a MALI mesh @@ -1157,6 +1589,13 @@ def interp_gridded2mali(self, source_file, mali_scrip, parallel_executable, variables: "all" or list of strings either the string "all" or a list of strings + + hull_path : matplotlib.path.Path or None, optional + Pre-built buffered concave boundary of the destination mesh footprint, + as returned by :py:func:`compass.landice.mesh.build_dst_scrip_hull`. + When provided, the boundary is not recomputed from mali_scrip, which + avoids redundant I/O and computation when this function is called + multiple times with the same destination mesh. """ def __guess_scrip_name(filename): @@ -1194,10 +1633,23 @@ def __guess_scrip_name(filename): '-r', '2'] check_call(args, logger=logger) + # Mask source SCRIP to the footprint of the destination mesh so that + # ESMF_RegridWeightGen only processes the cells that overlap the target. + stem = os.path.splitext(source_scrip)[0] # strips .nc + masked_source_scrip = f'{stem}_masked.nc' + logger.info('masking source SCRIP to destination mesh footprint') + add_grid_imask_from_dst_scrip_hull( + source_scrip=source_scrip, + dest_scrip=mali_scrip, + masked_source_scrip=masked_source_scrip, + domain=proj, + hull_path=hull_path, + logger=logger) + # Generate remapping weights logger.info('generating gridded dataset -> MPAS weights') args = [parallel_executable, '-n', nProcs, 'ESMF_RegridWeightGen', - '--source', source_scrip, + '--source', masked_source_scrip, '--destination', mali_scrip, '--weight', weights_filename, '--method', 'conserve', @@ -1485,10 +1937,19 @@ def run_optional_interpolation( dst_scrip_file = f'{mesh_base}_scrip.nc' scrip_from_mpas(mesh_filename, dst_scrip_file) + # Build the destination boundary once so it can be reused for both + # BedMachine and MEaSUREs without re-reading dst_scrip_file. + logger.info('building destination mesh boundary for source masking') + hull_path = build_dst_scrip_hull( + dest_scrip=dst_scrip_file, + domain=src_proj, + logger=logger) + if bedmachine_dataset is not None: interp_gridded2mali(self, bedmachine_dataset, dst_scrip_file, parallel_executable, nProcs, - mesh_filename, src_proj, variables='all') + mesh_filename, src_proj, variables='all', + hull_path=hull_path) if measures_dataset is not None: measures_vars = ['observedSurfaceVelocityX', @@ -1497,7 +1958,88 @@ def run_optional_interpolation( interp_gridded2mali(self, measures_dataset, dst_scrip_file, parallel_executable, nProcs, mesh_filename, src_proj, - variables=measures_vars) + variables=measures_vars, + hull_path=hull_path) + + # Diagnostic plot: show hull, MALI domain, and bounding boxes for + # all source datasets that were interpolated. + if bedmachine_dataset is not None or measures_dataset is not None: + projections = { + 'greenland': ( + '+proj=stere +lat_ts=70.0 +lat_0=90 +lon_0=315.0' + ' +k_0=1.0 +x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + 'antarctica': ( + '+proj=stere +lat_ts=-71.0 +lat_0=-90 +lon_0=0.0' + ' +k_0=1.0 +x_0=0.0 +y_0=0.0 +ellps=WGS84' + ), + } + projections['gis-gimp'] = projections['greenland'] + projections['ais-bedmap2'] = projections['antarctica'] + mesh_crs = projections.get(src_proj, src_proj) + transformer = Transformer.from_crs( + 'EPSG:4326', mesh_crs, always_xy=True) + + def _maybe_deg_plot(lon, lat): + if (np.nanmax(np.abs(lon)) <= 2.0 * np.pi + 1.0e-6 and + np.nanmax(np.abs(lat)) <= 0.5 * np.pi + 1.0e-6): + lon = np.rad2deg(lon) + lat = np.rad2deg(lat) + return lon, lat + + # Collect active source cells from BedMachine (first dataset) + # as a representative ice-sheet extent for the plot. + active_xc = np.array([]) + active_yc = np.array([]) + first_src = bedmachine_dataset or measures_dataset + bm_stem = os.path.splitext( + os.path.basename(first_src))[0] + # Try to find the masked SCRIP that was written to workdir + import re as _re + match = _re.search(r'(^.*[_-]v\d*[_-])+', bm_stem) + if match: + scrip_stem = bm_stem[:match.end() - 1] + else: + scrip_stem = bm_stem + masked_scrip = f'{scrip_stem}.scrip_masked.nc' + if os.path.exists(masked_scrip): + with xarray.open_dataset(masked_scrip) as ds_m: + imask = ds_m['grid_imask'].values.astype(bool) + if ('grid_center_x' in ds_m.variables and + 'grid_center_y' in ds_m.variables): + active_xc = ds_m['grid_center_x'].values[imask] + active_yc = ds_m['grid_center_y'].values[imask] + elif ('grid_center_lon' in ds_m.variables and + 'grid_center_lat' in ds_m.variables): + lon = ds_m['grid_center_lon'].values[imask] + lat = ds_m['grid_center_lat'].values[imask] + lon, lat = _maybe_deg_plot(lon, lat) + active_xc, active_yc = transformer.transform( + lon, lat) + active_xc = np.asarray(active_xc) + active_yc = np.asarray(active_yc) + + bbox_files = [] + if bedmachine_dataset is not None: + bbox_files.append(( + bedmachine_dataset, + 'BedMachine bounding box', + 'black')) + if measures_dataset is not None: + bbox_files.append(( + measures_dataset, + 'MEaSUREs bounding box', + 'grey')) + + plot_hull_diagnostic( + hull_path=hull_path, + dest_scrip=dst_scrip_file, + source_bbox_files=bbox_files, + transformer=transformer, + _maybe_deg=_maybe_deg_plot, + active_xc=active_xc, + active_yc=active_yc, + logger=logger) clean_up_after_interp(mesh_filename) finally: diff --git a/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg b/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg index 0c06f6e393..a1ec03e711 100644 --- a/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg +++ b/compass/landice/tests/greenland/mesh_gen/mesh_gen_1to10km.cfg @@ -70,4 +70,4 @@ measures_filename = greenland_vel_mosaic500_extrap.nc src_proj = gis-gimp # number of processors to use for ESMF_RegridWeightGen -nProcs = 2048 +nProcs = 1024 diff --git a/docs/developers_guide/landice/api.rst b/docs/developers_guide/landice/api.rst index 55378a3f6c..baac89ca63 100644 --- a/docs/developers_guide/landice/api.rst +++ b/docs/developers_guide/landice/api.rst @@ -589,8 +589,11 @@ Landice Framework iceshelf_melt.calc_mean_TF mesh.add_bedmachine_thk_to_ais_gridded_data + mesh.add_grid_imask_from_dst_scrip_hull + mesh.build_dst_scrip_hull mesh.clean_up_after_interp mesh.clip_mesh_to_bounding_box + mesh.plot_hull_diagnostic mesh.get_mesh_config_bounding_box mesh.get_optional_interp_datasets mesh.gridded_flood_fill diff --git a/docs/developers_guide/landice/framework.rst b/docs/developers_guide/landice/framework.rst index 463187675a..712eed33ba 100644 --- a/docs/developers_guide/landice/framework.rst +++ b/docs/developers_guide/landice/framework.rst @@ -66,7 +66,41 @@ to the gridded dataset in order to separate the ice sheet from peripheral ice. :py:func:`compass.landice.mesh.interp_gridded2mali()` interpolates gridded data (e.g. BedMachine thickness or MEaSUREs ice velocity) to a MALI mesh, accounting for masking of the ice extent to avoid interpolation ramps. This functions works -for both Antarctica and Greenland. +for both Antarctica and Greenland. Before calling ``ESMF_RegridWeightGen``, it +calls :py:func:`compass.landice.mesh.build_dst_scrip_hull()` (unless a pre-built +``hull_path`` is passed) and then +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` to restrict +the source SCRIP to only the cells that overlap the destination mesh footprint. +This can substantially reduce the weight-generation time for high-resolution +source datasets (e.g. 150 m BedMachine v6) that are much larger than the target +mesh domain. When interpolating multiple datasets to the same destination mesh, +pass a pre-built ``hull_path`` (from +:py:func:`compass.landice.mesh.build_dst_scrip_hull()`) to avoid recomputing the +hull for each dataset. + +:py:func:`compass.landice.mesh.build_dst_scrip_hull()` builds a buffered +concave boundary from a destination SCRIP file and returns it as a +``matplotlib.path.Path``. The boundary is constructed by rasterising +destination mesh points onto a coarse grid (10 km cell size), dilating +by the buffer distance (default 50 km), and extracting the outer contour. +This produces a tight boundary that follows the actual domain shape +(including bays and fjords) while remaining computationally inexpensive. +It is factored out of +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` so the +boundary can be computed once and passed to multiple +:py:func:`compass.landice.mesh.interp_gridded2mali()` calls that share the +same destination mesh, avoiding redundant I/O whose cost scales with +destination mesh size. + +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` creates a +new source SCRIP file with ``grid_imask`` set to 1 only for source cells that +fall within the concave boundary of the destination SCRIP footprint (plus a +configurable buffer, default 50 km). The function projects both SCRIP files +into a planar stereographic coordinate system (determined by the ``domain`` +argument, e.g. ``'gis-gimp'`` or ``'ais-bedmap2'``) and uses a +``matplotlib.path.Path`` point-in-polygon test for efficiency. The resulting +masked SCRIP file is passed to ``ESMF_RegridWeightGen`` in place of the +full-domain source SCRIP. :py:func:`compass.landice.mesh.preprocess_ais_data()` performs adjustments to gridded AIS datasets needed for rest of compass workflow to utilize them. @@ -219,4 +253,9 @@ For mesh prototyping, it is a good idea to use `interpolate_data = False`, as the interpolation step can be slow and require many more resources than the rest of the mesh generation process. For instance, a 1–10km Greenland mesh can be created on a single Perlmutter CPU node, but data interpolation may -require up to 16 nodes. +require several nodes. The source SCRIP masking performed by +:py:func:`compass.landice.mesh.add_grid_imask_from_dst_scrip_hull()` before +calling ``ESMF_RegridWeightGen`` reduces the effective source-grid size to +only the cells overlapping the destination mesh footprint, which significantly +reduces the weight-generation time compared to running on the full global or +continental source SCRIP. diff --git a/docs/users_guide/landice/test_groups/antarctica.rst b/docs/users_guide/landice/test_groups/antarctica.rst index 115d7ade26..a9fabb8c01 100644 --- a/docs/users_guide/landice/test_groups/antarctica.rst +++ b/docs/users_guide/landice/test_groups/antarctica.rst @@ -124,4 +124,12 @@ datasets, so interpolation is enabled by default. The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. +Before running ``ESMF_RegridWeightGen``, the interpolation step automatically +masks the source SCRIP file so that only cells overlapping a tight concave +boundary around the destination mesh (plus a 50 km buffer) are active. The +boundary follows the actual domain shape rather than a simple convex hull. +This avoids unnecessary weight computation for the large portions of the +BedMachine Antarctica domain that lie outside the target mesh and substantially +reduces ESMF weight-generation time. + There is no model integration step. diff --git a/docs/users_guide/landice/test_groups/greenland.rst b/docs/users_guide/landice/test_groups/greenland.rst index f20f7dab1d..cf544f7516 100644 --- a/docs/users_guide/landice/test_groups/greenland.rst +++ b/docs/users_guide/landice/test_groups/greenland.rst @@ -173,3 +173,12 @@ dataset interpolation step is skipped. The default config values include both datasets, so interpolation is enabled by default. The base-mesh projection used in ``build_mali_mesh()`` is fixed for this test case. + +Before running ``ESMF_RegridWeightGen``, the interpolation step automatically +masks the source SCRIP file so that only cells overlapping a tight concave +boundary around the destination mesh (plus a 50 km buffer) are active. The +boundary follows the actual domain shape — including bays and fjords — rather +than a simple convex hull. This avoids unnecessary weight computation for the +large portions of the BedMachine v6 domain (150 m native resolution, +~187 million cells) that lie outside the target mesh and substantially reduces +ESMF weight-generation time and memory footprint.