From 6705405b1a8b1d6ee3330f2ac6d87e719ca65e7d Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 18:59:37 -0600 Subject: [PATCH 01/13] feat(topo): add standalone basin topo.nc generation pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add four python scripts to construct topo.nc for SMRF/iSnobal using an input basin name, HUC ID, or input polygon file. No dependency on USDA-ARS basin_setup repository. Contains: - generate_topo.py: full pipeline manager (fetch_basin → fetch_dem → build_topo_nc) - fetch_basin.py: queries USGS WBD for HUC watershed boundaries - fetch_dem.py: streams or downloads USGS 3DEP 1/3 arc-second DEM tiles - build_topo_nc.py: combines DEM, basin mask, and vegetation into topo.nc [conda/basin_setup.yaml] update to match the new pipeline. Remove legacy version pins and unused packages (colorama, coloredlogs, inicheck, spatialnc); add requests; relax python and numpy constraints. --- conda/basin_setup.yaml | 14 +- scripts/topo/README.md | 161 ++++++++++++++++ scripts/topo/build_topo_nc.py | 349 ++++++++++++++++++++++++++++++++++ scripts/topo/fetch_basin.py | 163 ++++++++++++++++ scripts/topo/fetch_dem.py | 188 ++++++++++++++++++ scripts/topo/generate_topo.py | 157 +++++++++++++++ 6 files changed, 1023 insertions(+), 9 deletions(-) create mode 100644 scripts/topo/README.md create mode 100755 scripts/topo/build_topo_nc.py create mode 100755 scripts/topo/fetch_basin.py create mode 100755 scripts/topo/fetch_dem.py create mode 100755 scripts/topo/generate_topo.py diff --git a/conda/basin_setup.yaml b/conda/basin_setup.yaml index 047ab94..a7ce021 100644 --- a/conda/basin_setup.yaml +++ b/conda/basin_setup.yaml @@ -2,17 +2,13 @@ name: basin_setup channels: - conda-forge dependencies: - - colorama - - coloredlogs - - gdal<3.9 + - python>=3.9 + - gdal - geopandas - netcdf4 - - numpy<1.25 + - numpy - pandas - - pip - - python=3.11 + - requests - rioxarray - xarray - - pip: - - inicheck - - spatialnc + - dask diff --git a/scripts/topo/README.md b/scripts/topo/README.md new file mode 100644 index 0000000..8970d62 --- /dev/null +++ b/scripts/topo/README.md @@ -0,0 +1,161 @@ +# Generate basin topo.nc for SMRF/iSnobal + +Generates a `topo.nc` file containing DEM, basin mask, and vegetation layers +(type, height, tau, k) required to run SMRF/iSnobal. Vegetation defaults to +LANDFIRE 1.4.0; user-supplied rasters can also be substituted via `--veg-dir`. + +No dependency on the USDA-ARS `basin_setup` repository. + +## Requirements + +- `basin_setup` conda environment (see `model_setup/conda/basin_setup.yaml`) +- Vegetation data — either LANDFIRE 1.4.0 or user-supplied input (see below) + +## Vegetation data + +Two options are supported: + +### Option A — LANDFIRE 1.4.0 (default) + +Download the **LF 1.4.0 EVT** and **LF 1.4.0 EVH** products for CONUS from +a *to-be-staged* repository and organize as follows: + +``` +/ + US_140EVT_20180618/ + Grid/us_140evt/hdr.adf + CSV_Data/LF_140EVH_05092014.csv + US_140EVH_20180618/ + Grid/us_140evh/hdr.adf +``` + +A veg params CSV mapping LANDFIRE EVT class IDs to SMRF canopy parameters (tau, k) +is also required. Pass its path with `--veg-params-csv`. In this approach, all +vegetation parameters are categorical or discrete variables + +> **SnowHydRO group (UU CHPC):** all required datasets are staged at +> `/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE/`. + +### Option B — user-supplied rasters (`--veg-dir`) + +*** ***EXPERIMENTAL, TESTING REQUIRED*** *** +If you have derived parameters, place the four +required GeoTIFFs in a directory and pass in with `--veg-dir`: + +``` +/ + veg_type.tif # vegetation type class (categorical) + veg_height.tif # canopy height in metres (continuous) + veg_tau.tif # canopy transmissivity tau (continuous) + veg_k.tif # canopy extinction coefficient k (continuous) +``` + +All four files must cover the basin extent and be in a CRS that GDAL can reproject. +LANDFIRE is not required when using this option. Circumventing type and height is +also possible (needed only in default generation of tau and k), but not yet tested. + +## Quick start + +```bash +$ cd model_setup +$ conda env create -f conda/basin_setup.yaml +$ conda activate basin_setup + +$ cd scripts/topo + +# View script options +$ python generate_topo.py -h +``` + +## Common invocation options +```bash +# Generate topo.nc using basin name, use quotes to handle spaces +$ python generate_topo.py -n "new fork" -o /path/to/output + +# NB: multiple matches will print candidate basins in a table +# Select the correct hucid (or add specificity to basin name) and re-run +$ python generate_topo.py -n "gunnison" -o /path/to/output # this will yield 3 matches + huc8 name areasqkm states +14020002 Upper Gunnison 6245.14 CO +14020004 North Fork Gunnison 2509.66 CO +14020005 Lower Gunnison 4306.26 CO + +# Re-run with more specificity +$ python generate_topo.py -n "lower gunnison" -o /path/to/output + +# By HUC ID, defaults to HUC 8 +$ python generate_topo.py -huc 14020005 -o /path/to/output + +# From an existing UTM polygon file (GeoPackage) +$ python generate_topo.py -s /path/to/basin.gpkg -o /path/to/output + +# With user-supplied vegetation rasters instead of LANDFIRE +$ python generate_topo.py -huc 14020005 -o /path/to/output --veg-dir /path/to/veg/ +``` + +**Output:** `/path/to/output/output_m/topo.nc` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `generate_topo.py` | Orchestrates all three scripts | +| `fetch_basin.py` | Fetches HUC boundary from USGS WBD → UTM GeoPackage | +| `fetch_dem.py` | Streams or downloads 3DEP DEM tiles → warped GeoTIFF mosaic | +| `build_topo_nc.py` | Assembles polygon + DEM + vegetation → topo.nc | + +Each script is independently runnable, provided that requisite data layers exist. + +## Step-by-Step Examples + +```bash +$ conda activate basin_setup + +# Step 1: generate polygon and basin.env file by basin name +$ python fetch_basin.py -n "new fork" -o ./newfork_scripts + +# [alternatively] fetch basin boundary by HUC ID +$ python fetch_basin.py -huc 14040102 -o ./newfork_scripts + +# Step 2: mosaic DEM — streams via vsicurl by default (reads basin.env from step 1) +$ python fetch_dem.py -o ./newfork_scripts + +# if tile download to disk is preferred, add flag, then warp +$ python fetch_dem.py -o ./newfork_scripts --download-tiles + +# Step 3: build topo.nc using dem and vegetation (reads basin.env for paths) +# (LANDFIRE default) +$ python build_topo_nc.py -o ./newfork_scripts + +# (user-supplied veg): bypass LANDFIRE with pre-derived rasters +$ python build_topo_nc.py -o ./newfork_scripts --veg-dir /path/to/veg/ +``` + +Key output `basin.env` file is written by step 1 and appended in step 2, and contains: + +``` +BASIN_POLYGON="./newfork_scripts/huc14040102_basin.gpkg" +BASIN_EPSG="32612" +BASIN_BBOX="-110.0593,42.5219,-109.2101,43.2069" +BASIN_NAME="New Fork" +BASIN_DEM="./newfork_scripts/dem_epsg_32612_100m.tif" +``` + +## Options + +``` +generate_topo.py flags: + -huc HUC_ID HUC ID (e.g. 14050001) + -n NAME Basin name keyword (e.g. "Yampa") + -s POLY Existing UTM polygon file (shapefile or GeoPackage) + -o DIR Output directory (required) + -res METERS Resolution in meters (default: 100) + -level N HUC level for -n search (default: 8) + -e EPSG Override auto-detected UTM EPSG + --landfire-dir DIR LANDFIRE 1.4.0 directory + --veg-params-csv CSV Veg params lookup CSV (used with --landfire-dir) + --veg-dir DIR User-supplied vegetation rasters, overrides --landfire-dir. + Must contain: veg_type.tif, veg_height.tif, veg_tau.tif, veg_k.tif + --download-dem-tiles Download DEM tiles to disk before warping (use for SLURM/offline) + --skip-dem-download Reuse existing tiles in /dem_tiles/ (requires --download-dem-tiles) +``` diff --git a/scripts/topo/build_topo_nc.py b/scripts/topo/build_topo_nc.py new file mode 100755 index 0000000..2673e9e --- /dev/null +++ b/scripts/topo/build_topo_nc.py @@ -0,0 +1,349 @@ +#!/usr/bin/env python +""" +Build a topo.nc file for SMRF/iSnobal from a basin polygon file, a warped DEM +GeoTIFF, and vegetation data (LANDFIRE 1.4.0 or user-supplied via --veg-dir). + +Writes output/topo.nc and validates its CRS against the expected EPSG. + +Usage: + python build_topo_nc.py -o ./newfork_scripts # reads basin.env + python build_topo_nc.py -s custom_basin.gpkg -d custom_dem.tif -o /path/to/outputs/ + python build_topo_nc.py -o ./newfork_scripts --veg-dir /path/to/veg/ +""" + +import argparse +import math +import re +import sys +from datetime import datetime +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pandas as pd +import rioxarray +import xarray as xr +from osgeo import gdal +from pyproj import CRS +from rasterio.features import rasterize + +LANDFIRE_DIR_DEFAULT = Path("/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE") +VEG_PARAMS_CSV_DEFAULT = LANDFIRE_DIR_DEFAULT / "landfire_veg_params_assigned.csv" + +# Relative paths within the LANDFIRE 1.4.0 directory +_EVT_PATH = "US_140EVT_20180618/Grid/us_140evt/hdr.adf" +_EVH_PATH = "US_140EVH_20180618/Grid/us_140evh/hdr.adf" +_EVH_CSV = "US_140EVH_20180618/CSV_Data/LF_140EVH_05092014.csv" + +# Contract for user-supplied vegetation rasters via --veg-dir. +# veg_type is categorical so uses nearest-neighbour resampling; others are continuous. +VEG_DIR_FILES = { + "veg_type": ("veg_type.tif", "near"), + "veg_height": ("veg_height.tif", "bilinear"), + "veg_tau": ("veg_tau.tif", "bilinear"), + "veg_k": ("veg_k.tif", "bilinear"), +} + +# Matches numeric height values in CLASSNAMES strings, excluding asterisk-delimited NoData markers +_HEIGHT_RE = re.compile(r"(? {dst}") + result.FlushCache() + del result + + +def _read_raster(path): + if not Path(path).exists(): + raise FileNotFoundError(f"Raster not found: {path}") + with rioxarray.open_rasterio(path) as src: + da = src.squeeze(dim="band", drop=True) + return da + + +def _basin_mask(polygon, dem_da): + """Rasterize basin polygon onto the DEM grid. Returns uint8 array.""" + gdf = gpd.read_file(polygon) + ny, nx = dem_da.shape + return rasterize( + gdf.geometry, + out_shape=(ny, nx), + transform=dem_da.rio.transform(), + fill=0, + default_value=1, + dtype="uint8", + ) + + +def _veg_tau_k(evt_arr, veg_params_csv): + """Map EVT pixel values → tau and k arrays using the veg params CSV.""" + df = pd.read_csv(veg_params_csv) + idx_col = "landfire140" if "landfire140" in df.columns else df.columns[0] + df = df.set_index(idx_col) + df = df[~df.index.duplicated(keep="first")] + + missing = set(np.unique(evt_arr)) - set(df.index) + if missing: + # tau/k have no safe fallback — missing class would silently corrupt radiation + raise ValueError(f"EVT classes not in {veg_params_csv}: {missing}") + + flat = pd.Series(evt_arr.ravel()) + tau = flat.map(df["tau"]).values.reshape(evt_arr.shape) + k = flat.map(df["k"]).values.reshape(evt_arr.shape) + return tau, k + + +def _veg_height(evh_arr, evh_csv): + """Parse mean canopy heights from the LANDFIRE EVH CLASSNAMES CSV; defaults to 0 m.""" + df = pd.read_csv(evh_csv).set_index("VALUE") + def _parse_height(s): + matches = _HEIGHT_RE.findall(str(s)) + return float(np.mean([float(x) for x in matches])) if matches else 0.0 + + df["height"] = df["CLASSNAMES"].apply(_parse_height) + mapped = pd.Series(evh_arr.ravel()).map(df["height"]).fillna(0.0) + return mapped.values.reshape(evh_arr.shape) + + +def _projection_var(crs_str): + """ + Build a CF grid_mapping scalar DataArray from a CRS string. WKT is in version WKT1 for compatibility with WindNinja and older GDAL versions. The WKT is included in both crs_wkt and spatial_ref attributes for maximum compatibility. + """ + crs = CRS.from_string(crs_str) + wkt = crs.to_wkt(version="WKT1_GDAL") + attrs = crs.to_cf() + attrs["crs_wkt"] = wkt + attrs["spatial_ref"] = wkt + return xr.DataArray(0, attrs=attrs) + + +def validate(topo_nc, expected_epsg): + with xr.open_dataset(topo_nc) as ds: + if "projection" not in ds: + raise RuntimeError("topo.nc missing 'projection' variable") + proj_attrs = ds["projection"].attrs + wkt = proj_attrs.get("crs_wkt") or proj_attrs.get("spatial_ref") + if not wkt: + raise RuntimeError("topo.nc 'projection' has no crs_wkt or spatial_ref attribute") + actual = CRS.from_wkt(wkt).to_epsg() + if actual != expected_epsg: + raise ValueError(f"topo.nc CRS mismatch: EPSG:{actual} != expected EPSG:{expected_epsg}") + print(f" Projection OK, EPSG:{actual}") + + +def build_topo(polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, cell_size, basin_name, veg_dir=None): + """Construct topo.nc from input files and write to output_dir. Returns topo.nc path.""" + if veg_dir is not None: + veg_dir = Path(veg_dir) + missing = [fname for fname, _ in VEG_DIR_FILES.values() if not (veg_dir / fname).exists()] + if missing: + expected = "\n".join(f" {fname}" for fname, _ in VEG_DIR_FILES.values()) + raise FileNotFoundError( + f"Missing files in {veg_dir}:\n" + + "\n".join(f" {f}" for f in missing) + + f"\nExpected layout:\n{expected}" + ) + + landfire_dir = Path(landfire_dir) + topo_output = Path(output_dir) / f"output_{int(cell_size)}m" + temp_dir = topo_output / "temp" + topo_output.mkdir(parents=True, exist_ok=True) + temp_dir.mkdir(exist_ok=True) + + print("Computing extents...") + extents, crs = _padded_extents(polygon, cell_size) + + print("Clipping DEM...") + dem_clip = str(temp_dir / "clipped_dem.tif") + _clip_raster(str(dem_file), dem_clip, crs, extents, cell_size, "bilinear") + dem_da = _read_raster(dem_clip) + coords = {"y": dem_da.y, "x": dem_da.x} + + mask_arr = _basin_mask(polygon, dem_da) + + if veg_dir is not None: + print("Clipping user-provided vegetation rasters to basin extents...") + clips = {} + for var, (fname, resample) in VEG_DIR_FILES.items(): + clip_path = str(temp_dir / f"clipped_{var}.tif") + _clip_raster(str(veg_dir / fname), clip_path, crs, extents, cell_size, resample) + clips[var] = _read_raster(clip_path).values + type_arr = clips["veg_type"] + height_arr = clips["veg_height"] + tau_arr = clips["veg_tau"] + k_arr = clips["veg_k"] + else: + print("Clipping LANDFIRE vegetation to basin extents...") + evt_clip = str(temp_dir / "clipped_veg_type.tif") + evh_clip = str(temp_dir / "clipped_veg_height.tif") + _clip_raster(str(landfire_dir / _EVT_PATH), evt_clip, crs, extents, cell_size, "near") + _clip_raster(str(landfire_dir / _EVH_PATH), evh_clip, crs, extents, cell_size, "near") + evt_da = _read_raster(evt_clip) + evh_da = _read_raster(evh_clip) + type_arr = evt_da.values + tau_arr, k_arr = _veg_tau_k(type_arr, veg_params_csv) + height_arr = _veg_height(evh_da.values, str(landfire_dir / _EVH_CSV)) + + print("Assembling dataset...") + ds = xr.Dataset({ + "dem": xr.DataArray( + dem_da.values, dims=["y", "x"], coords=coords, + attrs={"long_name": "dem"}), + "mask": xr.DataArray( + mask_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": basin_name}), + "veg_type": xr.DataArray( + type_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation type"}), + "veg_height": xr.DataArray( + height_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation height"}), + "veg_tau": xr.DataArray( + tau_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation tau"}), + "veg_k": xr.DataArray( + k_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation k"}), + "projection": _projection_var(crs), + }) + + # Strip any grid_mapping set by rioxarray to avoid xarray encoding conflict + for var in list(ds.data_vars) + list(ds.coords): + ds[var].attrs.pop("grid_mapping", None) + ds[var].encoding.pop("grid_mapping", None) + + ds.x.attrs = {"units": "meters", "long_name": "x coordinate", + "standard_name": "projection_x_coordinate"} + ds.y.attrs = {"units": "meters", "long_name": "y coordinate", + "standard_name": "projection_y_coordinate"} + + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ds.attrs = { + "Conventions": "CF-1.6", + "dateCreated": now, + "Title": "Topographic Images for SMRF/AWSM", + "history": f"[{now}] Created by build_topo_nc.py", + } + + topo_nc = topo_output / "topo.nc" + print(f"Writing {topo_nc}...") + ds.to_netcdf( + topo_nc, + format="NETCDF4", + encoding={ + "x": {"dtype": "f4"}, + "y": {"dtype": "f4"}, + "dem": {"dtype": "f4", "grid_mapping": "projection"}, + "mask": {"grid_mapping": "projection"}, + "veg_type": {"dtype": "u2", "grid_mapping": "projection"}, + "veg_height": {"dtype": "f4", "grid_mapping": "projection"}, + "veg_tau": {"dtype": "f4", "grid_mapping": "projection"}, + "veg_k": {"dtype": "f4", "grid_mapping": "projection"}, + }, + ) + return topo_nc + + +def read_basin_env(output_dir): + """Read key value pairs from basin.env into dict.""" + env_path = Path(output_dir) / "basin.env" + if not env_path.exists(): + return {} + env = {} + for line in env_path.read_text().splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, _, v = line.partition("=") + env[k.strip()] = v.strip().strip('"\'') + return env + + +def main(): + parser = argparse.ArgumentParser( + description="Build topo.nc from polygon + DEM + LANDFIRE", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage="\n build_topo_nc.py -s POLY -d DEM -o DIR [-res METERS] [--name NAME]\n" + " build_topo_nc.py -o DIR # reads basin.env", + epilog=__doc__, + ) + parser.add_argument("-s", "--polygon", metavar="POLY", + help="Basin polygon file (shapefile or GeoPackage); reads BASIN_POLYGON from basin.env if omitted.") + parser.add_argument("-d", "--dem-file", metavar="DEM", + help="Warped DEM GeoTIFF; reads BASIN_DEM from basin.env if omitted.") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="UTM EPSG. Reads BASIN_EPSG from basin.env if omitted.") + parser.add_argument("--name", default=None, metavar="NAME", + help="Basin name for mask; reads BASIN_NAME from basin.env if None.") + parser.add_argument("--landfire-dir", default=str(LANDFIRE_DIR_DEFAULT), metavar="DIR") + parser.add_argument("--veg-params-csv", default=str(VEG_PARAMS_CSV_DEFAULT), metavar="CSV") + expected = ", ".join(fname for fname, _ in VEG_DIR_FILES.values()) + parser.add_argument("--veg-dir", default=None, metavar="DIR", + help=f"Directory of user-derived vegetation rasters, overrides " + f"--landfire-dir. Must contain: {expected}") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + env = read_basin_env(output_dir) + + polygon = Path(args.polygon) if args.polygon else Path(env.get("BASIN_POLYGON", "")) + dem_file = Path(args.dem_file) if args.dem_file else Path(env.get("BASIN_DEM", "")) + epsg = args.epsg or int(env.get("BASIN_EPSG", 0)) + basin_name = args.name or env.get("BASIN_NAME", "Full Basin") + + if not polygon or not polygon.exists(): + sys.exit(f"Polygon file not found: {polygon}. Pass -s or run fetch_basin.py first.") + if not dem_file or not dem_file.exists(): + sys.exit(f"DEM file not found: {dem_file}. Pass -d or run fetch_dem.py first.") + if not epsg: + sys.exit("EPSG not found. Pass -e or run fetch_basin.py first.") + + landfire_dir = Path(args.landfire_dir) + veg_params_csv = Path(args.veg_params_csv) + if not args.veg_dir: + if not landfire_dir.exists(): + sys.exit(f"LANDFIRE directory not found: {landfire_dir}") + if not veg_params_csv.exists(): + sys.exit(f"veg_params_csv not found: {veg_params_csv}") + + topo_nc = build_topo( + polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, args.cell_size, basin_name, + veg_dir=args.veg_dir, + ) + + print("Validating projection...") + validate(topo_nc, epsg) + print(f"\nDone! topo.nc at {topo_nc}") + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py new file mode 100755 index 0000000..ede7761 --- /dev/null +++ b/scripts/topo/fetch_basin.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +""" +Fetch a HUC watershed boundary from the USGS Water Boundary Dataset (WBD) +and save as a UTM-projected GeoPackage. + +Writes /basin.env with BASIN_POLYGON, BASIN_EPSG, BASIN_BBOX, +and BASIN_NAME for use by fetch_dem.py and build_topo_nc.py. + +Example usage: + python fetch_basin.py -huc 14040102 -o ./newfork_scripts + python fetch_basin.py -n "new fork" -o ./newfork_scripts [-level 8] + python fetch_basin.py -s custom.gpkg -o ./newfork_scripts +""" + +import argparse +import sys +from pathlib import Path + +import geopandas as gpd +import requests + +WBD_BASE = "https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer" +_WBD_LAYER = {2: 1, 4: 2, 6: 3, 8: 4, 10: 5, 12: 6} +WBD_FIELD = {2: "huc2", 4: "huc4", 6: "huc6", 8: "huc8", 10: "huc10", 12: "huc12"} + + +def _wbd_query(layer, where, out_fields): + resp = requests.get( + f"{WBD_BASE}/{layer}/query", + params={"where": where, "outFields": out_fields, "f": "geojson"}, + timeout=30, + ) + resp.raise_for_status() + data = resp.json() + if "error" in data: + raise RuntimeError(f"WBD API error: {data['error'].get('message', data['error'])}") + features = data.get("features", []) + if not features: + return gpd.GeoDataFrame() + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def utm_epsg_from_lonlat(lon, lat): + zone = int((lon + 180) / 6) + 1 + return 32600 + zone if lat >= 0 else 32700 + zone + + +def fetch_huc_polygon(huc_id, output_dir): + """Query WBD for a HUC boundary, reproject to UTM, and save as GeoPackage.""" + huc_level = len(huc_id) + if huc_level not in _WBD_LAYER: + raise ValueError(f"HUC ID length {huc_level} not supported. Use HUC2/4/6/8/10/12.") + field = WBD_FIELD[huc_level] + gdf = _wbd_query(_WBD_LAYER[huc_level], f"{field}='{huc_id}'", "*") + if gdf.empty: + raise ValueError(f"No WBD features found for HUC ID '{huc_id}'") + + xmin, ymin, xmax, ymax = gdf.total_bounds + utm_epsg = utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) + bbox_wgs84 = (xmin, ymin, xmax, ymax) + basin_name = gdf.iloc[0].get("name", f"HUC{huc_id}") + + gpkg_path = output_dir / f"huc{huc_id}_basin.gpkg" + gdf[["geometry"]].to_crs(f"EPSG:{utm_epsg}").to_file(gpkg_path, driver="GPKG") + return gpkg_path, utm_epsg, bbox_wgs84, basin_name + + +def search_huc_by_name(name, huc_level=8): + """Search WBD for HUCs with a name matching the input string (case-insensitive).""" + if huc_level not in _WBD_LAYER: + raise ValueError(f"huc_level {huc_level} not supported.") + field = WBD_FIELD[huc_level] + return _wbd_query( + _WBD_LAYER[huc_level], + f"LOWER(name) LIKE LOWER('%{name}%')", + f"{field},name,areasqkm,states", + ) + + +def write_env(output_dir, polygon, epsg, bbox_wgs84, basin_name): + """Write shell-sourceable key=value file for downstream scripts.""" + xmin, ymin, xmax, ymax = bbox_wgs84 + env_path = output_dir / "basin.env" + env_path.write_text( + f'BASIN_POLYGON="{polygon}"\n' + f'BASIN_EPSG="{epsg}"\n' + f'BASIN_BBOX="{xmin},{ymin},{xmax},{ymax}"\n' + f'BASIN_NAME="{basin_name}"\n', + encoding="utf-8", + ) + return env_path + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch HUC watershed boundary → UTM polygon", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage="\n fetch_basin.py (-huc HUC_ID | -n NAME | -s POLY) -o DIR [-level N] [-e EPSG]", + epilog=__doc__, + ) + src = parser.add_mutually_exclusive_group(required=True) + src.add_argument("-huc", "--huc-id", metavar="HUC_ID", + help="HUC ID (e.g. 14050001)") + src.add_argument("-n", "--basin-name", metavar="NAME", + help="Basin name keyword to search (e.g. 'Yampa')") + src.add_argument("-s", "--polygon", metavar="POLY", + help="Path to existing basin polygon file (shapefile or GeoPackage, must be in UTM)") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-level", "--huc-level", type=int, default=8, + choices=[2, 4, 6, 8, 10, 12], + help="HUC level for -n search (default: 8)") + parser.add_argument("-e", "--epsg", type=int, default=None, + help="Override UTM EPSG (auto-detected from centroid if not given)") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + if args.huc_id: + print(f"Fetching HUC {args.huc_id} from USGS WBD...") + polygon, utm_epsg, bbox_wgs84, basin_name = fetch_huc_polygon( + args.huc_id, output_dir) + + elif args.basin_name: + print(f"Searching USGS WBD HUC{args.huc_level} for '{args.basin_name}'...") + matches = search_huc_by_name(args.basin_name, args.huc_level) + huc_field = WBD_FIELD[args.huc_level] + if matches.empty: + sys.exit(f"No HUC{args.huc_level} features found matching '{args.basin_name}'.\n" + "Try a different spelling or -level.") + if len(matches) > 1: + cols = [c for c in [huc_field, "name", "areasqkm", "states"] if c in matches.columns] + print(f"\n{len(matches)} matches for '{args.basin_name}':\n") + print(matches[cols].to_string(index=False)) + sys.exit("\nRerun with the correct -huc value from the table above.") + huc_id = matches[huc_field].iloc[0] + basin_name = matches["name"].iloc[0] + print(f"Found: {basin_name} ({huc_id})") + polygon, utm_epsg, bbox_wgs84, _ = fetch_huc_polygon(huc_id, output_dir) + + else: + polygon = Path(args.polygon).resolve() + if not polygon.exists(): + sys.exit(f"Polygon file not found: {polygon}") + gdf_wgs84 = gpd.read_file(polygon).to_crs("EPSG:4326") + xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds + lon_center = (xmin + xmax) / 2 + lat_center = (ymin + ymax) / 2 + utm_epsg = utm_epsg_from_lonlat(lon_center, lat_center) + bbox_wgs84 = (xmin, ymin, xmax, ymax) + basin_name = polygon.stem + + if args.epsg: + utm_epsg = args.epsg + + env_path = write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) + print(f"Basin file: {polygon}") + print(f"EPSG : {utm_epsg}") + print(f"Env file : {env_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/fetch_dem.py b/scripts/topo/fetch_dem.py new file mode 100755 index 0000000..6d4a75c --- /dev/null +++ b/scripts/topo/fetch_dem.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python +""" +Build a UTM-projected DEM mosaic from USGS 3DEP 1/3 arc-second tiles. + +By default, tiles are streamed via GDAL vsicurl without writing to disk. +Use --download-tiles for SLURM jobs where compute nodes lack internet access. + +Reads basin.env from the output directory (written by fetch_basin.py) for +EPSG and bounding box if not provided explicitly. Appends BASIN_DEM to basin.env. + +Usage: + python fetch_dem.py -o ./newfork_scripts + python fetch_dem.py -o ./newfork_scripts --download-tiles + python fetch_dem.py -o ./newfork_scripts --download-tiles --skip-download +""" + +import argparse +import sys +from pathlib import Path + +import requests +from osgeo import gdal + + +TNM_DATASET = "National Elevation Dataset (NED) 1/3 arc-second" +TNM_API = "https://tnmaccess.nationalmap.gov/api/v1/products" + + +def read_basin_env(output_dir): + """Parse key=value pairs from basin.env into a dict.""" + env_path = Path(output_dir) / "basin.env" + if not env_path.exists(): + return {} + env = {} + for line in env_path.read_text().splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, _, v = line.partition("=") + env[k.strip()] = v.strip().strip('"\'') + return env + + +def _query_tile_urls(bbox_wgs84): + """Query TNM API, deduplicate by tile cell, and return .tif download URLs.""" + xmin, ymin, xmax, ymax = bbox_wgs84 + resp = requests.get(TNM_API, params={ + "datasets": TNM_DATASET, + "bbox": f"{xmin},{ymin},{xmax},{ymax}", + "max": 1000, + "outputFormat": "JSON", + }, timeout=60) + resp.raise_for_status() + + items = resp.json().get("items", []) + if not items: + raise RuntimeError(f"No TNM DEM tiles found for bbox {bbox_wgs84}.") + print(f" Found {len(items)} DEM tile(s)") + + # Keep only the newest version of each tile cell + # Title format: "USGS 1/3 Arc Second n40w107 20210312" → cell is second-to-last word + best = {} + for item in items: + tile = item.get("title", "").split()[-2] + pub_date = item.get("publicationDate", "") + is_newer = tile in best and pub_date > best[tile].get("publicationDate", "") + if tile not in best or is_newer: + best[tile] = item + deduped = list(best.values()) + print(f" {len(deduped)} unique tile(s) after deduplication") + + # Prefer .tif — GDAL reads it directly; other formats may require format conversion + tif_items = [i for i in deduped if i.get("downloadURL", "").endswith(".tif")] + return [i["downloadURL"] for i in (tif_items or deduped)] + + +def stream_dem_tiles(bbox_wgs84): + """Return vsicurl paths for GDAL to stream tiles without downloading.""" + urls = _query_tile_urls(bbox_wgs84) + return [f"/vsicurl/{url}" for url in urls] + + +def download_dem_tiles(bbox_wgs84, output_dir): + """Download tiles to disk and return local file paths.""" + urls = _query_tile_urls(bbox_wgs84) + tile_dir = output_dir / "dem_tiles" + tile_dir.mkdir(exist_ok=True) + tile_files = [] + for url in urls: + dest = tile_dir / Path(url).name + if not dest.exists(): + print(f" Downloading {dest.name} ...") + with requests.get(url, stream=True, timeout=120) as r: + r.raise_for_status() + with open(dest, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + else: + print(f" {dest.name} already exists, skipping") + tile_files.append(str(dest)) + return tile_files + + +def build_dem(tile_files, epsg, cell_size, output_dir): + """Mosaic tiles and warp to target UTM/resolution. Returns output GeoTIFF path.""" + vrt_path = str(output_dir / "dem_mosaic.vrt") + dem_path = output_dir / f"dem_epsg_{epsg}_{int(cell_size)}m.tif" + + print(f" Building VRT from {len(tile_files)} tile(s)...") + vrt = gdal.BuildVRT(vrt_path, tile_files) + if vrt is None: + raise RuntimeError("gdal.BuildVRT failed") + vrt.FlushCache() + del vrt + + print(f" Warping to EPSG:{epsg} at {int(cell_size)}m...") + result = gdal.Warp( + str(dem_path), vrt_path, + options=gdal.WarpOptions( + dstSRS=f"EPSG:{epsg}", + xRes=cell_size, yRes=cell_size, + resampleAlg="cubicspline", + targetAlignedPixels=True, + creationOptions=["TILED=YES", "COMPRESS=LZW", "BIGTIFF=IF_SAFER"], + ), + ) + if result is None: + raise RuntimeError("gdal.Warp failed") + result.FlushCache() + del result + return dem_path + + +def main(): + parser = argparse.ArgumentParser( + description="Build a UTM-projected DEM mosaic from USGS 3DEP tiles", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=("\n fetch_dem.py -o DIR [-res METERS] [-e EPSG]" + " [--download-tiles] [--skip-download]"), + epilog=__doc__, + ) + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR", + help="Output directory (reads basin.env from here)") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS", + help="Resolution in meters (default: 100)") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="UTM EPSG — reads BASIN_EPSG from basin.env if not given") + parser.add_argument("--download-tiles", action="store_true", + help="Download tiles to disk before warping (use for SLURM/offline)") + parser.add_argument("--skip-download", action="store_true", + help="Reuse existing tiles in /dem_tiles/ " + "(requires --download-tiles)") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + env = read_basin_env(output_dir) + + epsg = args.epsg or int(env.get("BASIN_EPSG", 0)) + if not epsg: + sys.exit("EPSG not provided and BASIN_EPSG not in basin.env. " + "Run fetch_basin.py first or pass -e EPSG.") + + bbox_str = env.get("BASIN_BBOX", "") + if not bbox_str: + sys.exit("BASIN_BBOX not in basin.env. Run fetch_basin.py first.") + bbox_wgs84 = tuple(float(x) for x in bbox_str.split(",")) + + if args.skip_download: + tile_files = [str(p) for p in (output_dir / "dem_tiles").glob("*.tif")] + print(f"Reusing {len(tile_files)} existing tile(s)") + if not tile_files: + sys.exit(f"No .tif tiles in {output_dir / 'dem_tiles'}. Remove --skip-download.") + elif args.download_tiles: + print("Downloading 3DEP 1/3 arc-second DEM tiles...") + tile_files = download_dem_tiles(bbox_wgs84, output_dir) + else: + print("Streaming 3DEP 1/3 arc-second DEM tiles via vsicurl...") + tile_files = stream_dem_tiles(bbox_wgs84) + + print(f"Building DEM mosaic (EPSG:{epsg}, {int(args.cell_size)}m)...") + dem_path = build_dem(tile_files, epsg, args.cell_size, output_dir) + print(f"DEM: {dem_path}") + + with open(output_dir / "basin.env", "a", encoding="utf-8") as f: + f.write(f'BASIN_DEM="{dem_path}"\n') + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/generate_topo.py b/scripts/topo/generate_topo.py new file mode 100755 index 0000000..571c6a9 --- /dev/null +++ b/scripts/topo/generate_topo.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python +""" +Generate topo.nc for SMRF/iSnobal from a HUC ID, basin name, or polygon. + +Chains fetch_basin → fetch_dem → build_topo_nc, passing values directly +between steps. Writes basin.env to the output directory for reference. + +Example usage: + python generate_topo.py -n "new fork" -o ./newfork_scripts + python generate_topo.py -huc 14040102 -o ./newfork_scripts + python generate_topo.py -s custom.gpkg -o ./newfork_scripts [-res 50] +""" + +import argparse +import os +import sys +from pathlib import Path + +if os.environ.get("CONDA_DEFAULT_ENV") != "basin_setup": + sys.exit( + "Error: must run inside 'basin_setup' conda env.\n" + f" Active env: {os.environ.get('CONDA_DEFAULT_ENV', '(none)')}\n" + " Run: conda activate basin_setup" + ) + +# Allow sibling scripts to be imported regardless of working directory +sys.path.insert(0, str(Path(__file__).parent)) + +import geopandas as gpd + +import build_topo_nc as btopo +import fetch_basin as fb +import fetch_dem as fd + + +def main(): + parser = argparse.ArgumentParser( + description="Generate topo.nc for SMRF/iSnobal", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=( + "\n" + " generate_topo.py (-huc HUC_ID | -n NAME | -s POLY) -o DIR\n" + " [-res METERS] [-level {2,4,6,8,10,12}] [-e EPSG]\n" + " [--landfire-dir DIR] [--veg-params-csv CSV | --veg-dir DIR]\n" + " [--download-dem-tiles] [--skip-dem-download]" + ), + epilog=__doc__, + ) + src = parser.add_mutually_exclusive_group(required=True) + src.add_argument("-huc", "--huc-id", metavar="HUC_ID", help="HUC ID (e.g. 14050001)") + src.add_argument("-n", "--basin-name", metavar="NAME", help="Basin keyword to search") + src.add_argument("-s", "--polygon", metavar="POLY", + help="Pre-existing basin polygon file (shapefile or GeoPackage) in UTM") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS") + parser.add_argument("-level", "--huc-level", type=int, default=8, + choices=[2, 4, 6, 8, 10, 12], + help="HUC level for -n search (default: 8)") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="Override auto-detected UTM EPSG") + parser.add_argument("--landfire-dir", default=str(btopo.LANDFIRE_DIR_DEFAULT), metavar="DIR") + parser.add_argument("--veg-params-csv", default=str(btopo.VEG_PARAMS_CSV_DEFAULT), metavar="CSV") + parser.add_argument("--download-dem-tiles", action="store_true", + help="Download DEM tiles to disk before warping (use for SLURM/offline)") + parser.add_argument("--skip-dem-download", action="store_true", + help="Reuse existing tiles in /dem_tiles/ (requires --download-dem-tiles)") + expected = ", ".join(fname for fname, _ in btopo.VEG_DIR_FILES.values()) + parser.add_argument("--veg-dir", default=None, metavar="DIR", + help=f"Directory of user-derived vegetation rasters, overrides " + f"--landfire-dir. Must contain: {expected}") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + landfire_dir = Path(args.landfire_dir) + veg_params_csv = Path(args.veg_params_csv) + + if not args.veg_dir: + if not landfire_dir.exists(): + sys.exit(f"LANDFIRE directory not found: {landfire_dir}") + if not veg_params_csv.exists(): + sys.exit(f"veg_params_csv not found: {veg_params_csv}") + + print("\n[1/3] Fetching basin boundary...") + if args.huc_id: + polygon, utm_epsg, bbox_wgs84, basin_name = fb.fetch_huc_polygon( + args.huc_id, output_dir) + + elif args.basin_name: + matches = fb.search_huc_by_name(args.basin_name, args.huc_level) + huc_field = fb.WBD_FIELD[args.huc_level] + if matches.empty: + sys.exit(f"No HUC{args.huc_level} features found matching '{args.basin_name}'.") + if len(matches) > 1: + cols = [c for c in [huc_field, "name", "areasqkm", "states"] + if c in matches.columns] + print(f"\n {len(matches)} matches for '{args.basin_name}':\n") + print(matches[cols].to_string(index=False)) + sys.exit("\nRerun with the correct -huc value from the table above.") + huc_id = matches[huc_field].iloc[0] + basin_name = matches["name"].iloc[0] + print(f" Found: {basin_name} ({huc_id})") + polygon, utm_epsg, bbox_wgs84, _ = fb.fetch_huc_polygon(huc_id, output_dir) + + else: + polygon = Path(args.polygon).resolve() + if not polygon.exists(): + sys.exit(f"Basin file not found: {polygon}") + gdf_wgs84 = gpd.read_file(polygon).to_crs("EPSG:4326") + xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds + lon_center = (xmin + xmax) / 2 + lat_center = (ymin + ymax) / 2 + utm_epsg = fb.utm_epsg_from_lonlat(lon_center, lat_center) + bbox_wgs84 = (xmin, ymin, xmax, ymax) + basin_name = polygon.stem + + if args.epsg: + utm_epsg = args.epsg + + fb.write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) + print(f" Basin file: {polygon}") + print(f" EPSG : {utm_epsg}") + + print("\n[2/3] Building DEM...") + if args.skip_dem_download: + tile_files = [str(p) for p in (output_dir / "dem_tiles").glob("*.tif")] + print(f" Reusing {len(tile_files)} existing tile(s)") + if not tile_files: + sys.exit(f"No tiles in {output_dir / 'dem_tiles'}. Remove --skip-dem-download.") + elif args.download_dem_tiles: + tile_files = fd.download_dem_tiles(bbox_wgs84, output_dir) + else: + tile_files = fd.stream_dem_tiles(bbox_wgs84) + + dem_file = fd.build_dem(tile_files, utm_epsg, args.cell_size, output_dir) + print(f" DEM: {dem_file}") + + with open(output_dir / "basin.env", "a", encoding="utf-8") as f: + f.write(f'BASIN_DEM="{dem_file}"\n') + + print("\n[3/3] Building topo.nc...") + topo_nc = btopo.build_topo( + polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, args.cell_size, basin_name, + veg_dir=args.veg_dir, + ) + + print("\nValidating projection...") + btopo.validate(topo_nc, utm_epsg) + print(f"\nDone! topo.nc at {topo_nc}") + + # TODO: add --cleanup flag to remove intermediate files (dem_mosaic.vrt, warped DEM .tif, + # dem_tiles/, output_m/temp/) after successful topo.nc generation + + +if __name__ == "__main__": + main() From b92eeeeb211ead013884f4e7ed7815aaf31b9105 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 19:38:07 -0600 Subject: [PATCH 02/13] conda - basin_setup - explicitly add pyproj and rasterio --- conda/basin_setup.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/conda/basin_setup.yaml b/conda/basin_setup.yaml index a7ce021..ecb5f2f 100644 --- a/conda/basin_setup.yaml +++ b/conda/basin_setup.yaml @@ -8,6 +8,8 @@ dependencies: - netcdf4 - numpy - pandas + - pyproj + - rasterio - requests - rioxarray - xarray From 8da35e331ff4b6ebb1cb505e3bfc890d87c2ac9e Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 19:38:54 -0600 Subject: [PATCH 03/13] scripts/topo - update minor README typos --- scripts/topo/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/topo/README.md b/scripts/topo/README.md index 8970d62..1ca9479 100644 --- a/scripts/topo/README.md +++ b/scripts/topo/README.md @@ -24,16 +24,16 @@ a *to-be-staged* repository and organize as follows: / US_140EVT_20180618/ Grid/us_140evt/hdr.adf - CSV_Data/LF_140EVH_05092014.csv US_140EVH_20180618/ Grid/us_140evh/hdr.adf + CSV_Data/LF_140EVH_05092014.csv ``` A veg params CSV mapping LANDFIRE EVT class IDs to SMRF canopy parameters (tau, k) is also required. Pass its path with `--veg-params-csv`. In this approach, all -vegetation parameters are categorical or discrete variables +vegetation parameters are categorical or discrete variables. -> **SnowHydRO group (UU CHPC):** all required datasets are staged at +> **SnowHydRO group (UU CHPC):** all required datasets are currently staged at > `/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE/`. ### Option B — user-supplied rasters (`--veg-dir`) From 7870a4b16d28dd1037b8824eb34dd2b5c9a3b073 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 19:40:17 -0600 Subject: [PATCH 04/13] scripts/topo - remove conda env check, modify help info in generate_topo --- scripts/topo/generate_topo.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/scripts/topo/generate_topo.py b/scripts/topo/generate_topo.py index 571c6a9..c9dac6b 100755 --- a/scripts/topo/generate_topo.py +++ b/scripts/topo/generate_topo.py @@ -16,13 +16,6 @@ import sys from pathlib import Path -if os.environ.get("CONDA_DEFAULT_ENV") != "basin_setup": - sys.exit( - "Error: must run inside 'basin_setup' conda env.\n" - f" Active env: {os.environ.get('CONDA_DEFAULT_ENV', '(none)')}\n" - " Run: conda activate basin_setup" - ) - # Allow sibling scripts to be imported regardless of working directory sys.path.insert(0, str(Path(__file__).parent)) @@ -63,7 +56,7 @@ def main(): parser.add_argument("--download-dem-tiles", action="store_true", help="Download DEM tiles to disk before warping (use for SLURM/offline)") parser.add_argument("--skip-dem-download", action="store_true", - help="Reuse existing tiles in /dem_tiles/ (requires --download-dem-tiles)") + help="Reuse existing tiles in /dem_tiles/ (from previous run w/ --download-dem-tiles)") expected = ", ".join(fname for fname, _ in btopo.VEG_DIR_FILES.values()) parser.add_argument("--veg-dir", default=None, metavar="DIR", help=f"Directory of user-derived vegetation rasters, overrides " From be458e538c8aae8f21c9354c3bf06bb4e06dcfd7 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 19:40:58 -0600 Subject: [PATCH 05/13] scripts/topo - build_topo_nc.py - update coord dtypes --- scripts/topo/build_topo_nc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/topo/build_topo_nc.py b/scripts/topo/build_topo_nc.py index 2673e9e..cfb83d0 100755 --- a/scripts/topo/build_topo_nc.py +++ b/scripts/topo/build_topo_nc.py @@ -258,8 +258,8 @@ def build_topo(polygon, dem_file, landfire_dir, veg_params_csv, topo_nc, format="NETCDF4", encoding={ - "x": {"dtype": "f4"}, - "y": {"dtype": "f4"}, + "x": {"dtype": "f8"}, + "y": {"dtype": "f8"}, "dem": {"dtype": "f4", "grid_mapping": "projection"}, "mask": {"grid_mapping": "projection"}, "veg_type": {"dtype": "u2", "grid_mapping": "projection"}, From 4d50dc1cb88ffc6ab0bd8fdccc0592cd0bda7eec Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 19:59:52 -0600 Subject: [PATCH 06/13] scripts/topo - README.md - update HUC_ID help --- scripts/topo/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/topo/README.md b/scripts/topo/README.md index 1ca9479..98b5886 100644 --- a/scripts/topo/README.md +++ b/scripts/topo/README.md @@ -115,6 +115,8 @@ $ conda activate basin_setup $ python fetch_basin.py -n "new fork" -o ./newfork_scripts # [alternatively] fetch basin boundary by HUC ID +# HUC IDs must match the target HUC level length (2, 4, 6, 8, 10, or 12 digits) +# Leading zeros are not automatically added (e.g., HUC 01). $ python fetch_basin.py -huc 14040102 -o ./newfork_scripts # Step 2: mosaic DEM — streams via vsicurl by default (reads basin.env from step 1) @@ -145,7 +147,7 @@ BASIN_DEM="./newfork_scripts/dem_epsg_32612_100m.tif" ``` generate_topo.py flags: - -huc HUC_ID HUC ID (e.g. 14050001) + -huc HUC_ID HUC ID with exact supported length (2, 4, 6, 8, 10, or 12 digits) -n NAME Basin name keyword (e.g. "Yampa") -s POLY Existing UTM polygon file (shapefile or GeoPackage) -o DIR Output directory (required) From 94cc39f70059542860bba3884b0181ba6415cafa Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 20:03:14 -0600 Subject: [PATCH 07/13] scripts/topo - fetch_basin.py - add poly epsg validation --- scripts/topo/fetch_basin.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py index ede7761..8f83503 100755 --- a/scripts/topo/fetch_basin.py +++ b/scripts/topo/fetch_basin.py @@ -142,11 +142,31 @@ def main(): polygon = Path(args.polygon).resolve() if not polygon.exists(): sys.exit(f"Polygon file not found: {polygon}") + gdf = gpd.read_file(polygon) + if gdf.crs is None: + sys.exit( + f"Polygon file must have a defined projected CRS: {polygon}" + ) + if not gdf.crs.is_projected: + sys.exit( + f"Polygon CRS must be projected/UTM, not geographic: {gdf.crs}" + ) + polygon_epsg = gdf.crs.to_epsg() + if polygon_epsg is None: + sys.exit( + f"Polygon CRS must resolve to an EPSG code so it can be validated: {gdf.crs}" + ) gdf_wgs84 = gpd.read_file(polygon).to_crs("EPSG:4326") xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds lon_center = (xmin + xmax) / 2 lat_center = (ymin + ymax) / 2 - utm_epsg = utm_epsg_from_lonlat(lon_center, lat_center) + expected_epsg = args.epsg or utm_epsg_from_lonlat(lon_center, lat_center) + if polygon_epsg != expected_epsg: + sys.exit( + "Polygon CRS EPSG does not match the target UTM EPSG " + f"(polygon: EPSG:{polygon_epsg}, expected: EPSG:{expected_epsg})" + ) + utm_epsg = expected_epsg bbox_wgs84 = (xmin, ymin, xmax, ymax) basin_name = polygon.stem From 8567abfe90c676807221e05b1c84d301dc7c11e5 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 20:04:06 -0600 Subject: [PATCH 08/13] scripts/topo - fetch_basin.py - add strict huc id checks and help info --- scripts/topo/fetch_basin.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py index 8f83503..8d223ec 100755 --- a/scripts/topo/fetch_basin.py +++ b/scripts/topo/fetch_basin.py @@ -22,6 +22,7 @@ WBD_BASE = "https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer" _WBD_LAYER = {2: 1, 4: 2, 6: 3, 8: 4, 10: 5, 12: 6} WBD_FIELD = {2: "huc2", 4: "huc4", 6: "huc6", 8: "huc8", 10: "huc10", 12: "huc12"} +SUPPORTED_HUC_LEVELS = tuple(sorted(_WBD_LAYER)) def _wbd_query(layer, where, out_fields): @@ -45,11 +46,25 @@ def utm_epsg_from_lonlat(lon, lat): return 32600 + zone if lat >= 0 else 32700 + zone -def fetch_huc_polygon(huc_id, output_dir): - """Query WBD for a HUC boundary, reproject to UTM, and save as GeoPackage.""" +def validate_huc_id(huc_id): + """Validate that the input HUC ID is supported.""" + huc_id = str(huc_id).strip() + if not huc_id.isdigit(): + raise ValueError(f"HUC ID must contain only digits, got '{huc_id}'") + huc_level = len(huc_id) if huc_level not in _WBD_LAYER: - raise ValueError(f"HUC ID length {huc_level} not supported. Use HUC2/4/6/8/10/12.") + levels = ", ".join(str(level) for level in SUPPORTED_HUC_LEVELS) + raise ValueError( + f"HUC ID '{huc_id}' must be exactly one of these lengths: {levels} digits. " + f"Got {huc_level} digits." + ) + return huc_id, huc_level + + +def fetch_huc_polygon(huc_id, output_dir): + """Query WBD for a HUC boundary, reproject to UTM, and save as GeoPackage.""" + huc_id, huc_level = validate_huc_id(huc_id) field = WBD_FIELD[huc_level] gdf = _wbd_query(_WBD_LAYER[huc_level], f"{field}='{huc_id}'", "*") if gdf.empty: @@ -99,8 +114,8 @@ def main(): epilog=__doc__, ) src = parser.add_mutually_exclusive_group(required=True) - src.add_argument("-huc", "--huc-id", metavar="HUC_ID", - help="HUC ID (e.g. 14050001)") + src.add_argument("-huc", "--huc-id", type=str, metavar="HUC_ID", + help="HUC ID with exact supported length (2, 4, 6, 8, 10, or 12 digits; e.g. 14050001)") src.add_argument("-n", "--basin-name", metavar="NAME", help="Basin name keyword to search (e.g. 'Yampa')") src.add_argument("-s", "--polygon", metavar="POLY", From fedb170a7c5bab7c8d4f359fb47cd2272b8d7d1e Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 20:46:53 -0600 Subject: [PATCH 09/13] scripts/topo - remove --skip-downloads for clarity Default uses vsicurl, keeping the flag creates more confusion --- scripts/topo/README.md | 1 - scripts/topo/fetch_dem.py | 12 ++---------- scripts/topo/generate_topo.py | 12 ++---------- 3 files changed, 4 insertions(+), 21 deletions(-) diff --git a/scripts/topo/README.md b/scripts/topo/README.md index 98b5886..723c693 100644 --- a/scripts/topo/README.md +++ b/scripts/topo/README.md @@ -159,5 +159,4 @@ generate_topo.py flags: --veg-dir DIR User-supplied vegetation rasters, overrides --landfire-dir. Must contain: veg_type.tif, veg_height.tif, veg_tau.tif, veg_k.tif --download-dem-tiles Download DEM tiles to disk before warping (use for SLURM/offline) - --skip-dem-download Reuse existing tiles in /dem_tiles/ (requires --download-dem-tiles) ``` diff --git a/scripts/topo/fetch_dem.py b/scripts/topo/fetch_dem.py index 6d4a75c..50d1b6a 100755 --- a/scripts/topo/fetch_dem.py +++ b/scripts/topo/fetch_dem.py @@ -135,7 +135,7 @@ def main(): description="Build a UTM-projected DEM mosaic from USGS 3DEP tiles", formatter_class=argparse.RawDescriptionHelpFormatter, usage=("\n fetch_dem.py -o DIR [-res METERS] [-e EPSG]" - " [--download-tiles] [--skip-download]"), + " [--download-tiles]"), epilog=__doc__, ) parser.add_argument("-o", "--output-dir", required=True, metavar="DIR", @@ -146,9 +146,6 @@ def main(): help="UTM EPSG — reads BASIN_EPSG from basin.env if not given") parser.add_argument("--download-tiles", action="store_true", help="Download tiles to disk before warping (use for SLURM/offline)") - parser.add_argument("--skip-download", action="store_true", - help="Reuse existing tiles in /dem_tiles/ " - "(requires --download-tiles)") args = parser.parse_args() output_dir = Path(args.output_dir).resolve() @@ -164,12 +161,7 @@ def main(): sys.exit("BASIN_BBOX not in basin.env. Run fetch_basin.py first.") bbox_wgs84 = tuple(float(x) for x in bbox_str.split(",")) - if args.skip_download: - tile_files = [str(p) for p in (output_dir / "dem_tiles").glob("*.tif")] - print(f"Reusing {len(tile_files)} existing tile(s)") - if not tile_files: - sys.exit(f"No .tif tiles in {output_dir / 'dem_tiles'}. Remove --skip-download.") - elif args.download_tiles: + if args.download_tiles: print("Downloading 3DEP 1/3 arc-second DEM tiles...") tile_files = download_dem_tiles(bbox_wgs84, output_dir) else: diff --git a/scripts/topo/generate_topo.py b/scripts/topo/generate_topo.py index c9dac6b..645dd10 100755 --- a/scripts/topo/generate_topo.py +++ b/scripts/topo/generate_topo.py @@ -12,7 +12,6 @@ """ import argparse -import os import sys from pathlib import Path @@ -35,7 +34,7 @@ def main(): " generate_topo.py (-huc HUC_ID | -n NAME | -s POLY) -o DIR\n" " [-res METERS] [-level {2,4,6,8,10,12}] [-e EPSG]\n" " [--landfire-dir DIR] [--veg-params-csv CSV | --veg-dir DIR]\n" - " [--download-dem-tiles] [--skip-dem-download]" + " [--download-dem-tiles]" ), epilog=__doc__, ) @@ -55,8 +54,6 @@ def main(): parser.add_argument("--veg-params-csv", default=str(btopo.VEG_PARAMS_CSV_DEFAULT), metavar="CSV") parser.add_argument("--download-dem-tiles", action="store_true", help="Download DEM tiles to disk before warping (use for SLURM/offline)") - parser.add_argument("--skip-dem-download", action="store_true", - help="Reuse existing tiles in /dem_tiles/ (from previous run w/ --download-dem-tiles)") expected = ", ".join(fname for fname, _ in btopo.VEG_DIR_FILES.values()) parser.add_argument("--veg-dir", default=None, metavar="DIR", help=f"Directory of user-derived vegetation rasters, overrides " @@ -115,12 +112,7 @@ def main(): print(f" EPSG : {utm_epsg}") print("\n[2/3] Building DEM...") - if args.skip_dem_download: - tile_files = [str(p) for p in (output_dir / "dem_tiles").glob("*.tif")] - print(f" Reusing {len(tile_files)} existing tile(s)") - if not tile_files: - sys.exit(f"No tiles in {output_dir / 'dem_tiles'}. Remove --skip-dem-download.") - elif args.download_dem_tiles: + if args.download_dem_tiles: tile_files = fd.download_dem_tiles(bbox_wgs84, output_dir) else: tile_files = fd.stream_dem_tiles(bbox_wgs84) From 453ed0adc0c465bb1bbb585751deb9dafda011a8 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 20:48:57 -0600 Subject: [PATCH 10/13] scripts/topo - fetch_basin.py - move epsg override into huc/name branch --- scripts/topo/fetch_basin.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py index 8d223ec..8e72d3a 100755 --- a/scripts/topo/fetch_basin.py +++ b/scripts/topo/fetch_basin.py @@ -135,6 +135,8 @@ def main(): print(f"Fetching HUC {args.huc_id} from USGS WBD...") polygon, utm_epsg, bbox_wgs84, basin_name = fetch_huc_polygon( args.huc_id, output_dir) + if args.epsg: + utm_epsg = args.epsg elif args.basin_name: print(f"Searching USGS WBD HUC{args.huc_level} for '{args.basin_name}'...") @@ -152,6 +154,8 @@ def main(): basin_name = matches["name"].iloc[0] print(f"Found: {basin_name} ({huc_id})") polygon, utm_epsg, bbox_wgs84, _ = fetch_huc_polygon(huc_id, output_dir) + if args.epsg: + utm_epsg = args.epsg else: polygon = Path(args.polygon).resolve() @@ -171,7 +175,7 @@ def main(): sys.exit( f"Polygon CRS must resolve to an EPSG code so it can be validated: {gdf.crs}" ) - gdf_wgs84 = gpd.read_file(polygon).to_crs("EPSG:4326") + gdf_wgs84 = gdf.to_crs("EPSG:4326") xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds lon_center = (xmin + xmax) / 2 lat_center = (ymin + ymax) / 2 @@ -185,9 +189,6 @@ def main(): bbox_wgs84 = (xmin, ymin, xmax, ymax) basin_name = polygon.stem - if args.epsg: - utm_epsg = args.epsg - env_path = write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) print(f"Basin file: {polygon}") print(f"EPSG : {utm_epsg}") From 85a51320d0bf7df77f463a7324310027e1797b56 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 20:53:44 -0600 Subject: [PATCH 11/13] scripts/topo - fetch_dem.py - fall back to a unique downloadURL Apply Copilot suggestion to guard against issues with malformed tile titles. Co-authored by Claude Sonnet 4.6 --- scripts/topo/fetch_dem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/topo/fetch_dem.py b/scripts/topo/fetch_dem.py index 50d1b6a..3621b4f 100755 --- a/scripts/topo/fetch_dem.py +++ b/scripts/topo/fetch_dem.py @@ -60,7 +60,8 @@ def _query_tile_urls(bbox_wgs84): # Title format: "USGS 1/3 Arc Second n40w107 20210312" → cell is second-to-last word best = {} for item in items: - tile = item.get("title", "").split()[-2] + parts = item.get("title", "").split() + tile = parts[-2] if len(parts) >= 2 else item.get("downloadURL", item.get("title", "")) pub_date = item.get("publicationDate", "") is_newer = tile in best and pub_date > best[tile].get("publicationDate", "") if tile not in best or is_newer: From 004c272e98bf9260c0718ba37016525065649f96 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 21:20:01 -0600 Subject: [PATCH 12/13] script/topo - build_topo_nc.py - add utm attribute SMRF's load_topo.py expects the UTM zone to be stored in projection info. Add handling for hemispheres to extract 2 digit zone. --- scripts/topo/build_topo_nc.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/topo/build_topo_nc.py b/scripts/topo/build_topo_nc.py index cfb83d0..3f2b32c 100755 --- a/scripts/topo/build_topo_nc.py +++ b/scripts/topo/build_topo_nc.py @@ -140,6 +140,12 @@ def _projection_var(crs_str): attrs = crs.to_cf() attrs["crs_wkt"] = wkt attrs["spatial_ref"] = wkt + # smrf/data/load_topo.py requires utm_zone_number as an integer attribute + epsg = crs.to_epsg() + if epsg and 32601 <= epsg <= 32660: + attrs["utm_zone_number"] = epsg - 32600 + elif epsg and 32701 <= epsg <= 32760: + attrs["utm_zone_number"] = epsg - 32700 return xr.DataArray(0, attrs=attrs) From 0c22245820b5a500c12377437542e781d0715ba8 Mon Sep 17 00:00:00 2001 From: "J. Michelle Hu" <43484205+jmichellehu@users.noreply.github.com> Date: Tue, 26 May 2026 22:28:25 -0600 Subject: [PATCH 13/13] scripts/topo - harden CRS/EPSG handling and EVT NoData across pipeline - Fix -e EPSG override so crs in polygon and basin.env agree - Validate EPSG is a UTM zone across all scripts - Enforce crs checks for input POLY - Mask EVT NoData pixels to deal with extent padding, fill with open (tau=1,k=0) - Change gdf.crs.srs to f'EPSG:{gdf.crs.to_epsg()}' in _padded_extents - Add check for zero-sized tiles in fetch_dem - Fix minor typos in README.md and add Claude acknowledgement --- scripts/topo/README.md | 6 ++- scripts/topo/build_topo_nc.py | 29 +++++++++++--- scripts/topo/fetch_basin.py | 72 ++++++++++++++++++----------------- scripts/topo/fetch_dem.py | 8 +++- scripts/topo/generate_topo.py | 20 +++------- 5 files changed, 78 insertions(+), 57 deletions(-) diff --git a/scripts/topo/README.md b/scripts/topo/README.md index 723c693..79b520c 100644 --- a/scripts/topo/README.md +++ b/scripts/topo/README.md @@ -29,9 +29,9 @@ a *to-be-staged* repository and organize as follows: CSV_Data/LF_140EVH_05092014.csv ``` -A veg params CSV mapping LANDFIRE EVT class IDs to SMRF canopy parameters (tau, k) +A veg params CSV mapping LANDFIRE EVH class IDs to SMRF canopy parameters (tau, k) is also required. Pass its path with `--veg-params-csv`. In this approach, all -vegetation parameters are categorical or discrete variables. +vegetation parameters are categorical or discrete values. > **SnowHydRO group (UU CHPC):** all required datasets are currently staged at > `/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE/`. @@ -160,3 +160,5 @@ generate_topo.py flags: Must contain: veg_type.tif, veg_height.tif, veg_tau.tif, veg_k.tif --download-dem-tiles Download DEM tiles to disk before warping (use for SLURM/offline) ``` +## Acknowledgements +Co-creation with Claude Sonnet 4.6 \ No newline at end of file diff --git a/scripts/topo/build_topo_nc.py b/scripts/topo/build_topo_nc.py index 3f2b32c..bf13e57 100755 --- a/scripts/topo/build_topo_nc.py +++ b/scripts/topo/build_topo_nc.py @@ -58,7 +58,7 @@ def _padded_extents(polygon, cell_size, pad_cells=5): ymin = math.floor((ymin - pad) / cell_size) * cell_size xmax = math.ceil((xmax + pad) / cell_size) * cell_size ymax = math.ceil((ymax + pad) / cell_size) * cell_size - return [xmin, ymin, xmax, ymax], gdf.crs.srs + return [xmin, ymin, xmax, ymax], f"EPSG:{gdf.crs.to_epsg()}" def _clip_raster(src, dst, crs, extents, cell_size, resample="near"): @@ -101,14 +101,24 @@ def _basin_mask(polygon, dem_da): ) -def _veg_tau_k(evt_arr, veg_params_csv): - """Map EVT pixel values → tau and k arrays using the veg params CSV.""" +def _veg_tau_k(evt_da, veg_params_csv): + """Map EVT pixel values → tau and k arrays using the veg params CSV. + + NoData pixels (from padded extents outside valid LANDFIRE coverage) are + mapped to open/no-canopy values (tau=1, k=0) rather than raising. + """ df = pd.read_csv(veg_params_csv) idx_col = "landfire140" if "landfire140" in df.columns else df.columns[0] df = df.set_index(idx_col) df = df[~df.index.duplicated(keep="first")] - missing = set(np.unique(evt_arr)) - set(df.index) + evt_arr = evt_da.values + nodata = evt_da.rio.nodata + nodata_mask = np.zeros(evt_arr.shape, dtype=bool) if nodata is None else (evt_arr == nodata) + if np.any(nodata_mask): + print(f" Warning: {nodata_mask.sum()} NoData EVT pixels mapped to open/no-canopy (tau=1, k=0)") + + missing = set(np.unique(evt_arr[~nodata_mask])) - set(df.index) if missing: # tau/k have no safe fallback — missing class would silently corrupt radiation raise ValueError(f"EVT classes not in {veg_params_csv}: {missing}") @@ -116,6 +126,8 @@ def _veg_tau_k(evt_arr, veg_params_csv): flat = pd.Series(evt_arr.ravel()) tau = flat.map(df["tau"]).values.reshape(evt_arr.shape) k = flat.map(df["k"]).values.reshape(evt_arr.shape) + tau[nodata_mask] = 1.0 + k[nodata_mask] = 0.0 return tau, k @@ -158,6 +170,8 @@ def validate(topo_nc, expected_epsg): if not wkt: raise RuntimeError("topo.nc 'projection' has no crs_wkt or spatial_ref attribute") actual = CRS.from_wkt(wkt).to_epsg() + if actual is None: + raise RuntimeError("topo.nc CRS does not resolve to an EPSG code") if actual != expected_epsg: raise ValueError(f"topo.nc CRS mismatch: EPSG:{actual} != expected EPSG:{expected_epsg}") print(f" Projection OK, EPSG:{actual}") @@ -214,7 +228,7 @@ def build_topo(polygon, dem_file, landfire_dir, veg_params_csv, evt_da = _read_raster(evt_clip) evh_da = _read_raster(evh_clip) type_arr = evt_da.values - tau_arr, k_arr = _veg_tau_k(type_arr, veg_params_csv) + tau_arr, k_arr = _veg_tau_k(evt_da, veg_params_csv) height_arr = _veg_height(evh_da.values, str(landfire_dir / _EVH_CSV)) print("Assembling dataset...") @@ -331,6 +345,11 @@ def main(): sys.exit(f"DEM file not found: {dem_file}. Pass -d or run fetch_dem.py first.") if not epsg: sys.exit("EPSG not found. Pass -e or run fetch_basin.py first.") + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) landfire_dir = Path(args.landfire_dir) veg_params_csv = Path(args.veg_params_csv) diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py index 8e72d3a..2c6b72c 100755 --- a/scripts/topo/fetch_basin.py +++ b/scripts/topo/fetch_basin.py @@ -46,6 +46,14 @@ def utm_epsg_from_lonlat(lon, lat): return 32600 + zone if lat >= 0 else 32700 + zone +def validate_utm_epsg(epsg): + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) + + def validate_huc_id(huc_id): """Validate that the input HUC ID is supported.""" huc_id = str(huc_id).strip() @@ -62,7 +70,28 @@ def validate_huc_id(huc_id): return huc_id, huc_level -def fetch_huc_polygon(huc_id, output_dir): +def validate_existing_polygon(polygon_path, epsg_override=None): + """Validate CRS of a user-supplied polygon. Returns (utm_epsg, bbox_wgs84, basin_name).""" + gdf = gpd.read_file(polygon_path) + if gdf.crs is None: + sys.exit(f"Polygon file must have a defined projected CRS: {polygon_path}") + if not gdf.crs.is_projected: + sys.exit(f"Polygon CRS must be projected/UTM, not geographic: {gdf.crs}") + polygon_epsg = gdf.crs.to_epsg() + if polygon_epsg is None: + sys.exit(f"Polygon CRS must resolve to an EPSG code so it can be validated: {gdf.crs}") + gdf_wgs84 = gdf.to_crs("EPSG:4326") + xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds + expected_epsg = epsg_override or utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) + if polygon_epsg != expected_epsg: + sys.exit( + "Polygon CRS EPSG does not match the target UTM EPSG " + f"(polygon: EPSG:{polygon_epsg}, expected: EPSG:{expected_epsg})" + ) + return expected_epsg, (xmin, ymin, xmax, ymax), polygon_path.stem + + +def fetch_huc_polygon(huc_id, output_dir, epsg_override=None): """Query WBD for a HUC boundary, reproject to UTM, and save as GeoPackage.""" huc_id, huc_level = validate_huc_id(huc_id) field = WBD_FIELD[huc_level] @@ -71,7 +100,7 @@ def fetch_huc_polygon(huc_id, output_dir): raise ValueError(f"No WBD features found for HUC ID '{huc_id}'") xmin, ymin, xmax, ymax = gdf.total_bounds - utm_epsg = utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) + utm_epsg = epsg_override or utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) bbox_wgs84 = (xmin, ymin, xmax, ymax) basin_name = gdf.iloc[0].get("name", f"HUC{huc_id}") @@ -128,15 +157,16 @@ def main(): help="Override UTM EPSG (auto-detected from centroid if not given)") args = parser.parse_args() + if args.epsg: + validate_utm_epsg(args.epsg) + output_dir = Path(args.output_dir).resolve() output_dir.mkdir(parents=True, exist_ok=True) if args.huc_id: print(f"Fetching HUC {args.huc_id} from USGS WBD...") polygon, utm_epsg, bbox_wgs84, basin_name = fetch_huc_polygon( - args.huc_id, output_dir) - if args.epsg: - utm_epsg = args.epsg + args.huc_id, output_dir, epsg_override=args.epsg) elif args.basin_name: print(f"Searching USGS WBD HUC{args.huc_level} for '{args.basin_name}'...") @@ -153,41 +183,13 @@ def main(): huc_id = matches[huc_field].iloc[0] basin_name = matches["name"].iloc[0] print(f"Found: {basin_name} ({huc_id})") - polygon, utm_epsg, bbox_wgs84, _ = fetch_huc_polygon(huc_id, output_dir) - if args.epsg: - utm_epsg = args.epsg + polygon, utm_epsg, bbox_wgs84, _ = fetch_huc_polygon(huc_id, output_dir, epsg_override=args.epsg) else: polygon = Path(args.polygon).resolve() if not polygon.exists(): sys.exit(f"Polygon file not found: {polygon}") - gdf = gpd.read_file(polygon) - if gdf.crs is None: - sys.exit( - f"Polygon file must have a defined projected CRS: {polygon}" - ) - if not gdf.crs.is_projected: - sys.exit( - f"Polygon CRS must be projected/UTM, not geographic: {gdf.crs}" - ) - polygon_epsg = gdf.crs.to_epsg() - if polygon_epsg is None: - sys.exit( - f"Polygon CRS must resolve to an EPSG code so it can be validated: {gdf.crs}" - ) - gdf_wgs84 = gdf.to_crs("EPSG:4326") - xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds - lon_center = (xmin + xmax) / 2 - lat_center = (ymin + ymax) / 2 - expected_epsg = args.epsg or utm_epsg_from_lonlat(lon_center, lat_center) - if polygon_epsg != expected_epsg: - sys.exit( - "Polygon CRS EPSG does not match the target UTM EPSG " - f"(polygon: EPSG:{polygon_epsg}, expected: EPSG:{expected_epsg})" - ) - utm_epsg = expected_epsg - bbox_wgs84 = (xmin, ymin, xmax, ymax) - basin_name = polygon.stem + utm_epsg, bbox_wgs84, basin_name = validate_existing_polygon(polygon, args.epsg) env_path = write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) print(f"Basin file: {polygon}") diff --git a/scripts/topo/fetch_dem.py b/scripts/topo/fetch_dem.py index 3621b4f..2c81db9 100755 --- a/scripts/topo/fetch_dem.py +++ b/scripts/topo/fetch_dem.py @@ -11,7 +11,6 @@ Usage: python fetch_dem.py -o ./newfork_scripts python fetch_dem.py -o ./newfork_scripts --download-tiles - python fetch_dem.py -o ./newfork_scripts --download-tiles --skip-download """ import argparse @@ -95,6 +94,8 @@ def download_dem_tiles(bbox_wgs84, output_dir): with open(dest, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) + if dest.stat().st_size == 0: + raise RuntimeError(f"Downloaded tile is empty (truncated download?): {dest}") else: print(f" {dest.name} already exists, skipping") tile_files.append(str(dest)) @@ -156,6 +157,11 @@ def main(): if not epsg: sys.exit("EPSG not provided and BASIN_EPSG not in basin.env. " "Run fetch_basin.py first or pass -e EPSG.") + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) bbox_str = env.get("BASIN_BBOX", "") if not bbox_str: diff --git a/scripts/topo/generate_topo.py b/scripts/topo/generate_topo.py index 645dd10..525f6e6 100755 --- a/scripts/topo/generate_topo.py +++ b/scripts/topo/generate_topo.py @@ -18,8 +18,6 @@ # Allow sibling scripts to be imported regardless of working directory sys.path.insert(0, str(Path(__file__).parent)) -import geopandas as gpd - import build_topo_nc as btopo import fetch_basin as fb import fetch_dem as fd @@ -60,6 +58,9 @@ def main(): f"--landfire-dir. Must contain: {expected}") args = parser.parse_args() + if args.epsg: + fb.validate_utm_epsg(args.epsg) + output_dir = Path(args.output_dir).resolve() output_dir.mkdir(parents=True, exist_ok=True) landfire_dir = Path(args.landfire_dir) @@ -74,7 +75,7 @@ def main(): print("\n[1/3] Fetching basin boundary...") if args.huc_id: polygon, utm_epsg, bbox_wgs84, basin_name = fb.fetch_huc_polygon( - args.huc_id, output_dir) + args.huc_id, output_dir, epsg_override=args.epsg) elif args.basin_name: matches = fb.search_huc_by_name(args.basin_name, args.huc_level) @@ -90,22 +91,13 @@ def main(): huc_id = matches[huc_field].iloc[0] basin_name = matches["name"].iloc[0] print(f" Found: {basin_name} ({huc_id})") - polygon, utm_epsg, bbox_wgs84, _ = fb.fetch_huc_polygon(huc_id, output_dir) + polygon, utm_epsg, bbox_wgs84, _ = fb.fetch_huc_polygon(huc_id, output_dir, epsg_override=args.epsg) else: polygon = Path(args.polygon).resolve() if not polygon.exists(): sys.exit(f"Basin file not found: {polygon}") - gdf_wgs84 = gpd.read_file(polygon).to_crs("EPSG:4326") - xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds - lon_center = (xmin + xmax) / 2 - lat_center = (ymin + ymax) / 2 - utm_epsg = fb.utm_epsg_from_lonlat(lon_center, lat_center) - bbox_wgs84 = (xmin, ymin, xmax, ymax) - basin_name = polygon.stem - - if args.epsg: - utm_epsg = args.epsg + utm_epsg, bbox_wgs84, basin_name = fb.validate_existing_polygon(polygon, args.epsg) fb.write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) print(f" Basin file: {polygon}")