diff --git a/conda/basin_setup.yaml b/conda/basin_setup.yaml index 047ab94..ecb5f2f 100644 --- a/conda/basin_setup.yaml +++ b/conda/basin_setup.yaml @@ -2,17 +2,15 @@ name: basin_setup channels: - conda-forge dependencies: - - colorama - - coloredlogs - - gdal<3.9 + - python>=3.9 + - gdal - geopandas - netcdf4 - - numpy<1.25 + - numpy - pandas - - pip - - python=3.11 + - pyproj + - rasterio + - requests - rioxarray - xarray - - pip: - - inicheck - - spatialnc + - dask diff --git a/scripts/topo/README.md b/scripts/topo/README.md new file mode 100644 index 0000000..79b520c --- /dev/null +++ b/scripts/topo/README.md @@ -0,0 +1,164 @@ +# Generate basin topo.nc for SMRF/iSnobal + +Generates a `topo.nc` file containing DEM, basin mask, and vegetation layers +(type, height, tau, k) required to run SMRF/iSnobal. Vegetation defaults to +LANDFIRE 1.4.0; user-supplied rasters can also be substituted via `--veg-dir`. + +No dependency on the USDA-ARS `basin_setup` repository. + +## Requirements + +- `basin_setup` conda environment (see `model_setup/conda/basin_setup.yaml`) +- Vegetation data — either LANDFIRE 1.4.0 or user-supplied input (see below) + +## Vegetation data + +Two options are supported: + +### Option A — LANDFIRE 1.4.0 (default) + +Download the **LF 1.4.0 EVT** and **LF 1.4.0 EVH** products for CONUS from +a *to-be-staged* repository and organize as follows: + +``` +/ + US_140EVT_20180618/ + Grid/us_140evt/hdr.adf + US_140EVH_20180618/ + Grid/us_140evh/hdr.adf + CSV_Data/LF_140EVH_05092014.csv +``` + +A veg params CSV mapping LANDFIRE EVH class IDs to SMRF canopy parameters (tau, k) +is also required. Pass its path with `--veg-params-csv`. In this approach, all +vegetation parameters are categorical or discrete values. + +> **SnowHydRO group (UU CHPC):** all required datasets are currently staged at +> `/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE/`. + +### Option B — user-supplied rasters (`--veg-dir`) + +*** ***EXPERIMENTAL, TESTING REQUIRED*** *** +If you have derived parameters, place the four +required GeoTIFFs in a directory and pass in with `--veg-dir`: + +``` +/ + veg_type.tif # vegetation type class (categorical) + veg_height.tif # canopy height in metres (continuous) + veg_tau.tif # canopy transmissivity tau (continuous) + veg_k.tif # canopy extinction coefficient k (continuous) +``` + +All four files must cover the basin extent and be in a CRS that GDAL can reproject. +LANDFIRE is not required when using this option. Circumventing type and height is +also possible (needed only in default generation of tau and k), but not yet tested. + +## Quick start + +```bash +$ cd model_setup +$ conda env create -f conda/basin_setup.yaml +$ conda activate basin_setup + +$ cd scripts/topo + +# View script options +$ python generate_topo.py -h +``` + +## Common invocation options +```bash +# Generate topo.nc using basin name, use quotes to handle spaces +$ python generate_topo.py -n "new fork" -o /path/to/output + +# NB: multiple matches will print candidate basins in a table +# Select the correct hucid (or add specificity to basin name) and re-run +$ python generate_topo.py -n "gunnison" -o /path/to/output # this will yield 3 matches + huc8 name areasqkm states +14020002 Upper Gunnison 6245.14 CO +14020004 North Fork Gunnison 2509.66 CO +14020005 Lower Gunnison 4306.26 CO + +# Re-run with more specificity +$ python generate_topo.py -n "lower gunnison" -o /path/to/output + +# By HUC ID, defaults to HUC 8 +$ python generate_topo.py -huc 14020005 -o /path/to/output + +# From an existing UTM polygon file (GeoPackage) +$ python generate_topo.py -s /path/to/basin.gpkg -o /path/to/output + +# With user-supplied vegetation rasters instead of LANDFIRE +$ python generate_topo.py -huc 14020005 -o /path/to/output --veg-dir /path/to/veg/ +``` + +**Output:** `/path/to/output/output_m/topo.nc` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `generate_topo.py` | Orchestrates all three scripts | +| `fetch_basin.py` | Fetches HUC boundary from USGS WBD → UTM GeoPackage | +| `fetch_dem.py` | Streams or downloads 3DEP DEM tiles → warped GeoTIFF mosaic | +| `build_topo_nc.py` | Assembles polygon + DEM + vegetation → topo.nc | + +Each script is independently runnable, provided that requisite data layers exist. + +## Step-by-Step Examples + +```bash +$ conda activate basin_setup + +# Step 1: generate polygon and basin.env file by basin name +$ python fetch_basin.py -n "new fork" -o ./newfork_scripts + +# [alternatively] fetch basin boundary by HUC ID +# HUC IDs must match the target HUC level length (2, 4, 6, 8, 10, or 12 digits) +# Leading zeros are not automatically added (e.g., HUC 01). +$ python fetch_basin.py -huc 14040102 -o ./newfork_scripts + +# Step 2: mosaic DEM — streams via vsicurl by default (reads basin.env from step 1) +$ python fetch_dem.py -o ./newfork_scripts + +# if tile download to disk is preferred, add flag, then warp +$ python fetch_dem.py -o ./newfork_scripts --download-tiles + +# Step 3: build topo.nc using dem and vegetation (reads basin.env for paths) +# (LANDFIRE default) +$ python build_topo_nc.py -o ./newfork_scripts + +# (user-supplied veg): bypass LANDFIRE with pre-derived rasters +$ python build_topo_nc.py -o ./newfork_scripts --veg-dir /path/to/veg/ +``` + +Key output `basin.env` file is written by step 1 and appended in step 2, and contains: + +``` +BASIN_POLYGON="./newfork_scripts/huc14040102_basin.gpkg" +BASIN_EPSG="32612" +BASIN_BBOX="-110.0593,42.5219,-109.2101,43.2069" +BASIN_NAME="New Fork" +BASIN_DEM="./newfork_scripts/dem_epsg_32612_100m.tif" +``` + +## Options + +``` +generate_topo.py flags: + -huc HUC_ID HUC ID with exact supported length (2, 4, 6, 8, 10, or 12 digits) + -n NAME Basin name keyword (e.g. "Yampa") + -s POLY Existing UTM polygon file (shapefile or GeoPackage) + -o DIR Output directory (required) + -res METERS Resolution in meters (default: 100) + -level N HUC level for -n search (default: 8) + -e EPSG Override auto-detected UTM EPSG + --landfire-dir DIR LANDFIRE 1.4.0 directory + --veg-params-csv CSV Veg params lookup CSV (used with --landfire-dir) + --veg-dir DIR User-supplied vegetation rasters, overrides --landfire-dir. + Must contain: veg_type.tif, veg_height.tif, veg_tau.tif, veg_k.tif + --download-dem-tiles Download DEM tiles to disk before warping (use for SLURM/offline) +``` +## Acknowledgements +Co-creation with Claude Sonnet 4.6 \ No newline at end of file diff --git a/scripts/topo/build_topo_nc.py b/scripts/topo/build_topo_nc.py new file mode 100755 index 0000000..bf13e57 --- /dev/null +++ b/scripts/topo/build_topo_nc.py @@ -0,0 +1,374 @@ +#!/usr/bin/env python +""" +Build a topo.nc file for SMRF/iSnobal from a basin polygon file, a warped DEM +GeoTIFF, and vegetation data (LANDFIRE 1.4.0 or user-supplied via --veg-dir). + +Writes output/topo.nc and validates its CRS against the expected EPSG. + +Usage: + python build_topo_nc.py -o ./newfork_scripts # reads basin.env + python build_topo_nc.py -s custom_basin.gpkg -d custom_dem.tif -o /path/to/outputs/ + python build_topo_nc.py -o ./newfork_scripts --veg-dir /path/to/veg/ +""" + +import argparse +import math +import re +import sys +from datetime import datetime +from pathlib import Path + +import geopandas as gpd +import numpy as np +import pandas as pd +import rioxarray +import xarray as xr +from osgeo import gdal +from pyproj import CRS +from rasterio.features import rasterize + +LANDFIRE_DIR_DEFAULT = Path("/uufs/chpc.utah.edu/common/home/skiles-group3/LANDFIRE") +VEG_PARAMS_CSV_DEFAULT = LANDFIRE_DIR_DEFAULT / "landfire_veg_params_assigned.csv" + +# Relative paths within the LANDFIRE 1.4.0 directory +_EVT_PATH = "US_140EVT_20180618/Grid/us_140evt/hdr.adf" +_EVH_PATH = "US_140EVH_20180618/Grid/us_140evh/hdr.adf" +_EVH_CSV = "US_140EVH_20180618/CSV_Data/LF_140EVH_05092014.csv" + +# Contract for user-supplied vegetation rasters via --veg-dir. +# veg_type is categorical so uses nearest-neighbour resampling; others are continuous. +VEG_DIR_FILES = { + "veg_type": ("veg_type.tif", "near"), + "veg_height": ("veg_height.tif", "bilinear"), + "veg_tau": ("veg_tau.tif", "bilinear"), + "veg_k": ("veg_k.tif", "bilinear"), +} + +# Matches numeric height values in CLASSNAMES strings, excluding asterisk-delimited NoData markers +_HEIGHT_RE = re.compile(r"(? {dst}") + result.FlushCache() + del result + + +def _read_raster(path): + if not Path(path).exists(): + raise FileNotFoundError(f"Raster not found: {path}") + with rioxarray.open_rasterio(path) as src: + da = src.squeeze(dim="band", drop=True) + return da + + +def _basin_mask(polygon, dem_da): + """Rasterize basin polygon onto the DEM grid. Returns uint8 array.""" + gdf = gpd.read_file(polygon) + ny, nx = dem_da.shape + return rasterize( + gdf.geometry, + out_shape=(ny, nx), + transform=dem_da.rio.transform(), + fill=0, + default_value=1, + dtype="uint8", + ) + + +def _veg_tau_k(evt_da, veg_params_csv): + """Map EVT pixel values → tau and k arrays using the veg params CSV. + + NoData pixels (from padded extents outside valid LANDFIRE coverage) are + mapped to open/no-canopy values (tau=1, k=0) rather than raising. + """ + df = pd.read_csv(veg_params_csv) + idx_col = "landfire140" if "landfire140" in df.columns else df.columns[0] + df = df.set_index(idx_col) + df = df[~df.index.duplicated(keep="first")] + + evt_arr = evt_da.values + nodata = evt_da.rio.nodata + nodata_mask = np.zeros(evt_arr.shape, dtype=bool) if nodata is None else (evt_arr == nodata) + if np.any(nodata_mask): + print(f" Warning: {nodata_mask.sum()} NoData EVT pixels mapped to open/no-canopy (tau=1, k=0)") + + missing = set(np.unique(evt_arr[~nodata_mask])) - set(df.index) + if missing: + # tau/k have no safe fallback — missing class would silently corrupt radiation + raise ValueError(f"EVT classes not in {veg_params_csv}: {missing}") + + flat = pd.Series(evt_arr.ravel()) + tau = flat.map(df["tau"]).values.reshape(evt_arr.shape) + k = flat.map(df["k"]).values.reshape(evt_arr.shape) + tau[nodata_mask] = 1.0 + k[nodata_mask] = 0.0 + return tau, k + + +def _veg_height(evh_arr, evh_csv): + """Parse mean canopy heights from the LANDFIRE EVH CLASSNAMES CSV; defaults to 0 m.""" + df = pd.read_csv(evh_csv).set_index("VALUE") + def _parse_height(s): + matches = _HEIGHT_RE.findall(str(s)) + return float(np.mean([float(x) for x in matches])) if matches else 0.0 + + df["height"] = df["CLASSNAMES"].apply(_parse_height) + mapped = pd.Series(evh_arr.ravel()).map(df["height"]).fillna(0.0) + return mapped.values.reshape(evh_arr.shape) + + +def _projection_var(crs_str): + """ + Build a CF grid_mapping scalar DataArray from a CRS string. WKT is in version WKT1 for compatibility with WindNinja and older GDAL versions. The WKT is included in both crs_wkt and spatial_ref attributes for maximum compatibility. + """ + crs = CRS.from_string(crs_str) + wkt = crs.to_wkt(version="WKT1_GDAL") + attrs = crs.to_cf() + attrs["crs_wkt"] = wkt + attrs["spatial_ref"] = wkt + # smrf/data/load_topo.py requires utm_zone_number as an integer attribute + epsg = crs.to_epsg() + if epsg and 32601 <= epsg <= 32660: + attrs["utm_zone_number"] = epsg - 32600 + elif epsg and 32701 <= epsg <= 32760: + attrs["utm_zone_number"] = epsg - 32700 + return xr.DataArray(0, attrs=attrs) + + +def validate(topo_nc, expected_epsg): + with xr.open_dataset(topo_nc) as ds: + if "projection" not in ds: + raise RuntimeError("topo.nc missing 'projection' variable") + proj_attrs = ds["projection"].attrs + wkt = proj_attrs.get("crs_wkt") or proj_attrs.get("spatial_ref") + if not wkt: + raise RuntimeError("topo.nc 'projection' has no crs_wkt or spatial_ref attribute") + actual = CRS.from_wkt(wkt).to_epsg() + if actual is None: + raise RuntimeError("topo.nc CRS does not resolve to an EPSG code") + if actual != expected_epsg: + raise ValueError(f"topo.nc CRS mismatch: EPSG:{actual} != expected EPSG:{expected_epsg}") + print(f" Projection OK, EPSG:{actual}") + + +def build_topo(polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, cell_size, basin_name, veg_dir=None): + """Construct topo.nc from input files and write to output_dir. Returns topo.nc path.""" + if veg_dir is not None: + veg_dir = Path(veg_dir) + missing = [fname for fname, _ in VEG_DIR_FILES.values() if not (veg_dir / fname).exists()] + if missing: + expected = "\n".join(f" {fname}" for fname, _ in VEG_DIR_FILES.values()) + raise FileNotFoundError( + f"Missing files in {veg_dir}:\n" + + "\n".join(f" {f}" for f in missing) + + f"\nExpected layout:\n{expected}" + ) + + landfire_dir = Path(landfire_dir) + topo_output = Path(output_dir) / f"output_{int(cell_size)}m" + temp_dir = topo_output / "temp" + topo_output.mkdir(parents=True, exist_ok=True) + temp_dir.mkdir(exist_ok=True) + + print("Computing extents...") + extents, crs = _padded_extents(polygon, cell_size) + + print("Clipping DEM...") + dem_clip = str(temp_dir / "clipped_dem.tif") + _clip_raster(str(dem_file), dem_clip, crs, extents, cell_size, "bilinear") + dem_da = _read_raster(dem_clip) + coords = {"y": dem_da.y, "x": dem_da.x} + + mask_arr = _basin_mask(polygon, dem_da) + + if veg_dir is not None: + print("Clipping user-provided vegetation rasters to basin extents...") + clips = {} + for var, (fname, resample) in VEG_DIR_FILES.items(): + clip_path = str(temp_dir / f"clipped_{var}.tif") + _clip_raster(str(veg_dir / fname), clip_path, crs, extents, cell_size, resample) + clips[var] = _read_raster(clip_path).values + type_arr = clips["veg_type"] + height_arr = clips["veg_height"] + tau_arr = clips["veg_tau"] + k_arr = clips["veg_k"] + else: + print("Clipping LANDFIRE vegetation to basin extents...") + evt_clip = str(temp_dir / "clipped_veg_type.tif") + evh_clip = str(temp_dir / "clipped_veg_height.tif") + _clip_raster(str(landfire_dir / _EVT_PATH), evt_clip, crs, extents, cell_size, "near") + _clip_raster(str(landfire_dir / _EVH_PATH), evh_clip, crs, extents, cell_size, "near") + evt_da = _read_raster(evt_clip) + evh_da = _read_raster(evh_clip) + type_arr = evt_da.values + tau_arr, k_arr = _veg_tau_k(evt_da, veg_params_csv) + height_arr = _veg_height(evh_da.values, str(landfire_dir / _EVH_CSV)) + + print("Assembling dataset...") + ds = xr.Dataset({ + "dem": xr.DataArray( + dem_da.values, dims=["y", "x"], coords=coords, + attrs={"long_name": "dem"}), + "mask": xr.DataArray( + mask_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": basin_name}), + "veg_type": xr.DataArray( + type_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation type"}), + "veg_height": xr.DataArray( + height_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation height"}), + "veg_tau": xr.DataArray( + tau_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation tau"}), + "veg_k": xr.DataArray( + k_arr, dims=["y", "x"], coords=coords, + attrs={"long_name": "vegetation k"}), + "projection": _projection_var(crs), + }) + + # Strip any grid_mapping set by rioxarray to avoid xarray encoding conflict + for var in list(ds.data_vars) + list(ds.coords): + ds[var].attrs.pop("grid_mapping", None) + ds[var].encoding.pop("grid_mapping", None) + + ds.x.attrs = {"units": "meters", "long_name": "x coordinate", + "standard_name": "projection_x_coordinate"} + ds.y.attrs = {"units": "meters", "long_name": "y coordinate", + "standard_name": "projection_y_coordinate"} + + now = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ds.attrs = { + "Conventions": "CF-1.6", + "dateCreated": now, + "Title": "Topographic Images for SMRF/AWSM", + "history": f"[{now}] Created by build_topo_nc.py", + } + + topo_nc = topo_output / "topo.nc" + print(f"Writing {topo_nc}...") + ds.to_netcdf( + topo_nc, + format="NETCDF4", + encoding={ + "x": {"dtype": "f8"}, + "y": {"dtype": "f8"}, + "dem": {"dtype": "f4", "grid_mapping": "projection"}, + "mask": {"grid_mapping": "projection"}, + "veg_type": {"dtype": "u2", "grid_mapping": "projection"}, + "veg_height": {"dtype": "f4", "grid_mapping": "projection"}, + "veg_tau": {"dtype": "f4", "grid_mapping": "projection"}, + "veg_k": {"dtype": "f4", "grid_mapping": "projection"}, + }, + ) + return topo_nc + + +def read_basin_env(output_dir): + """Read key value pairs from basin.env into dict.""" + env_path = Path(output_dir) / "basin.env" + if not env_path.exists(): + return {} + env = {} + for line in env_path.read_text().splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, _, v = line.partition("=") + env[k.strip()] = v.strip().strip('"\'') + return env + + +def main(): + parser = argparse.ArgumentParser( + description="Build topo.nc from polygon + DEM + LANDFIRE", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage="\n build_topo_nc.py -s POLY -d DEM -o DIR [-res METERS] [--name NAME]\n" + " build_topo_nc.py -o DIR # reads basin.env", + epilog=__doc__, + ) + parser.add_argument("-s", "--polygon", metavar="POLY", + help="Basin polygon file (shapefile or GeoPackage); reads BASIN_POLYGON from basin.env if omitted.") + parser.add_argument("-d", "--dem-file", metavar="DEM", + help="Warped DEM GeoTIFF; reads BASIN_DEM from basin.env if omitted.") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="UTM EPSG. Reads BASIN_EPSG from basin.env if omitted.") + parser.add_argument("--name", default=None, metavar="NAME", + help="Basin name for mask; reads BASIN_NAME from basin.env if None.") + parser.add_argument("--landfire-dir", default=str(LANDFIRE_DIR_DEFAULT), metavar="DIR") + parser.add_argument("--veg-params-csv", default=str(VEG_PARAMS_CSV_DEFAULT), metavar="CSV") + expected = ", ".join(fname for fname, _ in VEG_DIR_FILES.values()) + parser.add_argument("--veg-dir", default=None, metavar="DIR", + help=f"Directory of user-derived vegetation rasters, overrides " + f"--landfire-dir. Must contain: {expected}") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + env = read_basin_env(output_dir) + + polygon = Path(args.polygon) if args.polygon else Path(env.get("BASIN_POLYGON", "")) + dem_file = Path(args.dem_file) if args.dem_file else Path(env.get("BASIN_DEM", "")) + epsg = args.epsg or int(env.get("BASIN_EPSG", 0)) + basin_name = args.name or env.get("BASIN_NAME", "Full Basin") + + if not polygon or not polygon.exists(): + sys.exit(f"Polygon file not found: {polygon}. Pass -s or run fetch_basin.py first.") + if not dem_file or not dem_file.exists(): + sys.exit(f"DEM file not found: {dem_file}. Pass -d or run fetch_dem.py first.") + if not epsg: + sys.exit("EPSG not found. Pass -e or run fetch_basin.py first.") + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) + + landfire_dir = Path(args.landfire_dir) + veg_params_csv = Path(args.veg_params_csv) + if not args.veg_dir: + if not landfire_dir.exists(): + sys.exit(f"LANDFIRE directory not found: {landfire_dir}") + if not veg_params_csv.exists(): + sys.exit(f"veg_params_csv not found: {veg_params_csv}") + + topo_nc = build_topo( + polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, args.cell_size, basin_name, + veg_dir=args.veg_dir, + ) + + print("Validating projection...") + validate(topo_nc, epsg) + print(f"\nDone! topo.nc at {topo_nc}") + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/fetch_basin.py b/scripts/topo/fetch_basin.py new file mode 100755 index 0000000..2c6b72c --- /dev/null +++ b/scripts/topo/fetch_basin.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python +""" +Fetch a HUC watershed boundary from the USGS Water Boundary Dataset (WBD) +and save as a UTM-projected GeoPackage. + +Writes /basin.env with BASIN_POLYGON, BASIN_EPSG, BASIN_BBOX, +and BASIN_NAME for use by fetch_dem.py and build_topo_nc.py. + +Example usage: + python fetch_basin.py -huc 14040102 -o ./newfork_scripts + python fetch_basin.py -n "new fork" -o ./newfork_scripts [-level 8] + python fetch_basin.py -s custom.gpkg -o ./newfork_scripts +""" + +import argparse +import sys +from pathlib import Path + +import geopandas as gpd +import requests + +WBD_BASE = "https://hydro.nationalmap.gov/arcgis/rest/services/wbd/MapServer" +_WBD_LAYER = {2: 1, 4: 2, 6: 3, 8: 4, 10: 5, 12: 6} +WBD_FIELD = {2: "huc2", 4: "huc4", 6: "huc6", 8: "huc8", 10: "huc10", 12: "huc12"} +SUPPORTED_HUC_LEVELS = tuple(sorted(_WBD_LAYER)) + + +def _wbd_query(layer, where, out_fields): + resp = requests.get( + f"{WBD_BASE}/{layer}/query", + params={"where": where, "outFields": out_fields, "f": "geojson"}, + timeout=30, + ) + resp.raise_for_status() + data = resp.json() + if "error" in data: + raise RuntimeError(f"WBD API error: {data['error'].get('message', data['error'])}") + features = data.get("features", []) + if not features: + return gpd.GeoDataFrame() + return gpd.GeoDataFrame.from_features(features, crs="EPSG:4326") + + +def utm_epsg_from_lonlat(lon, lat): + zone = int((lon + 180) / 6) + 1 + return 32600 + zone if lat >= 0 else 32700 + zone + + +def validate_utm_epsg(epsg): + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) + + +def validate_huc_id(huc_id): + """Validate that the input HUC ID is supported.""" + huc_id = str(huc_id).strip() + if not huc_id.isdigit(): + raise ValueError(f"HUC ID must contain only digits, got '{huc_id}'") + + huc_level = len(huc_id) + if huc_level not in _WBD_LAYER: + levels = ", ".join(str(level) for level in SUPPORTED_HUC_LEVELS) + raise ValueError( + f"HUC ID '{huc_id}' must be exactly one of these lengths: {levels} digits. " + f"Got {huc_level} digits." + ) + return huc_id, huc_level + + +def validate_existing_polygon(polygon_path, epsg_override=None): + """Validate CRS of a user-supplied polygon. Returns (utm_epsg, bbox_wgs84, basin_name).""" + gdf = gpd.read_file(polygon_path) + if gdf.crs is None: + sys.exit(f"Polygon file must have a defined projected CRS: {polygon_path}") + if not gdf.crs.is_projected: + sys.exit(f"Polygon CRS must be projected/UTM, not geographic: {gdf.crs}") + polygon_epsg = gdf.crs.to_epsg() + if polygon_epsg is None: + sys.exit(f"Polygon CRS must resolve to an EPSG code so it can be validated: {gdf.crs}") + gdf_wgs84 = gdf.to_crs("EPSG:4326") + xmin, ymin, xmax, ymax = gdf_wgs84.total_bounds + expected_epsg = epsg_override or utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) + if polygon_epsg != expected_epsg: + sys.exit( + "Polygon CRS EPSG does not match the target UTM EPSG " + f"(polygon: EPSG:{polygon_epsg}, expected: EPSG:{expected_epsg})" + ) + return expected_epsg, (xmin, ymin, xmax, ymax), polygon_path.stem + + +def fetch_huc_polygon(huc_id, output_dir, epsg_override=None): + """Query WBD for a HUC boundary, reproject to UTM, and save as GeoPackage.""" + huc_id, huc_level = validate_huc_id(huc_id) + field = WBD_FIELD[huc_level] + gdf = _wbd_query(_WBD_LAYER[huc_level], f"{field}='{huc_id}'", "*") + if gdf.empty: + raise ValueError(f"No WBD features found for HUC ID '{huc_id}'") + + xmin, ymin, xmax, ymax = gdf.total_bounds + utm_epsg = epsg_override or utm_epsg_from_lonlat((xmin + xmax) / 2, (ymin + ymax) / 2) + bbox_wgs84 = (xmin, ymin, xmax, ymax) + basin_name = gdf.iloc[0].get("name", f"HUC{huc_id}") + + gpkg_path = output_dir / f"huc{huc_id}_basin.gpkg" + gdf[["geometry"]].to_crs(f"EPSG:{utm_epsg}").to_file(gpkg_path, driver="GPKG") + return gpkg_path, utm_epsg, bbox_wgs84, basin_name + + +def search_huc_by_name(name, huc_level=8): + """Search WBD for HUCs with a name matching the input string (case-insensitive).""" + if huc_level not in _WBD_LAYER: + raise ValueError(f"huc_level {huc_level} not supported.") + field = WBD_FIELD[huc_level] + return _wbd_query( + _WBD_LAYER[huc_level], + f"LOWER(name) LIKE LOWER('%{name}%')", + f"{field},name,areasqkm,states", + ) + + +def write_env(output_dir, polygon, epsg, bbox_wgs84, basin_name): + """Write shell-sourceable key=value file for downstream scripts.""" + xmin, ymin, xmax, ymax = bbox_wgs84 + env_path = output_dir / "basin.env" + env_path.write_text( + f'BASIN_POLYGON="{polygon}"\n' + f'BASIN_EPSG="{epsg}"\n' + f'BASIN_BBOX="{xmin},{ymin},{xmax},{ymax}"\n' + f'BASIN_NAME="{basin_name}"\n', + encoding="utf-8", + ) + return env_path + + +def main(): + parser = argparse.ArgumentParser( + description="Fetch HUC watershed boundary → UTM polygon", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage="\n fetch_basin.py (-huc HUC_ID | -n NAME | -s POLY) -o DIR [-level N] [-e EPSG]", + epilog=__doc__, + ) + src = parser.add_mutually_exclusive_group(required=True) + src.add_argument("-huc", "--huc-id", type=str, metavar="HUC_ID", + help="HUC ID with exact supported length (2, 4, 6, 8, 10, or 12 digits; e.g. 14050001)") + src.add_argument("-n", "--basin-name", metavar="NAME", + help="Basin name keyword to search (e.g. 'Yampa')") + src.add_argument("-s", "--polygon", metavar="POLY", + help="Path to existing basin polygon file (shapefile or GeoPackage, must be in UTM)") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-level", "--huc-level", type=int, default=8, + choices=[2, 4, 6, 8, 10, 12], + help="HUC level for -n search (default: 8)") + parser.add_argument("-e", "--epsg", type=int, default=None, + help="Override UTM EPSG (auto-detected from centroid if not given)") + args = parser.parse_args() + + if args.epsg: + validate_utm_epsg(args.epsg) + + output_dir = Path(args.output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + + if args.huc_id: + print(f"Fetching HUC {args.huc_id} from USGS WBD...") + polygon, utm_epsg, bbox_wgs84, basin_name = fetch_huc_polygon( + args.huc_id, output_dir, epsg_override=args.epsg) + + elif args.basin_name: + print(f"Searching USGS WBD HUC{args.huc_level} for '{args.basin_name}'...") + matches = search_huc_by_name(args.basin_name, args.huc_level) + huc_field = WBD_FIELD[args.huc_level] + if matches.empty: + sys.exit(f"No HUC{args.huc_level} features found matching '{args.basin_name}'.\n" + "Try a different spelling or -level.") + if len(matches) > 1: + cols = [c for c in [huc_field, "name", "areasqkm", "states"] if c in matches.columns] + print(f"\n{len(matches)} matches for '{args.basin_name}':\n") + print(matches[cols].to_string(index=False)) + sys.exit("\nRerun with the correct -huc value from the table above.") + huc_id = matches[huc_field].iloc[0] + basin_name = matches["name"].iloc[0] + print(f"Found: {basin_name} ({huc_id})") + polygon, utm_epsg, bbox_wgs84, _ = fetch_huc_polygon(huc_id, output_dir, epsg_override=args.epsg) + + else: + polygon = Path(args.polygon).resolve() + if not polygon.exists(): + sys.exit(f"Polygon file not found: {polygon}") + utm_epsg, bbox_wgs84, basin_name = validate_existing_polygon(polygon, args.epsg) + + env_path = write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) + print(f"Basin file: {polygon}") + print(f"EPSG : {utm_epsg}") + print(f"Env file : {env_path}") + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/fetch_dem.py b/scripts/topo/fetch_dem.py new file mode 100755 index 0000000..2c81db9 --- /dev/null +++ b/scripts/topo/fetch_dem.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python +""" +Build a UTM-projected DEM mosaic from USGS 3DEP 1/3 arc-second tiles. + +By default, tiles are streamed via GDAL vsicurl without writing to disk. +Use --download-tiles for SLURM jobs where compute nodes lack internet access. + +Reads basin.env from the output directory (written by fetch_basin.py) for +EPSG and bounding box if not provided explicitly. Appends BASIN_DEM to basin.env. + +Usage: + python fetch_dem.py -o ./newfork_scripts + python fetch_dem.py -o ./newfork_scripts --download-tiles +""" + +import argparse +import sys +from pathlib import Path + +import requests +from osgeo import gdal + + +TNM_DATASET = "National Elevation Dataset (NED) 1/3 arc-second" +TNM_API = "https://tnmaccess.nationalmap.gov/api/v1/products" + + +def read_basin_env(output_dir): + """Parse key=value pairs from basin.env into a dict.""" + env_path = Path(output_dir) / "basin.env" + if not env_path.exists(): + return {} + env = {} + for line in env_path.read_text().splitlines(): + line = line.strip() + if "=" in line and not line.startswith("#"): + k, _, v = line.partition("=") + env[k.strip()] = v.strip().strip('"\'') + return env + + +def _query_tile_urls(bbox_wgs84): + """Query TNM API, deduplicate by tile cell, and return .tif download URLs.""" + xmin, ymin, xmax, ymax = bbox_wgs84 + resp = requests.get(TNM_API, params={ + "datasets": TNM_DATASET, + "bbox": f"{xmin},{ymin},{xmax},{ymax}", + "max": 1000, + "outputFormat": "JSON", + }, timeout=60) + resp.raise_for_status() + + items = resp.json().get("items", []) + if not items: + raise RuntimeError(f"No TNM DEM tiles found for bbox {bbox_wgs84}.") + print(f" Found {len(items)} DEM tile(s)") + + # Keep only the newest version of each tile cell + # Title format: "USGS 1/3 Arc Second n40w107 20210312" → cell is second-to-last word + best = {} + for item in items: + parts = item.get("title", "").split() + tile = parts[-2] if len(parts) >= 2 else item.get("downloadURL", item.get("title", "")) + pub_date = item.get("publicationDate", "") + is_newer = tile in best and pub_date > best[tile].get("publicationDate", "") + if tile not in best or is_newer: + best[tile] = item + deduped = list(best.values()) + print(f" {len(deduped)} unique tile(s) after deduplication") + + # Prefer .tif — GDAL reads it directly; other formats may require format conversion + tif_items = [i for i in deduped if i.get("downloadURL", "").endswith(".tif")] + return [i["downloadURL"] for i in (tif_items or deduped)] + + +def stream_dem_tiles(bbox_wgs84): + """Return vsicurl paths for GDAL to stream tiles without downloading.""" + urls = _query_tile_urls(bbox_wgs84) + return [f"/vsicurl/{url}" for url in urls] + + +def download_dem_tiles(bbox_wgs84, output_dir): + """Download tiles to disk and return local file paths.""" + urls = _query_tile_urls(bbox_wgs84) + tile_dir = output_dir / "dem_tiles" + tile_dir.mkdir(exist_ok=True) + tile_files = [] + for url in urls: + dest = tile_dir / Path(url).name + if not dest.exists(): + print(f" Downloading {dest.name} ...") + with requests.get(url, stream=True, timeout=120) as r: + r.raise_for_status() + with open(dest, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + if dest.stat().st_size == 0: + raise RuntimeError(f"Downloaded tile is empty (truncated download?): {dest}") + else: + print(f" {dest.name} already exists, skipping") + tile_files.append(str(dest)) + return tile_files + + +def build_dem(tile_files, epsg, cell_size, output_dir): + """Mosaic tiles and warp to target UTM/resolution. Returns output GeoTIFF path.""" + vrt_path = str(output_dir / "dem_mosaic.vrt") + dem_path = output_dir / f"dem_epsg_{epsg}_{int(cell_size)}m.tif" + + print(f" Building VRT from {len(tile_files)} tile(s)...") + vrt = gdal.BuildVRT(vrt_path, tile_files) + if vrt is None: + raise RuntimeError("gdal.BuildVRT failed") + vrt.FlushCache() + del vrt + + print(f" Warping to EPSG:{epsg} at {int(cell_size)}m...") + result = gdal.Warp( + str(dem_path), vrt_path, + options=gdal.WarpOptions( + dstSRS=f"EPSG:{epsg}", + xRes=cell_size, yRes=cell_size, + resampleAlg="cubicspline", + targetAlignedPixels=True, + creationOptions=["TILED=YES", "COMPRESS=LZW", "BIGTIFF=IF_SAFER"], + ), + ) + if result is None: + raise RuntimeError("gdal.Warp failed") + result.FlushCache() + del result + return dem_path + + +def main(): + parser = argparse.ArgumentParser( + description="Build a UTM-projected DEM mosaic from USGS 3DEP tiles", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=("\n fetch_dem.py -o DIR [-res METERS] [-e EPSG]" + " [--download-tiles]"), + epilog=__doc__, + ) + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR", + help="Output directory (reads basin.env from here)") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS", + help="Resolution in meters (default: 100)") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="UTM EPSG — reads BASIN_EPSG from basin.env if not given") + parser.add_argument("--download-tiles", action="store_true", + help="Download tiles to disk before warping (use for SLURM/offline)") + args = parser.parse_args() + + output_dir = Path(args.output_dir).resolve() + env = read_basin_env(output_dir) + + epsg = args.epsg or int(env.get("BASIN_EPSG", 0)) + if not epsg: + sys.exit("EPSG not provided and BASIN_EPSG not in basin.env. " + "Run fetch_basin.py first or pass -e EPSG.") + if not (32601 <= epsg <= 32660 or 32701 <= epsg <= 32760): + sys.exit( + f"EPSG:{epsg} is not a valid UTM zone. " + "Expected 32601-32660 (northern) or 32701-32760 (southern)." + ) + + bbox_str = env.get("BASIN_BBOX", "") + if not bbox_str: + sys.exit("BASIN_BBOX not in basin.env. Run fetch_basin.py first.") + bbox_wgs84 = tuple(float(x) for x in bbox_str.split(",")) + + if args.download_tiles: + print("Downloading 3DEP 1/3 arc-second DEM tiles...") + tile_files = download_dem_tiles(bbox_wgs84, output_dir) + else: + print("Streaming 3DEP 1/3 arc-second DEM tiles via vsicurl...") + tile_files = stream_dem_tiles(bbox_wgs84) + + print(f"Building DEM mosaic (EPSG:{epsg}, {int(args.cell_size)}m)...") + dem_path = build_dem(tile_files, epsg, args.cell_size, output_dir) + print(f"DEM: {dem_path}") + + with open(output_dir / "basin.env", "a", encoding="utf-8") as f: + f.write(f'BASIN_DEM="{dem_path}"\n') + + +if __name__ == "__main__": + main() diff --git a/scripts/topo/generate_topo.py b/scripts/topo/generate_topo.py new file mode 100755 index 0000000..525f6e6 --- /dev/null +++ b/scripts/topo/generate_topo.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +""" +Generate topo.nc for SMRF/iSnobal from a HUC ID, basin name, or polygon. + +Chains fetch_basin → fetch_dem → build_topo_nc, passing values directly +between steps. Writes basin.env to the output directory for reference. + +Example usage: + python generate_topo.py -n "new fork" -o ./newfork_scripts + python generate_topo.py -huc 14040102 -o ./newfork_scripts + python generate_topo.py -s custom.gpkg -o ./newfork_scripts [-res 50] +""" + +import argparse +import sys +from pathlib import Path + +# Allow sibling scripts to be imported regardless of working directory +sys.path.insert(0, str(Path(__file__).parent)) + +import build_topo_nc as btopo +import fetch_basin as fb +import fetch_dem as fd + + +def main(): + parser = argparse.ArgumentParser( + description="Generate topo.nc for SMRF/iSnobal", + formatter_class=argparse.RawDescriptionHelpFormatter, + usage=( + "\n" + " generate_topo.py (-huc HUC_ID | -n NAME | -s POLY) -o DIR\n" + " [-res METERS] [-level {2,4,6,8,10,12}] [-e EPSG]\n" + " [--landfire-dir DIR] [--veg-params-csv CSV | --veg-dir DIR]\n" + " [--download-dem-tiles]" + ), + epilog=__doc__, + ) + src = parser.add_mutually_exclusive_group(required=True) + src.add_argument("-huc", "--huc-id", metavar="HUC_ID", help="HUC ID (e.g. 14050001)") + src.add_argument("-n", "--basin-name", metavar="NAME", help="Basin keyword to search") + src.add_argument("-s", "--polygon", metavar="POLY", + help="Pre-existing basin polygon file (shapefile or GeoPackage) in UTM") + parser.add_argument("-o", "--output-dir", required=True, metavar="DIR") + parser.add_argument("-res", "--cell-size", type=float, default=100.0, metavar="METERS") + parser.add_argument("-level", "--huc-level", type=int, default=8, + choices=[2, 4, 6, 8, 10, 12], + help="HUC level for -n search (default: 8)") + parser.add_argument("-e", "--epsg", type=int, default=None, metavar="EPSG", + help="Override auto-detected UTM EPSG") + parser.add_argument("--landfire-dir", default=str(btopo.LANDFIRE_DIR_DEFAULT), metavar="DIR") + parser.add_argument("--veg-params-csv", default=str(btopo.VEG_PARAMS_CSV_DEFAULT), metavar="CSV") + parser.add_argument("--download-dem-tiles", action="store_true", + help="Download DEM tiles to disk before warping (use for SLURM/offline)") + expected = ", ".join(fname for fname, _ in btopo.VEG_DIR_FILES.values()) + parser.add_argument("--veg-dir", default=None, metavar="DIR", + help=f"Directory of user-derived vegetation rasters, overrides " + f"--landfire-dir. Must contain: {expected}") + args = parser.parse_args() + + if args.epsg: + fb.validate_utm_epsg(args.epsg) + + output_dir = Path(args.output_dir).resolve() + output_dir.mkdir(parents=True, exist_ok=True) + landfire_dir = Path(args.landfire_dir) + veg_params_csv = Path(args.veg_params_csv) + + if not args.veg_dir: + if not landfire_dir.exists(): + sys.exit(f"LANDFIRE directory not found: {landfire_dir}") + if not veg_params_csv.exists(): + sys.exit(f"veg_params_csv not found: {veg_params_csv}") + + print("\n[1/3] Fetching basin boundary...") + if args.huc_id: + polygon, utm_epsg, bbox_wgs84, basin_name = fb.fetch_huc_polygon( + args.huc_id, output_dir, epsg_override=args.epsg) + + elif args.basin_name: + matches = fb.search_huc_by_name(args.basin_name, args.huc_level) + huc_field = fb.WBD_FIELD[args.huc_level] + if matches.empty: + sys.exit(f"No HUC{args.huc_level} features found matching '{args.basin_name}'.") + if len(matches) > 1: + cols = [c for c in [huc_field, "name", "areasqkm", "states"] + if c in matches.columns] + print(f"\n {len(matches)} matches for '{args.basin_name}':\n") + print(matches[cols].to_string(index=False)) + sys.exit("\nRerun with the correct -huc value from the table above.") + huc_id = matches[huc_field].iloc[0] + basin_name = matches["name"].iloc[0] + print(f" Found: {basin_name} ({huc_id})") + polygon, utm_epsg, bbox_wgs84, _ = fb.fetch_huc_polygon(huc_id, output_dir, epsg_override=args.epsg) + + else: + polygon = Path(args.polygon).resolve() + if not polygon.exists(): + sys.exit(f"Basin file not found: {polygon}") + utm_epsg, bbox_wgs84, basin_name = fb.validate_existing_polygon(polygon, args.epsg) + + fb.write_env(output_dir, polygon, utm_epsg, bbox_wgs84, basin_name) + print(f" Basin file: {polygon}") + print(f" EPSG : {utm_epsg}") + + print("\n[2/3] Building DEM...") + if args.download_dem_tiles: + tile_files = fd.download_dem_tiles(bbox_wgs84, output_dir) + else: + tile_files = fd.stream_dem_tiles(bbox_wgs84) + + dem_file = fd.build_dem(tile_files, utm_epsg, args.cell_size, output_dir) + print(f" DEM: {dem_file}") + + with open(output_dir / "basin.env", "a", encoding="utf-8") as f: + f.write(f'BASIN_DEM="{dem_file}"\n') + + print("\n[3/3] Building topo.nc...") + topo_nc = btopo.build_topo( + polygon, dem_file, landfire_dir, veg_params_csv, + output_dir, args.cell_size, basin_name, + veg_dir=args.veg_dir, + ) + + print("\nValidating projection...") + btopo.validate(topo_nc, utm_epsg) + print(f"\nDone! topo.nc at {topo_nc}") + + # TODO: add --cleanup flag to remove intermediate files (dem_mosaic.vrt, warped DEM .tif, + # dem_tiles/, output_m/temp/) after successful topo.nc generation + + +if __name__ == "__main__": + main()