diff --git a/.gitignore b/.gitignore index a973aff..a1da880 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .DS_Store __pycache__/ -src/eo_api/datasets/cache .venv/ .env eo_api.egg-info/ diff --git a/__init__.py b/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index 9f7854e..d319dfd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,9 +24,6 @@ dependencies = [ [tool.ruff] target-version = "py313" line-length = 120 -exclude = [ - "src/eo_api/datasets" -] [tool.ruff.lint] fixable = ["ALL"] @@ -50,6 +47,7 @@ docstring-code-line-length = "dynamic" [tool.mypy] python_version = "3.13" +plugins = ["pydantic.mypy"] warn_return_any = true warn_unused_configs = true disallow_untyped_defs = true @@ -58,16 +56,13 @@ no_implicit_optional = true warn_unused_ignores = true strict_equality = true mypy_path = ["src"] -exclude = [ - "src/eo_api/datasets" -] [[tool.mypy.overrides]] module = "tests.*" disallow_untyped_defs = false [[tool.mypy.overrides]] -module = ["dhis2eo.*", "dhis2_client.*", "pygeoapi.*", "titiler.*", "rasterio.*", "pygeofilter.*", "prefect.*", "requests.*"] +module = ["dhis2eo.*", "dhis2_client.*", "pygeoapi.*", "titiler.*", "rasterio.*", "pygeofilter.*", "prefect.*", "requests.*", "geopandas.*", "earthkit.*", "metpy.*", "matplotlib.*", "yaml"] ignore_missing_imports = true [tool.pyright] diff --git a/src/eo_api/datasets/api.py b/src/eo_api/datasets/api.py index 7c25fe0..5273335 100644 --- a/src/eo_api/datasets/api.py +++ b/src/eo_api/datasets/api.py @@ -1,4 +1,8 @@ +"""FastAPI router exposing dataset endpoints.""" +from typing import Any + +import xarray as xr from fastapi import APIRouter, BackgroundTasks, HTTPException, Response from fastapi.responses import FileResponse from starlette.background import BackgroundTask @@ -7,146 +11,164 @@ router = APIRouter() + @router.get("/") -def list_datasets(): - """Returned list of available datasets from registry. - """ - datasets = registry.list_datasets() - return datasets +def list_datasets() -> list[dict[str, Any]]: + """Return list of available datasets from registry.""" + return registry.list_datasets() + -def get_dataset_or_404(dataset_id: str): +def _get_dataset_or_404(dataset_id: str) -> dict[str, Any]: + """Look up a dataset by ID or raise 404.""" dataset = registry.get_dataset(dataset_id) if not dataset: raise HTTPException(status_code=404, detail=f"Dataset '{dataset_id}' not found") return dataset + @router.get("/{dataset_id}", response_model=dict) -def get_dataset(dataset_id: str): - """Get a single dataset by ID. - """ - dataset = get_dataset_or_404(dataset_id) +def get_dataset(dataset_id: str) -> dict[str, Any]: + """Get a single dataset by ID.""" + dataset = _get_dataset_or_404(dataset_id) cache_info = cache.get_cache_info(dataset) dataset.update(cache_info) return dataset + @router.get("/{dataset_id}/build_cache", response_model=dict) -def build_dataset_cache(dataset_id: str, start: str, end: str | None = None, overwrite: bool = False, background_tasks: BackgroundTasks = None): - """Download and cache dataset as local netcdf files direct from the source. - """ - dataset = get_dataset_or_404(dataset_id) +def build_dataset_cache( + dataset_id: str, + start: str, + end: str | None = None, + overwrite: bool = False, + background_tasks: BackgroundTasks | None = None, +) -> dict[str, str]: + """Download and cache dataset as local netcdf files direct from the source.""" + dataset = _get_dataset_or_404(dataset_id) cache.build_dataset_cache(dataset, start=start, end=end, overwrite=overwrite, background_tasks=background_tasks) - return {'status': 'Dataset caching request submitted for processing'} + return {"status": "Dataset caching request submitted for processing"} + @router.get("/{dataset_id}/optimize_cache", response_model=dict) -def optimize_dataset_cache(dataset_id: str, background_tasks: BackgroundTasks = None): - """Optimize dataset cache by collecting all cache files to a single zarr archive. - """ - dataset = get_dataset_or_404(dataset_id) - background_tasks.add_task(cache.optimize_dataset_cache, dataset) - return {'status': 'Dataset cache optimization submitted for processing'} - -def get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation): +def optimize_dataset_cache( + dataset_id: str, + background_tasks: BackgroundTasks | None = None, +) -> dict[str, str]: + """Optimize dataset cache by collecting all cache files to a single zarr archive.""" + dataset = _get_dataset_or_404(dataset_id) + if background_tasks is not None: + background_tasks.add_task(cache.optimize_dataset_cache, dataset) + return {"status": "Dataset cache optimization submitted for processing"} + + +def _get_dataset_period_type( + dataset: dict[str, Any], + period_type: str, + start: str, + end: str, + temporal_aggregation: str, +) -> xr.Dataset: + """Load and temporally aggregate a dataset.""" # TODO: maybe move this and similar somewhere better like a pipelines.py file? - # ... - - # get raster data ds = raster.get_data(dataset, start, end) - - # aggregate to period type ds = raster.to_timeperiod(ds, dataset, period_type, statistic=temporal_aggregation) - - # return return ds -@router.get("/{dataset_id}/{period_type}/orgunits", response_model=list) -def get_dataset_period_type_org_units(dataset_id: str, period_type: str, start: str, end: str, temporal_aggregation: str, spatial_aggregation: str): - """Get a dataset dynamically aggregated to a given period type and org units and return json values. - """ - # get dataset metadata - dataset = get_dataset_or_404(dataset_id) - # get dataset for period type and start/end period - ds = get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) +@router.get("/{dataset_id}/{period_type}/orgunits", response_model=list) +def get_dataset_period_type_org_units( + dataset_id: str, + period_type: str, + start: str, + end: str, + temporal_aggregation: str, + spatial_aggregation: str, +) -> list[dict[str, Any]]: + """Get a dataset aggregated to a given period type and org units as JSON values.""" + dataset = _get_dataset_or_404(dataset_id) + ds = _get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) - # aggregate to geojson features df = raster.to_features(ds, dataset, features=constants.ORG_UNITS_GEOJSON, statistic=spatial_aggregation) # convert units if needed (inplace) - # NOTE: here we do it after agggregation to dataframe to speedup computation + # NOTE: here we do it after aggregation to dataframe to speedup computation units.convert_pandas_units(df, dataset) - # serialize to json - data = serialize.dataframe_to_json_data(df, dataset, period_type) - return data + return serialize.dataframe_to_json_data(df, dataset, period_type) + @router.get("/{dataset_id}/{period_type}/orgunits/preview", response_model=list) -def get_dataset_period_type_org_units_preview(dataset_id: str, period_type: str, period: str, temporal_aggregation: str, spatial_aggregation: str): - """Preview a PNG map image of a dataset dynamically aggregated to a given period and org units. - """ - # get dataset metadata - dataset = get_dataset_or_404(dataset_id) +def get_dataset_period_type_org_units_preview( + dataset_id: str, + period_type: str, + period: str, + temporal_aggregation: str, + spatial_aggregation: str, +) -> Response: + """Preview a PNG map image of a dataset aggregated to a given period and org units.""" + dataset = _get_dataset_or_404(dataset_id) - # get dataset for period type and a single period start = end = period - ds = get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) + ds = _get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) - # aggregate to geojson features df = raster.to_features(ds, dataset, features=constants.ORG_UNITS_GEOJSON, statistic=spatial_aggregation) # convert units if needed (inplace) - # NOTE: here we do it after agggregation to dataframe to speedup computation + # NOTE: here we do it after aggregation to dataframe to speedup computation units.convert_pandas_units(df, dataset) - # serialize to image image_data = serialize.dataframe_to_preview(df, dataset, period_type) - - # return as image return Response(content=image_data, media_type="image/png") -@router.get("/{dataset_id}/{period_type}/raster") -def get_dataset_period_type_raster(dataset_id: str, period_type: str, start: str, end: str, temporal_aggregation: str): - """Get a dataset dynamically aggregated to a given period type and return as downloadable raster file. - """ - # get dataset metadata - dataset = get_dataset_or_404(dataset_id) - # get dataset for period type and start/end period - ds = get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) +@router.get("/{dataset_id}/{period_type}/raster") +def get_dataset_period_type_raster( + dataset_id: str, + period_type: str, + start: str, + end: str, + temporal_aggregation: str, +) -> FileResponse: + """Get a dataset aggregated to a given period type as a downloadable raster file.""" + dataset = _get_dataset_or_404(dataset_id) + ds = _get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) - # convert units if needed (inplace) units.convert_xarray_units(ds, dataset) - # serialize to temporary netcdf file_path = serialize.xarray_to_temporary_netcdf(ds) - - # return as streaming file and delete after completion return FileResponse( file_path, media_type="application/x-netcdf", - filename='eo-api-raster-download.nc', - background=BackgroundTask(serialize.cleanup_file, file_path) + filename="eo-api-raster-download.nc", + background=BackgroundTask(serialize.cleanup_file, file_path), ) + @router.get("/{dataset_id}/{period_type}/raster/preview") -def get_dataset_period_type_raster_preview(dataset_id: str, period_type: str, period: str, temporal_aggregation: str): - """Preview a PNG map image of a dataset dynamically aggregated to a given period. - """ - # get dataset metadata - dataset = get_dataset_or_404(dataset_id) +def get_dataset_period_type_raster_preview( + dataset_id: str, + period_type: str, + period: str, + temporal_aggregation: str, +) -> Response: + """Preview a PNG map image of a dataset aggregated to a given period.""" + dataset = _get_dataset_or_404(dataset_id) - # get dataset for period type and a single period start = end = period - ds = get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) + ds = _get_dataset_period_type(dataset, period_type, start, end, temporal_aggregation) - # convert units if needed (inplace) units.convert_xarray_units(ds, dataset) - # serialize to image image_data = serialize.xarray_to_preview(ds, dataset, period_type) - - # return as image return Response(content=image_data, media_type="image/png") + @router.get("/{dataset_id}/{period_type}/tiles") -def get_dataset_period_type_tiles(dataset_id: str, period_type: str, start: str, end: str, temporal_aggregation: str): - pass +def get_dataset_period_type_tiles( + dataset_id: str, + period_type: str, + start: str, + end: str, + temporal_aggregation: str, +) -> None: + """Placeholder for future tile-based dataset access.""" diff --git a/src/eo_api/datasets/cache.py b/src/eo_api/datasets/cache.py index e8d7cbd..2597985 100644 --- a/src/eo_api/datasets/cache.py +++ b/src/eo_api/datasets/cache.py @@ -1,179 +1,172 @@ +"""Dataset cache: download, store, and optimize raster data as local files.""" + import datetime import importlib import inspect import logging +from collections.abc import Callable from pathlib import Path +from typing import Any import xarray as xr +from fastapi import BackgroundTasks from .constants import BBOX, CACHE_OVERRIDE, COUNTRY_CODE from .utils import get_lon_lat_dims, get_time_dim, numpy_period_string -# logger logger = logging.getLogger(__name__) -# paths SCRIPT_DIR = Path(__file__).parent.resolve() -CACHE_DIR = SCRIPT_DIR / 'cache' +_cache_dir = SCRIPT_DIR / "cache" if CACHE_OVERRIDE: - CACHE_DIR = Path(CACHE_OVERRIDE) - -def build_dataset_cache(dataset, start, end, overwrite, background_tasks): - # get download function - cache_info = dataset['cacheInfo'] - eo_download_func_path = cache_info['eoFunction'] - eo_download_func = get_dynamic_function(eo_download_func_path) - #logger.info(eo_download_func_path, eo_download_func) - - # construct standard params - params = cache_info['defaultParams'] - params.update({ - 'start': start, - 'end': end or datetime.date.today().isoformat(), # todays date if empty - 'dirname': CACHE_DIR, - 'prefix': get_cache_prefix(dataset), - 'overwrite': overwrite, - }) - - # add in varying spatial args + _cache_dir = Path(CACHE_OVERRIDE) +CACHE_DIR: Path = _cache_dir + + +def build_dataset_cache( + dataset: dict[str, Any], + start: str, + end: str | None, + overwrite: bool, + background_tasks: BackgroundTasks | None, +) -> None: + """Download dataset from source and store as local NetCDF cache files.""" + cache_info = dataset["cacheInfo"] + eo_download_func_path = cache_info["eoFunction"] + eo_download_func = _get_dynamic_function(eo_download_func_path) + + params: dict[str, Any] = dict(cache_info["defaultParams"]) + params.update( + { + "start": start, + "end": end or datetime.date.today().isoformat(), + "dirname": CACHE_DIR, + "prefix": _get_cache_prefix(dataset), + "overwrite": overwrite, + } + ) + sig = inspect.signature(eo_download_func) - if 'bbox' in sig.parameters.keys(): - params['bbox'] = BBOX - elif 'country_code' in sig.parameters.keys(): - params['country_code'] = COUNTRY_CODE + if "bbox" in sig.parameters: + params["bbox"] = BBOX + elif "country_code" in sig.parameters: + params["country_code"] = COUNTRY_CODE + + if background_tasks is not None: + background_tasks.add_task(eo_download_func, **params) - # execute the download - background_tasks.add_task(eo_download_func, **params) -def optimize_dataset_cache(dataset): - logger.info(f'Optimizing cache for dataset {dataset["id"]}') +def optimize_dataset_cache(dataset: dict[str, Any]) -> None: + """Collect all cache files into a single optimised zarr archive.""" + logger.info(f"Optimizing cache for dataset {dataset['id']}") - # open all cache files as xarray files = get_cache_files(dataset) - logger.info(f'Opening {len(files)} files from cache') - # for fil in files: - # d = xr.open_dataset(fil) - # print(d) - # fdsfs + logger.info(f"Opening {len(files)} files from cache") ds = xr.open_mfdataset(files) # trim to only minimal vars and coords - logger.info('Trimming unnecessary variables and coordinates') - varname = dataset['variable'] + logger.info("Trimming unnecessary variables and coordinates") + varname = dataset["variable"] ds = ds[[varname]] keep_coords = [get_time_dim(ds)] + list(get_lon_lat_dims(ds)) - drop_coords = [ - c for c in ds.coords - if c not in keep_coords - ] + drop_coords = [c for c in ds.coords if c not in keep_coords] ds = ds.drop_vars(drop_coords) # determine optimal chunk sizes - logger.info('Determining optimal chunk size for zarr archive') - ds_autochunk = ds.chunk('auto').unify_chunks() - # extract the first chunk size for each dimension to force uniformity - uniform_chunks = {dim: ds_autochunk.chunks[dim][0] for dim in ds_autochunk.dims} - # override with time space chunks - time_space_chunks = compute_time_space_chunks(ds, dataset) - uniform_chunks.update( time_space_chunks ) - logging.info(f'--> {uniform_chunks}') + logger.info("Determining optimal chunk size for zarr archive") + ds_autochunk = ds.chunk("auto").unify_chunks() + uniform_chunks: dict[str, Any] = {str(dim): ds_autochunk.chunks[dim][0] for dim in ds_autochunk.dims} + time_space_chunks = _compute_time_space_chunks(ds, dataset) + uniform_chunks.update(time_space_chunks) + logging.info(f"--> {uniform_chunks}") # save as zarr - logger.info('Saving to optimized zarr file') - zarr_path = CACHE_DIR / f'{get_cache_prefix(dataset)}.zarr' + logger.info("Saving to optimized zarr file") + zarr_path = CACHE_DIR / f"{_get_cache_prefix(dataset)}.zarr" ds_chunked = ds.chunk(uniform_chunks) - ds_chunked.to_zarr(zarr_path, mode='w') + ds_chunked.to_zarr(zarr_path, mode="w") ds_chunked.close() - logger.info('Finished cache optimization') + logger.info("Finished cache optimization") -def compute_time_space_chunks(ds, dataset, max_spatial_chunk=256): - chunks = {} - # time - # set to common access patterns depending on original dataset period - # TODO: could potentially allow this to be customized in the dataset yaml file +def _compute_time_space_chunks( + ds: xr.Dataset, + dataset: dict[str, Any], + max_spatial_chunk: int = 256, +) -> dict[str, int]: + """Compute chunk sizes tuned for common temporal access patterns.""" + chunks: dict[str, int] = {} + dim = get_time_dim(ds) - period_type = dataset['periodType'] - if period_type == 'hourly': + period_type = dataset["periodType"] + if period_type == "hourly": chunks[dim] = 24 * 7 - elif period_type == 'daily': + elif period_type == "daily": chunks[dim] = 30 - elif period_type == 'monthly': + elif period_type == "monthly": chunks[dim] = 12 - elif period_type == 'yearly': + elif period_type == "yearly": chunks[dim] = 1 - # space - lon_dim,lat_dim = get_lon_lat_dims(ds) + lon_dim, lat_dim = get_lon_lat_dims(ds) chunks[lon_dim] = min(ds.sizes[lon_dim], max_spatial_chunk) chunks[lat_dim] = min(ds.sizes[lat_dim], max_spatial_chunk) return chunks -def get_cache_info(dataset): - # find all files with cache prefix + +def get_cache_info(dataset: dict[str, Any]) -> dict[str, Any]: + """Return temporal and spatial coverage metadata for the cached dataset.""" files = get_cache_files(dataset) if not files: - cache_info = dict( - temporal_coverage = None, - spatial_coverage = None, - ) - return cache_info + return {"temporal_coverage": None, "spatial_coverage": None} - # open first of sorted filenames, should be sufficient to get earliest time period ds = xr.open_dataset(sorted(files)[0]) - # get dim names time_dim = get_time_dim(ds) lon_dim, lat_dim = get_lon_lat_dims(ds) - # get start time - start = numpy_period_string(ds[time_dim].min().values, dataset['periodType']) + start = numpy_period_string(ds[time_dim].min().values, dataset["periodType"]) # type: ignore[arg-type] - # get space scope - xmin,xmax = ds[lon_dim].min().item(), ds[lon_dim].max().item() - ymin,ymax = ds[lat_dim].min().item(), ds[lat_dim].max().item() + xmin, xmax = ds[lon_dim].min().item(), ds[lon_dim].max().item() + ymin, ymax = ds[lat_dim].min().item(), ds[lat_dim].max().item() - # open last of sorted filenames, should be sufficient to get latest time period ds = xr.open_dataset(sorted(files)[-1]) + end = numpy_period_string(ds[time_dim].max().values, dataset["periodType"]) # type: ignore[arg-type] - # get end time - end = numpy_period_string(ds[time_dim].max().values, dataset['periodType']) + return { + "coverage": { + "temporal": {"start": start, "end": end}, + "spatial": {"xmin": xmin, "ymin": ymin, "xmax": xmax, "ymax": ymax}, + } + } - # cache info - cache_info = dict( - coverage=dict( - temporal = {'start': start, 'end': end}, - spatial = {'xmin': xmin, 'ymin': ymin, 'xmax': xmax, 'ymax': ymax}, - ) - ) - return cache_info - -def get_cache_prefix(dataset): - prefix = dataset['id'] - return prefix - -def get_cache_files(dataset): - # TODO: this is not bulletproof, eg 2m_temperature might also get another dataset named 2m_temperature_modified - # ...probably need a delimeter to specify end of dataset name... - prefix = get_cache_prefix(dataset) - files = list(CACHE_DIR.glob(f'{prefix}*.nc')) - return files - -def get_zarr_path(dataset): - prefix = get_cache_prefix(dataset) - optimized = CACHE_DIR / f'{prefix}.zarr' + +def _get_cache_prefix(dataset: dict[str, Any]) -> str: + return str(dataset["id"]) + + +def get_cache_files(dataset: dict[str, Any]) -> list[Path]: + """Return all NetCDF cache files matching this dataset's prefix.""" + # TODO: not bulletproof -- e.g. 2m_temperature matches 2m_temperature_modified + prefix = _get_cache_prefix(dataset) + return list(CACHE_DIR.glob(f"{prefix}*.nc")) + + +def get_zarr_path(dataset: dict[str, Any]) -> Path | None: + """Return the optimised zarr archive path if it exists.""" + prefix = _get_cache_prefix(dataset) + optimized = CACHE_DIR / f"{prefix}.zarr" if optimized.exists(): return optimized + return None + -def get_dynamic_function(full_path): - # Split the path into: 'dhis2eo.data.cds.era5_land.hourly' and 'function' - parts = full_path.split('.') +def _get_dynamic_function(full_path: str) -> Callable[..., Any]: + """Import and return a function given its dotted module path.""" + parts = full_path.split(".") module_path = ".".join(parts[:-1]) function_name = parts[-1] - - # This handles all the intermediate sub-package imports automatically module = importlib.import_module(module_path) - - return getattr(module, function_name) + return getattr(module, function_name) # type: ignore[no-any-return] diff --git a/src/eo_api/datasets/constants.py b/src/eo_api/datasets/constants.py index e4cc206..b3b51a2 100644 --- a/src/eo_api/datasets/constants.py +++ b/src/eo_api/datasets/constants.py @@ -1,3 +1,5 @@ +"""Module-level constants loaded at import time (DHIS2 org units, bbox, env config).""" + import json import os @@ -13,5 +15,5 @@ # env variables we need from .env # TODO: should probably centralize to shared config module -COUNTRY_CODE = os.getenv('COUNTRY_CODE') -CACHE_OVERRIDE = os.getenv('CACHE_OVERRIDE') +COUNTRY_CODE = os.getenv("COUNTRY_CODE") +CACHE_OVERRIDE = os.getenv("CACHE_OVERRIDE") diff --git a/src/eo_api/datasets/preprocess.py b/src/eo_api/datasets/preprocess.py index 62ef101..eacbe55 100644 --- a/src/eo_api/datasets/preprocess.py +++ b/src/eo_api/datasets/preprocess.py @@ -1,26 +1,26 @@ +"""Preprocessing functions applied to raster datasets before aggregation.""" + import logging import xarray as xr -# logger logger = logging.getLogger(__name__) -def deaccumulate_era5(ds_cumul): - """Convert ERA5 cumulative hourly data to incremental hourly data""" - logger.info('Deaccumulating ERA5 dataset') +def deaccumulate_era5(ds_cumul: xr.Dataset) -> xr.Dataset: + """Convert ERA5 cumulative hourly data to incremental hourly data.""" + logger.info("Deaccumulating ERA5 dataset") # NOTE: this is hardcoded to era5 specific cumulative patterns and varnames # shift all values to previous hour, so the values don't spill over to the next day ds_cumul = ds_cumul.shift(valid_time=-1) # convert cumulative to diffs - ds_diffs = ds_cumul.diff(dim='valid_time') + ds_diffs = ds_cumul.diff(dim="valid_time") ds_diffs = ds_diffs.reindex(valid_time=ds_cumul.valid_time) # use cumul values where accumulation resets (00:00) and diff everywhere else - is_reset = ds_cumul['valid_time'].dt.hour == 0 + is_reset = ds_cumul["valid_time"].dt.hour == 0 ds_hourly = xr.where(is_reset, ds_cumul, ds_diffs) - # return - return ds_hourly + return ds_hourly # type: ignore[no-any-return] diff --git a/src/eo_api/datasets/raster.py b/src/eo_api/datasets/raster.py index 86afd31..8dce903 100644 --- a/src/eo_api/datasets/raster.py +++ b/src/eo_api/datasets/raster.py @@ -1,140 +1,127 @@ +"""Raster data loading, temporal aggregation, and spatial feature extraction.""" + import json import logging +from typing import Any import geopandas as gpd +import pandas as pd import xarray as xr from earthkit import transforms from . import cache, preprocess from .utils import get_time_dim -# logger logger = logging.getLogger(__name__) -def get_data(dataset, start, end): - """Get xarray raster dataset for given time range""" - # load xarray from cache - logger.info('Opening dataset') - # first check for optimized zarr archive +def get_data(dataset: dict[str, Any], start: str, end: str) -> xr.Dataset: + """Load an xarray raster dataset for the given time range.""" + logger.info("Opening dataset") zarr_path = cache.get_zarr_path(dataset) if zarr_path: - ds = xr.open_zarr(zarr_path, consolidated=True) # consolidated means caching metadatafa - # fallback to reading raw cache files (slower) + ds = xr.open_zarr(zarr_path, consolidated=True) else: - logger.warning(f'Could not find optimized zarr file for dataset {dataset["id"]}, using slower netcdf files instead.') + logger.warning( + f"Could not find optimized zarr file for dataset {dataset['id']}, using slower netcdf files instead." + ) files = cache.get_cache_files(dataset) ds = xr.open_mfdataset( files, data_vars="minimal", - coords="minimal", - compat="override" + coords="minimal", # pyright: ignore[reportArgumentType] + compat="override", ) - # subset time dim - logger.info(f'Subsetting time to {start} and {end}') + logger.info(f"Subsetting time to {start} and {end}") time_dim = get_time_dim(ds) - ds = ds.sel(**{time_dim: slice(start, end)}) + ds = ds.sel(**{time_dim: slice(start, end)}) # pyright: ignore[reportArgumentType] - # apply any preprocessing functions - for prep_name in dataset.get('preProcess', []): + for prep_name in dataset.get("preProcess", []): prep_func = getattr(preprocess, prep_name) ds = prep_func(ds) - # return - return ds + return ds # type: ignore[no-any-return] -def to_timeperiod(ds, dataset, period_type, statistic, timezone_offset=0): - """Aggregate given xarray dataset to another period type""" - # validate period types - valid_period_types = ['hourly', 'daily', 'monthly', 'yearly'] +def to_timeperiod( + ds: xr.Dataset, + dataset: dict[str, Any], + period_type: str, + statistic: str, + timezone_offset: int = 0, +) -> xr.Dataset: + """Aggregate an xarray dataset to another period type.""" + valid_period_types = ["hourly", "daily", "monthly", "yearly"] if period_type not in valid_period_types: - raise ValueError(f'Period type not supported: {period_type}') + raise ValueError(f"Period type not supported: {period_type}") - # return early if no change - if dataset['periodType'] == period_type: + if dataset["periodType"] == period_type: return ds - # begin - logger.info(f'Aggregating period type from {dataset["periodType"]} to {period_type}') + logger.info(f"Aggregating period type from {dataset['periodType']} to {period_type}") - # process only the array belonging to varname - varname = dataset['variable'] + varname = dataset["variable"] arr = ds[varname] - # remember mask of valid pixels from original dataset (only one time point needed) time_dim = get_time_dim(ds) valid = arr.isel({time_dim: 0}).notnull() - # hourly datasets - if dataset['periodType'] == 'hourly': - if period_type == 'daily': + if dataset["periodType"] == "hourly": + if period_type == "daily": arr = transforms.temporal.daily_reduce( arr, how=statistic, time_shift={"hours": timezone_offset}, remove_partial_periods=False, ) - - elif period_type == 'monthly': + elif period_type == "monthly": arr = transforms.temporal.monthly_reduce( arr, how=statistic, time_shift={"hours": timezone_offset}, remove_partial_periods=False, ) - else: - raise Exception(f'Unsupported period aggregation from {dataset["periodType"]} to {period_type}') + raise ValueError(f"Unsupported period aggregation from {dataset['periodType']} to {period_type}") - # daily datasets - elif dataset['periodType'] == 'daily': - if period_type == 'monthly': + elif dataset["periodType"] == "daily": + if period_type == "monthly": arr = transforms.temporal.monthly_reduce( arr, how=statistic, remove_partial_periods=False, ) - else: - raise Exception(f'Unsupported period aggregation from {dataset["periodType"]} to {period_type}') + raise ValueError(f"Unsupported period aggregation from {dataset['periodType']} to {period_type}") else: - raise Exception(f'Unsupported period aggregation from {dataset["periodType"]} to {period_type}') + raise ValueError(f"Unsupported period aggregation from {dataset['periodType']} to {period_type}") - # apply the original mask in case the aggregation turned nan values to 0s arr = xr.where(valid, arr, None) - - # IMPORTANT: compute to avoid slow dask graphs arr = arr.compute() - - # convert back to dataset ds = arr.to_dataset() - # return return ds -def to_features(ds, dataset, features, statistic): - """Aggregate given xarray to geojson features and return pandas dataframe""" - logger.info('Aggregating to org units') +def to_features( + ds: xr.Dataset, + dataset: dict[str, Any], + features: dict[str, Any], + statistic: str, +) -> pd.DataFrame: + """Aggregate an xarray dataset to GeoJSON features and return a DataFrame.""" + logger.info("Aggregating to org units") - # load geojson as geopandas gdf = gpd.read_file(json.dumps(features)) - # aggregate - varname = dataset['variable'] - ds = transforms.spatial.reduce( + varname = dataset["variable"] + ds_reduced = transforms.spatial.reduce( ds[varname], gdf, - mask_dim="id", # TODO: DONT HARDCODE + mask_dim="id", # TODO: DONT HARDCODE how=statistic, ) - # convert to df - df = ds.to_dataframe().reset_index() - - # return - return df - + return ds_reduced.to_dataframe().reset_index() # type: ignore[no-any-return] diff --git a/src/eo_api/datasets/registry.py b/src/eo_api/datasets/registry.py index a5ffd78..03ea31f 100644 --- a/src/eo_api/datasets/registry.py +++ b/src/eo_api/datasets/registry.py @@ -1,40 +1,37 @@ +"""Dataset registry backed by YAML config files.""" + +import logging from pathlib import Path +from typing import Any import yaml +logger = logging.getLogger(__name__) + SCRIPT_DIR = Path(__file__).parent.resolve() -CONFIGS_DIR = SCRIPT_DIR / 'registry' +CONFIGS_DIR = SCRIPT_DIR / "registry" + -def list_datasets(): - """Loops through configs folder, loads YAML files, and returns a list - of datasets. - """ - datasets = [] +def list_datasets() -> list[dict[str, Any]]: + """Load all YAML files in the registry folder and return a flat list of datasets.""" + datasets: list[dict[str, Any]] = [] folder = CONFIGS_DIR - # Check if directory exists if not folder.is_dir(): raise ValueError(f"Path is not a directory: {folder}") - # Iterate over .yaml and .yml files - for file_path in folder.glob('*.y*ml'): + for file_path in folder.glob("*.y*ml"): try: - with open(file_path, 'r', encoding='utf-8') as f: - # Use safe_load to avoid security risks + with open(file_path, encoding="utf-8") as f: file_datasets = yaml.safe_load(f) - datasets.extend( file_datasets ) - except Exception as e: - print(f"Error loading {file_path.name}: {e}") + datasets.extend(file_datasets) + except Exception: + logger.exception("Error loading %s", file_path.name) return datasets -def get_dataset(dataset_id): - """Get dataset dict for a given id - """ - datasets_lookup = {d['id']: d for d in list_datasets()} - if dataset_id in datasets_lookup: - # get base dataset info - dataset = datasets_lookup[dataset_id] - # return - return dataset +def get_dataset(dataset_id: str) -> dict[str, Any] | None: + """Get dataset dict for a given id.""" + datasets_lookup = {d["id"]: d for d in list_datasets()} + return datasets_lookup.get(dataset_id) diff --git a/src/eo_api/datasets/serialize.py b/src/eo_api/datasets/serialize.py index 4586854..e7761d8 100644 --- a/src/eo_api/datasets/serialize.py +++ b/src/eo_api/datasets/serialize.py @@ -1,10 +1,15 @@ +"""Serialization of xarray/pandas data to JSON, PNG previews, and NetCDF files.""" + import io import json import logging import os import tempfile +from typing import Any import geopandas as gpd +import pandas as pd +import xarray as xr from matplotlib.figure import Figure from . import constants @@ -12,99 +17,82 @@ logger = logging.getLogger(__name__) -def dataframe_to_json_data(df, dataset, period_type): - time_dim = get_time_dim(df) - varname = dataset['variable'] - # create smaller dataframe with known columns - temp_df = df[[time_dim, "id", varname]].rename(columns={time_dim:'period', 'id':'orgunit', varname:'value'}) +def dataframe_to_json_data(df: pd.DataFrame, dataset: dict[str, Any], period_type: str) -> list[dict[str, Any]]: + """Convert a DataFrame to a list of ``{period, orgunit, value}`` dicts.""" + time_dim = get_time_dim(df) + varname = dataset["variable"] - # convert period string depending on period type - temp_df['period'] = pandas_period_string(temp_df['period'], period_type) + temp_df = df[[time_dim, "id", varname]].rename(columns={time_dim: "period", "id": "orgunit", varname: "value"}) + temp_df["period"] = pandas_period_string(temp_df["period"], period_type) - # convert to list of json dicts - data = temp_df.to_dict(orient="records") + return temp_df.to_dict(orient="records") # type: ignore[return-value] - # return - return data -def dataframe_to_preview(df, dataset, period_type): - logger.info('Generating dataframe map preview') +def dataframe_to_preview(df: pd.DataFrame, dataset: dict[str, Any], period_type: str) -> bytes: + """Render a DataFrame as a choropleth PNG map image.""" + logger.info("Generating dataframe map preview") time_dim = get_time_dim(df) - varname = dataset['variable'] + varname = dataset["variable"] - # create smaller dataframe with known columns temp_df = df[[time_dim, "id", varname]] - - # convert period string depending on period type temp_df[time_dim] = pandas_period_string(temp_df[time_dim], period_type) - # validate only one period assert len(temp_df[time_dim].unique()) == 1 - # merge with org units geojson org_units = gpd.read_file(json.dumps(constants.ORG_UNITS_GEOJSON)) - org_units_with_temp = org_units.merge(temp_df, on='id', how='left') + org_units_with_temp = org_units.merge(temp_df, on="id", how="left") - # plot to map fig = Figure() ax = fig.subplots() period = temp_df[time_dim].values[0] - org_units_with_temp.plot(ax=ax, column=varname, legend=True, legend_kwds={'label': varname}) - ax.set_title(f'{period}') + org_units_with_temp.plot(ax=ax, column=varname, legend=True, legend_kwds={"label": varname}) + ax.set_title(f"{period}") - # save to in-memory image buf = io.BytesIO() fig.savefig(buf, format="png", dpi=150) buf.seek(0) - # return as image image_data = buf.getvalue() buf.close() return image_data -def xarray_to_preview(ds, dataset, period_type): - logger.info('Generating xarray map preview') + +def xarray_to_preview(ds: xr.Dataset, dataset: dict[str, Any], period_type: str) -> bytes: + """Render an xarray Dataset as a PNG map image.""" + logger.info("Generating xarray map preview") time_dim = get_time_dim(ds) - varname = dataset['variable'] + varname = dataset["variable"] - # create smaller dataframe with known columns temp_ds = ds[[time_dim, varname]] + temp_ds = temp_ds.assign_coords({time_dim: lambda x: numpy_period_array(x[time_dim].values, period_type)}) - # convert period string depending on period type - temp_ds = temp_ds.assign_coords({ - time_dim: lambda x: numpy_period_array(x[time_dim].values, period_type) - }) - - # validate only one period assert len(temp_ds[time_dim].values) == 1 - # plot to map fig = Figure() ax = fig.subplots() period = temp_ds[time_dim].values[0] temp_ds[varname].plot(ax=ax) - ax.set_title(f'{period}') + ax.set_title(f"{period}") - # save to in-memory image buf = io.BytesIO() fig.savefig(buf, format="png", dpi=150) buf.seek(0) - # return as image image_data = buf.getvalue() buf.close() return image_data -def xarray_to_temporary_netcdf(ds): - # temporary file path - path = tempfile.mktemp() - # save to path +def xarray_to_temporary_netcdf(ds: xr.Dataset) -> str: + """Write a dataset to a temporary NetCDF file and return the path.""" + fd = tempfile.NamedTemporaryFile(suffix=".nc", delete=False) + path = fd.name + fd.close() ds.to_netcdf(path) - - # return return path -def cleanup_file(path: str): + +def cleanup_file(path: str) -> None: + """Remove a file from disk.""" os.remove(path) diff --git a/src/eo_api/datasets/units.py b/src/eo_api/datasets/units.py index fa00758..9a56ca8 100644 --- a/src/eo_api/datasets/units.py +++ b/src/eo_api/datasets/units.py @@ -1,40 +1,39 @@ +"""Unit conversion helpers for pandas DataFrames and xarray Datasets.""" + import logging +from typing import Any +import xarray as xr from metpy.units import units -# logger logger = logging.getLogger(__name__) -def convert_pandas_units(ds, dataset): - varname = dataset['variable'] - from_units = dataset['units'] - to_units = dataset.get('convertUnits') + +def convert_pandas_units(ds: Any, dataset: dict[str, Any]) -> None: + """Convert values in a pandas DataFrame column from source to target units.""" + varname = dataset["variable"] + from_units = dataset["units"] + to_units = dataset.get("convertUnits") if to_units and to_units != from_units: logger.info(f"Applying unit conversion from {from_units} to {to_units}...") - # values with source units values_with_units = ds[varname].values * units(from_units) - # convert to target units converted = values_with_units.to(to_units).magnitude - # update the dataframe ds[varname] = converted - else: logger.info("No unit conversion needed") -def convert_xarray_units(ds, dataset): - varname = dataset['variable'] - from_units = dataset['units'] - to_units = dataset.get('convertUnits') + +def convert_xarray_units(ds: xr.Dataset, dataset: dict[str, Any]) -> None: + """Convert values in an xarray Dataset variable from source to target units.""" + varname = dataset["variable"] + from_units = dataset["units"] + to_units = dataset.get("convertUnits") if to_units and to_units != from_units: logger.info(f"Applying unit conversion from {from_units} to {to_units}...") - # values with source units values_with_units = ds[varname].values * units(from_units) - # convert to target units converted = values_with_units.to(to_units).magnitude - # update the ds ds[varname].values = converted - else: logger.info("No unit conversion needed") diff --git a/src/eo_api/datasets/utils.py b/src/eo_api/datasets/utils.py index 981c1d5..374ad5e 100644 --- a/src/eo_api/datasets/utils.py +++ b/src/eo_api/datasets/utils.py @@ -1,73 +1,68 @@ +"""Utility helpers for time and spatial dimension discovery and formatting.""" + +from typing import Any import numpy as np +import pandas as pd -def get_time_dim(ds): - # get first available time dim - time_dim = None - for time_name in ['valid_time', 'time']: +def get_time_dim(ds: Any) -> str: + """Return the name of the time dimension in a dataset or dataframe.""" + for time_name in ["valid_time", "time"]: if hasattr(ds, time_name): - time_dim = time_name - break - if time_dim is None: - raise Exception(f'Unable to find time dimension: {ds.coordinates}') - - return time_dim - -def get_lon_lat_dims(ds): - # get first available spatial dim - lat_dim = None - lon_dim = None - for lon_name,lat_name in [('lon','lat'), ('longitude','latitude'), ('x','y')]: + return time_name + raise ValueError(f"Unable to find time dimension: {ds.coordinates}") + + +def get_lon_lat_dims(ds: Any) -> tuple[str, str]: + """Return ``(lon, lat)`` dimension names from a dataset.""" + for lon_name, lat_name in [("lon", "lat"), ("longitude", "latitude"), ("x", "y")]: if hasattr(ds, lat_name): - lat_dim = lat_name - lon_dim = lon_name - break - if lat_dim is None: - raise Exception(f'Unable to find space dimension: {ds.coordinates}') + return lon_name, lat_name + raise ValueError(f"Unable to find space dimension: {ds.coordinates}") - return lon_dim, lat_dim def numpy_period_string(t: np.datetime64, period_type: str) -> str: - # convert numpy dateime to period string + """Convert a single numpy datetime to a period string.""" s = np.datetime_as_string(t, unit="s") if period_type == "hourly": - return s[:13] # YYYY-MM-DDTHH + return s[:13] # YYYY-MM-DDTHH if period_type == "daily": - return s[:10] # YYYY-MM-DD + return s[:10] # YYYY-MM-DD if period_type == "monthly": - return s[:7] # YYYY-MM + return s[:7] # YYYY-MM if period_type == "yearly": - return s[:4] # YYYY + return s[:4] # YYYY raise ValueError(f"Unknown periodType: {period_type}") -def numpy_period_array(t_array: np.ndarray, period_type: str) -> np.ndarray: - # TODO: this and numpy_period_string should be merged - # ... - # Convert the whole array to strings at once +def numpy_period_array(t_array: np.ndarray[Any, Any], period_type: str) -> np.ndarray[Any, Any]: + """Convert an array of numpy datetimes to truncated period strings.""" + # TODO: this and numpy_period_string should be merged s = np.datetime_as_string(t_array, unit="s") # Map periods to string lengths: YYYY-MM-DDTHH (13), YYYY-MM-DD (10), etc. lengths = {"hourly": 13, "daily": 10, "monthly": 7, "yearly": 4} return s.astype(f"U{lengths[period_type]}") -def pandas_period_string(column, period_type): + +def pandas_period_string(column: pd.Series[Any], period_type: str) -> pd.Series[Any]: + """Format a pandas datetime column as period strings.""" if period_type == "hourly": - return column.dt.strftime('%Y-%m-%dT%H') + return column.dt.strftime("%Y-%m-%dT%H") # type: ignore[no-any-return] if period_type == "daily": - return column.dt.strftime('%Y-%m-%d') + return column.dt.strftime("%Y-%m-%d") # type: ignore[no-any-return] if period_type == "monthly": - return column.dt.strftime('%Y-%m') + return column.dt.strftime("%Y-%m") # type: ignore[no-any-return] if period_type == "yearly": - return column.dt.strftime('%Y') + return column.dt.strftime("%Y") # type: ignore[no-any-return] raise ValueError(f"Unknown periodType: {period_type}") diff --git a/src/eo_api/lifecycle.py b/src/eo_api/lifecycle.py new file mode 100644 index 0000000..d3c9735 --- /dev/null +++ b/src/eo_api/lifecycle.py @@ -0,0 +1,34 @@ +"""Application lifespan: Prefect server bootstrap and flow runner.""" + +import asyncio +from collections.abc import AsyncIterator +from contextlib import asynccontextmanager + +from fastapi import FastAPI + + +async def _serve_flows() -> None: + """Register Prefect deployments and start a runner to execute them.""" + from prefect.runner import Runner + + from eo_api.prefect_flows.flows import ALL_FLOWS + + runner = Runner() + for fl in ALL_FLOWS: + await runner.aadd_flow(fl, name=fl.name) + await runner.start() + + +@asynccontextmanager +async def lifespan(app: FastAPI) -> AsyncIterator[None]: + """Start Prefect server, then register and serve pipeline deployments.""" + from eo_api.routers import prefect + + # Mounted sub-apps don't get their lifespans called automatically, + # so we trigger the Prefect server's lifespan here to initialize + # the database, docket, and background workers. + prefect_app = prefect.app + async with prefect_app.router.lifespan_context(prefect_app): + task = asyncio.create_task(_serve_flows()) + yield + task.cancel() diff --git a/src/eo_api/main.py b/src/eo_api/main.py index ba3454a..f99667a 100644 --- a/src/eo_api/main.py +++ b/src/eo_api/main.py @@ -1,117 +1,11 @@ -"""DHIS2 EO API - Earth observation data API for DHIS2. +"""DHIS2 EO API -- Earth observation data API for DHIS2.""" -load_dotenv() is called before pygeoapi import because pygeoapi -reads PYGEOAPI_CONFIG and PYGEOAPI_OPENAPI at import time. - -Prefect UI env vars are set before any imports because Prefect -caches its settings on first import. -""" - -import logging -import os -import warnings -from collections.abc import AsyncIterator -from contextlib import asynccontextmanager -from importlib.util import find_spec -from pathlib import Path -from typing import Any, cast - -os.environ.setdefault("PREFECT_UI_SERVE_BASE", "/prefect/") -os.environ.setdefault("PREFECT_UI_API_URL", "/prefect/api") -os.environ.setdefault("PREFECT_SERVER_API_BASE_PATH", "/prefect/api") -os.environ.setdefault("PREFECT_API_URL", "http://localhost:8000/prefect/api") -os.environ.setdefault("PREFECT_SERVER_ANALYTICS_ENABLED", "false") -os.environ.setdefault("PREFECT_SERVER_UI_SHOW_PROMOTIONAL_CONTENT", "false") - - -def _configure_proj_data() -> None: - """Point PROJ to rasterio bundled data to avoid mixed-install conflicts.""" - spec = find_spec("rasterio") - if spec is None or spec.origin is None: - return - - proj_data = Path(spec.origin).parent / "proj_data" - if not proj_data.is_dir(): - return - - proj_data_path = str(proj_data) - os.environ["PROJ_DATA"] = proj_data_path - os.environ["PROJ_LIB"] = proj_data_path - - -_configure_proj_data() - -warnings.filterwarnings("ignore", message="ecCodes .* or higher is recommended") -warnings.filterwarnings("ignore", message=r"Engine 'cfgrib' loading failed:[\s\S]*", category=RuntimeWarning) - -logging.getLogger("pygeoapi.api.processes").setLevel(logging.ERROR) -logging.getLogger("pygeoapi.l10n").setLevel(logging.ERROR) - -from dotenv import load_dotenv # noqa: E402 - -load_dotenv() - -openapi_path = os.getenv("PYGEOAPI_OPENAPI") -config_path = os.getenv("PYGEOAPI_CONFIG") -if openapi_path and config_path and not Path(openapi_path).exists(): - from pygeoapi.openapi import generate_openapi_document # noqa: E402 - - with Path(config_path).open(encoding="utf-8") as config_file: - openapi_doc = generate_openapi_document( - config_file, - output_format=cast(Any, "yaml"), - fail_on_invalid_collection=False, - ) - Path(openapi_path).write_text(openapi_doc, encoding="utf-8") - warnings.warn(f"Generated missing OpenAPI document at '{openapi_path}'.", RuntimeWarning) - -from fastapi import FastAPI # noqa: E402 -from fastapi.middleware.cors import CORSMiddleware # noqa: E402 -from fastapi.responses import RedirectResponse # noqa: E402 - -from eo_api.datasets import api as datasets # noqa: E402 -from eo_api.routers import cog, ogcapi, pipelines, prefect, root # noqa: E402 - -# Keep app progress logs visible while muting noisy third-party info logs. -eo_logger = logging.getLogger("eo_api") -eo_logger.setLevel(logging.INFO) -if not eo_logger.handlers: - handler = logging.StreamHandler() - handler.setLevel(logging.INFO) - handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s - %(message)s")) - eo_logger.addHandler(handler) -eo_logger.propagate = False - -logging.getLogger("dhis2eo").setLevel(logging.WARNING) -logging.getLogger("xarray").setLevel(logging.WARNING) - - -async def _serve_flows() -> None: - """Register Prefect deployments and start a runner to execute them.""" - from prefect.runner import Runner - - from eo_api.prefect_flows.flows import ALL_FLOWS - - runner = Runner() - for fl in ALL_FLOWS: - await runner.aadd_flow(fl, name=fl.name) - await runner.start() - - -@asynccontextmanager -async def lifespan(app: FastAPI) -> AsyncIterator[None]: - """Start Prefect server, then register and serve pipeline deployments.""" - import asyncio - - # Mounted sub-apps don't get their lifespans called automatically, - # so we trigger the Prefect server's lifespan here to initialize - # the database, docket, and background workers. - prefect_app = prefect.app - async with prefect_app.router.lifespan_context(prefect_app): - task = asyncio.create_task(_serve_flows()) - yield - task.cancel() +from fastapi import FastAPI +from fastapi.middleware.cors import CORSMiddleware +import eo_api.startup # noqa: F401 # pyright: ignore[reportUnusedImport] +from eo_api.lifecycle import lifespan +from eo_api.routers import cog, ogcapi, pipelines, prefect, root app = FastAPI(lifespan=lifespan) @@ -126,14 +20,6 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]: app.include_router(root.router) app.include_router(cog.router, prefix="/cog", tags=["Cloud Optimized GeoTIFF"]) app.include_router(pipelines.router, prefix="/pipelines", tags=["Pipelines"]) -app.include_router(datasets.router, prefix="/datasets", tags=['Datasets']) - - -@app.get("/ogcapi", include_in_schema=False) -async def ogcapi_redirect() -> RedirectResponse: - """Redirect /ogcapi to /ogcapi/ for trailing-slash consistency.""" - return RedirectResponse(url="/ogcapi/") - app.mount(path="/ogcapi", app=ogcapi.app) app.mount(path="/", app=prefect.app) diff --git a/src/eo_api/routers/ogcapi/plugins/providers/dhis2_org_units.py b/src/eo_api/routers/ogcapi/plugins/providers/dhis2_org_units.py index 5a06587..e702274 100644 --- a/src/eo_api/routers/ogcapi/plugins/providers/dhis2_org_units.py +++ b/src/eo_api/routers/ogcapi/plugins/providers/dhis2_org_units.py @@ -52,7 +52,7 @@ def query( number_matched = len(org_units) page = org_units[offset : offset + limit] - fc = FeatureCollection( + fc: FeatureCollection = FeatureCollection( type="FeatureCollection", features=[org_unit_to_feature(ou) for ou in page], ) diff --git a/src/eo_api/routers/root.py b/src/eo_api/routers/root.py index a9cbc60..c497ac6 100644 --- a/src/eo_api/routers/root.py +++ b/src/eo_api/routers/root.py @@ -4,6 +4,7 @@ from importlib.metadata import version from fastapi import APIRouter, Request +from fastapi.responses import RedirectResponse from eo_api.schemas import AppInfo, HealthStatus, Link, RootResponse, Status @@ -40,3 +41,9 @@ def info() -> AppInfo: pygeoapi_version=version("pygeoapi"), uvicorn_version=version("uvicorn"), ) + + +@router.get("/ogcapi", include_in_schema=False) +async def ogcapi_redirect() -> RedirectResponse: + """Redirect /ogcapi to /ogcapi/ for trailing-slash consistency.""" + return RedirectResponse(url="/ogcapi/") diff --git a/src/eo_api/startup.py b/src/eo_api/startup.py new file mode 100644 index 0000000..710d8e9 --- /dev/null +++ b/src/eo_api/startup.py @@ -0,0 +1,81 @@ +"""Early-boot side effects: env vars, PROJ config, logging, dotenv, OpenAPI. + +This module is imported before any other eo_api modules so that +environment variables and logging are configured before Prefect/pygeoapi +read them at import time. +""" + +import logging +import os +import warnings +from importlib.util import find_spec +from pathlib import Path +from typing import Any, cast + +# -- Prefect env-var defaults (must be set before Prefect is imported) -------- +os.environ.setdefault("PREFECT_UI_SERVE_BASE", "/prefect/") +os.environ.setdefault("PREFECT_UI_API_URL", "/prefect/api") +os.environ.setdefault("PREFECT_SERVER_API_BASE_PATH", "/prefect/api") +os.environ.setdefault("PREFECT_API_URL", "http://localhost:8000/prefect/api") +os.environ.setdefault("PREFECT_SERVER_ANALYTICS_ENABLED", "false") +os.environ.setdefault("PREFECT_SERVER_UI_SHOW_PROMOTIONAL_CONTENT", "false") + + +# -- PROJ data configuration -------------------------------------------------- +def _configure_proj_data() -> None: + """Point PROJ to rasterio bundled data to avoid mixed-install conflicts.""" + spec = find_spec("rasterio") + if spec is None or spec.origin is None: + return + + proj_data = Path(spec.origin).parent / "proj_data" + if not proj_data.is_dir(): + return + + proj_data_path = str(proj_data) + os.environ["PROJ_DATA"] = proj_data_path + os.environ["PROJ_LIB"] = proj_data_path + + +_configure_proj_data() + +# -- Warning filters --------------------------------------------------------- +warnings.filterwarnings("ignore", message="ecCodes .* or higher is recommended") +warnings.filterwarnings("ignore", message=r"Engine 'cfgrib' loading failed:[\s\S]*", category=RuntimeWarning) + +# -- Silence noisy third-party loggers early ---------------------------------- +logging.getLogger("pygeoapi.api.processes").setLevel(logging.ERROR) +logging.getLogger("pygeoapi.l10n").setLevel(logging.ERROR) + +# -- Load .env (must happen before pygeoapi reads PYGEOAPI_CONFIG) ------------ +from dotenv import load_dotenv # noqa: E402 + +load_dotenv() + +# -- Generate missing OpenAPI document ---------------------------------------- +openapi_path = os.getenv("PYGEOAPI_OPENAPI") +config_path = os.getenv("PYGEOAPI_CONFIG") +if openapi_path and config_path and not Path(openapi_path).exists(): + from pygeoapi.openapi import generate_openapi_document # noqa: E402 + + with Path(config_path).open(encoding="utf-8") as config_file: + openapi_doc = generate_openapi_document( + config_file, + output_format=cast(Any, "yaml"), + fail_on_invalid_collection=False, + ) + Path(openapi_path).write_text(openapi_doc, encoding="utf-8") + warnings.warn(f"Generated missing OpenAPI document at '{openapi_path}'.", RuntimeWarning) + +# -- eo_api / third-party logging setup --------------------------------------- +eo_logger = logging.getLogger("eo_api") +eo_logger.setLevel(logging.INFO) +if not eo_logger.handlers: + handler = logging.StreamHandler() + handler.setLevel(logging.INFO) + handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(name)s - %(message)s")) + eo_logger.addHandler(handler) +eo_logger.propagate = False + +logging.getLogger("dhis2eo").setLevel(logging.WARNING) +logging.getLogger("xarray").setLevel(logging.WARNING) diff --git a/uv.lock b/uv.lock index c82515a..6399c0c 100644 --- a/uv.lock +++ b/uv.lock @@ -659,6 +659,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, ] +[[package]] +name = "donfig" +version = "0.8.1.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" }, +] + [[package]] name = "earthkit-data" version = "0.16.8" @@ -718,6 +730,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/5c/b2f6d6221834f8df6912b8623bea7070b5cb593d098a606b376badc21678/earthkit_meteo-0.5.1-py3-none-any.whl", hash = "sha256:02ae1ed7471749b3ee18b286a84a7f41e2bf3cdb54f923928f455eb4ecb988ca", size = 57071, upload-time = "2026-02-18T13:48:01.006Z" }, ] +[[package]] +name = "earthkit-transforms" +version = "0.5.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "array-api-compat" }, + { name = "earthkit-data" }, + { name = "earthkit-utils" }, + { name = "geopandas" }, + { name = "numpy" }, + { name = "xarray" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2b/8f/2b112128b24e3ff2a13d8a0eee42d7e4517d9642a1f2067087ddc931ae44/earthkit_transforms-0.5.3.1.tar.gz", hash = "sha256:d4ea111401af99fb309fe9402f4712d71dc80caa722baa37466c09bc7c116e90", size = 69685, upload-time = "2025-11-20T11:15:41.829Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/87/e019223fef300e66b3b69802f3ce4f90c90b1efc6f8a087d15e7f671996a/earthkit_transforms-0.5.3.1-py3-none-any.whl", hash = "sha256:6ca5971a0c4875f32dda08252cab57bd0f4dffc888f05f00316e08a1d91ca39d", size = 39281, upload-time = "2025-11-20T11:15:40.184Z" }, +] + [[package]] name = "earthkit-utils" version = "0.1.2" @@ -777,13 +806,16 @@ source = { editable = "." } dependencies = [ { name = "dhis2-client" }, { name = "dhis2eo" }, + { name = "earthkit-transforms" }, { name = "geojson-pydantic" }, { name = "httpx" }, + { name = "metpy" }, { name = "prefect" }, { name = "pygeoapi" }, { name = "python-dotenv" }, { name = "titiler-core" }, { name = "uvicorn" }, + { name = "zarr" }, ] [package.dev-dependencies] @@ -801,13 +833,16 @@ dev = [ requires-dist = [ { name = "dhis2-client", git = "https://github.com/dhis2/dhis2-python-client.git?rev=V0.3.0" }, { name = "dhis2eo", git = "https://github.com/dhis2/dhis2eo.git?rev=v1.1.0" }, + { name = "earthkit-transforms", specifier = "==0.5.*" }, { name = "geojson-pydantic", specifier = ">=2.1.0" }, { name = "httpx", specifier = ">=0.28.1" }, + { name = "metpy", specifier = ">=1.7,<2" }, { name = "prefect", specifier = ">=3.6" }, { name = "pygeoapi", specifier = ">=0.22.0" }, { name = "python-dotenv", specifier = ">=1.0.1" }, { name = "titiler-core", specifier = ">=1.2.0" }, { name = "uvicorn", specifier = ">=0.41.0" }, + { name = "zarr", specifier = "==3.1.5" }, ] [package.metadata.requires-dev] @@ -994,6 +1029,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/e4/fac19dc34cb686c96011388b813ff7b858a70681e5ce6ce7698e5021b0f4/geopandas-1.1.2-py3-none-any.whl", hash = "sha256:2bb0b1052cb47378addb4ba54c47f8d4642dcbda9b61375638274f49d9f0bb0d", size = 341734, upload-time = "2025-12-22T21:06:12.498Z" }, ] +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/db/000f15b41724589b0e7bc24bc7a8967898d8d3bc8caf64c513d91ef1f6c0/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:3ebb04528e83b2634857f43f9bb8ef5b2bbe7f10f140daeb01b58f972d04736b", size = 31297, upload-time = "2025-12-16T00:23:20.709Z" }, + { url = "https://files.pythonhosted.org/packages/d7/0d/8ebed0c39c53a7e838e2a486da8abb0e52de135f1b376ae2f0b160eb4c1a/google_crc32c-1.8.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:450dc98429d3e33ed2926fc99ee81001928d63460f8538f21a5d6060912a8e27", size = 30867, upload-time = "2025-12-16T00:43:14.628Z" }, + { url = "https://files.pythonhosted.org/packages/ce/42/b468aec74a0354b34c8cbf748db20d6e350a68a2b0912e128cabee49806c/google_crc32c-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:3b9776774b24ba76831609ffbabce8cdf6fa2bd5e9df37b594221c7e333a81fa", size = 33344, upload-time = "2025-12-16T00:40:24.742Z" }, + { url = "https://files.pythonhosted.org/packages/1c/e8/b33784d6fc77fb5062a8a7854e43e1e618b87d5ddf610a88025e4de6226e/google_crc32c-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:89c17d53d75562edfff86679244830599ee0a48efc216200691de8b02ab6b2b8", size = 33694, upload-time = "2025-12-16T00:40:25.505Z" }, + { url = "https://files.pythonhosted.org/packages/92/b1/d3cbd4d988afb3d8e4db94ca953df429ed6db7282ed0e700d25e6c7bfc8d/google_crc32c-1.8.0-cp313-cp313-win_amd64.whl", hash = "sha256:57a50a9035b75643996fbf224d6661e386c7162d1dfdab9bc4ca790947d1007f", size = 34435, upload-time = "2025-12-16T00:35:22.107Z" }, + { url = "https://files.pythonhosted.org/packages/21/88/8ecf3c2b864a490b9e7010c84fd203ec8cf3b280651106a3a74dd1b0ca72/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:e6584b12cb06796d285d09e33f63309a09368b9d806a551d8036a4207ea43697", size = 31301, upload-time = "2025-12-16T00:24:48.527Z" }, + { url = "https://files.pythonhosted.org/packages/36/c6/f7ff6c11f5ca215d9f43d3629163727a272eabc356e5c9b2853df2bfe965/google_crc32c-1.8.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:f4b51844ef67d6cf2e9425983274da75f18b1597bb2c998e1c0a0e8d46f8f651", size = 30868, upload-time = "2025-12-16T00:48:12.163Z" }, + { url = "https://files.pythonhosted.org/packages/56/15/c25671c7aad70f8179d858c55a6ae8404902abe0cdcf32a29d581792b491/google_crc32c-1.8.0-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b0d1a7afc6e8e4635564ba8aa5c0548e3173e41b6384d7711a9123165f582de2", size = 33381, upload-time = "2025-12-16T00:40:26.268Z" }, + { url = "https://files.pythonhosted.org/packages/42/fa/f50f51260d7b0ef5d4898af122d8a7ec5a84e2984f676f746445f783705f/google_crc32c-1.8.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8b3f68782f3cbd1bce027e48768293072813469af6a61a86f6bb4977a4380f21", size = 33734, upload-time = "2025-12-16T00:40:27.028Z" }, + { url = "https://files.pythonhosted.org/packages/08/a5/7b059810934a09fb3ccb657e0843813c1fee1183d3bc2c8041800374aa2c/google_crc32c-1.8.0-cp314-cp314-win_amd64.whl", hash = "sha256:d511b3153e7011a27ab6ee6bb3a5404a55b994dc1a7322c0b87b29606d9790e2", size = 34878, upload-time = "2025-12-16T00:35:23.142Z" }, +] + [[package]] name = "graphviz" version = "0.21" @@ -1618,6 +1671,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" }, ] +[[package]] +name = "metpy" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "matplotlib" }, + { name = "numpy" }, + { name = "pandas" }, + { name = "pint" }, + { name = "pooch" }, + { name = "pyproj" }, + { name = "scipy" }, + { name = "traitlets" }, + { name = "xarray" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/66/2745f03139abda190010ed94c4f6722688304896ad28b4d02e0455b0263e/metpy-1.7.1.tar.gz", hash = "sha256:cdfd8fdab58bc092a1974c016f2ea3a7715ffdf6a4660b28b0de7049328bce75", size = 13366247, upload-time = "2025-08-29T23:19:54.045Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/c3/6b05cc23afc9c466634ba815bfddf529677f8c7e03f5814ccd387dac0ade/metpy-1.7.1-py3-none-any.whl", hash = "sha256:b184a1786a730f7926ec5fa58618cd0935a21f8088a13b268dd7267f9e104efc", size = 424395, upload-time = "2025-08-29T23:19:51.878Z" }, +] + [[package]] name = "morecantile" version = "7.0.3" @@ -1720,6 +1793,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/b2/d0896bdcdc8d28a7fc5717c305f1a861c26e18c05047949fb371034d98bd/nodeenv-1.10.0-py2.py3-none-any.whl", hash = "sha256:5bb13e3eed2923615535339b3c620e76779af4cb4c6a90deccc9e36b274d3827", size = 23438, upload-time = "2025-12-20T14:08:52.782Z" }, ] +[[package]] +name = "numcodecs" +version = "0.16.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/bd/8a391e7c356366224734efd24da929cc4796fff468bfb179fe1af6548535/numcodecs-0.16.5.tar.gz", hash = "sha256:0d0fb60852f84c0bd9543cc4d2ab9eefd37fc8efcc410acd4777e62a1d300318", size = 6276387, upload-time = "2025-11-21T02:49:48.986Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/38/38/071ced5a5fd1c85ba0e14ba721b66b053823e5176298c2f707e50bed11d9/numcodecs-0.16.5-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:25be3a516ab677dad890760d357cfe081a371d9c0a2e9a204562318ac5969de3", size = 1654359, upload-time = "2025-11-21T02:49:33.673Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c0/5f84ba7525577c1b9909fc2d06ef11314825fc4ad4378f61d0e4c9883b4a/numcodecs-0.16.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0107e839ef75b854e969cb577e140b1aadb9847893937636582d23a2a4c6ce50", size = 1144237, upload-time = "2025-11-21T02:49:35.294Z" }, + { url = "https://files.pythonhosted.org/packages/0b/00/787ea5f237b8ea7bc67140c99155f9c00b5baf11c49afc5f3bfefa298f95/numcodecs-0.16.5-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:015a7c859ecc2a06e2a548f64008c0ec3aaecabc26456c2c62f4278d8fc20597", size = 8483064, upload-time = "2025-11-21T02:49:36.454Z" }, + { url = "https://files.pythonhosted.org/packages/c4/e6/d359fdd37498e74d26a167f7a51e54542e642ea47181eb4e643a69a066c3/numcodecs-0.16.5-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:84230b4b9dad2392f2a84242bd6e3e659ac137b5a1ce3571d6965fca673e0903", size = 9126063, upload-time = "2025-11-21T02:49:38.018Z" }, + { url = "https://files.pythonhosted.org/packages/27/72/6663cc0382ddbb866136c255c837bcb96cc7ce5e83562efec55e1b995941/numcodecs-0.16.5-cp313-cp313-win_amd64.whl", hash = "sha256:5088145502ad1ebf677ec47d00eb6f0fd600658217db3e0c070c321c85d6cf3d", size = 799275, upload-time = "2025-11-21T02:49:39.558Z" }, + { url = "https://files.pythonhosted.org/packages/3c/9e/38e7ca8184c958b51f45d56a4aeceb1134ecde2d8bd157efadc98502cc42/numcodecs-0.16.5-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b05647b8b769e6bc8016e9fd4843c823ce5c9f2337c089fb5c9c4da05e5275de", size = 1654721, upload-time = "2025-11-21T02:49:40.602Z" }, + { url = "https://files.pythonhosted.org/packages/a1/37/260fa42e7b2b08e6e00ad632f8dd620961a60a459426c26cea390f8c68d0/numcodecs-0.16.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:3832bd1b5af8bb3e413076b7d93318c8e7d7b68935006b9fa36ca057d1725a8f", size = 1146887, upload-time = "2025-11-21T02:49:41.721Z" }, + { url = "https://files.pythonhosted.org/packages/4e/15/e2e1151b5a8b14a15dfd4bb4abccce7fff7580f39bc34092780088835f3a/numcodecs-0.16.5-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49f7b7d24f103187f53135bed28bb9f0ed6b2e14c604664726487bb6d7c882e1", size = 8476987, upload-time = "2025-11-21T02:49:43.363Z" }, + { url = "https://files.pythonhosted.org/packages/6d/30/16a57fc4d9fb0ba06c600408bd6634f2f1753c54a7a351c99c5e09b51ee2/numcodecs-0.16.5-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aec9736d81b70f337d89c4070ee3ffeff113f386fd789492fa152d26a15043e4", size = 9102377, upload-time = "2025-11-21T02:49:45.508Z" }, + { url = "https://files.pythonhosted.org/packages/31/a5/a0425af36c20d55a3ea884db4b4efca25a43bea9214ba69ca7932dd997b4/numcodecs-0.16.5-cp314-cp314-win_amd64.whl", hash = "sha256:b16a14303800e9fb88abc39463ab4706c037647ac17e49e297faa5f7d7dbbf1d", size = 819022, upload-time = "2025-11-21T02:49:47.39Z" }, +] + [[package]] name = "numexpr" version = "2.14.1" @@ -2063,6 +2158,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" }, ] +[[package]] +name = "pooch" +version = "1.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, + { name = "platformdirs" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/43/85ef45e8b36c6a48546af7b266592dc32d7f67837a6514d111bced6d7d75/pooch-1.9.0.tar.gz", hash = "sha256:de46729579b9857ffd3e741987a2f6d5e0e03219892c167c6578c0091fb511ed", size = 61788, upload-time = "2026-01-30T19:15:09.649Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/2d/d4bf65e47cea8ff2c794a600c4fd1273a7902f268757c531e0ee9f18aa58/pooch-1.9.0-py3-none-any.whl", hash = "sha256:f265597baa9f760d25ceb29d0beb8186c243d6607b0f60b83ecf14078dbc703b", size = 67175, upload-time = "2026-01-30T19:15:08.36Z" }, +] + [[package]] name = "prefect" version = "3.6.19" @@ -2963,6 +3072,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/78/097c0798b1dab9f8affe73da9642bb4500e098cb27fd8dc9724816ac747b/ruff-0.15.2-py3-none-win_arm64.whl", hash = "sha256:cabddc5822acdc8f7b5527b36ceac55cc51eec7b1946e60181de8fe83ca8876e", size = 10941649, upload-time = "2026-02-19T22:32:18.108Z" }, ] +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/27/07ee1b57b65e92645f219b37148a7e7928b82e2b5dbeccecb4dff7c64f0b/scipy-1.17.1-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:5e3c5c011904115f88a39308379c17f91546f77c1667cea98739fe0fccea804c", size = 31590199, upload-time = "2026-02-23T00:19:17.192Z" }, + { url = "https://files.pythonhosted.org/packages/ec/ae/db19f8ab842e9b724bf5dbb7db29302a91f1e55bc4d04b1025d6d605a2c5/scipy-1.17.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6fac755ca3d2c3edcb22f479fceaa241704111414831ddd3bc6056e18516892f", size = 28154001, upload-time = "2026-02-23T00:19:22.241Z" }, + { url = "https://files.pythonhosted.org/packages/5b/58/3ce96251560107b381cbd6e8413c483bbb1228a6b919fa8652b0d4090e7f/scipy-1.17.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:7ff200bf9d24f2e4d5dc6ee8c3ac64d739d3a89e2326ba68aaf6c4a2b838fd7d", size = 20325719, upload-time = "2026-02-23T00:19:26.329Z" }, + { url = "https://files.pythonhosted.org/packages/b2/83/15087d945e0e4d48ce2377498abf5ad171ae013232ae31d06f336e64c999/scipy-1.17.1-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:4b400bdc6f79fa02a4d86640310dde87a21fba0c979efff5248908c6f15fad1b", size = 22683595, upload-time = "2026-02-23T00:19:30.304Z" }, + { url = "https://files.pythonhosted.org/packages/b4/e0/e58fbde4a1a594c8be8114eb4aac1a55bcd6587047efc18a61eb1f5c0d30/scipy-1.17.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2b64ca7d4aee0102a97f3ba22124052b4bd2152522355073580bf4845e2550b6", size = 32896429, upload-time = "2026-02-23T00:19:35.536Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5f/f17563f28ff03c7b6799c50d01d5d856a1d55f2676f537ca8d28c7f627cd/scipy-1.17.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:581b2264fc0aa555f3f435a5944da7504ea3a065d7029ad60e7c3d1ae09c5464", size = 35203952, upload-time = "2026-02-23T00:19:42.259Z" }, + { url = "https://files.pythonhosted.org/packages/8d/a5/9afd17de24f657fdfe4df9a3f1ea049b39aef7c06000c13db1530d81ccca/scipy-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:beeda3d4ae615106d7094f7e7cef6218392e4465cc95d25f900bebabfded0950", size = 34979063, upload-time = "2026-02-23T00:19:47.547Z" }, + { url = "https://files.pythonhosted.org/packages/8b/13/88b1d2384b424bf7c924f2038c1c409f8d88bb2a8d49d097861dd64a57b2/scipy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6609bc224e9568f65064cfa72edc0f24ee6655b47575954ec6339534b2798369", size = 37598449, upload-time = "2026-02-23T00:19:53.238Z" }, + { url = "https://files.pythonhosted.org/packages/35/e5/d6d0e51fc888f692a35134336866341c08655d92614f492c6860dc45bb2c/scipy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:37425bc9175607b0268f493d79a292c39f9d001a357bebb6b88fdfaff13f6448", size = 36510943, upload-time = "2026-02-23T00:20:50.89Z" }, + { url = "https://files.pythonhosted.org/packages/2a/fd/3be73c564e2a01e690e19cc618811540ba5354c67c8680dce3281123fb79/scipy-1.17.1-cp313-cp313-win_arm64.whl", hash = "sha256:5cf36e801231b6a2059bf354720274b7558746f3b1a4efb43fcf557ccd484a87", size = 24545621, upload-time = "2026-02-23T00:20:55.871Z" }, + { url = "https://files.pythonhosted.org/packages/6f/6b/17787db8b8114933a66f9dcc479a8272e4b4da75fe03b0c282f7b0ade8cd/scipy-1.17.1-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:d59c30000a16d8edc7e64152e30220bfbd724c9bbb08368c054e24c651314f0a", size = 31936708, upload-time = "2026-02-23T00:19:58.694Z" }, + { url = "https://files.pythonhosted.org/packages/38/2e/524405c2b6392765ab1e2b722a41d5da33dc5c7b7278184a8ad29b6cb206/scipy-1.17.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:010f4333c96c9bb1a4516269e33cb5917b08ef2166d5556ca2fd9f082a9e6ea0", size = 28570135, upload-time = "2026-02-23T00:20:03.934Z" }, + { url = "https://files.pythonhosted.org/packages/fd/c3/5bd7199f4ea8556c0c8e39f04ccb014ac37d1468e6cfa6a95c6b3562b76e/scipy-1.17.1-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2ceb2d3e01c5f1d83c4189737a42d9cb2fc38a6eeed225e7515eef71ad301dce", size = 20741977, upload-time = "2026-02-23T00:20:07.935Z" }, + { url = "https://files.pythonhosted.org/packages/d9/b8/8ccd9b766ad14c78386599708eb745f6b44f08400a5fd0ade7cf89b6fc93/scipy-1.17.1-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:844e165636711ef41f80b4103ed234181646b98a53c8f05da12ca5ca289134f6", size = 23029601, upload-time = "2026-02-23T00:20:12.161Z" }, + { url = "https://files.pythonhosted.org/packages/6d/a0/3cb6f4d2fb3e17428ad2880333cac878909ad1a89f678527b5328b93c1d4/scipy-1.17.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:158dd96d2207e21c966063e1635b1063cd7787b627b6f07305315dd73d9c679e", size = 33019667, upload-time = "2026-02-23T00:20:17.208Z" }, + { url = "https://files.pythonhosted.org/packages/f3/c3/2d834a5ac7bf3a0c806ad1508efc02dda3c8c61472a56132d7894c312dea/scipy-1.17.1-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74cbb80d93260fe2ffa334efa24cb8f2f0f622a9b9febf8b483c0b865bfb3475", size = 35264159, upload-time = "2026-02-23T00:20:23.087Z" }, + { url = "https://files.pythonhosted.org/packages/4d/77/d3ed4becfdbd217c52062fafe35a72388d1bd82c2d0ba5ca19d6fcc93e11/scipy-1.17.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:dbc12c9f3d185f5c737d801da555fb74b3dcfa1a50b66a1a93e09190f41fab50", size = 35102771, upload-time = "2026-02-23T00:20:28.636Z" }, + { url = "https://files.pythonhosted.org/packages/bd/12/d19da97efde68ca1ee5538bb261d5d2c062f0c055575128f11a2730e3ac1/scipy-1.17.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:94055a11dfebe37c656e70317e1996dc197e1a15bbcc351bcdd4610e128fe1ca", size = 37665910, upload-time = "2026-02-23T00:20:34.743Z" }, + { url = "https://files.pythonhosted.org/packages/06/1c/1172a88d507a4baaf72c5a09bb6c018fe2ae0ab622e5830b703a46cc9e44/scipy-1.17.1-cp313-cp313t-win_amd64.whl", hash = "sha256:e30bdeaa5deed6bc27b4cc490823cd0347d7dae09119b8803ae576ea0ce52e4c", size = 36562980, upload-time = "2026-02-23T00:20:40.575Z" }, + { url = "https://files.pythonhosted.org/packages/70/b0/eb757336e5a76dfa7911f63252e3b7d1de00935d7705cf772db5b45ec238/scipy-1.17.1-cp313-cp313t-win_arm64.whl", hash = "sha256:a720477885a9d2411f94a93d16f9d89bad0f28ca23c3f8daa521e2dcc3f44d49", size = 24856543, upload-time = "2026-02-23T00:20:45.313Z" }, + { url = "https://files.pythonhosted.org/packages/cf/83/333afb452af6f0fd70414dc04f898647ee1423979ce02efa75c3b0f2c28e/scipy-1.17.1-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:a48a72c77a310327f6a3a920092fa2b8fd03d7deaa60f093038f22d98e096717", size = 31584510, upload-time = "2026-02-23T00:21:01.015Z" }, + { url = "https://files.pythonhosted.org/packages/ed/a6/d05a85fd51daeb2e4ea71d102f15b34fedca8e931af02594193ae4fd25f7/scipy-1.17.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:45abad819184f07240d8a696117a7aacd39787af9e0b719d00285549ed19a1e9", size = 28170131, upload-time = "2026-02-23T00:21:05.888Z" }, + { url = "https://files.pythonhosted.org/packages/db/7b/8624a203326675d7746a254083a187398090a179335b2e4a20e2ddc46e83/scipy-1.17.1-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:3fd1fcdab3ea951b610dc4cef356d416d5802991e7e32b5254828d342f7b7e0b", size = 20342032, upload-time = "2026-02-23T00:21:09.904Z" }, + { url = "https://files.pythonhosted.org/packages/c9/35/2c342897c00775d688d8ff3987aced3426858fd89d5a0e26e020b660b301/scipy-1.17.1-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:7bdf2da170b67fdf10bca777614b1c7d96ae3ca5794fd9587dce41eb2966e866", size = 22678766, upload-time = "2026-02-23T00:21:14.313Z" }, + { url = "https://files.pythonhosted.org/packages/ef/f2/7cdb8eb308a1a6ae1e19f945913c82c23c0c442a462a46480ce487fdc0ac/scipy-1.17.1-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:adb2642e060a6549c343603a3851ba76ef0b74cc8c079a9a58121c7ec9fe2350", size = 32957007, upload-time = "2026-02-23T00:21:19.663Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2e/7eea398450457ecb54e18e9d10110993fa65561c4f3add5e8eccd2b9cd41/scipy-1.17.1-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eee2cfda04c00a857206a4330f0c5e3e56535494e30ca445eb19ec624ae75118", size = 35221333, upload-time = "2026-02-23T00:21:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d9/77/5b8509d03b77f093a0d52e606d3c4f79e8b06d1d38c441dacb1e26cacf46/scipy-1.17.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d2650c1fb97e184d12d8ba010493ee7b322864f7d3d00d3f9bb97d9c21de4068", size = 35042066, upload-time = "2026-02-23T00:21:31.358Z" }, + { url = "https://files.pythonhosted.org/packages/f9/df/18f80fb99df40b4070328d5ae5c596f2f00fffb50167e31439e932f29e7d/scipy-1.17.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:08b900519463543aa604a06bec02461558a6e1cef8fdbb8098f77a48a83c8118", size = 37612763, upload-time = "2026-02-23T00:21:37.247Z" }, + { url = "https://files.pythonhosted.org/packages/4b/39/f0e8ea762a764a9dc52aa7dabcfad51a354819de1f0d4652b6a1122424d6/scipy-1.17.1-cp314-cp314-win_amd64.whl", hash = "sha256:3877ac408e14da24a6196de0ddcace62092bfc12a83823e92e49e40747e52c19", size = 37290984, upload-time = "2026-02-23T00:22:35.023Z" }, + { url = "https://files.pythonhosted.org/packages/7c/56/fe201e3b0f93d1a8bcf75d3379affd228a63d7e2d80ab45467a74b494947/scipy-1.17.1-cp314-cp314-win_arm64.whl", hash = "sha256:f8885db0bc2bffa59d5c1b72fad7a6a92d3e80e7257f967dd81abb553a90d293", size = 25192877, upload-time = "2026-02-23T00:22:39.798Z" }, + { url = "https://files.pythonhosted.org/packages/96/ad/f8c414e121f82e02d76f310f16db9899c4fcde36710329502a6b2a3c0392/scipy-1.17.1-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:1cc682cea2ae55524432f3cdff9e9a3be743d52a7443d0cba9017c23c87ae2f6", size = 31949750, upload-time = "2026-02-23T00:21:42.289Z" }, + { url = "https://files.pythonhosted.org/packages/7c/b0/c741e8865d61b67c81e255f4f0a832846c064e426636cd7de84e74d209be/scipy-1.17.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:2040ad4d1795a0ae89bfc7e8429677f365d45aa9fd5e4587cf1ea737f927b4a1", size = 28585858, upload-time = "2026-02-23T00:21:47.706Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1b/3985219c6177866628fa7c2595bfd23f193ceebbe472c98a08824b9466ff/scipy-1.17.1-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:131f5aaea57602008f9822e2115029b55d4b5f7c070287699fe45c661d051e39", size = 20757723, upload-time = "2026-02-23T00:21:52.039Z" }, + { url = "https://files.pythonhosted.org/packages/c0/19/2a04aa25050d656d6f7b9e7b685cc83d6957fb101665bfd9369ca6534563/scipy-1.17.1-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:9cdc1a2fcfd5c52cfb3045feb399f7b3ce822abdde3a193a6b9a60b3cb5854ca", size = 23043098, upload-time = "2026-02-23T00:21:56.185Z" }, + { url = "https://files.pythonhosted.org/packages/86/f1/3383beb9b5d0dbddd030335bf8a8b32d4317185efe495374f134d8be6cce/scipy-1.17.1-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e3dcd57ab780c741fde8dc68619de988b966db759a3c3152e8e9142c26295ad", size = 33030397, upload-time = "2026-02-23T00:22:01.404Z" }, + { url = "https://files.pythonhosted.org/packages/41/68/8f21e8a65a5a03f25a79165ec9d2b28c00e66dc80546cf5eb803aeeff35b/scipy-1.17.1-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a9956e4d4f4a301ebf6cde39850333a6b6110799d470dbbb1e25326ac447f52a", size = 35281163, upload-time = "2026-02-23T00:22:07.024Z" }, + { url = "https://files.pythonhosted.org/packages/84/8d/c8a5e19479554007a5632ed7529e665c315ae7492b4f946b0deb39870e39/scipy-1.17.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:a4328d245944d09fd639771de275701ccadf5f781ba0ff092ad141e017eccda4", size = 35116291, upload-time = "2026-02-23T00:22:12.585Z" }, + { url = "https://files.pythonhosted.org/packages/52/52/e57eceff0e342a1f50e274264ed47497b59e6a4e3118808ee58ddda7b74a/scipy-1.17.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a77cbd07b940d326d39a1d1b37817e2ee4d79cb30e7338f3d0cddffae70fcaa2", size = 37682317, upload-time = "2026-02-23T00:22:18.513Z" }, + { url = "https://files.pythonhosted.org/packages/11/2f/b29eafe4a3fbc3d6de9662b36e028d5f039e72d345e05c250e121a230dd4/scipy-1.17.1-cp314-cp314t-win_amd64.whl", hash = "sha256:eb092099205ef62cd1782b006658db09e2fed75bffcae7cc0d44052d8aa0f484", size = 37345327, upload-time = "2026-02-23T00:22:24.442Z" }, + { url = "https://files.pythonhosted.org/packages/07/39/338d9219c4e87f3e708f18857ecd24d22a0c3094752393319553096b98af/scipy-1.17.1-cp314-cp314t-win_arm64.whl", hash = "sha256:200e1050faffacc162be6a486a984a0497866ec54149a01270adc8a59b7c7d21", size = 25489165, upload-time = "2026-02-23T00:22:29.563Z" }, +] + [[package]] name = "semver" version = "3.0.4" @@ -3194,6 +3354,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" }, ] +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621, upload-time = "2024-04-19T11:11:49.746Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359, upload-time = "2024-04-19T11:11:46.763Z" }, +] + [[package]] name = "typer" version = "0.24.1" @@ -3425,6 +3594,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d5/e4/62a677feefde05b12a70a4fc9bdc8558010182a801fbcab68cb56c2b0986/xarray-2025.12.0-py3-none-any.whl", hash = "sha256:9e77e820474dbbe4c6c2954d0da6342aa484e33adaa96ab916b15a786181e970", size = 1381742, upload-time = "2025-12-05T21:51:20.841Z" }, ] +[[package]] +name = "zarr" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "donfig" }, + { name = "google-crc32c" }, + { name = "numcodecs" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/76/7fa87f57c112c7b9c82f0a730f8b6f333e792574812872e2cd45ab604199/zarr-3.1.5.tar.gz", hash = "sha256:fbe0c79675a40c996de7ca08e80a1c0a20537bd4a9f43418b6d101395c0bba2b", size = 366825, upload-time = "2025-11-21T14:06:01.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/15/bb13b4913ef95ad5448490821eee4671d0e67673342e4d4070854e5fe081/zarr-3.1.5-py3-none-any.whl", hash = "sha256:29cd905afb6235b94c09decda4258c888fcb79bb6c862ef7c0b8fe009b5c8563", size = 284067, upload-time = "2025-11-21T14:05:59.235Z" }, +] + [[package]] name = "zipp" version = "3.23.0"