From 35fe80ec878bbaa31fefb3edfca4cce34e7e8620 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 28 Apr 2026 15:57:59 +0200 Subject: [PATCH 01/10] added endpoint for data retrieval --- src/simdb/remote/apis/v1_2/__init__.py | 3 +- src/simdb/remote/apis/v1_2/simulation_data.py | 233 ++++++++++++++++++ 2 files changed, 235 insertions(+), 1 deletion(-) create mode 100644 src/simdb/remote/apis/v1_2/simulation_data.py diff --git a/src/simdb/remote/apis/v1_2/__init__.py b/src/simdb/remote/apis/v1_2/__init__.py index 920f303..6b8bc01 100644 --- a/src/simdb/remote/apis/v1_2/__init__.py +++ b/src/simdb/remote/apis/v1_2/__init__.py @@ -10,6 +10,7 @@ from simdb.remote.core.typing import current_app from simdb.remote.models import StagingDirectoryResponse +from .simulation_data import api as data_ns from .simulations import api as sim_ns api = Api( @@ -31,7 +32,7 @@ ) api.add_namespace(sim_ns) -namespaces = [metadata_ns, watcher_ns, file_ns, sim_ns] +namespaces = [metadata_ns, watcher_ns, file_ns, sim_ns, data_ns] @api.route("/staging_dir", defaults={"sim_hex": None}) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py new file mode 100644 index 0000000..482f17a --- /dev/null +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -0,0 +1,233 @@ +"""Simulation IMAS data endpoint: /data. + +TODO: Temporal solution to retrive data (Use IBEX backend) +""" + +import re +import uuid as _uuid +from typing import Any + +import numpy as np +from flask import request +from flask_restx import Namespace, Resource +from imas.ids_primitive import IDSPrimitive + +from simdb.cli.manifest import DataObject +from simdb.database import DatabaseError +from simdb.imas.utils import FLOAT_MISSING_VALUE, INT_MISSING_VALUE, ImasError, open_imas +from simdb.remote.core.auth import User, requires_auth +from simdb.remote.core.cache import cache +from simdb.remote.core.typing import current_app +from simdb.uri import URI + +api = Namespace("data", path="/") + + +# Helpers + + +def _to_python(value: Any) -> Any: + """Convert a value returned by IDSPrimitive.value to a JSON-serialisable + Python object.""" + if isinstance(value, np.ndarray): + flat = value.tolist() + + def _clean(v): + if isinstance(v, float) and ( + v != v or v == float("inf") or v == float("-inf") or v == FLOAT_MISSING_VALUE + ): + return None + if isinstance(v, list): + return [_clean(x) for x in v] + return v + + return _clean(flat) + if isinstance(value, np.integer): + v = int(value) + return None if v == INT_MISSING_VALUE else v + if isinstance(value, np.floating): + v = float(value) + return None if (np.isnan(v) or np.isinf(v) or v == FLOAT_MISSING_VALUE) else v + if isinstance(value, np.complexfloating): + return {"real": float(value.real), "imag": float(value.imag)} + if isinstance(value, np.bool_): + return bool(value) + return value + +# TODO Replace this logic with slicing when supported by imas-python. +# TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. +def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): + """Walk *field_segments* inside *ids_name* and return (value, shape, coordinate_path). + + Each segment is either: + - a non-negative integer string → array-of-structures index + - a plain name → attribute access (IDSStructure child node) + """ + ids_obj = entry.get( + ids_name, occurrence, + lazy=True, autoconvert=False, ignore_unknown_dd_version=True, + ) + node = ids_obj + for segment in field_segments: + if segment.isdigit(): + node = node[int(segment)] + else: + try: + node = getattr(node, segment) + except AttributeError: + raise ValueError(f"segment '{segment}' not found in IDS path") + if not isinstance(node, IDSPrimitive): + raise ValueError( + f"path does not point to a scalar/array leaf " + f"(reached {type(node).__name__}); add more path segments" + ) + if not node.has_value: + raise ValueError(f"field is not populated (no data written)") + + node_shape = list(node.shape) if node.metadata.ndim > 0 else None + + coordinate_path = None + try: + def _replace_placeholder(m, _segs=field_segments): + idx = next((s for s in _segs if s.isdigit()), "0") + return "/" + idx + "/" + + for coord in node.metadata.coordinates: + clean = re.sub(r"\([^)]+\)/", _replace_placeholder, str(coord)) + coordinate_path = ids_name + "/" + clean + break + except Exception: + pass + + return _to_python(node.value), node_shape, coordinate_path + + +def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: int) -> tuple: + """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). + + Scalar results (``shape is None``) are written into the response cache so + that repeated requests skip the IMAS open. Array values are intentionally + *not* cached: caching large numpy-derived lists would create persistent + memory pressure and could fill the cache backend with multi-MB payloads. + """ + if ids_name and not field_segments: # bare IDS name only – no leaf, skip cache probe + pass + else: + cache_key = ( + f"simdb:field:{uri_str}:{ids_name}:" + f"{'/' .join(field_segments)}:{occurrence}" + ) + cached = cache.get(cache_key) + if cached is not None: + return cached + + entry = open_imas(URI(uri_str)) + with entry: + result = _traverse_path(entry, ids_name, list(field_segments), occurrence) + + _value, shape, _coord = result + if shape is None: # scalar leaf – safe to persist in cache + cache.set(cache_key, result) # type: ignore[possibly-undefined] + return result + + +def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): + try: + simulation = current_app.db.get_simulation(sim_id) + except DatabaseError as exc: + return None, None, ({"error": str(exc)}, 404) + + imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] + if not imas_outputs: + return None, None, ( + {"error": f"Simulation {sim_id} has no IMAS output files"}, 404 + ) + + if not file_uuid_str: + return simulation, imas_outputs[0], None + + try: + target_uuid = _uuid.UUID(file_uuid_str) + except ValueError: + return None, None, ({"error": f"Invalid file_uuid: {file_uuid_str!r}"}, 400) + + imas_file = next((f for f in imas_outputs if f.uuid == target_uuid), None) + if imas_file is None: + return None, None, ({"error": f"File {file_uuid_str} not found"}, 404) + + return simulation, imas_file, None + + +# Endpoints + +@api.route("/simulation//data") +class SimulationImasData(Resource): + @requires_auth() + def get(self, sim_id: str, user: User): + """Return the value at a given IDS path for a simulation's IMAS output. + + Query parameters + ---------------- + path (required) IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density`` + file_uuid (optional) UUID of an IMAS output file + occurrence (optional) IDS occurrence index (default 0) + """ + path = request.args.get("path", "").strip() + if not path: + return {"error": "Query parameter 'path' is required"}, 400 + + file_uuid_str = request.args.get("file_uuid", "").strip() or None + + try: + occurrence = int(request.args.get("occurrence", "0")) + except ValueError: + return {"error": "'occurrence' must be a non-negative integer"}, 400 + if occurrence < 0: + return {"error": "'occurrence' must be a non-negative integer"}, 400 + + simulation, imas_file, error = _get_simulation_and_imas_file( + sim_id, file_uuid_str + ) + if error: + payload, status = error + if file_uuid_str and status == 404 and "File " in payload["error"]: + return ( + { + "error": ( + f"File {file_uuid_str} not found or is not an IMAS " + "output for this simulation" + ) + }, + 404, + ) + return payload, status + + segments = [s for s in path.split("/") if s] + if not segments: + return {"error": "'path' must not be empty"}, 400 + + ids_name = segments[0] + field_segments = segments[1:] + + try: + value, shape, coordinate_path = _fetch_field( + str(imas_file.uri), ids_name, tuple(field_segments), occurrence + ) + except (ValueError, AttributeError, IndexError, KeyError) as exc: + return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 + except (ImasError,) as exc: + return {"error": f"Failed to open IMAS data: {exc}"}, 500 + except Exception as exc: + msg = str(exc) + status = 404 if "is empty" in msg or "not found" in msg.lower() else 500 + return {"error": msg}, status + + return { + "simulation": str(simulation.uuid), + "file_uuid": str(imas_file.uuid), + "path": path, + "occurrence": occurrence, + "value": value, + "shape": shape, + "coordinate": coordinate_path, + } From 05b8818a65c67374363c2ed67c6dde3b6bd3227c Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 28 Apr 2026 16:20:49 +0200 Subject: [PATCH 02/10] fixed formatting --- src/simdb/remote/apis/v1_2/simulation_data.py | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 482f17a..41fe5ce 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -14,7 +14,12 @@ from simdb.cli.manifest import DataObject from simdb.database import DatabaseError -from simdb.imas.utils import FLOAT_MISSING_VALUE, INT_MISSING_VALUE, ImasError, open_imas +from simdb.imas.utils import ( + FLOAT_MISSING_VALUE, + INT_MISSING_VALUE, + ImasError, + open_imas, +) from simdb.remote.core.auth import User, requires_auth from simdb.remote.core.cache import cache from simdb.remote.core.typing import current_app @@ -34,7 +39,10 @@ def _to_python(value: Any) -> Any: def _clean(v): if isinstance(v, float) and ( - v != v or v == float("inf") or v == float("-inf") or v == FLOAT_MISSING_VALUE + v != v + or v == float("inf") + or v == float("-inf") + or v == FLOAT_MISSING_VALUE ): return None if isinstance(v, list): @@ -54,6 +62,7 @@ def _clean(v): return bool(value) return value + # TODO Replace this logic with slicing when supported by imas-python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): @@ -64,8 +73,11 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - a plain name → attribute access (IDSStructure child node) """ ids_obj = entry.get( - ids_name, occurrence, - lazy=True, autoconvert=False, ignore_unknown_dd_version=True, + ids_name, + occurrence, + lazy=True, + autoconvert=False, + ignore_unknown_dd_version=True, ) node = ids_obj for segment in field_segments: @@ -88,6 +100,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): coordinate_path = None try: + def _replace_placeholder(m, _segs=field_segments): idx = next((s for s in _segs if s.isdigit()), "0") return "/" + idx + "/" @@ -102,7 +115,9 @@ def _replace_placeholder(m, _segs=field_segments): return _to_python(node.value), node_shape, coordinate_path -def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: int) -> tuple: +def _fetch_field( + uri_str: str, ids_name: str, field_segments: tuple, occurrence: int +) -> tuple: """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). Scalar results (``shape is None``) are written into the response cache so @@ -110,12 +125,13 @@ def _fetch_field(uri_str: str, ids_name: str, field_segments: tuple, occurrence: *not* cached: caching large numpy-derived lists would create persistent memory pressure and could fill the cache backend with multi-MB payloads. """ - if ids_name and not field_segments: # bare IDS name only – no leaf, skip cache probe + if ( + ids_name and not field_segments + ): # bare IDS name only – no leaf, skip cache probe pass else: cache_key = ( - f"simdb:field:{uri_str}:{ids_name}:" - f"{'/' .join(field_segments)}:{occurrence}" + f"simdb:field:{uri_str}:{ids_name}:{'/'.join(field_segments)}:{occurrence}" ) cached = cache.get(cache_key) if cached is not None: @@ -139,8 +155,10 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - return None, None, ( - {"error": f"Simulation {sim_id} has no IMAS output files"}, 404 + return ( + None, + None, + ({"error": f"Simulation {sim_id} has no IMAS output files"}, 404), ) if not file_uuid_str: @@ -160,6 +178,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): # Endpoints + @api.route("/simulation//data") class SimulationImasData(Resource): @requires_auth() From 66a6ba7d1d1b22a12ec00ba572fdb2aae77a3648 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Wed, 29 Apr 2026 09:20:56 +0200 Subject: [PATCH 03/10] fixed linting and typing issues --- src/simdb/remote/apis/v1_2/simulation_data.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 41fe5ce..1b67fdd 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -5,7 +5,7 @@ import re import uuid as _uuid -from typing import Any +from typing import Any, Optional import numpy as np from flask import request @@ -66,11 +66,11 @@ def _clean(v): # TODO Replace this logic with slicing when supported by imas-python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - """Walk *field_segments* inside *ids_name* and return (value, shape, coordinate_path). + """Walk inside *ids_name* and return (value, shape, coordinate_path). Each segment is either: - - a non-negative integer string → array-of-structures index - - a plain name → attribute access (IDSStructure child node) + - a non-negative integer string: array-of-structures index + - a plain name: attribute access (IDSStructure child node) """ ids_obj = entry.get( ids_name, @@ -86,15 +86,15 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): else: try: node = getattr(node, segment) - except AttributeError: - raise ValueError(f"segment '{segment}' not found in IDS path") + except AttributeError as err: + raise ValueError(f"segment '{segment}' not found in IDS path") from err if not isinstance(node, IDSPrimitive): raise ValueError( f"path does not point to a scalar/array leaf " f"(reached {type(node).__name__}); add more path segments" ) if not node.has_value: - raise ValueError(f"field is not populated (no data written)") + raise ValueError("field is not populated (no data written)") node_shape = list(node.shape) if node.metadata.ndim > 0 else None @@ -118,7 +118,7 @@ def _replace_placeholder(m, _segs=field_segments): def _fetch_field( uri_str: str, ids_name: str, field_segments: tuple, occurrence: int ) -> tuple: - """Open the IMAS entry, traverse the path, and return (value, shape, coordinate_path). + """Open the IMAS entry and return (value, shape, coordinate_path). Scalar results (``shape is None``) are written into the response cache so that repeated requests skip the IMAS open. Array values are intentionally @@ -127,7 +127,7 @@ def _fetch_field( """ if ( ids_name and not field_segments - ): # bare IDS name only – no leaf, skip cache probe + ): # bare IDS name only - no leaf, skip cache probe pass else: cache_key = ( @@ -142,12 +142,12 @@ def _fetch_field( result = _traverse_path(entry, ids_name, list(field_segments), occurrence) _value, shape, _coord = result - if shape is None: # scalar leaf – safe to persist in cache + if shape is None: # scalar leaf - safe to persist in cache cache.set(cache_key, result) # type: ignore[possibly-undefined] return result -def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: str | None): +def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: @@ -187,7 +187,8 @@ def get(self, sim_id: str, user: User): Query parameters ---------------- - path (required) IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density`` + path (required) IDS path, e.g. + ``core_profiles/profiles_1d/0/electrons/density`` file_uuid (optional) UUID of an IMAS output file occurrence (optional) IDS occurrence index (default 0) """ @@ -234,7 +235,7 @@ def get(self, sim_id: str, user: User): ) except (ValueError, AttributeError, IndexError, KeyError) as exc: return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 - except (ImasError,) as exc: + except ImasError as exc: return {"error": f"Failed to open IMAS data: {exc}"}, 500 except Exception as exc: msg = str(exc) From 0aec0153771dea0c4f963c9a8e493a93b669ff44 Mon Sep 17 00:00:00 2001 From: Prasad Date: Mon, 4 May 2026 10:27:04 +0200 Subject: [PATCH 04/10] Apply suggestions from code review Co-authored-by: Simon Pinches --- src/simdb/remote/apis/v1_2/simulation_data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 1b67fdd..afb1a33 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -1,6 +1,6 @@ -"""Simulation IMAS data endpoint: /data. +"""IMAS simulation data endpoint: /data. -TODO: Temporal solution to retrive data (Use IBEX backend) +TODO: Temporary solution to retrieve data (for IBEX backend) """ import re @@ -63,7 +63,7 @@ def _clean(v): return value -# TODO Replace this logic with slicing when supported by imas-python. +# TODO Replace this logic with slicing when supported by IMAS-Python. # TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): """Walk inside *ids_name* and return (value, shape, coordinate_path). From 8a9ee4af63a067d162c5a6af8da36743f580cd72 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Mon, 4 May 2026 13:45:44 +0200 Subject: [PATCH 05/10] use pydantic models for input and output --- src/simdb/remote/apis/v1_2/simulation_data.py | 124 +++++++----------- src/simdb/remote/models.py | 47 +++++++ 2 files changed, 95 insertions(+), 76 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index afb1a33..13334b2 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -4,11 +4,10 @@ """ import re -import uuid as _uuid -from typing import Any, Optional +from typing import Annotated, Any, Optional +from uuid import UUID import numpy as np -from flask import request from flask_restx import Namespace, Resource from imas.ids_primitive import IDSPrimitive @@ -22,7 +21,14 @@ ) from simdb.remote.core.auth import User, requires_auth from simdb.remote.core.cache import cache +from simdb.remote.core.pydantic_utils import ( + Query, + ResponseException, + ServerException, + pydantic_validate, +) from simdb.remote.core.typing import current_app +from simdb.remote.models import ImasDataQueryParams, ImasDataResponse from simdb.uri import URI api = Namespace("data", path="/") @@ -147,33 +153,29 @@ def _fetch_field( return result -def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): +def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: - return None, None, ({"error": str(exc)}, 404) + raise ResponseException(str(exc), 404) from exc imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - return ( - None, - None, - ({"error": f"Simulation {sim_id} has no IMAS output files"}, 404), + raise ResponseException( + f"Simulation {sim_id} has no IMAS output files", 404 ) - if not file_uuid_str: - return simulation, imas_outputs[0], None - - try: - target_uuid = _uuid.UUID(file_uuid_str) - except ValueError: - return None, None, ({"error": f"Invalid file_uuid: {file_uuid_str!r}"}, 400) + if file_uuid is None: + return simulation, imas_outputs[0] - imas_file = next((f for f in imas_outputs if f.uuid == target_uuid), None) + imas_file = next((f for f in imas_outputs if f.uuid == file_uuid), None) if imas_file is None: - return None, None, ({"error": f"File {file_uuid_str} not found"}, 404) + raise ResponseException( + f"File {file_uuid} not found or is not an IMAS output for this simulation", + 404, + ) - return simulation, imas_file, None + return simulation, imas_file # Endpoints @@ -182,72 +184,42 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid_str: Optional[str]): @api.route("/simulation//data") class SimulationImasData(Resource): @requires_auth() - def get(self, sim_id: str, user: User): - """Return the value at a given IDS path for a simulation's IMAS output. - - Query parameters - ---------------- - path (required) IDS path, e.g. - ``core_profiles/profiles_1d/0/electrons/density`` - file_uuid (optional) UUID of an IMAS output file - occurrence (optional) IDS occurrence index (default 0) - """ - path = request.args.get("path", "").strip() - if not path: - return {"error": "Query parameter 'path' is required"}, 400 - - file_uuid_str = request.args.get("file_uuid", "").strip() or None - - try: - occurrence = int(request.args.get("occurrence", "0")) - except ValueError: - return {"error": "'occurrence' must be a non-negative integer"}, 400 - if occurrence < 0: - return {"error": "'occurrence' must be a non-negative integer"}, 400 - - simulation, imas_file, error = _get_simulation_and_imas_file( - sim_id, file_uuid_str + @pydantic_validate(api) + def get( + self, + sim_id: str, + user: User, + params: Annotated[ImasDataQueryParams, Query()], + ) -> ImasDataResponse: + """Return the value at a given IDS path for a simulation's IMAS output.""" + simulation, imas_file = _get_simulation_and_imas_file( + sim_id, params.file_uuid ) - if error: - payload, status = error - if file_uuid_str and status == 404 and "File " in payload["error"]: - return ( - { - "error": ( - f"File {file_uuid_str} not found or is not an IMAS " - "output for this simulation" - ) - }, - 404, - ) - return payload, status - - segments = [s for s in path.split("/") if s] - if not segments: - return {"error": "'path' must not be empty"}, 400 + segments = [s for s in params.path.split("/") if s] ids_name = segments[0] field_segments = segments[1:] try: value, shape, coordinate_path = _fetch_field( - str(imas_file.uri), ids_name, tuple(field_segments), occurrence + str(imas_file.uri), ids_name, tuple(field_segments), params.occurrence ) except (ValueError, AttributeError, IndexError, KeyError) as exc: - return {"error": f"Invalid IDS path '{path}': {exc}"}, 400 + raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") except ImasError as exc: - return {"error": f"Failed to open IMAS data: {exc}"}, 500 + raise ServerException(f"Failed to open IMAS data: {exc}") except Exception as exc: msg = str(exc) - status = 404 if "is empty" in msg or "not found" in msg.lower() else 500 - return {"error": msg}, status - - return { - "simulation": str(simulation.uuid), - "file_uuid": str(imas_file.uuid), - "path": path, - "occurrence": occurrence, - "value": value, - "shape": shape, - "coordinate": coordinate_path, - } + if "is empty" in msg or "not found" in msg.lower(): + raise ResponseException(msg, 404) + raise ServerException(msg) + + return ImasDataResponse( + simulation=str(simulation.uuid), + file_uuid=str(imas_file.uuid), + path=params.path, + occurrence=params.occurrence, + value=value, + shape=shape, + coordinate=coordinate_path, + ) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 62cee41..2088b70 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -28,6 +28,7 @@ Field, InstanceOf, PlainSerializer, + field_validator, model_validator, ) from pydantic import ( @@ -553,6 +554,52 @@ class StagingDirectoryResponse(BaseModel): """Path to the staging dir.""" +class ImasDataQueryParams(BaseModel): + """Query parameters for the IMAS field-data endpoint.""" + + path: str + """IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density``.""" + file_uuid: Optional[UUID] = None + """UUID of a specific IMAS output file (optional).""" + occurrence: int = Field(0, ge=0) + """IDS occurrence index (default 0).""" + + @field_validator("path", mode="before") + @classmethod + def _strip_path(cls, v: Any) -> str: + v = str(v).strip() + if not v: + raise ValueError("must not be empty") + return v + + @field_validator("file_uuid", mode="before") + @classmethod + def _strip_file_uuid(cls, v: Any) -> Any: + if v is None: + return None + stripped = str(v).strip() + return stripped if stripped else None + + +class ImasDataResponse(BaseModel): + """Response from the IMAS field-data endpoint.""" + + simulation: str + """UUID of the simulation.""" + file_uuid: str + """UUID of the IMAS output file.""" + path: str + """Requested IDS path.""" + occurrence: int + """IDS occurrence index.""" + value: Any + """Value at the requested IDS path.""" + shape: Optional[List[int]] = None + """Shape of the returned array, or ``None`` for scalars.""" + coordinate: Optional[str] = None + """Coordinate path for the first dimension, if available.""" + + class ErrorResponse(BaseModel): """Response model for server errors.""" From 2c01fb1fe0bd92c00b057942984a05e7078625df Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:15:31 +0200 Subject: [PATCH 06/10] resolved pull request comments from Maarten --- src/simdb/remote/apis/v1_2/simulation_data.py | 179 ++++++++---------- src/simdb/remote/models.py | 28 ++- 2 files changed, 98 insertions(+), 109 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 13334b2..30bcb9f 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -3,24 +3,21 @@ TODO: Temporary solution to retrieve data (for IBEX backend) """ -import re from typing import Annotated, Any, Optional from uuid import UUID import numpy as np from flask_restx import Namespace, Resource +from imas.ids_defs import EMPTY_COMPLEX, EMPTY_FLOAT, EMPTY_INT from imas.ids_primitive import IDSPrimitive from simdb.cli.manifest import DataObject from simdb.database import DatabaseError from simdb.imas.utils import ( - FLOAT_MISSING_VALUE, - INT_MISSING_VALUE, ImasError, open_imas, ) from simdb.remote.core.auth import User, requires_auth -from simdb.remote.core.cache import cache from simdb.remote.core.pydantic_utils import ( Query, ResponseException, @@ -28,7 +25,7 @@ pydantic_validate, ) from simdb.remote.core.typing import current_app -from simdb.remote.models import ImasDataQueryParams, ImasDataResponse +from simdb.remote.models import ImasDataQueryParams, ImasDataResponse, QuantityData from simdb.uri import URI api = Namespace("data", path="/") @@ -45,10 +42,7 @@ def _to_python(value: Any) -> Any: def _clean(v): if isinstance(v, float) and ( - v != v - or v == float("inf") - or v == float("-inf") - or v == FLOAT_MISSING_VALUE + v != v or v == float("inf") or v == float("-inf") or v == EMPTY_FLOAT ): return None if isinstance(v, list): @@ -58,26 +52,68 @@ def _clean(v): return _clean(flat) if isinstance(value, np.integer): v = int(value) - return None if v == INT_MISSING_VALUE else v + return None if v == EMPTY_INT else v if isinstance(value, np.floating): v = float(value) - return None if (np.isnan(v) or np.isinf(v) or v == FLOAT_MISSING_VALUE) else v + return None if (np.isnan(v) or np.isinf(v) or v == EMPTY_FLOAT) else v if isinstance(value, np.complexfloating): - return {"real": float(value.real), "imag": float(value.imag)} + r, i = float(value.real), float(value.imag) + if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: + return None + return {"real": r, "imag": i} if isinstance(value, np.bool_): return bool(value) return value -# TODO Replace this logic with slicing when supported by IMAS-Python. -# TODO Add support for [:], [:-1], and [2:4:2] python slicing syntax. -def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): - """Walk inside *ids_name* and return (value, shape, coordinate_path). +def _parse_ids_path(path: str) -> tuple: + """Parse ``ids_name[:occurrence][/ids_path]`` into a 3-tuple""" + head, _, ids_path = path.partition("/") + if ":" in head: + ids_name, occ_str = head.split(":", 1) + try: + occurrence = int(occ_str) + except ValueError as exc: + raise ValueError( + f"Invalid occurrence in path '{path}': '{occ_str}'" + ) from exc + else: + ids_name, occurrence = head, 0 + return ids_name, occurrence, ids_path + + +def _get_coordinates(node: IDSPrimitive, ids_name: str) -> list: + """Return a :class:`QuantityData` for each coordinate dimension of *node*.""" + coords = [] + for i in range(node.metadata.ndim): + coord = node.coordinates[i] + if isinstance(coord, IDSPrimitive): + data = ( + _to_python(coord.value) + if coord.has_value + else list(range(node.shape[i])) + ) + coords.append( + QuantityData( + name=f"{ids_name}/{coord._path}", + units=coord.metadata.units or "", + data=data, + ) + ) + else: + # Index-based coordinate: coord is already a numpy arange + coords.append( + QuantityData( + name=f"dim_{i + 1}", + units="", + data=coord.tolist(), + ) + ) + return coords - Each segment is either: - - a non-negative integer string: array-of-structures index - - a plain name: attribute access (IDSStructure child node) - """ + +def _get_ids_node(entry, ids_name: str, occurrence: int, ids_path: str) -> IDSPrimitive: + """Return the :class:`IDSPrimitive` leaf node at *ids_path* inside *ids_name*.""" ids_obj = entry.get( ids_name, occurrence, @@ -85,15 +121,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): autoconvert=False, ignore_unknown_dd_version=True, ) - node = ids_obj - for segment in field_segments: - if segment.isdigit(): - node = node[int(segment)] - else: - try: - node = getattr(node, segment) - except AttributeError as err: - raise ValueError(f"segment '{segment}' not found in IDS path") from err + node = ids_obj[ids_path] if ids_path else ids_obj if not isinstance(node, IDSPrimitive): raise ValueError( f"path does not point to a scalar/array leaf " @@ -101,56 +129,7 @@ def _traverse_path(entry, ids_name: str, field_segments: list, occurrence: int): ) if not node.has_value: raise ValueError("field is not populated (no data written)") - - node_shape = list(node.shape) if node.metadata.ndim > 0 else None - - coordinate_path = None - try: - - def _replace_placeholder(m, _segs=field_segments): - idx = next((s for s in _segs if s.isdigit()), "0") - return "/" + idx + "/" - - for coord in node.metadata.coordinates: - clean = re.sub(r"\([^)]+\)/", _replace_placeholder, str(coord)) - coordinate_path = ids_name + "/" + clean - break - except Exception: - pass - - return _to_python(node.value), node_shape, coordinate_path - - -def _fetch_field( - uri_str: str, ids_name: str, field_segments: tuple, occurrence: int -) -> tuple: - """Open the IMAS entry and return (value, shape, coordinate_path). - - Scalar results (``shape is None``) are written into the response cache so - that repeated requests skip the IMAS open. Array values are intentionally - *not* cached: caching large numpy-derived lists would create persistent - memory pressure and could fill the cache backend with multi-MB payloads. - """ - if ( - ids_name and not field_segments - ): # bare IDS name only - no leaf, skip cache probe - pass - else: - cache_key = ( - f"simdb:field:{uri_str}:{ids_name}:{'/'.join(field_segments)}:{occurrence}" - ) - cached = cache.get(cache_key) - if cached is not None: - return cached - - entry = open_imas(URI(uri_str)) - with entry: - result = _traverse_path(entry, ids_name, list(field_segments), occurrence) - - _value, shape, _coord = result - if shape is None: # scalar leaf - safe to persist in cache - cache.set(cache_key, result) # type: ignore[possibly-undefined] - return result + return node def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): @@ -161,9 +140,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): imas_outputs = [f for f in simulation.outputs if f.type == DataObject.Type.IMAS] if not imas_outputs: - raise ResponseException( - f"Simulation {sim_id} has no IMAS output files", 404 - ) + raise ResponseException(f"Simulation {sim_id} has no IMAS output files", 404) if file_uuid is None: return simulation, imas_outputs[0] @@ -192,34 +169,38 @@ def get( params: Annotated[ImasDataQueryParams, Query()], ) -> ImasDataResponse: """Return the value at a given IDS path for a simulation's IMAS output.""" - simulation, imas_file = _get_simulation_and_imas_file( - sim_id, params.file_uuid - ) + simulation, imas_file = _get_simulation_and_imas_file(sim_id, params.file_uuid) - segments = [s for s in params.path.split("/") if s] - ids_name = segments[0] - field_segments = segments[1:] + try: + ids_name, occurrence, ids_path = _parse_ids_path(params.path) + except ValueError as exc: + raise ResponseException(str(exc)) from exc try: - value, shape, coordinate_path = _fetch_field( - str(imas_file.uri), ids_name, tuple(field_segments), params.occurrence - ) + entry = open_imas(URI(str(imas_file.uri))) + with entry: + node = _get_ids_node(entry, ids_name, occurrence, ids_path) + coordinates = _get_coordinates(node, ids_name) + field = QuantityData( + name=f"{ids_name}/{node._path}", + units=node.metadata.units or "", + data=_to_python(node.value), + ) except (ValueError, AttributeError, IndexError, KeyError) as exc: - raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") + raise ResponseException(f"Invalid IDS path '{params.path}': {exc}") from exc except ImasError as exc: - raise ServerException(f"Failed to open IMAS data: {exc}") + raise ServerException(f"Failed to open IMAS data: {exc}") from exc except Exception as exc: msg = str(exc) if "is empty" in msg or "not found" in msg.lower(): - raise ResponseException(msg, 404) - raise ServerException(msg) + raise ResponseException(msg, 404) from exc + raise ServerException(msg) from exc return ImasDataResponse( simulation=str(simulation.uuid), file_uuid=str(imas_file.uuid), path=params.path, - occurrence=params.occurrence, - value=value, - shape=shape, - coordinate=coordinate_path, + occurrence=occurrence, + field=field, + coordinates=coordinates, ) diff --git a/src/simdb/remote/models.py b/src/simdb/remote/models.py index 2088b70..b8936cd 100644 --- a/src/simdb/remote/models.py +++ b/src/simdb/remote/models.py @@ -558,11 +558,9 @@ class ImasDataQueryParams(BaseModel): """Query parameters for the IMAS field-data endpoint.""" path: str - """IDS path, e.g. ``core_profiles/profiles_1d/0/electrons/density``.""" + """Full IDS path including IDS name and optional occurrence.""" file_uuid: Optional[UUID] = None """UUID of a specific IMAS output file (optional).""" - occurrence: int = Field(0, ge=0) - """IDS occurrence index (default 0).""" @field_validator("path", mode="before") @classmethod @@ -578,7 +576,19 @@ def _strip_file_uuid(cls, v: Any) -> Any: if v is None: return None stripped = str(v).strip() - return stripped if stripped else None + return stripped or None + + +class QuantityData(BaseModel): + """A named, unit-bearing data quantity (field value or coordinate).""" + + name: str + """IDS path of this quantity relative to the IDS root""" + units: str + """Physical units of the quantity""" + data: Any + """Data value: a Python scalar for 0-D quantities, or a nested list for + arrays. """ class ImasDataResponse(BaseModel): @@ -592,12 +602,10 @@ class ImasDataResponse(BaseModel): """Requested IDS path.""" occurrence: int """IDS occurrence index.""" - value: Any - """Value at the requested IDS path.""" - shape: Optional[List[int]] = None - """Shape of the returned array, or ``None`` for scalars.""" - coordinate: Optional[str] = None - """Coordinate path for the first dimension, if available.""" + field: QuantityData + """The requested quantity""" + coordinates: List[QuantityData] + """Coordinates for each dimension of *field*, in dimension order.""" class ErrorResponse(BaseModel): From 4de51d93d33f45011c5251100281fc4910a8dbf1 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:29:41 +0200 Subject: [PATCH 07/10] removed _bool check --- src/simdb/remote/apis/v1_2/simulation_data.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 30bcb9f..76faf86 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -61,8 +61,6 @@ def _clean(v): if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: return None return {"real": r, "imag": i} - if isinstance(value, np.bool_): - return bool(value) return value From 678367628451cf5fa2583080827cd118db436ab6 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:39:52 +0200 Subject: [PATCH 08/10] use namedtuple when returning function values --- src/simdb/remote/apis/v1_2/simulation_data.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index 76faf86..b3b141a 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -3,7 +3,7 @@ TODO: Temporary solution to retrieve data (for IBEX backend) """ -from typing import Annotated, Any, Optional +from typing import Annotated, Any, NamedTuple, Optional from uuid import UUID import numpy as np @@ -130,7 +130,14 @@ def _get_ids_node(entry, ids_name: str, occurrence: int, ids_path: str) -> IDSPr return node -def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): +class _SimulationImasFile(NamedTuple): + simulation: Any + imas_file: Any + + +def _get_simulation_and_imas_file( + sim_id: str, file_uuid: Optional[UUID] +) -> _SimulationImasFile: try: simulation = current_app.db.get_simulation(sim_id) except DatabaseError as exc: @@ -141,7 +148,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): raise ResponseException(f"Simulation {sim_id} has no IMAS output files", 404) if file_uuid is None: - return simulation, imas_outputs[0] + return _SimulationImasFile(simulation, imas_outputs[0]) imas_file = next((f for f in imas_outputs if f.uuid == file_uuid), None) if imas_file is None: @@ -150,7 +157,7 @@ def _get_simulation_and_imas_file(sim_id: str, file_uuid: Optional[UUID]): 404, ) - return simulation, imas_file + return _SimulationImasFile(simulation, imas_file) # Endpoints @@ -167,7 +174,7 @@ def get( params: Annotated[ImasDataQueryParams, Query()], ) -> ImasDataResponse: """Return the value at a given IDS path for a simulation's IMAS output.""" - simulation, imas_file = _get_simulation_and_imas_file(sim_id, params.file_uuid) + result = _get_simulation_and_imas_file(sim_id, params.file_uuid) try: ids_name, occurrence, ids_path = _parse_ids_path(params.path) @@ -175,7 +182,7 @@ def get( raise ResponseException(str(exc)) from exc try: - entry = open_imas(URI(str(imas_file.uri))) + entry = open_imas(URI(str(result.imas_file.uri))) with entry: node = _get_ids_node(entry, ids_name, occurrence, ids_path) coordinates = _get_coordinates(node, ids_name) @@ -195,8 +202,8 @@ def get( raise ServerException(msg) from exc return ImasDataResponse( - simulation=str(simulation.uuid), - file_uuid=str(imas_file.uuid), + simulation=str(result.simulation.uuid), + file_uuid=str(result.imas_file.uuid), path=params.path, occurrence=occurrence, field=field, From ca0b9b4efa7117f83d2a1ec3e62481176ce83c9f Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 11:55:31 +0200 Subject: [PATCH 09/10] used node.has_value instead of manual checking scalar types --- src/simdb/remote/apis/v1_2/simulation_data.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index b3b141a..aad05bb 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -8,7 +8,7 @@ import numpy as np from flask_restx import Namespace, Resource -from imas.ids_defs import EMPTY_COMPLEX, EMPTY_FLOAT, EMPTY_INT +from imas.ids_defs import EMPTY_FLOAT from imas.ids_primitive import IDSPrimitive from simdb.cli.manifest import DataObject @@ -37,6 +37,7 @@ def _to_python(value: Any) -> Any: """Convert a value returned by IDSPrimitive.value to a JSON-serialisable Python object.""" + print(type(value)) if isinstance(value, np.ndarray): flat = value.tolist() @@ -50,17 +51,6 @@ def _clean(v): return v return _clean(flat) - if isinstance(value, np.integer): - v = int(value) - return None if v == EMPTY_INT else v - if isinstance(value, np.floating): - v = float(value) - return None if (np.isnan(v) or np.isinf(v) or v == EMPTY_FLOAT) else v - if isinstance(value, np.complexfloating): - r, i = float(value.real), float(value.imag) - if r == EMPTY_COMPLEX.real and i == EMPTY_COMPLEX.imag: - return None - return {"real": r, "imag": i} return value From 9f2a7b00593f4c5b4110648f04ec70da3943bd46 Mon Sep 17 00:00:00 2001 From: prasad-sawantdesai Date: Tue, 5 May 2026 16:19:32 +0200 Subject: [PATCH 10/10] remove leftover print statement --- src/simdb/remote/apis/v1_2/simulation_data.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/simdb/remote/apis/v1_2/simulation_data.py b/src/simdb/remote/apis/v1_2/simulation_data.py index aad05bb..b64bacf 100644 --- a/src/simdb/remote/apis/v1_2/simulation_data.py +++ b/src/simdb/remote/apis/v1_2/simulation_data.py @@ -37,7 +37,6 @@ def _to_python(value: Any) -> Any: """Convert a value returned by IDSPrimitive.value to a JSON-serialisable Python object.""" - print(type(value)) if isinstance(value, np.ndarray): flat = value.tolist()