Skip to content

Commit 7ab02c7

Browse files
committed
WIP: lazy loading for netCDF backend
1 parent 0c835a1 commit 7ab02c7

File tree

5 files changed

+99
-23
lines changed

5 files changed

+99
-23
lines changed

imas/backends/imas_core/al_context.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
import numpy
1212

13+
import imas
1314
from imas.backends.imas_core.imas_interface import ll_interface
1415
from imas.exception import LowlevelError
1516
from imas.ids_defs import (
@@ -280,6 +281,9 @@ def __init__(
280281
self.context = None
281282
"""Potential weak reference to opened context."""
282283

284+
def get_child(self, child):
285+
imas.backends.imas_core.db_entry_helpers._get_child(child, self)
286+
283287
def get_context(self) -> ALContext:
284288
"""Create and yield the actual ALContext."""
285289
if self.dbentry._db_ctx is not self.dbentry_ctx:

imas/backends/imas_core/db_entry_helpers.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def get_children(
2222
structure: IDSStructure,
2323
ctx: ALContext,
2424
time_mode: int,
25-
nbc_map: Optional[NBCPathMap],
25+
nbc_map: Optional["NBCPathMap"],
2626
) -> None:
2727
"""Recursively get all children of an IDSStructure."""
2828
# NOTE: changes in this method must be propagated to _get_child and vice versa
@@ -77,15 +77,11 @@ def get_children(
7777
getattr(structure, name)._IDSPrimitive__value = data
7878

7979

80-
def _get_child(child: IDSBase, ctx: Optional[LazyALContext]):
80+
def _get_child(child: IDSBase, ctx: LazyALContext):
8181
"""Get a single child when required (lazy loading)."""
8282
# NOTE: changes in this method must be propagated to _get_children and vice versa
8383
# Performance: this method is specialized for the lazy get
8484

85-
# ctx can be None when the parent structure does not exist in the on-disk DD version
86-
if ctx is None:
87-
return # There is no data to be loaded
88-
8985
time_mode = ctx.time_mode
9086
if time_mode == IDS_TIME_MODE_INDEPENDENT and child.metadata.type.is_dynamic:
9187
return # skip dynamic (time-dependent) nodes
@@ -148,7 +144,7 @@ def put_children(
148144
ctx: ALContext,
149145
time_mode: int,
150146
is_slice: bool,
151-
nbc_map: Optional[NBCPathMap],
147+
nbc_map: Optional["NBCPathMap"],
152148
verify_maxoccur: bool,
153149
) -> None:
154150
"""Recursively put all children of an IDSStructure"""

imas/backends/netcdf/db_entry_nc.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,10 +108,6 @@ def get(
108108
else:
109109
func = "get_sample"
110110
raise NotImplementedError(f"`{func}` is not available for netCDF files.")
111-
if lazy:
112-
raise NotImplementedError(
113-
"Lazy loading is not implemented for netCDF files."
114-
)
115111

116112
# Check if the IDS/occurrence exists, and obtain the group it is stored in
117113
try:
@@ -123,7 +119,7 @@ def get(
123119

124120
# Load data into the destination IDS
125121
if self._ds_factory.dd_version == destination._dd_version:
126-
NC2IDS(group, destination, destination.metadata, None).run()
122+
NC2IDS(group, destination, destination.metadata, None).run(lazy)
127123
else:
128124
# Construct relevant NBCPathMap, the one we get from DBEntry has the reverse
129125
# mapping from what we need. The imas_core logic does the mapping from
@@ -135,7 +131,7 @@ def get(
135131
nbc_map = ddmap.old_to_new if source_is_older else ddmap.new_to_old
136132
NC2IDS(
137133
group, destination, self._ds_factory.new(ids_name).metadata, nbc_map
138-
).run()
134+
).run(lazy)
139135

140136
return destination
141137

imas/backends/netcdf/nc2ids.py

Lines changed: 82 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import Iterator, List, Optional, Tuple
44

55
import netCDF4
6+
import numpy as np
67

78
from imas.backends.netcdf import ids2nc
89
from imas.backends.netcdf.nc_metadata import NCMetadata
@@ -100,6 +101,8 @@ def __init__(
100101
"""NetCDF related metadata."""
101102
self.variables = list(group.variables)
102103
"""List of variable names stored in the netCDF group."""
104+
105+
self._lazy_map = {}
103106
# Don't use masked arrays: they're slow and we'll handle most of the unset
104107
# values through the `:shape` arrays
105108
self.group.set_auto_mask(False)
@@ -113,18 +116,20 @@ def __init__(
113116
"Mandatory variable `ids_properties.homogeneous_time` does not exist."
114117
)
115118
var = group["ids_properties.homogeneous_time"]
116-
self._validate_variable(var, ids.ids_properties.homogeneous_time.metadata)
119+
self._validate_variable(var, ids.metadata["ids_properties/homogeneous_time"])
117120
if var[()] not in [0, 1, 2]:
118121
raise InvalidNetCDFEntry(
119122
f"Invalid value for ids_properties.homogeneous_time: {var[()]}. "
120123
"Was expecting: 0, 1 or 2."
121124
)
122125
self.homogeneous_time = var[()] == IDS_TIME_MODE_HOMOGENEOUS
123126

124-
def run(self) -> None:
127+
def run(self, lazy: bool) -> None:
125128
"""Load the data from the netCDF group into the IDS."""
126129
self.variables.sort()
127130
self.validate_variables()
131+
if lazy:
132+
self.ids._set_lazy_context(LazyContext(self))
128133
for var_name in self.variables:
129134
if var_name.endswith(":shape"):
130135
continue
@@ -157,6 +162,10 @@ def run(self) -> None:
157162
target_metadata = metadata # no conversion required
158163

159164
var = self.group[var_name]
165+
if lazy:
166+
self._lazy_map[target_metadata.path_string] = var
167+
continue
168+
160169
if metadata.data_type is IDSDataType.STRUCT_ARRAY:
161170
if "sparse" in var.ncattrs():
162171
shapes = self.group[var_name + ":shape"][()]
@@ -342,3 +351,74 @@ def _validate_sparsity(
342351
raise variable_error(
343352
shape_var, "dtype", shape_var.dtype, "any integer type"
344353
)
354+
355+
356+
class LazyContext:
357+
def __init__(self, nc2ids, index=()):
358+
self.nc2ids = nc2ids
359+
self.index = index
360+
361+
def get_child(self, child):
362+
metadata = child.metadata
363+
path = metadata.path_string
364+
data_type = metadata.data_type
365+
366+
var = self.nc2ids._lazy_map.get(path)
367+
if data_type is IDSDataType.STRUCT_ARRAY:
368+
# Determine size of the aos
369+
if var is None:
370+
size = 0
371+
elif "sparse" in var.ncattrs():
372+
size = self.group[var.name + ":shape"][self.index][0]
373+
else:
374+
# FIXME: extract dimension name from nc file?
375+
dim = self.ncmeta.get_dimensions(
376+
metadata.path_string, self.homogeneous_time
377+
)[-1]
378+
size = self.group.dimensions[dim].size
379+
380+
child._set_lazy_context(
381+
LazyArrayStructContext(self.nc2ids, self.index, size)
382+
)
383+
384+
elif data_type is IDSDataType.STRUCTURE:
385+
child._set_lazy_context(self)
386+
387+
else: # Data elements
388+
var = self.nc2ids._lazy_map.get(path)
389+
if var is None:
390+
return # nothing to load
391+
392+
value = None
393+
if "sparse" in var.ncattrs():
394+
if metadata.ndim:
395+
shape_var = self.nc2ids.group[var.name + ":shape"]
396+
shape = shape_var[self.index]
397+
if shape.all():
398+
value = var[self.index + tuple(map(slice, shape))]
399+
else:
400+
value = var[self.index]
401+
if value == getattr(var, "_FillValue", None):
402+
value = None # Skip setting
403+
else:
404+
value = var[self.index]
405+
406+
if value is not None:
407+
if isinstance(value, np.ndarray):
408+
# Convert the numpy array to a read-only view
409+
value = value.view()
410+
value.flags.writeable = False
411+
# NOTE: bypassing IDSPrimitive.value.setter logic
412+
child._IDSPrimitive__value = value
413+
414+
415+
class LazyArrayStructContext(LazyContext):
416+
def __init__(self, nc2ids, index, size):
417+
super().__init__(nc2ids, index)
418+
self.size = size
419+
420+
def get_context(self):
421+
return self # IDSStructArray expects to get something with a size attribute
422+
423+
def iterate_to_index(self, index: int) -> LazyContext:
424+
return LazyContext(self.nc2ids, self.index + (index,))

imas/ids_structure.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,20 @@
66
import logging
77
from copy import deepcopy
88
from types import MappingProxyType
9-
from typing import Generator, List, Optional, Union
9+
from typing import TYPE_CHECKING, Generator, List, Optional, Union
1010

1111
from xxhash import xxh3_64
1212

13-
from imas.backends.imas_core.al_context import LazyALContext
1413
from imas.ids_base import IDSBase, IDSDoc
1514
from imas.ids_identifiers import IDSIdentifier
1615
from imas.ids_metadata import IDSDataType, IDSMetadata
1716
from imas.ids_path import IDSPath
1817
from imas.ids_primitive import IDSPrimitive
1918
from imas.ids_struct_array import IDSStructArray
2019

20+
if TYPE_CHECKING:
21+
from imas.backends.imas_core.al_context import LazyALContext
22+
2123
logger = logging.getLogger(__name__)
2224

2325

@@ -32,7 +34,7 @@ class IDSStructure(IDSBase):
3234

3335
__doc__ = IDSDoc(__doc__)
3436
_children: "MappingProxyType[str, IDSMetadata]"
35-
_lazy_context: Optional[LazyALContext]
37+
_lazy_context: Optional["LazyALContext"]
3638

3739
def __init__(self, parent: IDSBase, metadata: IDSMetadata):
3840
"""Initialize IDSStructure from metadata specification
@@ -62,10 +64,8 @@ def __getattr__(self, name):
6264
child_meta = self._children[name]
6365
child = child_meta._node_type(self, child_meta)
6466
self.__dict__[name] = child # bypass setattr logic below: avoid recursion
65-
if self._lazy: # lazy load the child
66-
from imas.backends.imas_core.db_entry_helpers import _get_child
67-
68-
_get_child(child, self._lazy_context)
67+
if self._lazy and self._lazy_context is not None: # lazy load the child
68+
self._lazy_context.get_child(child)
6969
return child
7070

7171
def _assign_identifier(self, value: Union[IDSIdentifier, str, int]) -> None:
@@ -168,7 +168,7 @@ def __eq__(self, other) -> bool:
168168
return False # Not equal if there is any difference
169169
return True # Equal when there are no differences
170170

171-
def _set_lazy_context(self, ctx: LazyALContext) -> None:
171+
def _set_lazy_context(self, ctx: "LazyALContext") -> None:
172172
"""Called by DBEntry during a lazy get/get_slice.
173173
174174
Set the context that we can use for retrieving our children.

0 commit comments

Comments
 (0)