diff --git a/src/cachier/config.py b/src/cachier/config.py index aa1ae0d5..1732e117 100644 --- a/src/cachier/config.py +++ b/src/cachier/config.py @@ -29,6 +29,12 @@ def _is_numpy_array(value: Any) -> bool: def _hash_numpy_array(hasher: "hashlib._Hash", value: Any) -> None: """Update hasher with NumPy array metadata and buffer content. + The array content is converted to bytes using C-order (row-major) layout + to ensure consistent hashing regardless of memory layout. This operation + may create a copy if the array is not already C-contiguous (e.g., for + transposed arrays, sliced views, or Fortran-ordered arrays), which has + performance implications for large arrays. + Parameters ---------- hasher : hashlib._Hash @@ -36,6 +42,13 @@ def _hash_numpy_array(hasher: "hashlib._Hash", value: Any) -> None: value : Any A NumPy ndarray instance. + Notes + ----- + The ``tobytes(order="C")`` call ensures deterministic hash values by + normalizing the memory layout, but may incur a memory copy for + non-contiguous arrays. For optimal performance with large arrays, + consider using C-contiguous arrays when possible. + """ hasher.update(b"numpy.ndarray") hasher.update(value.dtype.str.encode("utf-8"))