From 67f37d25bdb7e6711442d259f864a54efeae7221 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 16 May 2026 22:01:38 +1000 Subject: [PATCH 1/4] feat: add tuple preservation to AutoSerializer and 'pythonic' alias AutoSerializer now preserves tuples through serialization roundtrips via type markers, matching the existing set/frozenset/datetime/UUID preservation. Add 'pythonic' as a registry alias for 'auto' to better communicate intent. Implementation: tuples are pre-processed into {"__tuple__": True, "value": [...]} markers before msgpack encoding (since msgpack natively flattens tuples to arrays). The object_hook restores them on decode. Closes #78 --- src/cachekit/serializers/__init__.py | 3 ++- src/cachekit/serializers/auto_serializer.py | 30 +++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/cachekit/serializers/__init__.py b/src/cachekit/serializers/__init__.py index 2d240da..c622fd8 100644 --- a/src/cachekit/serializers/__init__.py +++ b/src/cachekit/serializers/__init__.py @@ -52,6 +52,7 @@ def _get_arrow_serializer() -> type: # This allows passing enable_integrity_checking parameter during instantiation SERIALIZER_REGISTRY = { "auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization) + "pythonic": AutoSerializer, # Alias — preserves Python types (tuples, sets, frozensets, datetime, UUID) "default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches "std": StandardSerializer, # Explicit StandardSerializer alias "arrow": None, # Lazy-loaded: requires pyarrow from [data] extra @@ -121,7 +122,7 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali serializer_class = SERIALIZER_REGISTRY[name] # Instantiate with integrity checking configuration - if name in ("default", "std", "auto", "arrow", "orjson"): + if name in ("default", "std", "auto", "pythonic", "arrow", "orjson"): # All core serializers use enable_integrity_checking parameter serializer = serializer_class(enable_integrity_checking=enable_integrity_checking) else: diff --git a/src/cachekit/serializers/auto_serializer.py b/src/cachekit/serializers/auto_serializer.py index 50f4785..f7ea464 100644 --- a/src/cachekit/serializers/auto_serializer.py +++ b/src/cachekit/serializers/auto_serializer.py @@ -124,6 +124,26 @@ def _safe_hasattr(obj: Any, attr: str) -> bool: return False +def _wrap_tuples(obj: Any) -> Any: + """Recursively wrap tuples in type markers before msgpack encoding. + + Msgpack natively serializes tuples as arrays (same as lists), so the + ``default`` callback is never called for them. This pre-processor + converts tuples to ``{"__tuple__": True, "value": [...]}`` markers + that ``_auto_object_hook`` restores on deserialization. + + Only affects tuples — all other types pass through unchanged and are + handled by msgpack's ``default`` callback (``_auto_default``). + """ + if isinstance(obj, tuple): + return {"__tuple__": True, "value": [_wrap_tuples(x) for x in obj]} + if isinstance(obj, list): + return [_wrap_tuples(x) for x in obj] + if isinstance(obj, dict): + return {k: _wrap_tuples(v) for k, v in obj.items()} + return obj + + def _auto_default(obj: Any) -> Any: """Custom encoder for types not natively supported by MessagePack. @@ -226,6 +246,14 @@ def _auto_object_hook(obj: Any) -> Any: except (ValueError, TypeError) as e: raise SerializationError(f"Invalid UUID format in cached data: {value}") from e + if obj.get("__tuple__") is True: + if "value" not in obj: + raise SerializationError("Invalid tuple format: missing 'value' field in cached data") + value_list = obj["value"] + if not isinstance(value_list, list): + raise SerializationError(f"Invalid tuple format: expected list, got {type(value_list).__name__}") + return tuple(value_list) + if obj.get("__set__") is True: if "value" not in obj: raise SerializationError("Invalid set format: missing 'value' field in cached data") @@ -748,6 +776,8 @@ def _deserialize_series(self, data) -> pd.Series: def _serialize_msgpack(self, obj: Any) -> bytes: """Serialize general object with MessagePack.""" + # Pre-process tuples into markers (msgpack natively flattens them to lists) + obj = _wrap_tuples(obj) msgpack_data = msgpack.packb(obj, **self._msgpack_pack_opts) if self.enable_integrity_checking: From 6425058c9b5c7d6fdf9c1482152cba059dee9cac Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 16 May 2026 22:11:17 +1000 Subject: [PATCH 2/4] test: add tuple preservation and pythonic alias coverage 10 tests covering: simple/nested/empty tuple roundtrips, tuples in dicts and lists, tuples with other special types (sets, datetimes), list-stays-list verification, malformed marker error paths, and pythonic registry alias. --- tests/unit/test_auto_serializer_new_types.py | 96 +++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_auto_serializer_new_types.py b/tests/unit/test_auto_serializer_new_types.py index 4800cd1..1689b2a 100644 --- a/tests/unit/test_auto_serializer_new_types.py +++ b/tests/unit/test_auto_serializer_new_types.py @@ -1,4 +1,4 @@ -"""Unit tests for AutoSerializer new type support (UUID, set, frozenset). +"""Unit tests for AutoSerializer new type support (UUID, set, frozenset, tuple). Tests: - UUID serialization roundtrip @@ -559,3 +559,97 @@ def test_uuid_deterministic_serialization_property(self, uuid_list: list[UUID]): result1 = serializer.deserialize(bytes1) result2 = serializer.deserialize(bytes2) assert result1 == result2 + + +class TestAutoSerializerTuple: + """Test tuple preservation through AutoSerializer roundtrip.""" + + def test_simple_tuple_roundtrip(self): + serializer = AutoSerializer() + data = (1, 2, 3) + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, tuple) + assert result == (1, 2, 3) + + def test_nested_tuple_roundtrip(self): + serializer = AutoSerializer() + data = (1, (2, 3), (4, (5, 6))) + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, tuple) + assert isinstance(result[1], tuple) + assert isinstance(result[2][1], tuple) + assert result == (1, (2, 3), (4, (5, 6))) + + def test_empty_tuple_roundtrip(self): + serializer = AutoSerializer() + data = () + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, tuple) + assert result == () + + def test_tuple_in_dict(self): + serializer = AutoSerializer() + data = {"key": (1, 2), "other": "value"} + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result["key"], tuple) + assert result["key"] == (1, 2) + + def test_tuple_in_list(self): + serializer = AutoSerializer() + data = [(1, 2), (3, 4)] + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result[0], tuple) + assert isinstance(result[1], tuple) + + def test_tuple_with_set_and_datetime(self): + """Tuple containing other special types that use _auto_default.""" + from datetime import datetime + + serializer = AutoSerializer() + data = (1, {2, 3}, datetime(2025, 1, 1)) + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, tuple) + assert isinstance(result[1], set) + assert isinstance(result[2], datetime) + + def test_list_preserved_as_list_not_tuple(self): + """Lists must stay as lists — only tuples get markers.""" + serializer = AutoSerializer() + data = [1, 2, 3] + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, list) + + def test_malformed_tuple_marker_missing_value(self): + """Malformed __tuple__ marker raises SerializationError.""" + import msgpack + + serializer = AutoSerializer(enable_integrity_checking=False) + bad_data = msgpack.packb({"__tuple__": True}) + with pytest.raises(SerializationError, match="missing 'value' field"): + serializer.deserialize(bad_data) + + def test_malformed_tuple_marker_wrong_value_type(self): + """__tuple__ marker with non-list value raises SerializationError.""" + import msgpack + + serializer = AutoSerializer(enable_integrity_checking=False) + bad_data = msgpack.packb({"__tuple__": True, "value": "not a list"}) + with pytest.raises(SerializationError, match="expected list"): + serializer.deserialize(bad_data) + + +class TestPythonicSerializerAlias: + """Test 'pythonic' alias for AutoSerializer.""" + + def test_pythonic_returns_auto_serializer(self): + from cachekit.serializers import get_serializer + + s = get_serializer("pythonic") + assert isinstance(s, AutoSerializer) From db3583ec339e71dddcd3abb6ac36c65355e2a607 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sat, 16 May 2026 22:11:55 +1000 Subject: [PATCH 3/4] docs: add tuple to AutoSerializer supported types list --- src/cachekit/serializers/auto_serializer.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cachekit/serializers/auto_serializer.py b/src/cachekit/serializers/auto_serializer.py index f7ea464..b988d73 100644 --- a/src/cachekit/serializers/auto_serializer.py +++ b/src/cachekit/serializers/auto_serializer.py @@ -9,6 +9,7 @@ - datetime/date/time (ISO-8601) - UUID (string representation) - set/frozenset (type-safe roundtrip) +- tuple (recursive type-safe roundtrip) Uses MessagePack as the default format with graceful degradation for optional dependencies. @@ -73,7 +74,7 @@ CUSTOM_CLASS_ERROR_MESSAGE = ( "AutoSerializer does not support custom classes. " - "Supported types: dict, list, str, int, float, bool, None, bytes, " + "Supported types: dict, list, tuple, str, int, float, bool, None, bytes, " "datetime, date, time, UUID, set, frozenset, NumPy arrays, pandas DataFrames.\n" "Options:\n" " 1. Convert to dict manually\n" From 5512b36a6db9d7b5697f5f56f82eacaa6ae9f828 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Mon, 18 May 2026 21:26:09 +1000 Subject: [PATCH 4/4] =?UTF-8?q?test:=20address=20review=20=E2=80=94=20drop?= =?UTF-8?q?=20unnecessary=20integrity=20flag,=20add=20single-element=20tup?= =?UTF-8?q?le=20test?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/unit/test_auto_serializer_new_types.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_auto_serializer_new_types.py b/tests/unit/test_auto_serializer_new_types.py index 1689b2a..fb2f59b 100644 --- a/tests/unit/test_auto_serializer_new_types.py +++ b/tests/unit/test_auto_serializer_new_types.py @@ -626,11 +626,20 @@ def test_list_preserved_as_list_not_tuple(self): result = serializer.deserialize(serialized, metadata) assert isinstance(result, list) + def test_single_element_tuple_roundtrip(self): + serializer = AutoSerializer() + data = (1,) + serialized, metadata = serializer.serialize(data) + result = serializer.deserialize(serialized, metadata) + assert isinstance(result, tuple) + assert len(result) == 1 + assert result == (1,) + def test_malformed_tuple_marker_missing_value(self): """Malformed __tuple__ marker raises SerializationError.""" import msgpack - serializer = AutoSerializer(enable_integrity_checking=False) + serializer = AutoSerializer() bad_data = msgpack.packb({"__tuple__": True}) with pytest.raises(SerializationError, match="missing 'value' field"): serializer.deserialize(bad_data) @@ -639,7 +648,7 @@ def test_malformed_tuple_marker_wrong_value_type(self): """__tuple__ marker with non-list value raises SerializationError.""" import msgpack - serializer = AutoSerializer(enable_integrity_checking=False) + serializer = AutoSerializer() bad_data = msgpack.packb({"__tuple__": True, "value": "not a list"}) with pytest.raises(SerializationError, match="expected list"): serializer.deserialize(bad_data)