Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cachekit/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def _get_arrow_serializer() -> type:
# This allows passing enable_integrity_checking parameter during instantiation
SERIALIZER_REGISTRY = {
"auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization)
"pythonic": AutoSerializer, # Alias — preserves Python types (tuples, sets, frozensets, datetime, UUID)
"default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
"std": StandardSerializer, # Explicit StandardSerializer alias
"arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
Expand Down Expand Up @@ -121,7 +122,7 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali
serializer_class = SERIALIZER_REGISTRY[name]

# Instantiate with integrity checking configuration
if name in ("default", "std", "auto", "arrow", "orjson"):
if name in ("default", "std", "auto", "pythonic", "arrow", "orjson"):
# All core serializers use enable_integrity_checking parameter
serializer = serializer_class(enable_integrity_checking=enable_integrity_checking)
else:
Expand Down
33 changes: 32 additions & 1 deletion src/cachekit/serializers/auto_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- datetime/date/time (ISO-8601)
- UUID (string representation)
- set/frozenset (type-safe roundtrip)
- tuple (recursive type-safe roundtrip)

Uses MessagePack as the default format with graceful degradation for optional dependencies.

Expand Down Expand Up @@ -73,7 +74,7 @@

CUSTOM_CLASS_ERROR_MESSAGE = (
"AutoSerializer does not support custom classes. "
"Supported types: dict, list, str, int, float, bool, None, bytes, "
"Supported types: dict, list, tuple, str, int, float, bool, None, bytes, "
"datetime, date, time, UUID, set, frozenset, NumPy arrays, pandas DataFrames.\n"
"Options:\n"
" 1. Convert to dict manually\n"
Expand Down Expand Up @@ -124,6 +125,26 @@ def _safe_hasattr(obj: Any, attr: str) -> bool:
return False


def _wrap_tuples(obj: Any) -> Any:
"""Recursively wrap tuples in type markers before msgpack encoding.

Msgpack natively serializes tuples as arrays (same as lists), so the
``default`` callback is never called for them. This pre-processor
converts tuples to ``{"__tuple__": True, "value": [...]}`` markers
that ``_auto_object_hook`` restores on deserialization.

Only affects tuples — all other types pass through unchanged and are
handled by msgpack's ``default`` callback (``_auto_default``).
"""
if isinstance(obj, tuple):
return {"__tuple__": True, "value": [_wrap_tuples(x) for x in obj]}
if isinstance(obj, list):
return [_wrap_tuples(x) for x in obj]
if isinstance(obj, dict):
return {k: _wrap_tuples(v) for k, v in obj.items()}
return obj


def _auto_default(obj: Any) -> Any:
"""Custom encoder for types not natively supported by MessagePack.

Expand Down Expand Up @@ -226,6 +247,14 @@ def _auto_object_hook(obj: Any) -> Any:
except (ValueError, TypeError) as e:
raise SerializationError(f"Invalid UUID format in cached data: {value}") from e

if obj.get("__tuple__") is True:
if "value" not in obj:
raise SerializationError("Invalid tuple format: missing 'value' field in cached data")
value_list = obj["value"]
if not isinstance(value_list, list):
raise SerializationError(f"Invalid tuple format: expected list, got {type(value_list).__name__}")
return tuple(value_list)

if obj.get("__set__") is True:
if "value" not in obj:
raise SerializationError("Invalid set format: missing 'value' field in cached data")
Expand Down Expand Up @@ -748,6 +777,8 @@ def _deserialize_series(self, data) -> pd.Series:

def _serialize_msgpack(self, obj: Any) -> bytes:
"""Serialize general object with MessagePack."""
# Pre-process tuples into markers (msgpack natively flattens them to lists)
obj = _wrap_tuples(obj)
msgpack_data = msgpack.packb(obj, **self._msgpack_pack_opts)

if self.enable_integrity_checking:
Expand Down
105 changes: 104 additions & 1 deletion tests/unit/test_auto_serializer_new_types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Unit tests for AutoSerializer new type support (UUID, set, frozenset).
"""Unit tests for AutoSerializer new type support (UUID, set, frozenset, tuple).

Tests:
- UUID serialization roundtrip
Expand Down Expand Up @@ -559,3 +559,106 @@ def test_uuid_deterministic_serialization_property(self, uuid_list: list[UUID]):
result1 = serializer.deserialize(bytes1)
result2 = serializer.deserialize(bytes2)
assert result1 == result2


class TestAutoSerializerTuple:
"""Test tuple preservation through AutoSerializer roundtrip."""

def test_simple_tuple_roundtrip(self):
serializer = AutoSerializer()
data = (1, 2, 3)
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, tuple)
assert result == (1, 2, 3)

def test_nested_tuple_roundtrip(self):
serializer = AutoSerializer()
data = (1, (2, 3), (4, (5, 6)))
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, tuple)
assert isinstance(result[1], tuple)
assert isinstance(result[2][1], tuple)
assert result == (1, (2, 3), (4, (5, 6)))

def test_empty_tuple_roundtrip(self):
serializer = AutoSerializer()
data = ()
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, tuple)
assert result == ()

def test_tuple_in_dict(self):
serializer = AutoSerializer()
data = {"key": (1, 2), "other": "value"}
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result["key"], tuple)
assert result["key"] == (1, 2)

def test_tuple_in_list(self):
serializer = AutoSerializer()
data = [(1, 2), (3, 4)]
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result[0], tuple)
assert isinstance(result[1], tuple)

def test_tuple_with_set_and_datetime(self):
"""Tuple containing other special types that use _auto_default."""
from datetime import datetime

serializer = AutoSerializer()
data = (1, {2, 3}, datetime(2025, 1, 1))
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, tuple)
assert isinstance(result[1], set)
assert isinstance(result[2], datetime)

def test_list_preserved_as_list_not_tuple(self):
"""Lists must stay as lists — only tuples get markers."""
serializer = AutoSerializer()
data = [1, 2, 3]
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, list)

def test_single_element_tuple_roundtrip(self):
serializer = AutoSerializer()
data = (1,)
serialized, metadata = serializer.serialize(data)
result = serializer.deserialize(serialized, metadata)
assert isinstance(result, tuple)
assert len(result) == 1
assert result == (1,)

def test_malformed_tuple_marker_missing_value(self):
"""Malformed __tuple__ marker raises SerializationError."""
import msgpack

serializer = AutoSerializer()
bad_data = msgpack.packb({"__tuple__": True})
with pytest.raises(SerializationError, match="missing 'value' field"):
serializer.deserialize(bad_data)

def test_malformed_tuple_marker_wrong_value_type(self):
"""__tuple__ marker with non-list value raises SerializationError."""
import msgpack

serializer = AutoSerializer()
bad_data = msgpack.packb({"__tuple__": True, "value": "not a list"})
with pytest.raises(SerializationError, match="expected list"):
serializer.deserialize(bad_data)
Comment thread
coderabbitai[bot] marked this conversation as resolved.


class TestPythonicSerializerAlias:
"""Test 'pythonic' alias for AutoSerializer."""

def test_pythonic_returns_auto_serializer(self):
from cachekit.serializers import get_serializer

s = get_serializer("pythonic")
assert isinstance(s, AutoSerializer)
Loading