Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions Doc/library/dataclasses.rst
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,12 @@ Module contents
{field.name: getattr(obj, field.name) for field in fields(obj)}

:func:`!asdict` raises :exc:`TypeError` if *obj* is not a dataclass
instance.
instance. It raises :exc:`ValueError` if *obj* contains a circular
reference.

.. versionchanged:: next
A circular reference now raises :exc:`ValueError` instead of

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The PR adds overhead to the conversion to dict or tuple. Can you benchmark this?

Adding overhead just to change the type of error message might be ok, but I would like to have some quantative numbers

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarked on a release build (best of 9, over a ~1093-node nested structure of dataclasses + lists + dicts):

  • asdict: 15.5 ms → 19.3 ms (+24%)
  • astuple: 21.4 ms → 25.1 ms (+17%)

For typical shallow dataclasses the absolute cost is negligible; the percentage only shows up on deeply-nested, container-heavy data. I tried gating the cycle-tracking behind a recursion-depth threshold to make the common case cheaper, but it gave no measurable improvement — the cost is inherent to tracking object identities through the recursion, not the set operations specifically — so I kept the straightforward implementation.

On whether it's worth it beyond the error type: this also brings asdict/astuple in line with json.dumps, which detects circular references by default and raises the same ValueError("Circular reference detected"). Since asdict() output is so often passed straight to json.dumps, a cyclic structure currently dies with a deep RecursionError rather than the clear error json would give; copy.deepcopy likewise tracks identities on every call. If the default overhead is still a concern, I'd be happy to mirror json's check_circular= opt-out.

:exc:`RecursionError`.

.. function:: astuple(obj, *, tuple_factory=tuple)

Expand All @@ -429,7 +434,12 @@ Module contents
tuple(getattr(obj, field.name) for field in dataclasses.fields(obj))

:func:`!astuple` raises :exc:`TypeError` if *obj* is not a dataclass
instance.
instance. It raises :exc:`ValueError` if *obj* contains a circular
reference.

.. versionchanged:: next
A circular reference now raises :exc:`ValueError` instead of
:exc:`RecursionError`.

.. function:: make_dataclass(cls_name, fields, *, bases=(), namespace=None, init=True, repr=True, eq=True, order=False, unsafe_hash=False, frozen=False, match_args=True, kw_only=False, slots=False, weakref_slot=False, module=None, qualname=None, decorator=dataclass)

Expand Down
68 changes: 46 additions & 22 deletions Lib/dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,35 +1510,47 @@ class C:
"""
if not _is_dataclass_instance(obj):
raise TypeError("asdict() should be called on dataclass instances")
return _asdict_inner(obj, dict_factory)
return _asdict_inner(obj, dict_factory, set())


def _asdict_inner(obj, dict_factory):
def _asdict_inner(obj, dict_factory, seen):
obj_type = type(obj)
if obj_type in _ATOMIC_TYPES:
return obj
elif hasattr(obj_type, _FIELDS):
# Guard against circular references, which would otherwise recurse until
# a RecursionError (or a crash on release builds). gh-94345
if id(obj) in seen:
raise ValueError("Circular reference detected")
seen.add(id(obj))
try:
return _asdict_inner_recurse(obj, obj_type, dict_factory, seen)
finally:
seen.discard(id(obj))


def _asdict_inner_recurse(obj, obj_type, dict_factory, seen):
if hasattr(obj_type, _FIELDS):
# dataclass instance: fast path for the common case
if dict_factory is dict:
return {
f.name: _asdict_inner(getattr(obj, f.name), dict)
f.name: _asdict_inner(getattr(obj, f.name), dict, seen)
for f in fields(obj)
}
else:
return dict_factory([
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory))
(f.name, _asdict_inner(getattr(obj, f.name), dict_factory, seen))
for f in fields(obj)
])
# handle the builtin types first for speed; subclasses handled below
elif obj_type is list:
return [_asdict_inner(v, dict_factory) for v in obj]
return [_asdict_inner(v, dict_factory, seen) for v in obj]
elif obj_type is dict:
return {
_asdict_inner(k, dict_factory): _asdict_inner(v, dict_factory)
_asdict_inner(k, dict_factory, seen): _asdict_inner(v, dict_factory, seen)
for k, v in obj.items()
}
elif obj_type is tuple:
return tuple([_asdict_inner(v, dict_factory) for v in obj])
return tuple([_asdict_inner(v, dict_factory, seen) for v in obj])
elif issubclass(obj_type, tuple):
if hasattr(obj, '_fields'):
# obj is a namedtuple. Recurse into it, but the returned
Expand All @@ -1559,24 +1571,24 @@ def _asdict_inner(obj, dict_factory):
# dict. Note that if we returned dicts here instead of
# namedtuples, we could no longer call asdict() on a data
# structure where a namedtuple was used as a dict key.
return obj_type(*[_asdict_inner(v, dict_factory) for v in obj])
return obj_type(*[_asdict_inner(v, dict_factory, seen) for v in obj])
else:
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
return obj_type(_asdict_inner(v, dict_factory, seen) for v in obj)
elif issubclass(obj_type, (dict, frozendict)):
if hasattr(obj_type, 'default_factory'):
# obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg.
result = obj_type(obj.default_factory)
for k, v in obj.items():
result[_asdict_inner(k, dict_factory)] = _asdict_inner(v, dict_factory)
result[_asdict_inner(k, dict_factory, seen)] = _asdict_inner(v, dict_factory, seen)
return result
return obj_type((_asdict_inner(k, dict_factory),
_asdict_inner(v, dict_factory))
return obj_type((_asdict_inner(k, dict_factory, seen),
_asdict_inner(v, dict_factory, seen))
for k, v in obj.items())
elif issubclass(obj_type, list):
# Assume we can create an object of this type by passing in a
# generator
return obj_type(_asdict_inner(v, dict_factory) for v in obj)
return obj_type(_asdict_inner(v, dict_factory, seen) for v in obj)
else:
return copy.deepcopy(obj)

Expand All @@ -1603,15 +1615,27 @@ class C:

if not _is_dataclass_instance(obj):
raise TypeError("astuple() should be called on dataclass instances")
return _astuple_inner(obj, tuple_factory)
return _astuple_inner(obj, tuple_factory, set())


def _astuple_inner(obj, tuple_factory):
def _astuple_inner(obj, tuple_factory, seen):
if type(obj) in _ATOMIC_TYPES:
return obj
elif _is_dataclass_instance(obj):
# Guard against circular references, which would otherwise recurse until
# a RecursionError (or a crash on release builds). gh-94345
if id(obj) in seen:
raise ValueError("Circular reference detected")
seen.add(id(obj))
try:
return _astuple_inner_recurse(obj, tuple_factory, seen)
finally:
seen.discard(id(obj))


def _astuple_inner_recurse(obj, tuple_factory, seen):
if _is_dataclass_instance(obj):
return tuple_factory([
_astuple_inner(getattr(obj, f.name), tuple_factory)
_astuple_inner(getattr(obj, f.name), tuple_factory, seen)
for f in fields(obj)
])
elif isinstance(obj, tuple) and hasattr(obj, '_fields'):
Expand All @@ -1621,22 +1645,22 @@ def _astuple_inner(obj, tuple_factory):
# treated (see below), but we just need to create them
# differently because a namedtuple's __init__ needs to be
# called differently (see bpo-34363).
return type(obj)(*[_astuple_inner(v, tuple_factory) for v in obj])
return type(obj)(*[_astuple_inner(v, tuple_factory, seen) for v in obj])
elif isinstance(obj, (list, tuple)):
# Assume we can create an object of this type by passing in a
# generator (which is not true for namedtuples, handled
# above).
return type(obj)(_astuple_inner(v, tuple_factory) for v in obj)
return type(obj)(_astuple_inner(v, tuple_factory, seen) for v in obj)
elif isinstance(obj, (dict, frozendict)):
obj_type = type(obj)
if hasattr(obj_type, 'default_factory'):
# obj is a defaultdict, which has a different constructor from
# dict as it requires the default_factory as its first arg.
result = obj_type(getattr(obj, 'default_factory'))
for k, v in obj.items():
result[_astuple_inner(k, tuple_factory)] = _astuple_inner(v, tuple_factory)
result[_astuple_inner(k, tuple_factory, seen)] = _astuple_inner(v, tuple_factory, seen)
return result
return obj_type((_astuple_inner(k, tuple_factory), _astuple_inner(v, tuple_factory))
return obj_type((_astuple_inner(k, tuple_factory, seen), _astuple_inner(v, tuple_factory, seen))
for k, v in obj.items())
else:
return copy.deepcopy(obj)
Expand Down
92 changes: 92 additions & 0 deletions Lib/test/test_dataclasses/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,98 @@ class C:
self.assertEqual(t, ({"x": [12]},))
self.assertTrue(t[0] is not dd) # make sure defaultdict is copied

def test_helper_asdict_circular_reference(self):
# gh-94345: asdict() must raise a clear error on a circular reference
# instead of recursing until a RecursionError (or crashing).
@dataclass
class C:
name: str
link: object = None
items: list = field(default_factory=list)

# Direct self reference.
c = C('c')
c.link = c
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
asdict(c)
# Indirect cycle through another dataclass.
a = C('a')
b = C('b')
a.link = b
b.link = a
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
asdict(a)
# Cycle through a list field.
d = C('d')
d.items.append(d)
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
asdict(d)
# Cycle through a dict field.
e = C('e')
e.link = {'self': e}
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
asdict(e)

def test_helper_asdict_shared_reference_is_not_circular(self):
# gh-94345: an object referenced more than once without forming a
# cycle (a DAG) must still be converted successfully.
@dataclass
class Inner:
value: int
@dataclass
class Outer:
left: object
right: object

shared = Inner(1)
o = Outer(left=shared, right=shared)
self.assertEqual(asdict(o),
{'left': {'value': 1}, 'right': {'value': 1}})
# A shared built-in container referenced twice is fine too.
shared_list = [1, 2]
o2 = Outer(left=shared_list, right=shared_list)
self.assertEqual(asdict(o2), {'left': [1, 2], 'right': [1, 2]})

def test_helper_astuple_circular_reference(self):
# gh-94345: see test_helper_asdict_circular_reference.
@dataclass
class C:
name: str
link: object = None
items: list = field(default_factory=list)

c = C('c')
c.link = c
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
astuple(c)
a = C('a')
b = C('b')
a.link = b
b.link = a
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
astuple(a)
d = C('d')
d.items.append(d)
with self.assertRaisesRegex(ValueError, 'Circular reference detected'):
astuple(d)

def test_helper_astuple_shared_reference_is_not_circular(self):
# gh-94345: a DAG must still be converted successfully.
@dataclass
class Inner:
value: int
@dataclass
class Outer:
left: object
right: object

shared = Inner(1)
o = Outer(left=shared, right=shared)
self.assertEqual(astuple(o), ((1,), (1,)))
shared_list = [1, 2]
o2 = Outer(left=shared_list, right=shared_list)
self.assertEqual(astuple(o2), ([1, 2], [1, 2]))

def test_dynamic_class_creation(self):
cls_dict = {'__annotations__': {'x': int, 'y': int},
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
:func:`dataclasses.asdict` and :func:`dataclasses.astuple` now raise
:exc:`ValueError` when the dataclass instance contains a circular reference,
instead of recursing until a :exc:`RecursionError`.
Loading