From 617abe205129bbea423fdfa3bfbfffddda66d6ea Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 15 Jul 2025 15:08:18 +0200 Subject: [PATCH 01/46] style: refactor project --- libdestruct/__init__.py | 4 ++-- libdestruct/c/__init__.py | 2 +- libdestruct/c/c_str.py | 4 ++-- libdestruct/common/struct/__init__.py | 2 +- libdestruct/common/struct/struct_impl.py | 2 +- pyproject.toml | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index 856d246..5f552a7 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -22,7 +22,6 @@ __all__ = [ "array", "array_of", - "offset", "c_int", "c_long", "c_str", @@ -32,8 +31,9 @@ "enum_of", "inflate", "inflater", - "struct", + "offset", "ptr", "ptr_to", "ptr_to_self", + "struct", ] diff --git a/libdestruct/c/__init__.py b/libdestruct/c/__init__.py index 9f81aee..6fcde33 100644 --- a/libdestruct/c/__init__.py +++ b/libdestruct/c/__init__.py @@ -7,7 +7,7 @@ from libdestruct.c.c_integer_types import c_char, c_int, c_long, c_short, c_uchar, c_uint, c_ulong, c_ushort from libdestruct.c.c_str import c_str -__all__ = ["c_char", "c_uchar", "c_short", "c_ushort", "c_int", "c_uint", "c_long", "c_ulong", "c_str"] +__all__ = ["c_char", "c_int", "c_long", "c_short", "c_str", "c_uchar", "c_uint", "c_ulong", "c_ushort"] import libdestruct.c.base_type_inflater import libdestruct.c.ctypes_generic_field # noqa: F401 diff --git a/libdestruct/c/c_str.py b/libdestruct/c/c_str.py index 87ff11b..eb0aa46 100644 --- a/libdestruct/c/c_str.py +++ b/libdestruct/c/c_str.py @@ -26,7 +26,7 @@ def count(self: c_str) -> int: def get(self: c_str, index: int = -1) -> bytes: """Return the character at the given index.""" - if index != -1 and index < 0 or index >= self.count(): + if (index != -1 and index < 0) or index >= self.count(): raise IndexError("String index out of range.") if index == -1: @@ -40,7 +40,7 @@ def to_bytes(self: c_str) -> bytes: def _set(self: c_str, value: bytes, index: int = -1) -> None: """Set the character at the given index to the given value.""" - if index != -1 and index < 0 or index >= self.count(): + if (index != -1 and index < 0) or index >= self.count(): raise IndexError("String index out of range.") if index == -1: diff --git a/libdestruct/common/struct/__init__.py b/libdestruct/common/struct/__init__.py index e7733b6..c6909e4 100644 --- a/libdestruct/common/struct/__init__.py +++ b/libdestruct/common/struct/__init__.py @@ -8,7 +8,7 @@ from libdestruct.common.struct.struct import struct from libdestruct.common.struct.struct_impl import struct_impl -__all__ = ["struct", "struct_impl", "ptr_to", "ptr_to_self"] +__all__ = ["ptr_to", "ptr_to_self", "struct", "struct_impl"] import libdestruct.common.ptr.ptr_field_inflater import libdestruct.common.struct.struct_inflater # noqa: F401 diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index dc40b6a..1c267e8 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -175,7 +175,7 @@ def to_str(self: struct_impl, indent: int = 0) -> str: ) return f"""{self.name} {{ {members} -{' ' * indent}}}""" +{" " * indent}}}""" def __repr__(self: struct_impl) -> str: """Return a string representation of the struct.""" diff --git a/pyproject.toml b/pyproject.toml index 09ee941..197dc44 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ exclude = ["test/"] [tool.ruff.lint] select = ["ALL"] -ignore = ["D100", "D104", "EM", "FBT", "G", "TD", "TRY002", "TRY003", "RET505", "SLF001", "S603", "S606", "N801"] +ignore = ["D100", "D104", "EM", "FBT", "G", "TD", "TRY002", "TRY003", "RET505", "SLF001", "S603", "S606", "N801", "COM812"] [tool.ruff.lint.pydocstyle] convention = "google" From 60ebfe0414f03a35eea244ba7f501bf19f02ad5e Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 16:18:10 -0500 Subject: [PATCH 02/46] fix: correct various problems in the codebase --- libdestruct/__init__.py | 4 +- libdestruct/backing/fake_resolver.py | 4 +- libdestruct/backing/memory_resolver.py | 6 +- libdestruct/c/c_str.py | 6 +- libdestruct/c/ctypes_generic.py | 2 +- libdestruct/c/struct_parser.py | 5 + libdestruct/common/__init__.py | 2 + libdestruct/common/array/array.py | 4 +- libdestruct/common/array/array_impl.py | 6 +- libdestruct/common/enum/enum.py | 2 +- libdestruct/common/forward_ref_inflater.py | 133 ++++++++++++++ libdestruct/common/obj.py | 10 +- libdestruct/common/ptr/ptr.py | 7 +- libdestruct/common/struct/struct.py | 4 +- libdestruct/common/struct/struct_impl.py | 20 ++- libdestruct/common/type_registry.py | 41 ++++- libdestruct/common/utils.py | 29 +++- test/scripts/basic_struct_test.py | 16 ++ test/scripts/regression_test.py | 193 +++++++++++++++++++++ 19 files changed, 464 insertions(+), 30 deletions(-) create mode 100644 libdestruct/common/forward_ref_inflater.py create mode 100644 test/scripts/regression_test.py diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index 5f552a7..4c5826c 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -11,11 +11,12 @@ except ImportError: # pragma: no cover pass +from libdestruct.backing.resolver import Resolver from libdestruct.c import c_int, c_long, c_str, c_uint, c_ulong from libdestruct.common.array import array, array_of from libdestruct.common.attributes import offset from libdestruct.common.enum import enum, enum_of -from libdestruct.common.ptr import ptr +from libdestruct.common.ptr.ptr import ptr from libdestruct.common.struct import ptr_to, ptr_to_self, struct from libdestruct.libdestruct import inflate, inflater @@ -35,5 +36,6 @@ "ptr", "ptr_to", "ptr_to_self", + "Resolver", "struct", ] diff --git a/libdestruct/backing/fake_resolver.py b/libdestruct/backing/fake_resolver.py index 30ed986..496030d 100644 --- a/libdestruct/backing/fake_resolver.py +++ b/libdestruct/backing/fake_resolver.py @@ -12,7 +12,7 @@ class FakeResolver(Resolver): """A class that can resolve elements in a simulated memory storage.""" - def __init__(self: FakeResolver, memory: dict | None = None, address: int | None = 0) -> FakeResolver: + def __init__(self: FakeResolver, memory: dict | None = None, address: int | None = 0) -> None: """Initializes a basic fake resolver.""" self.memory = memory if memory is not None else {} self.address = address @@ -47,7 +47,7 @@ def resolve(self: FakeResolver, size: int, _: int) -> bytes: result = b"" while size: - page = self.memory.get(page_address, b"\x00" * (0x1000 - page_offset)) + page = self.memory.get(page_address, b"\x00" * 0x1000) page_size = min(size, 0x1000 - page_offset) result += page[page_offset : page_offset + page_size] size -= page_size diff --git a/libdestruct/backing/memory_resolver.py b/libdestruct/backing/memory_resolver.py index 291d8a1..8ab42bd 100644 --- a/libdestruct/backing/memory_resolver.py +++ b/libdestruct/backing/memory_resolver.py @@ -17,7 +17,7 @@ class MemoryResolver(Resolver): """A class that can resolve itself to a value in a referenced memory storage.""" - def __init__(self: MemoryResolver, memory: MutableSequence, address: int | None) -> MemoryResolver: + def __init__(self: MemoryResolver, memory: MutableSequence, address: int | None) -> None: """Initializes a basic memory resolver.""" self.memory = memory self.address = address @@ -38,7 +38,7 @@ def relative_from_own(self: MemoryResolver, address_offset: int, _: int) -> Memo new_resolver.offset = address_offset return new_resolver - def absolute_from_own(self: Resolver, address: int) -> MemoryResolver: + def absolute_from_own(self: MemoryResolver, address: int) -> MemoryResolver: """Creates a resolver that has an absolute reference to an object, from the parent's view.""" return MemoryResolver(self.memory, address) @@ -47,7 +47,7 @@ def resolve(self: MemoryResolver, size: int, _: int) -> bytes: address = self.resolve_address() return self.memory[address : address + size] - def modify(self: Resolver, size: int, _: int, value: bytes) -> None: + def modify(self: MemoryResolver, size: int, _: int, value: bytes) -> None: """Modifies itself in memory.""" address = self.resolve_address() self.memory[address : address + size] = value diff --git a/libdestruct/c/c_str.py b/libdestruct/c/c_str.py index eb0aa46..3895a31 100644 --- a/libdestruct/c/c_str.py +++ b/libdestruct/c/c_str.py @@ -32,7 +32,7 @@ def get(self: c_str, index: int = -1) -> bytes: if index == -1: return self.resolver.resolve(self.count(), 0) - return bytes([self.resolver.resolve(index)[-1]]) + return bytes([self.resolver.resolve(index + 1, 0)[-1]]) def to_bytes(self: c_str) -> bytes: """Return the serialized representation of the object.""" @@ -50,6 +50,10 @@ def _set(self: c_str, value: bytes, index: int = -1) -> None: prev = self.resolver.resolve(index, 0) self.resolver.modify(index + len(value), 0, prev + value) + def __setitem__(self: c_str, index: int, value: bytes) -> None: + """Set the character at the given index to the given value.""" + self._set(value, index) + def __iter__(self: c_str) -> iter: """Return an iterator over the string.""" for i in range(self.count()): diff --git a/libdestruct/c/ctypes_generic.py b/libdestruct/c/ctypes_generic.py index 7025910..5620d6b 100644 --- a/libdestruct/c/ctypes_generic.py +++ b/libdestruct/c/ctypes_generic.py @@ -34,6 +34,6 @@ def _set(self: _ctypes_generic, value: Any) -> None: def to_bytes(self: _ctypes_generic) -> bytes: """Serialize the type to bytes.""" if self._frozen: - return bytes(self._frozen_value) + return bytes(self.backing_type(self._frozen_value)) return self.resolver.resolve(self.size, 0) diff --git a/libdestruct/c/struct_parser.py b/libdestruct/c/struct_parser.py index 0b5e88f..9c4195e 100644 --- a/libdestruct/c/struct_parser.py +++ b/libdestruct/c/struct_parser.py @@ -96,6 +96,11 @@ def ptr_to_type(ptr: c_ast.PtrDecl, parent: c_ast.Struct | None = None) -> type[ if not isinstance(ptr, c_ast.PtrDecl): raise TypeError("Definition must be a pointer.") + # Handle nested pointers (e.g., int **pp) by recursively wrapping in ptr_to + if isinstance(ptr.type, c_ast.PtrDecl): + inner = ptr_to_type(ptr.type, parent) + return ptr_to(inner) + if not isinstance(ptr.type, c_ast.TypeDecl): raise TypeError("Definition must be a type declaration.") diff --git a/libdestruct/common/__init__.py b/libdestruct/common/__init__.py index 6a0982b..59f8f80 100644 --- a/libdestruct/common/__init__.py +++ b/libdestruct/common/__init__.py @@ -3,3 +3,5 @@ # Copyright (c) 2024 Roberto Alessandro Bertolini. All rights reserved. # Licensed under the MIT license. See LICENSE file in the project root for details. # + +import libdestruct.common.forward_ref_inflater diff --git a/libdestruct/common/array/array.py b/libdestruct/common/array/array.py index 231ba55..f9d6e9a 100644 --- a/libdestruct/common/array/array.py +++ b/libdestruct/common/array/array.py @@ -23,8 +23,8 @@ def __len__(self: array) -> int: return self.count() @abstractmethod - def get(self: array, index: int) -> object: - """Return the element at the given index.""" + def get(self: array, index: int = -1) -> object: + """Return the element at the given index, or all elements if index is -1.""" def __getitem__(self: array, index: int) -> object: """Return the element at the given index.""" diff --git a/libdestruct/common/array/array_impl.py b/libdestruct/common/array/array_impl.py index 41520ae..963f371 100644 --- a/libdestruct/common/array/array_impl.py +++ b/libdestruct/common/array/array_impl.py @@ -46,8 +46,10 @@ def count(self: array_impl) -> int: """Get the size of the array.""" return self._count - def get(self: array, index: int) -> object: - """Return the element at the given index.""" + def get(self: array, index: int = -1) -> object: + """Return the element at the given index, or all elements if index is -1.""" + if index == -1: + return [self.backing_type(self.resolver.relative_from_own(i * self.item_size, 0)) for i in range(self._count)] return self.backing_type(self.resolver.relative_from_own(index * self.item_size, 0)) def _set(self: array_impl, _: list[obj]) -> None: diff --git a/libdestruct/common/enum/enum.py b/libdestruct/common/enum/enum.py index 162a07a..03fdd70 100644 --- a/libdestruct/common/enum/enum.py +++ b/libdestruct/common/enum/enum.py @@ -59,4 +59,4 @@ def to_bytes(self: enum) -> bytes: def to_str(self: obj, indent: int = 0) -> str: """Return a string representation of the object.""" - return f"{' ' * indent}{self.get()!r}" + return f"{self.get()!r}" diff --git a/libdestruct/common/forward_ref_inflater.py b/libdestruct/common/forward_ref_inflater.py new file mode 100644 index 0000000..0df3c2d --- /dev/null +++ b/libdestruct/common/forward_ref_inflater.py @@ -0,0 +1,133 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2025 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + + +from __future__ import annotations + +import sys +from typing import TYPE_CHECKING, ForwardRef + +from libdestruct.common.ptr.ptr import ptr +from libdestruct.common.ptr.ptr_field import PtrField +from libdestruct.common.type_registry import TypeRegistry + +if TYPE_CHECKING: # pragma: no cover + from collections.abc import Callable + + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +registry = TypeRegistry() + + +class _LazyPtrField(PtrField): + """A PtrField that lazily resolves a forward reference at inflation time.""" + + def __init__(self: _LazyPtrField, forward_ref: ForwardRef, owner: tuple[obj, type[obj]] | None) -> None: + super().__init__(None) + self.forward_ref = forward_ref + self.owner = owner + + def inflate(self: _LazyPtrField, resolver: Resolver) -> obj: + """Inflate the field, resolving the forward reference on first use.""" + if self.backing_type is None: + resolved = self._resolve_forward_ref() + if resolved is not None: + self.backing_type = registry.inflater_for(resolved) + + if self.backing_type: + return ptr(resolver, self.backing_type) + + return ptr(resolver) + + def _resolve_forward_ref(self: _LazyPtrField) -> type | None: + """Resolve the forward reference to an actual type.""" + globalns = {} + localns = {} + + if self.owner: + _, owner_type = self.owner + + # Get the user's reference struct for proper module resolution + ref_struct = getattr(owner_type, "_reference_struct", owner_type) + + if hasattr(ref_struct, "__module__"): + module = sys.modules.get(ref_struct.__module__) + if module: + globalns = module.__dict__ + + # Add the reference struct to locals for self-references + if hasattr(ref_struct, "__name__"): + localns[ref_struct.__name__] = ref_struct + + try: + resolved = eval(self.forward_ref.__forward_arg__, globalns, localns) # noqa: S307 + if isinstance(resolved, type): + return resolved + return None + except Exception: + return None + + +def _subscripted_ptr_handler( + item: object, + args: tuple, + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj] | None: + """Handle subscripted ptr types like ptr["Node"] or ptr[SomeType].""" + target = args[0] if args else None + + if target is None: + field = PtrField(None) + return field.inflate + + if isinstance(target, type): + field = PtrField(target) + field.backing_type = registry.inflater_for(target) + return field.inflate + + # String or ForwardRef: use lazy resolution + if isinstance(target, str): + target = ForwardRef(target) + + if isinstance(target, ForwardRef): + lazy_field = _LazyPtrField(target, owner) + return lazy_field.inflate + + field = PtrField(None) + return field.inflate + + +def _forward_ref_inflater( + forward_ref: ForwardRef, + _: type[obj], + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj]: + """Handle bare ForwardRef annotations that couldn't be resolved at annotation time.""" + forward_arg = forward_ref.__forward_arg__ + + # Check if it's a ptr forward reference (e.g., from `from __future__ import annotations` + # where ptr wasn't in scope) + if forward_arg.startswith("ptr[") and forward_arg.endswith("]"): + inner_type = forward_arg[4:-1] + if (inner_type.startswith("'") and inner_type.endswith("'")) or ( + inner_type.startswith('"') and inner_type.endswith('"') + ): + inner_type = inner_type[1:-1] + + target_ref = ForwardRef(inner_type) + lazy_field = _LazyPtrField(target_ref, owner) + return lazy_field.inflate + + raise ValueError( + f"Cannot resolve forward reference '{forward_arg}'. " + f"Ensure the type is imported and available in the module scope.", + ) + + +registry.register_generic_handler(ptr, _subscripted_ptr_handler) +registry.register_instance_handler(ForwardRef, _forward_ref_inflater) diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index 84001ee..9476a23 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -7,13 +7,14 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Generic, TypeVar if TYPE_CHECKING: # pragma: no cover from libdestruct.backing.resolver import Resolver +T = TypeVar("T") -class obj(ABC): +class obj(ABC, Generic[T]): """A generic object, with reference to the backing memory view.""" endianness: str = "little" @@ -56,7 +57,10 @@ def to_bytes(self: obj) -> bytes: @classmethod def from_bytes(cls: type[obj], data: bytes) -> obj: """Deserialize the object from bytes.""" - item = cls(data, 0) + from libdestruct.libdestruct import inflater + + lib = inflater(data) + item = lib.inflate(cls, 0) item.freeze() return item diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index 8b3ebbd..ff6733c 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypeVar from libdestruct.common.field import Field from libdestruct.common.obj import obj @@ -14,8 +14,9 @@ if TYPE_CHECKING: # pragma: no cover from libdestruct.backing.resolver import Resolver +T = TypeVar("T") -class ptr(obj): +class ptr(obj[T]): """A pointer to an object in memory.""" size: int = 8 @@ -76,7 +77,7 @@ def try_unwrap(self: ptr, length: int | None = None) -> obj | None: try: # If the address is invalid, this will raise an IndexError or ValueError. - self.resolver.absolute_from_own(address).resolve(length) + self.resolver.absolute_from_own(address).resolve(length or 1, 0) except (IndexError, ValueError): return None diff --git a/libdestruct/common/struct/struct.py b/libdestruct/common/struct/struct.py index 894302a..0ef7eed 100644 --- a/libdestruct/common/struct/struct.py +++ b/libdestruct/common/struct/struct.py @@ -26,8 +26,8 @@ def __init__(self: struct) -> None: def __new__(cls: type[struct], *args: ..., **kwargs: ...) -> struct: # noqa: PYI034 """Create a new struct.""" # Look for an inflater for this struct - inflater = TypeRegistry().inflater_for(cls) - return inflater(*args, **kwargs) + type_impl = TypeRegistry().inflater_for(cls) + return type_impl(*args, **kwargs) @classmethod def from_bytes(cls: type[struct], data: bytes) -> struct_impl: diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 1c267e8..8f84a8f 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -51,6 +51,17 @@ def __init__(self: struct_impl, resolver: Resolver | None = None, **kwargs: ...) for name, value in kwargs.items(): getattr(self, name).value = value + def __getattribute__(self: struct_impl, name: str) -> object: + """Return the attribute, checking struct members first to avoid collisions with obj properties.""" + # Check _members dict directly to avoid infinite recursion + try: + members = object.__getattribute__(self, "_members") + if name in members: + return members[name] + except AttributeError: + pass + return super().__getattribute__(name) + def __new__(cls: struct_impl, *args: ..., **kwargs: ...) -> Self: """Create a new struct.""" # Skip the __new__ method of the parent class @@ -101,7 +112,6 @@ def _inflate_struct_attributes( resolved_type = inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)) result = resolved_type(resolver.relative_from_own(current_offset, 0)) - setattr(self, name, result) self._members[name] = result current_offset += size_of(result) @@ -127,7 +137,7 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: for attr in attrs: if isinstance(attr, Field): - attribute = cls._inflater.inflater_for((attr, annotation))(None) + attribute = cls._inflater.inflater_for((attr, annotation), (None, cls))(None) elif isinstance(attr, OffsetAttribute): offset = attr.offset if offset < size: @@ -138,11 +148,11 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: # If we don't have a Field, we need to inflate the attribute as if we have no attributes if not attribute: - attribute = cls._inflater.inflater_for(annotation) + attribute = cls._inflater.inflater_for(annotation, (None, cls)) elif isinstance(annotation, Field): - attribute = cls._inflater.inflater_for((annotation, annotation.base_type))(None) + attribute = cls._inflater.inflater_for((annotation, annotation.base_type), (None, cls))(None) else: - attribute = cls._inflater.inflater_for(annotation) + attribute = cls._inflater.inflater_for(annotation, (None, cls)) size += size_of(attribute) diff --git a/libdestruct/common/type_registry.py b/libdestruct/common/type_registry.py index 71516d5..2467ca6 100644 --- a/libdestruct/common/type_registry.py +++ b/libdestruct/common/type_registry.py @@ -6,7 +6,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, get_args, get_origin from libdestruct.common.field import Field @@ -28,6 +28,9 @@ class TypeRegistry: type_handlers: dict[type, list[Callable[[type[obj]], type[obj] | None]]] """The handlers for generic object types, with basic inheritance support.""" + generic_handlers: dict[type, list[Callable]] + """The handlers for subscripted generic types like ptr[T].""" + instance_handlers: dict[ type, list[ @@ -46,6 +49,7 @@ def __new__(cls: type[TypeRegistry]) -> Self: cls._instance.mapping = {} cls._instance.type_handlers = {} + cls._instance.generic_handlers = {} cls._instance.instance_handlers = {} return cls._instance @@ -64,6 +68,10 @@ def inflater_for( Returns: The inflater for the object type. """ + origin = get_origin(item) + if origin is not None: + return self._inflater_for_generic(item, origin, get_args(item), owner) + if isinstance(item, type): if item in self.mapping: return self.mapping[item] @@ -86,6 +94,20 @@ def _inflater_for_type(self: TypeRegistry, item: type[obj]) -> type[obj]: raise ValueError(f"No applicable inflater found for {item}") + def _inflater_for_generic( + self: TypeRegistry, + item: object, + origin: type, + args: tuple, + owner: tuple[obj, type[obj]] | None, + ) -> Callable[[Resolver], obj]: + for handler in self.generic_handlers.get(origin, []): + result = handler(item, args, owner) + if result is not None: + return result + + raise ValueError(f"No applicable inflater found for subscripted type {item}") + def _inflater_for_instance( self: TypeRegistry, instance: Field | tuple[object, type[obj]], @@ -106,7 +128,6 @@ def _inflater_for_instance( result = handler(item, annotation, owner) if result is not None: - self.mapping[base] = result return result raise ValueError(f"No applicable inflater found for {item}") @@ -146,6 +167,22 @@ def register_instance_handler( self.instance_handlers[parent].append(handler) + def register_generic_handler( + self: TypeRegistry, + origin: type, + handler: Callable, + ) -> None: + """Register a handler for a subscripted generic type. + + Args: + origin: The origin type (e.g., ptr for ptr[T]). + handler: The handler for the subscripted type. + """ + if origin not in self.generic_handlers: + self.generic_handlers[origin] = [] + + self.generic_handlers[origin].append(handler) + def register_mapping( self: TypeRegistry, parent: type[obj], diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index 4d8aeeb..0054c25 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -6,8 +6,9 @@ from __future__ import annotations +import sys from types import MethodType -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ForwardRef from libdestruct.common.field import Field @@ -38,9 +39,33 @@ def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: field_object = item_or_inflater.__self__ return field_object.get_size() + # Check if item is directly a Field instance + if isinstance(item_or_inflater, Field): + return item_or_inflater.get_size() + raise ValueError(f"Cannot determine the size of {item_or_inflater}") +def _resolve_annotation(annotation: Any, defining_class: type) -> Any: + """Resolve a string annotation to its actual type. + + For annotations that are strings (e.g., from ``from __future__ import annotations``), + evaluates them in the defining class's module namespace. + Non-string annotations are returned as-is. + """ + if not isinstance(annotation, str): + return annotation + + module = sys.modules.get(defining_class.__module__, None) + globalns = getattr(module, "__dict__", {}) if module else {} + localns = {defining_class.__name__: defining_class} + + try: + return eval(annotation, globalns, localns) # noqa: S307 + except Exception: + return ForwardRef(annotation) + + def iterate_annotation_chain(item: obj, terminate_at: object | None = None) -> Generator[tuple[str, Any, type[obj]]]: """Iterate over the annotation chain of the provided item.""" current_item = item @@ -53,4 +78,4 @@ def iterate_annotation_chain(item: obj, terminate_at: object | None = None) -> G for reference_item in chain: for name, annotation in reference_item.__annotations__.items(): - yield name, annotation, reference_item + yield name, _resolve_annotation(annotation, reference_item), reference_item diff --git a/test/scripts/basic_struct_test.py b/test/scripts/basic_struct_test.py index 75a21d0..5d93e82 100644 --- a/test/scripts/basic_struct_test.py +++ b/test/scripts/basic_struct_test.py @@ -368,3 +368,19 @@ class test_t2(struct): self.assertEqual(test2.size.address, 0x0) self.assertEqual(test2.a.value, 0xdeadbeef) self.assertEqual(test2.address, 0x0) + + def test_struct_new_syntax(self): + class Node(struct): + member: ptr["Node"] + + memory = b"" + memory += (0x8).to_bytes(8, "little") + memory += (0x10).to_bytes(8, "little") + memory += (0x0).to_bytes(8, "little") + + node_root = Node.from_bytes(memory) + + self.assertEqual(node_root.member.value, 0x8) + self.assertEqual(node_root.member.unwrap().member.value, 0x10) + self.assertEqual(node_root.member.unwrap().member.unwrap().member.value, 0x0) + self.assertEqual(node_root.address, 0x0) diff --git a/test/scripts/regression_test.py b/test/scripts/regression_test.py new file mode 100644 index 0000000..2ad1729 --- /dev/null +++ b/test/scripts/regression_test.py @@ -0,0 +1,193 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2024 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import ctypes +import unittest +from enum import IntEnum + +from libdestruct import c_int, c_long, c_str, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of +from libdestruct.backing.fake_resolver import FakeResolver + + +class FakeResolverTest(unittest.TestCase): + """Issue #1: FakeResolver.resolve() uses wrong default page size for non-zero offset.""" + + def test_resolve_default_page_with_offset(self): + resolver = FakeResolver() + resolver.address = 0x800 # offset 0x800 within page 0 + + # Reading from an address with a non-zero page offset in a non-existent page + # should return zero bytes, not empty bytes + data = resolver.resolve(4, 0) + self.assertEqual(len(data), 4) + self.assertEqual(data, b"\x00\x00\x00\x00") + + +class PtrTryUnwrapTest(unittest.TestCase): + """Issue #2: ptr.try_unwrap() passes wrong number of args to resolve().""" + + def test_try_unwrap_null_pointer(self): + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") + + test = test_t.from_bytes(memory) + # try_unwrap on a null pointer should return None, not crash with TypeError + result = test.p.try_unwrap() + # Address 0 is valid in our byte buffer, so it may or may not return None + # The important thing is that it doesn't crash + + +class CStrGetTest(unittest.TestCase): + """Issue #3: c_str.get(index) passes wrong number of args to resolve().""" + + def test_get_single_char(self): + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + self.assertEqual(s.get(0), b"H") + self.assertEqual(s.get(1), b"e") + self.assertEqual(s.get(4), b"o") + + +class CtypesGenericFrozenTest(unittest.TestCase): + """Issue #4: _ctypes_generic.to_bytes() when frozen returns garbage via bytes(int).""" + + def test_frozen_to_bytes(self): + memory = (42).to_bytes(ctypes.sizeof(ctypes.c_int), "little") + lib = inflater(memory) + obj = lib.inflate(ctypes.c_int, 0) + + self.assertEqual(obj.value, 42) + obj.freeze() + self.assertEqual(obj.value, 42) + # bytes(42) produces b'\x00'*42, not the 4-byte LE representation + self.assertEqual(len(obj.to_bytes()), ctypes.sizeof(ctypes.c_int)) + self.assertEqual(obj.to_bytes(), memory) + + +class ObjFromBytesTest(unittest.TestCase): + """Issue #5: obj.from_bytes() is broken for non-struct types like c_int.""" + + def test_c_int_from_bytes(self): + data = (42).to_bytes(4, "little") + obj = c_int.from_bytes(data) + self.assertEqual(obj.value, 42) + + def test_c_long_from_bytes(self): + data = (123456789).to_bytes(8, "little") + obj = c_long.from_bytes(data) + self.assertEqual(obj.value, 123456789) + + def test_c_uint_from_bytes(self): + data = (0xDEADBEEF).to_bytes(4, "little") + obj = c_uint.from_bytes(data) + self.assertEqual(obj.value, 0xDEADBEEF) + + +class ArrayValueTest(unittest.TestCase): + """Issue #6: array.get() signature is incompatible with obj.get(), breaking .value property.""" + + def test_array_value_property(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(5)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 5), 0) + + # .value calls self.get() without args - should not raise TypeError + val = arr.value + self.assertIsNotNone(val) + + def test_array_repr(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(3)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + # __repr__ calls self.get() - should not crash + r = repr(arr) + self.assertIsInstance(r, str) + + +class StructMemberCollisionTest(unittest.TestCase): + """Issue #7: struct field named 'value' or 'address' crashes during inflation.""" + + def test_struct_with_value_field(self): + class test_t(struct): + value: c_int + + memory = (42).to_bytes(4, "little") + # Should not raise RuntimeError from obj.value setter + test = test_t.from_bytes(memory) + self.assertEqual(test.value.value, 42) + + def test_struct_with_address_field(self): + class test_t(struct): + address: c_int + b: c_int + + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (20).to_bytes(4, "little") + + # Should not raise AttributeError from read-only property + test = test_t.from_bytes(memory) + self.assertEqual(test.address.value, 10) + self.assertEqual(test.b.value, 20) + + +class CStrSetItemTest(unittest.TestCase): + """Issue #8: c_str.__setitem__ calls obj.set(index, value) which has wrong arity.""" + + def test_setitem(self): + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + # Should not raise TypeError + s[0] = b"J" + self.assertEqual(s.get(0), b"J") + + +class EnumToStrTest(unittest.TestCase): + """Issue #11: enum.to_str() adds unexpected leading indentation.""" + + def test_enum_in_struct_to_str(self): + class Color(IntEnum): + RED = 0 + GREEN = 1 + + class test_t(struct): + color: enum = enum_of(Color) + x: c_int + + memory = b"" + memory += (1).to_bytes(4, "little") # GREEN + memory += (42).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + result = test.to_str() + + # Should be " color: ", not " color: " + self.assertIn("color: ", result) + + +class StructParserTest(unittest.TestCase): + """Issue #16: struct_parser doesn't handle double pointers (int **pp).""" + + def test_double_pointer(self): + from libdestruct.c.struct_parser import definition_to_type + + # Should not raise TypeError: "Definition must be a type declaration." + struct_type = definition_to_type("struct test { int **pp; };") + self.assertIn("pp", struct_type.__annotations__) + + +if __name__ == "__main__": + unittest.main() From e282510a501b49371f856754645cfac08a550698 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 16:29:40 -0500 Subject: [PATCH 03/46] test: add general check for issue #6 --- test/scripts/regression_test.py | 51 +++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/test/scripts/regression_test.py b/test/scripts/regression_test.py index 2ad1729..211b08e 100644 --- a/test/scripts/regression_test.py +++ b/test/scripts/regression_test.py @@ -189,5 +189,56 @@ def test_double_pointer(self): self.assertIn("pp", struct_type.__annotations__) +class BytearrayMemoryBytesTest(unittest.TestCase): + """Issue #6: __bytes__ fails on Python 3.13+ when backing memory is bytearray.""" + + def test_resolve_returns_bytes(self): + from libdestruct.backing.memory_resolver import MemoryResolver + + # MemoryResolver.resolve() should always return bytes, even when + # the backing memory is a bytearray + resolver = MemoryResolver(bytearray(b"\x01\x02\x03\x04"), 0) + result = resolver.resolve(4, 0) + self.assertIsInstance(result, bytes) + + def test_bytes_on_bytearray_backed_c_int(self): + lib = inflater(bytearray(b"\x2a\x00\x00\x00")) + obj = lib.inflate(c_int, 0) + + result = bytes(obj) + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 4) + + def test_bytes_on_bytearray_backed_c_str(self): + lib = inflater(bytearray(b"Hello\x00")) + s = lib.inflate(c_str, 0) + + result = bytes(s) + self.assertIsInstance(result, bytes) + + def test_bytes_on_bytearray_backed_ptr(self): + class test_t(struct): + p: ptr = ptr_to_self() + + memory = bytearray(b"\x00" * 8) + test = test_t.from_bytes(memory) + + result = test.p.to_bytes() + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 8) + + def test_c_str_get_returns_bytes(self): + lib = inflater(bytearray(b"Hello\x00")) + s = lib.inflate(c_str, 0) + + # get() without index returns the full string + result = s.get() + self.assertIsInstance(result, bytes) + + # get() with index returns a single byte + result = s.get(0) + self.assertIsInstance(result, bytes) + + if __name__ == "__main__": unittest.main() From 85fcd4884f0091f96181b863b34dc279d5448489 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 16:30:38 -0500 Subject: [PATCH 04/46] fix: cast to bytes in MemoryResolver instead of returning the slice directly --- libdestruct/backing/memory_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdestruct/backing/memory_resolver.py b/libdestruct/backing/memory_resolver.py index 8ab42bd..8e9054c 100644 --- a/libdestruct/backing/memory_resolver.py +++ b/libdestruct/backing/memory_resolver.py @@ -45,7 +45,7 @@ def absolute_from_own(self: MemoryResolver, address: int) -> MemoryResolver: def resolve(self: MemoryResolver, size: int, _: int) -> bytes: """Resolves itself, providing the bytes it references for the specified size and index.""" address = self.resolve_address() - return self.memory[address : address + size] + return bytes(self.memory[address : address + size]) def modify(self: MemoryResolver, size: int, _: int, value: bytes) -> None: """Modifies itself in memory.""" From 7bc1802addd804f19059d49451225da76ae2def7 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 16:43:33 -0500 Subject: [PATCH 05/46] test: add more test coverage on the entire codebase --- test/scripts/array_unit_test.py | 104 ++++++++++ test/scripts/enum_test.py | 63 +++++- test/scripts/regression_test.py | 244 ------------------------ test/scripts/resolver_unit_test.py | 111 +++++++++++ test/scripts/struct_parser_unit_test.py | 43 +++++ test/scripts/struct_unit_test.py | 218 +++++++++++++++++++++ test/scripts/types_unit_test.py | 170 +++++++++++++++++ 7 files changed, 708 insertions(+), 245 deletions(-) create mode 100644 test/scripts/array_unit_test.py delete mode 100644 test/scripts/regression_test.py create mode 100644 test/scripts/resolver_unit_test.py create mode 100644 test/scripts/struct_parser_unit_test.py create mode 100644 test/scripts/struct_unit_test.py create mode 100644 test/scripts/types_unit_test.py diff --git a/test/scripts/array_unit_test.py b/test/scripts/array_unit_test.py new file mode 100644 index 0000000..2c5fec7 --- /dev/null +++ b/test/scripts/array_unit_test.py @@ -0,0 +1,104 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import unittest +from enum import IntEnum + +from libdestruct import c_int, inflater, struct, array_of, enum, enum_of + + +class ArrayUnitTest(unittest.TestCase): + """Array operations without debugger.""" + + def test_array_value_property(self): + """.value calls self.get() without args - should not raise TypeError.""" + memory = b"".join((i).to_bytes(4, "little") for i in range(5)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 5), 0) + + val = arr.value + self.assertIsNotNone(val) + + def test_array_repr(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(3)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + r = repr(arr) + self.assertIsInstance(r, str) + + def test_array_indexing(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(5)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 5), 0) + + for i in range(5): + self.assertEqual(arr[i].value, i) + + def test_array_iteration(self): + memory = b"".join((i * 10).to_bytes(4, "little") for i in range(3)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + values = [x.value for x in arr] + self.assertEqual(values, [0, 10, 20]) + + def test_array_len(self): + memory = b"".join((0).to_bytes(4, "little") for _ in range(7)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 7), 0) + + self.assertEqual(len(arr), 7) + + def test_array_contains(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(5)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 5), 0) + + elem = arr[2] + self.assertIn(elem, arr) + + def test_array_to_bytes(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(3)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + result = arr.to_bytes() + self.assertIsInstance(result, bytes) + self.assertEqual(result, memory) + + def test_array_to_str(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(3)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + result = arr.to_str() + self.assertIn("0", result) + self.assertIn("1", result) + self.assertIn("2", result) + + def test_array_value_returns_all_elements(self): + memory = b"".join((i).to_bytes(4, "little") for i in range(4)) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 4), 0) + + val = arr.value + self.assertIsInstance(val, list) + self.assertEqual(len(val), 4) + self.assertEqual([x.value for x in val], [0, 1, 2, 3]) + + def test_bytes_on_bytearray_backed_array(self): + memory = bytearray(b"".join((i).to_bytes(4, "little") for i in range(3))) + lib = inflater(memory) + arr = lib.inflate(array_of(c_int, 3), 0) + + result = bytes(arr) + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 12) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/enum_test.py b/test/scripts/enum_test.py index 1231425..bd283ac 100644 --- a/test/scripts/enum_test.py +++ b/test/scripts/enum_test.py @@ -7,7 +7,7 @@ import unittest from enum import Enum, IntEnum -from libdestruct import inflater, enum, enum_of, struct +from libdestruct import inflater, c_int, enum, enum_of, struct class EnumTest(unittest.TestCase): def test_enum(self): @@ -70,3 +70,64 @@ class TestHolder2(struct): with self.assertRaises(ValueError): enum_of(Test, size=9) + + def test_enum_to_str_no_leading_indent(self): + """enum.to_str() should not add unexpected leading indentation.""" + class Color(IntEnum): + RED = 0 + GREEN = 1 + + class test_t(struct): + color: enum = enum_of(Color) + x: c_int + + memory = b"" + memory += (1).to_bytes(4, "little") # GREEN + memory += (42).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + result = test.to_str() + + # Should be " color: ", not " color: " + self.assertIn("color: ", result) + + def test_enum_standalone_to_str(self): + class Color(IntEnum): + RED = 0 + BLUE = 1 + + class test_t(struct): + color: enum = enum_of(Color) + + memory = (0).to_bytes(4, "little") + test = test_t.from_bytes(memory) + + result = test.color.to_str() + self.assertFalse(result.startswith(" ")) + + def test_enum_value_extraction(self): + class Status(IntEnum): + OK = 0 + ERROR = 1 + PENDING = 2 + + class test_t(struct): + status: enum = enum_of(Status) + + memory = (2).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.status.value, Status.PENDING) + + def test_bytes_on_bytearray_backed_enum(self): + class Color(IntEnum): + RED = 0 + GREEN = 1 + + class test_t(struct): + color: enum = enum_of(Color) + + lib = inflater(bytearray(b"\x01\x00\x00\x00")) + test = lib.inflate(test_t, 0) + + result = bytes(test) + self.assertIsInstance(result, bytes) diff --git a/test/scripts/regression_test.py b/test/scripts/regression_test.py deleted file mode 100644 index 211b08e..0000000 --- a/test/scripts/regression_test.py +++ /dev/null @@ -1,244 +0,0 @@ -# -# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). -# Copyright (c) 2024 Roberto Alessandro Bertolini. All rights reserved. -# Licensed under the MIT license. See LICENSE file in the project root for details. -# - -import ctypes -import unittest -from enum import IntEnum - -from libdestruct import c_int, c_long, c_str, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of -from libdestruct.backing.fake_resolver import FakeResolver - - -class FakeResolverTest(unittest.TestCase): - """Issue #1: FakeResolver.resolve() uses wrong default page size for non-zero offset.""" - - def test_resolve_default_page_with_offset(self): - resolver = FakeResolver() - resolver.address = 0x800 # offset 0x800 within page 0 - - # Reading from an address with a non-zero page offset in a non-existent page - # should return zero bytes, not empty bytes - data = resolver.resolve(4, 0) - self.assertEqual(len(data), 4) - self.assertEqual(data, b"\x00\x00\x00\x00") - - -class PtrTryUnwrapTest(unittest.TestCase): - """Issue #2: ptr.try_unwrap() passes wrong number of args to resolve().""" - - def test_try_unwrap_null_pointer(self): - class test_t(struct): - a: c_int - p: ptr = ptr_to_self() - - memory = b"" - memory += (42).to_bytes(4, "little") - memory += (0).to_bytes(8, "little") - - test = test_t.from_bytes(memory) - # try_unwrap on a null pointer should return None, not crash with TypeError - result = test.p.try_unwrap() - # Address 0 is valid in our byte buffer, so it may or may not return None - # The important thing is that it doesn't crash - - -class CStrGetTest(unittest.TestCase): - """Issue #3: c_str.get(index) passes wrong number of args to resolve().""" - - def test_get_single_char(self): - memory = bytearray(b"Hello\x00") - lib = inflater(memory) - s = lib.inflate(c_str, 0) - - self.assertEqual(s.get(0), b"H") - self.assertEqual(s.get(1), b"e") - self.assertEqual(s.get(4), b"o") - - -class CtypesGenericFrozenTest(unittest.TestCase): - """Issue #4: _ctypes_generic.to_bytes() when frozen returns garbage via bytes(int).""" - - def test_frozen_to_bytes(self): - memory = (42).to_bytes(ctypes.sizeof(ctypes.c_int), "little") - lib = inflater(memory) - obj = lib.inflate(ctypes.c_int, 0) - - self.assertEqual(obj.value, 42) - obj.freeze() - self.assertEqual(obj.value, 42) - # bytes(42) produces b'\x00'*42, not the 4-byte LE representation - self.assertEqual(len(obj.to_bytes()), ctypes.sizeof(ctypes.c_int)) - self.assertEqual(obj.to_bytes(), memory) - - -class ObjFromBytesTest(unittest.TestCase): - """Issue #5: obj.from_bytes() is broken for non-struct types like c_int.""" - - def test_c_int_from_bytes(self): - data = (42).to_bytes(4, "little") - obj = c_int.from_bytes(data) - self.assertEqual(obj.value, 42) - - def test_c_long_from_bytes(self): - data = (123456789).to_bytes(8, "little") - obj = c_long.from_bytes(data) - self.assertEqual(obj.value, 123456789) - - def test_c_uint_from_bytes(self): - data = (0xDEADBEEF).to_bytes(4, "little") - obj = c_uint.from_bytes(data) - self.assertEqual(obj.value, 0xDEADBEEF) - - -class ArrayValueTest(unittest.TestCase): - """Issue #6: array.get() signature is incompatible with obj.get(), breaking .value property.""" - - def test_array_value_property(self): - memory = b"".join((i).to_bytes(4, "little") for i in range(5)) - lib = inflater(memory) - arr = lib.inflate(array_of(c_int, 5), 0) - - # .value calls self.get() without args - should not raise TypeError - val = arr.value - self.assertIsNotNone(val) - - def test_array_repr(self): - memory = b"".join((i).to_bytes(4, "little") for i in range(3)) - lib = inflater(memory) - arr = lib.inflate(array_of(c_int, 3), 0) - - # __repr__ calls self.get() - should not crash - r = repr(arr) - self.assertIsInstance(r, str) - - -class StructMemberCollisionTest(unittest.TestCase): - """Issue #7: struct field named 'value' or 'address' crashes during inflation.""" - - def test_struct_with_value_field(self): - class test_t(struct): - value: c_int - - memory = (42).to_bytes(4, "little") - # Should not raise RuntimeError from obj.value setter - test = test_t.from_bytes(memory) - self.assertEqual(test.value.value, 42) - - def test_struct_with_address_field(self): - class test_t(struct): - address: c_int - b: c_int - - memory = b"" - memory += (10).to_bytes(4, "little") - memory += (20).to_bytes(4, "little") - - # Should not raise AttributeError from read-only property - test = test_t.from_bytes(memory) - self.assertEqual(test.address.value, 10) - self.assertEqual(test.b.value, 20) - - -class CStrSetItemTest(unittest.TestCase): - """Issue #8: c_str.__setitem__ calls obj.set(index, value) which has wrong arity.""" - - def test_setitem(self): - memory = bytearray(b"Hello\x00") - lib = inflater(memory) - s = lib.inflate(c_str, 0) - - # Should not raise TypeError - s[0] = b"J" - self.assertEqual(s.get(0), b"J") - - -class EnumToStrTest(unittest.TestCase): - """Issue #11: enum.to_str() adds unexpected leading indentation.""" - - def test_enum_in_struct_to_str(self): - class Color(IntEnum): - RED = 0 - GREEN = 1 - - class test_t(struct): - color: enum = enum_of(Color) - x: c_int - - memory = b"" - memory += (1).to_bytes(4, "little") # GREEN - memory += (42).to_bytes(4, "little") - - test = test_t.from_bytes(memory) - result = test.to_str() - - # Should be " color: ", not " color: " - self.assertIn("color: ", result) - - -class StructParserTest(unittest.TestCase): - """Issue #16: struct_parser doesn't handle double pointers (int **pp).""" - - def test_double_pointer(self): - from libdestruct.c.struct_parser import definition_to_type - - # Should not raise TypeError: "Definition must be a type declaration." - struct_type = definition_to_type("struct test { int **pp; };") - self.assertIn("pp", struct_type.__annotations__) - - -class BytearrayMemoryBytesTest(unittest.TestCase): - """Issue #6: __bytes__ fails on Python 3.13+ when backing memory is bytearray.""" - - def test_resolve_returns_bytes(self): - from libdestruct.backing.memory_resolver import MemoryResolver - - # MemoryResolver.resolve() should always return bytes, even when - # the backing memory is a bytearray - resolver = MemoryResolver(bytearray(b"\x01\x02\x03\x04"), 0) - result = resolver.resolve(4, 0) - self.assertIsInstance(result, bytes) - - def test_bytes_on_bytearray_backed_c_int(self): - lib = inflater(bytearray(b"\x2a\x00\x00\x00")) - obj = lib.inflate(c_int, 0) - - result = bytes(obj) - self.assertIsInstance(result, bytes) - self.assertEqual(len(result), 4) - - def test_bytes_on_bytearray_backed_c_str(self): - lib = inflater(bytearray(b"Hello\x00")) - s = lib.inflate(c_str, 0) - - result = bytes(s) - self.assertIsInstance(result, bytes) - - def test_bytes_on_bytearray_backed_ptr(self): - class test_t(struct): - p: ptr = ptr_to_self() - - memory = bytearray(b"\x00" * 8) - test = test_t.from_bytes(memory) - - result = test.p.to_bytes() - self.assertIsInstance(result, bytes) - self.assertEqual(len(result), 8) - - def test_c_str_get_returns_bytes(self): - lib = inflater(bytearray(b"Hello\x00")) - s = lib.inflate(c_str, 0) - - # get() without index returns the full string - result = s.get() - self.assertIsInstance(result, bytes) - - # get() with index returns a single byte - result = s.get(0) - self.assertIsInstance(result, bytes) - - -if __name__ == "__main__": - unittest.main() diff --git a/test/scripts/resolver_unit_test.py b/test/scripts/resolver_unit_test.py new file mode 100644 index 0000000..7c3a54e --- /dev/null +++ b/test/scripts/resolver_unit_test.py @@ -0,0 +1,111 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import unittest + +from libdestruct import c_int, c_str, c_uint, inflater, struct, ptr, ptr_to_self +from libdestruct.backing.fake_resolver import FakeResolver +from libdestruct.backing.memory_resolver import MemoryResolver + + +class FakeResolverTest(unittest.TestCase): + def test_resolve_default_page_with_offset(self): + """FakeResolver.resolve() should return zero-filled bytes for non-existent pages regardless of offset.""" + resolver = FakeResolver() + resolver.address = 0x800 + + data = resolver.resolve(4, 0) + self.assertEqual(len(data), 4) + self.assertEqual(data, b"\x00\x00\x00\x00") + + def test_read_spanning_page_boundary(self): + resolver = FakeResolver() + resolver.address = 0xFFE # 2 bytes before page boundary + + data = resolver.resolve(8, 0) + self.assertEqual(len(data), 8) + self.assertEqual(data, b"\x00" * 8) + + def test_read_with_populated_page(self): + resolver = FakeResolver() + page = b"\xAA" * 0x1000 + resolver.memory[0x0] = page + resolver.address = 0x10 + + data = resolver.resolve(4, 0) + self.assertEqual(data, b"\xAA" * 4) + + def test_write_then_read_cross_page(self): + resolver = FakeResolver() + resolver.address = 0xFFE + + resolver.modify(4, 0, b"\x01\x02\x03\x04") + data = resolver.resolve(4, 0) + self.assertEqual(data, b"\x01\x02\x03\x04") + + +class MemoryResolverTest(unittest.TestCase): + def test_resolve_returns_bytes_not_bytearray(self): + """MemoryResolver.resolve() must return bytes even when backing is bytearray.""" + resolver = MemoryResolver(bytearray(b"\x01\x02\x03\x04"), 0) + result = resolver.resolve(4, 0) + self.assertIsInstance(result, bytes) + + def test_bytes_on_bytearray_backed_c_int(self): + lib = inflater(bytearray(b"\x2a\x00\x00\x00")) + obj = lib.inflate(c_int, 0) + + result = bytes(obj) + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 4) + + def test_bytes_on_bytearray_backed_c_str(self): + lib = inflater(bytearray(b"Hello\x00")) + s = lib.inflate(c_str, 0) + + result = bytes(s) + self.assertIsInstance(result, bytes) + + def test_bytes_on_bytearray_backed_ptr(self): + class test_t(struct): + p: ptr = ptr_to_self() + + memory = bytearray(b"\x00" * 8) + test = test_t.from_bytes(memory) + + result = test.p.to_bytes() + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 8) + + def test_c_str_get_returns_bytes(self): + lib = inflater(bytearray(b"Hello\x00")) + s = lib.inflate(c_str, 0) + + result = s.get() + self.assertIsInstance(result, bytes) + + result = s.get(0) + self.assertIsInstance(result, bytes) + + def test_write_to_bytearray_memory(self): + memory = bytearray(b"\x00" * 8) + lib = inflater(memory) + obj = lib.inflate(c_int, 0) + + obj.value = 0x7EADBEEF + self.assertEqual(obj.value, 0x7EADBEEF) + + def test_c_uint_write_to_bytearray(self): + memory = bytearray(b"\x00" * 4) + lib = inflater(memory) + obj = lib.inflate(c_uint, 0) + + obj.value = 0xDEADBEEF + self.assertEqual(obj.value, 0xDEADBEEF) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/struct_parser_unit_test.py b/test/scripts/struct_parser_unit_test.py new file mode 100644 index 0000000..a05cbb4 --- /dev/null +++ b/test/scripts/struct_parser_unit_test.py @@ -0,0 +1,43 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import unittest + +from libdestruct.c.struct_parser import definition_to_type + + +class StructParserTest(unittest.TestCase): + """C struct parser tests.""" + + def test_simple_struct(self): + t = definition_to_type("struct Foo { int x; unsigned int y; };") + self.assertIn("x", t.__annotations__) + self.assertIn("y", t.__annotations__) + + def test_double_pointer(self): + """Parser should handle double pointers (int **pp).""" + t = definition_to_type("struct test { int **pp; };") + self.assertIn("pp", t.__annotations__) + + def test_triple_pointer(self): + t = definition_to_type("struct test { int ***ppp; };") + self.assertIn("ppp", t.__annotations__) + + def test_array_field(self): + t = definition_to_type("struct test { int arr[4]; };") + self.assertIn("arr", t.__annotations__) + + def test_nested_struct_definition(self): + t = definition_to_type(""" + struct inner { int x; }; + struct outer { struct inner a; int b; }; + """) + self.assertIn("a", t.__annotations__) + self.assertIn("b", t.__annotations__) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py new file mode 100644 index 0000000..283a378 --- /dev/null +++ b/test/scripts/struct_unit_test.py @@ -0,0 +1,218 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import unittest +from enum import IntEnum + +from libdestruct import c_int, c_long, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of + + +class StructMemberCollisionTest(unittest.TestCase): + """Struct fields named after obj properties should not crash.""" + + def test_struct_with_value_field(self): + class test_t(struct): + value: c_int + + memory = (42).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.value.value, 42) + + def test_struct_with_address_field(self): + class test_t(struct): + address: c_int + b: c_int + + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (20).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + self.assertEqual(test.address.value, 10) + self.assertEqual(test.b.value, 20) + + def test_struct_with_size_field(self): + class test_t(struct): + size: c_int + data: c_int + + memory = b"" + memory += (100).to_bytes(4, "little") + memory += (200).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + self.assertEqual(test.size.value, 100) + self.assertEqual(test.data.value, 200) + + def test_struct_with_resolver_field(self): + class test_t(struct): + resolver: c_int + x: c_int + + memory = b"" + memory += (11).to_bytes(4, "little") + memory += (22).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + self.assertEqual(test.resolver.value, 11) + self.assertEqual(test.x.value, 22) + + def test_struct_with_name_field(self): + class test_t(struct): + name: c_int + + memory = (77).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.name.value, 77) + + def test_nested_struct_with_collisions(self): + class inner_t(struct): + value: c_int + + class outer_t(struct): + address: inner_t + size: c_int + + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (20).to_bytes(4, "little") + + test = outer_t.from_bytes(memory) + self.assertEqual(test.address.value.value, 10) + self.assertEqual(test.size.value, 20) + + +class StructRoundTripTest(unittest.TestCase): + """Struct serialization round-trips.""" + + def test_simple_round_trip(self): + class test_t(struct): + a: c_int + b: c_long + + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (1337).to_bytes(8, "little") + + test = test_t.from_bytes(memory) + self.assertEqual(test.to_bytes(), memory[:12]) + + def test_nested_struct_round_trip(self): + class inner_t(struct): + x: c_int + y: c_int + + class outer_t(struct): + a: c_int + b: inner_t + + memory = b"" + memory += (1).to_bytes(4, "little") + memory += (2).to_bytes(4, "little") + memory += (3).to_bytes(4, "little") + + test = outer_t.from_bytes(memory) + self.assertEqual(test.to_bytes(), memory[:12]) + + def test_struct_to_str(self): + class test_t(struct): + a: c_int + b: c_int + + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (20).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + s = test.to_str() + self.assertIn("a: 10", s) + self.assertIn("b: 20", s) + + def test_struct_repr(self): + class test_t(struct): + a: c_int + + memory = (99).to_bytes(4, "little") + test = test_t.from_bytes(memory) + + r = repr(test) + self.assertIn("test_t", r) + + def test_bytes_on_bytearray_backed_struct(self): + class test_t(struct): + a: c_int + b: c_int + + lib = inflater(bytearray(b"\x01\x00\x00\x00\x02\x00\x00\x00")) + test = lib.inflate(test_t, 0) + + result = bytes(test) + self.assertIsInstance(result, bytes) + self.assertEqual(len(result), 8) + + +class StructFreezeTest(unittest.TestCase): + """Struct freeze semantics.""" + + def test_frozen_struct_rejects_writes(self): + class test_t(struct): + a: c_int + b: c_int + + memory = bytearray(b"\x00" * 8) + lib = inflater(memory) + test = lib.inflate(test_t, 0) + test.a.value = 10 + test.b.value = 20 + + test.freeze() + + self.assertEqual(test.a.value, 10) + self.assertEqual(test.b.value, 20) + + with self.assertRaises(ValueError): + test.a.value = 999 + + +class ForwardRefPtrTest(unittest.TestCase): + """Forward reference ptr["Type"] syntax.""" + + def test_self_referential_struct(self): + class Node(struct): + val: c_int + next: ptr["Node"] + + # No padding: c_int(4) + ptr(8) = 12 bytes per node + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (12).to_bytes(8, "little") # next -> offset 12 + memory += (20).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") # next -> null + + node = Node.from_bytes(memory) + self.assertEqual(node.val.value, 10) + self.assertEqual(node.next.unwrap().val.value, 20) + + def test_tree_struct(self): + class TreeNode(struct): + data: c_uint + left: ptr["TreeNode"] + right: ptr["TreeNode"] + + # Single node, no children + # c_uint(4) + ptr(8) + ptr(8) = 20 bytes + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (0).to_bytes(4, "little") # padding + memory += (0).to_bytes(8, "little") # left=null + memory += (0).to_bytes(8, "little") # right=null + + node = TreeNode.from_bytes(memory) + self.assertEqual(node.data.value, 42) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py new file mode 100644 index 0000000..9db116d --- /dev/null +++ b/test/scripts/types_unit_test.py @@ -0,0 +1,170 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import ctypes +import unittest + +from libdestruct import c_int, c_long, c_str, c_uint, inflater, struct, ptr, ptr_to_self + + +class ObjFromBytesTest(unittest.TestCase): + """obj.from_bytes() for non-struct types.""" + + def test_c_int_from_bytes(self): + data = (42).to_bytes(4, "little") + obj = c_int.from_bytes(data) + self.assertEqual(obj.value, 42) + + def test_c_long_from_bytes(self): + data = (123456789).to_bytes(8, "little") + obj = c_long.from_bytes(data) + self.assertEqual(obj.value, 123456789) + + def test_c_uint_from_bytes(self): + data = (0xDEADBEEF).to_bytes(4, "little") + obj = c_uint.from_bytes(data) + self.assertEqual(obj.value, 0xDEADBEEF) + + +class CtypesGenericFrozenTest(unittest.TestCase): + """ctypes generic frozen to_bytes.""" + + def test_frozen_to_bytes(self): + memory = (42).to_bytes(ctypes.sizeof(ctypes.c_int), "little") + lib = inflater(memory) + obj = lib.inflate(ctypes.c_int, 0) + + self.assertEqual(obj.value, 42) + obj.freeze() + self.assertEqual(obj.value, 42) + self.assertEqual(len(obj.to_bytes()), ctypes.sizeof(ctypes.c_int)) + self.assertEqual(obj.to_bytes(), memory) + + +class FreezeTest(unittest.TestCase): + """Freeze semantics for primitive types.""" + + def test_frozen_c_int_rejects_writes(self): + data = (99).to_bytes(4, "little") + obj = c_int.from_bytes(data) + + self.assertTrue(obj._frozen) + self.assertEqual(obj.value, 99) + + with self.assertRaises(ValueError): + obj.value = 100 + + +class PtrTest(unittest.TestCase): + """Pointer operations.""" + + def test_try_unwrap_null_pointer(self): + """ptr.try_unwrap() should not crash with TypeError.""" + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") + + test = test_t.from_bytes(memory) + result = test.p.try_unwrap() + # Address 0 is valid in our byte buffer, so it may or may not return None + # The important thing is that it doesn't crash + + def test_try_unwrap_valid_pointer(self): + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") # points to self + + test = test_t.from_bytes(memory) + result = test.p.try_unwrap() + self.assertIsNotNone(result) + self.assertEqual(result.a.value, 42) + + def test_ptr_to_str(self): + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = b"" + memory += (1).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") + + test = test_t.from_bytes(memory) + s = str(test.p) + self.assertIn("0x0", s) + + +class CStrTest(unittest.TestCase): + """c_str indexing, iteration, and mutation.""" + + def test_get_single_char(self): + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + self.assertEqual(s.get(0), b"H") + self.assertEqual(s.get(1), b"e") + self.assertEqual(s.get(4), b"o") + + def test_iterate_string(self): + memory = bytearray(b"ABC\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + chars = list(s) + self.assertEqual(chars, [b"A", b"B", b"C"]) + + def test_len(self): + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + self.assertEqual(len(s), 5) + + def test_getitem(self): + memory = bytearray(b"XYZ\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + self.assertEqual(s[0], b"X") + self.assertEqual(s[1], b"Y") + self.assertEqual(s[2], b"Z") + + def test_index_out_of_range(self): + memory = bytearray(b"Hi\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + with self.assertRaises(IndexError): + s.get(100) + + def test_setitem(self): + """c_str.__setitem__ should not raise TypeError.""" + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + s[0] = b"J" + self.assertEqual(s.get(0), b"J") + + def test_setitem_middle(self): + memory = bytearray(b"Hello\x00") + lib = inflater(memory) + s = lib.inflate(c_str, 0) + + s[1] = b"a" + self.assertEqual(s.get(1), b"a") + + +if __name__ == "__main__": + unittest.main() From 69f26570541b6eec1103e747c3b01abcd4513331 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 17:03:23 -0500 Subject: [PATCH 06/46] fix: avoid name clashes with address and other members --- libdestruct/common/struct/struct_impl.py | 25 +++++++++++++++++------- test/scripts/struct_unit_test.py | 22 +++++++++++++++++++++ 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 8f84a8f..0595c7b 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -42,7 +42,7 @@ def __init__(self: struct_impl, resolver: Resolver | None = None, **kwargs: ...) # struct overrides the __init__ method, so we need to call the parent class __init__ method obj.__init__(self, resolver) - self.name = self.__class__.__name__ + self._struct_name = self.__class__.__name__ self._members = {} reference_type = self._reference_struct @@ -158,9 +158,17 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: cls.size = size + @property + def address(self: struct_impl) -> int: + """Return the address of the struct, bypassing __getattribute__ to avoid member collisions.""" + resolver = object.__getattribute__(self, "resolver") + return resolver.resolve_address() + def get(self: struct_impl) -> str: """Return the value of the struct.""" - return f"{self.name}(address={self.address}, size={size_of(self)})" + name = object.__getattribute__(self, "_struct_name") + addr = struct_impl.address.fget(self) + return f"{name}(address={addr}, size={size_of(self)})" def to_bytes(self: struct_impl) -> bytes: """Return the serialized representation of the struct.""" @@ -180,18 +188,21 @@ def freeze(self: struct_impl) -> None: def to_str(self: struct_impl, indent: int = 0) -> str: """Return a string representation of the struct.""" + name = object.__getattribute__(self, "_struct_name") members = ",\n".join( - [f"{' ' * (indent + 4)}{name}: {member.to_str(indent + 4)}" for name, member in self._members.items()], + [f"{' ' * (indent + 4)}{n}: {member.to_str(indent + 4)}" for n, member in self._members.items()], ) - return f"""{self.name} {{ + return f"""{name} {{ {members} {" " * indent}}}""" def __repr__(self: struct_impl) -> str: """Return a string representation of the struct.""" - members = ",\n".join([f"{name}: {member}" for name, member in self._members.items()]) - return f"""{self.name} {{ - address: 0x{self.address:x}, + name = object.__getattribute__(self, "_struct_name") + addr = struct_impl.address.fget(self) + members = ",\n".join([f"{n}: {member}" for n, member in self._members.items()]) + return f"""{name} {{ + address: 0x{addr:x}, size: 0x{size_of(self):x}, members: {{ {members} diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index 283a378..f7379b2 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -34,6 +34,12 @@ class test_t(struct): self.assertEqual(test.address.value, 10) self.assertEqual(test.b.value, 20) + # repr and get must not crash when 'address' is a member + r = repr(test) + self.assertIn("test_t", r) + g = test.get() + self.assertIn("test_t", g) + def test_struct_with_size_field(self): class test_t(struct): size: c_int @@ -60,6 +66,16 @@ class test_t(struct): self.assertEqual(test.resolver.value, 11) self.assertEqual(test.x.value, 22) + # Internal address lookup must still work even though 'resolver' is a member + addr = test.address + self.assertIsInstance(addr, int) + + # repr/to_str must not crash + r = repr(test) + self.assertIn("test_t", r) + s = test.to_str() + self.assertIn("test_t", s) + def test_struct_with_name_field(self): class test_t(struct): name: c_int @@ -68,6 +84,12 @@ class test_t(struct): test = test_t.from_bytes(memory) self.assertEqual(test.name.value, 77) + # to_str/repr must use the struct type name, not the member value + s = test.to_str() + self.assertTrue(s.startswith("test_t")) + r = repr(test) + self.assertIn("test_t", r) + def test_nested_struct_with_collisions(self): class inner_t(struct): value: c_int From 919e79ee57d4edd441f45af001bd6e1f37778b56 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 17:44:29 -0500 Subject: [PATCH 07/46] docs: add main README for the repository --- README.md | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/README.md b/README.md index dabc79d..ee49a49 100644 --- a/README.md +++ b/README.md @@ -1 +1,65 @@ # libdestruct + +**Native structs made Pythonic.** + +libdestruct is a Python library for defining C-like data structures and inflating them directly from raw memory. It is designed for reverse engineering, binary analysis, and debugger scripting — anywhere you need to make sense of packed binary data without writing boilerplate. + +With libdestruct you can: +- Define C structs using Python type annotations +- Read and write typed values from raw memory buffers +- Follow pointers, including self-referential types (linked lists, trees) +- Work with arrays, enums, and nested structs +- Parse C struct definitions directly from source +- Snapshot values and track changes with freeze/diff/reset + +## Installation + +```bash +pip install git+https://github.com/mrindeciso/libdestruct.git +``` + +## Your first script + +```python +from libdestruct import struct, c_int, c_long, inflater + +class player_t(struct): + health: c_int + score: c_long + +memory = bytearray(b"\x64\x00\x00\x00\x39\x05\x00\x00\x00\x00\x00\x00") + +lib = inflater(memory) +player = lib.inflate(player_t, 0) + +print(player.health.value) # 100 +print(player.score.value) # 1337 + +# Write a new value back to memory +player.health.value = 200 +print(player.health.value) # 200 +``` + +You can also skip the Python definition and parse C directly: + +```python +from libdestruct.c.struct_parser import definition_to_type + +player_t = definition_to_type(""" + struct player_t { + int health; + long score; + }; +""") + +player = player_t.from_bytes(b"\x64\x00\x00\x00\x39\x05\x00\x00\x00\x00\x00\x00") +print(player.health.value) # 100 +``` + +## Project Links + +Documentation: [docs/](docs/) + +## License + +libdestruct is licensed under the [MIT License](LICENSE). From 3778fdad15b6a9ed5455bdf2e6a77391e7d6e645 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 17:44:43 -0500 Subject: [PATCH 08/46] docs: add basic project documentation --- docs/advanced/c_parser.md | 138 +++++++++++++++++++++++++++++++++ docs/advanced/forward_refs.md | 85 ++++++++++++++++++++ docs/advanced/freeze_diff.md | 117 ++++++++++++++++++++++++++++ docs/advanced/offset.md | 78 +++++++++++++++++++ docs/basics/arrays.md | 95 +++++++++++++++++++++++ docs/basics/enums.md | 75 ++++++++++++++++++ docs/basics/getting_started.md | 108 ++++++++++++++++++++++++++ docs/basics/pointers.md | 103 ++++++++++++++++++++++++ docs/basics/structs.md | 126 ++++++++++++++++++++++++++++++ docs/basics/types.md | 76 ++++++++++++++++++ docs/index.md | 82 ++++++++++++++++++++ docs/memory/inflater.md | 88 +++++++++++++++++++++ docs/memory/resolvers.md | 83 ++++++++++++++++++++ mkdocs.yml | 67 ++++++++++++++++ 14 files changed, 1321 insertions(+) create mode 100644 docs/advanced/c_parser.md create mode 100644 docs/advanced/forward_refs.md create mode 100644 docs/advanced/freeze_diff.md create mode 100644 docs/advanced/offset.md create mode 100644 docs/basics/arrays.md create mode 100644 docs/basics/enums.md create mode 100644 docs/basics/getting_started.md create mode 100644 docs/basics/pointers.md create mode 100644 docs/basics/structs.md create mode 100644 docs/basics/types.md create mode 100644 docs/index.md create mode 100644 docs/memory/inflater.md create mode 100644 docs/memory/resolvers.md create mode 100644 mkdocs.yml diff --git a/docs/advanced/c_parser.md b/docs/advanced/c_parser.md new file mode 100644 index 0000000..f9637b4 --- /dev/null +++ b/docs/advanced/c_parser.md @@ -0,0 +1,138 @@ +# C Struct Parser + +libdestruct can parse C struct definitions directly and convert them into usable Python types. This is powered by [pycparser](https://github.com/eliben/pycparser). + +## Basic Usage + +```python +from libdestruct.c.struct_parser import definition_to_type + +player_t = definition_to_type(""" + struct player_t { + int health; + unsigned int score; + long experience; + }; +""") + +memory = b"\x64\x00\x00\x00\xe8\x03\x00\x00\x39\x05\x00\x00\x00\x00\x00\x00" +player = player_t.from_bytes(memory) + +print(player.health.value) # 100 +print(player.score.value) # 1000 +print(player.experience.value) # 1337 +``` + +## Supported C Types + +The parser recognizes these C type specifiers: + +| C Type | Maps to | +|---|---| +| `int` | `c_int` | +| `unsigned int` | `c_uint` | +| `long` | `c_long` | +| `unsigned long` | `c_ulong` | +| `char` | `c_char` | + +Type names are normalized — `unsigned int`, `uint`, and `unsigned` all map to `c_uint`. + +## Pointers + +Single, double, and triple pointers are supported: + +```python +t = definition_to_type(""" + struct test { + int *p; + int **pp; + int ***ppp; + }; +""") +``` + +Self-referential pointers are automatically detected: + +```python +node_t = definition_to_type(""" + struct node { + int value; + struct node *next; + }; +""") +``` + +## Arrays + +Fixed-size arrays are converted to `array_of()`: + +```python +t = definition_to_type(""" + struct buffer { + int data[16]; + }; +""") +``` + +## Nested Structs + +Define multiple structs in a single definition: + +```python +t = definition_to_type(""" + struct point { + int x; + int y; + }; + + struct rect { + struct point origin; + struct point size; + }; +""") +``` + +The last struct in the definition is returned. All previous structs are cached and available for forward references. + +## Include Directives + +The parser supports `#include` directives by running the C preprocessor: + +```python +t = definition_to_type(""" + #include + + struct packet { + int type; + unsigned long length; + }; +""") +``` + +!!! warning + Include expansion requires a C preprocessor (`cpp`) to be available on your system. + +## GCC Attributes + +`__attribute__((...))` annotations are automatically stripped before parsing: + +```python +t = definition_to_type(""" + struct __attribute__((packed)) data { + int x; + int y; + }; +""") +``` + +## Caching + +Parsed struct definitions are cached globally. Parsing the same struct name twice returns the cached version: + +```python +# First call parses +t1 = definition_to_type("struct foo { int x; };") + +# Second call with same name returns cached type +t2 = definition_to_type("struct foo { int x; };") +``` diff --git a/docs/advanced/forward_refs.md b/docs/advanced/forward_refs.md new file mode 100644 index 0000000..22aeabc --- /dev/null +++ b/docs/advanced/forward_refs.md @@ -0,0 +1,85 @@ +# Forward References + +Forward references allow structs to reference types that haven't been fully defined yet — most commonly, the struct itself. This is essential for recursive data structures like linked lists and trees. + +## The `ptr["TypeName"]` Syntax + +Use a string inside `ptr[...]` to reference a type by name: + +```python +from libdestruct import struct, c_int, ptr + +class Node(struct): + val: c_int + next: ptr["Node"] +``` + +At inflation time, the string `"Node"` is resolved to the actual `Node` class. This works because Python's `from __future__ import annotations` (used internally by libdestruct) defers annotation evaluation. + +## The `ptr_to_self` Shortcut + +For the common case of a pointer to the enclosing struct, use `ptr_to_self`: + +```python +from libdestruct import struct, c_int, ptr_to_self + +class Node(struct): + val: c_int + next: ptr_to_self +``` + +This is equivalent to `ptr["Node"]` but doesn't require you to spell out the type name. + +## Linked List Example + +```python +from libdestruct import struct, c_int, ptr, inflater + +class Node(struct): + val: c_int + next: ptr["Node"] + +# Build a two-node list in memory +# Node layout: c_int(4) + ptr(8) = 12 bytes +memory = bytearray(24) + +import struct as pystruct +# Node 0 at offset 0 +memory[0:4] = pystruct.pack(" offset 12 + +# Node 1 at offset 12 +memory[12:16] = pystruct.pack(" null + +lib = inflater(memory) +head = lib.inflate(Node, 0) + +print(head.val.value) # 10 +print(head.next.unwrap().val.value) # 20 +print(head.next.unwrap().next.try_unwrap()) # None +``` + +## Tree Example + +```python +from libdestruct import struct, c_uint, ptr + +class TreeNode(struct): + data: c_uint + left: ptr["TreeNode"] + right: ptr["TreeNode"] +``` + +## How It Works + +When libdestruct encounters a `ptr["TypeName"]` annotation: + +1. It stores the string reference during struct class creation +2. At inflation time, it resolves the string against all known struct types +3. The resolved type is used as the pointer's wrapper type + +This means the referenced type must be defined before the struct is inflated, but not necessarily before it is declared. + +!!! info + Forward references are resolved through the `TypeRegistry` at inflation time. If the referenced type is not found, an error is raised. diff --git a/docs/advanced/freeze_diff.md b/docs/advanced/freeze_diff.md new file mode 100644 index 0000000..791cfcc --- /dev/null +++ b/docs/advanced/freeze_diff.md @@ -0,0 +1,117 @@ +# Freeze, Diff & Reset + +libdestruct supports snapshotting values for change tracking. This is useful when you want to detect what changed in memory between two points in time. + +## Freezing + +Call `freeze()` to snapshot the current value: + +```python +from libdestruct import c_int, inflater + +memory = bytearray(4) +lib = inflater(memory) +x = lib.inflate(c_int, 0) + +x.value = 42 +x.freeze() +``` + +Once frozen, the object remembers its value at the time of the freeze. Further reads still return the live value from memory, but writes are blocked: + +```python +# Writing to a frozen object raises ValueError +try: + x.value = 99 +except ValueError: + print("Cannot write to frozen object") +``` + +## Diffing + +Use `diff()` to compare the frozen value with the current live value: + +```python +x.value = 42 +x.freeze() + +# Something changes the underlying memory +memory[0:4] = (100).to_bytes(4, "little") + +frozen_val, current_val = x.diff() +print(f"Was: {frozen_val}, Now: {current_val}") +# Was: 42, Now: 100 +``` + +!!! note + `diff()` only works on frozen objects. It returns a tuple of `(frozen_value, current_value)`. + +## Resetting + +Call `reset()` to restore the memory to the frozen value: + +```python +x.reset() +print(x.value) # 42 (restored to frozen value) +``` + +## Updating + +Call `update()` to re-freeze with the current live value, discarding the old snapshot: + +```python +x.update() +# The frozen value is now whatever is currently in memory +``` + +## Freezing Structs + +When you freeze a struct, all its members are frozen recursively: + +```python +from libdestruct import struct, c_int, inflater + +class pair_t(struct): + a: c_int + b: c_int + +memory = bytearray(8) +lib = inflater(memory) +pair = lib.inflate(pair_t, 0) + +pair.a.value = 10 +pair.b.value = 20 + +pair.freeze() + +# Both members are now frozen +try: + pair.a.value = 999 +except ValueError: + print("Frozen!") +``` + +## Workflow Example + +A typical workflow for detecting changes: + +```python +# 1. Inflate the struct +state = lib.inflate(game_state_t, addr) + +# 2. Freeze the current state +state.freeze() + +# 3. Let the program run (memory changes externally) +# ... + +# 4. Check what changed +for name in ["health", "score", "level"]: + member = getattr(state, name) + old, new = member.diff() + if old != new: + print(f"{name}: {old} -> {new}") + +# 5. Optionally reset to the frozen state +state.reset() +``` diff --git a/docs/advanced/offset.md b/docs/advanced/offset.md new file mode 100644 index 0000000..0a412d1 --- /dev/null +++ b/docs/advanced/offset.md @@ -0,0 +1,78 @@ +# Field Offsets + +By default, struct fields are laid out sequentially — each field starts immediately after the previous one. The `offset()` attribute lets you place a field at a specific byte offset. + +## Usage + +```python +from libdestruct import struct, c_int, offset + +class sparse_t(struct): + a: c_int + b: c_int = offset(16) + c: c_int +``` + +In this example: + +- `a` starts at offset 0 (4 bytes) +- `b` starts at offset 16 (skipping 12 bytes of padding) +- `c` starts at offset 20 (immediately after `b`) + +## Rules + +The offset must be **greater than or equal to** the current position in the struct. You cannot move backwards: + +```python +class invalid_t(struct): + a: c_int # offset 0, size 4 + b: c_int = offset(2) # ERROR: 2 < 4 (current offset) +``` + +This will raise a `ValueError` at struct creation time. + +## Use Cases + +### Matching Padded C Structs + +C compilers often insert padding for alignment. Use `offset()` to match the actual layout: + +```python +# C definition (with compiler padding): +# struct data { +# char flag; // offset 0 +# // 3 bytes padding +# int value; // offset 4 +# // 4 bytes padding +# long timestamp; // offset 8 (on some ABIs, offset 8 with 64-bit alignment) +# }; + +class data_t(struct): + flag: c_char + value: c_int = offset(4) + timestamp: c_long = offset(8) +``` + +### Skipping Unknown Fields + +When reverse engineering, you might know the offset of a field but not what comes before it: + +```python +class mystery_t(struct): + known_field: c_int = offset(0x40) + another_field: c_long = offset(0x100) +``` + +## Combining with Other Attributes + +`offset()` can be combined with `Field` attributes using a tuple: + +```python +from libdestruct.common.field import Field + +class example_t(struct): + data: c_int = (Field(), offset(8)) +``` + +!!! note + When using tuples of attributes, only one `Field` is allowed per annotation. Multiple `OffsetAttribute`s are also not typical — use a single `offset()` to set the position. diff --git a/docs/basics/arrays.md b/docs/basics/arrays.md new file mode 100644 index 0000000..22c970b --- /dev/null +++ b/docs/basics/arrays.md @@ -0,0 +1,95 @@ +# Arrays + +Fixed-size arrays are created with `array_of()`. + +## Defining Arrays + +```python +from libdestruct import c_int, array_of, inflater + +# An array of 5 c_int values +int_array_t = array_of(c_int, 5) +``` + +## Inflating Arrays + +```python +memory = bytearray(20) # 5 * 4 bytes +lib = inflater(memory) +arr = lib.inflate(int_array_t, 0) +``` + +## Indexing + +Access elements by index: + +```python +memory = b"".join((i).to_bytes(4, "little") for i in range(5)) +lib = inflater(memory) +arr = lib.inflate(array_of(c_int, 5), 0) + +print(arr[0].value) # 0 +print(arr[2].value) # 2 +print(arr[4].value) # 4 +``` + +## Iteration + +Arrays are iterable: + +```python +for element in arr: + print(element.value) +# 0, 1, 2, 3, 4 +``` + +## Length + +```python +print(len(arr)) # 5 +``` + +## Containment + +```python +elem = arr[2] +print(elem in arr) # True +``` + +## Value Property + +The `.value` property returns a list of all element objects: + +```python +elements = arr.value +print(len(elements)) # 5 +print(elements[0].value) # 0 +``` + +## Serialization + +```python +raw = arr.to_bytes() +# or +raw = bytes(arr) +``` + +## Arrays in Structs + +Use `array_of()` as a type annotation: + +```python +from libdestruct import struct, c_int, array_of + +class matrix_row_t(struct): + values: array_of(c_int, 4) +``` + +```python +data = b"".join((i * 10).to_bytes(4, "little") for i in range(4)) +row = matrix_row_t.from_bytes(data) + +for v in row.values: + print(v.value) +# 0, 10, 20, 30 +``` diff --git a/docs/basics/enums.md b/docs/basics/enums.md new file mode 100644 index 0000000..072878d --- /dev/null +++ b/docs/basics/enums.md @@ -0,0 +1,75 @@ +# Enums + +libdestruct maps integer values in memory to Python `Enum` types using `enum_of()`. + +## Defining Enums + +```python +from enum import IntEnum +from libdestruct import struct, c_int, enum_of + +class Color(IntEnum): + RED = 0 + GREEN = 1 + BLUE = 2 + +class pixel_t(struct): + color: enum_of(Color, c_int) + x: c_int + y: c_int +``` + +`enum_of(PythonEnum, backing_type)` creates a type that: + +- Reads the raw integer from memory using the backing type (`c_int`) +- Converts it to the corresponding `Enum` member (`Color.RED`, etc.) + +## Reading Enum Values + +```python +from libdestruct import inflater + +memory = bytearray(12) +memory[0:4] = (1).to_bytes(4, "little") # Color.GREEN + +lib = inflater(memory) +pixel = lib.inflate(pixel_t, 0) + +print(pixel.color.value) # Color.GREEN +``` + +## Lenient Mode + +By default, enums operate in lenient mode: if the integer value does not match any enum member, the raw integer is returned instead of raising an error. + +```python +memory[0:4] = (99).to_bytes(4, "little") # Not a valid Color +pixel = lib.inflate(pixel_t, 0) + +print(pixel.color.value) # 99 (raw integer, no error) +``` + +## Standalone Enums + +You can also use `enum` directly (without `enum_of`): + +```python +from libdestruct import enum, inflater + +memory = (2).to_bytes(4, "little") +lib = inflater(memory) + +# The enum() constructor takes a resolver, a Python Enum, and a backing type +``` + +!!! tip + For struct fields, `enum_of()` is the recommended API. It automatically handles type registration and inflation. + +## Serialization + +Enum values serialize through their backing type: + +```python +raw = pixel.color.to_bytes() +print(len(raw)) # 4 (size of c_int) +``` diff --git a/docs/basics/getting_started.md b/docs/basics/getting_started.md new file mode 100644 index 0000000..a35e6df --- /dev/null +++ b/docs/basics/getting_started.md @@ -0,0 +1,108 @@ +# Getting Started + +## Requirements + +- Python 3.10 or later +- `typing_extensions` +- `pycparser` (for C struct parsing) + +## Installation + +=== "pip" + + ```bash + pip install libdestruct + ``` + +=== "From source" + + ```bash + git clone https://github.com/mrindeciso/libdestruct.git + cd libdestruct + pip install . + ``` + +## Core Concepts + +libdestruct revolves around three ideas: + +1. **Types** — Python classes that mirror C types (`c_int`, `c_long`, `struct`, `ptr`, etc.) +2. **Memory** — a `bytes` or `bytearray` buffer that holds the raw data +3. **Inflater** — the bridge that reads memory and materializes typed objects + +### A Minimal Example + +```python +from libdestruct import c_int, inflater + +memory = (42).to_bytes(4, "little") +lib = inflater(memory) +value = lib.inflate(c_int, 0) + +print(value.value) # 42 +``` + +Here, `inflater(memory)` creates a memory context, and `inflate(c_int, 0)` reads a 4-byte signed integer at offset 0. + +### Working with Structs + +Structs let you group fields together, just like in C: + +```python +from libdestruct import struct, c_int, c_long, inflater + +class point_t(struct): + x: c_int + y: c_int + +memory = bytearray(8) +lib = inflater(memory) +point = lib.inflate(point_t, 0) + +# Write values +point.x.value = 10 +point.y.value = 20 + +# Read them back +print(point.x.value) # 10 +print(point.y.value) # 20 +``` + +!!! note + When the backing memory is a `bytearray`, writes through `.value` are reflected in the underlying buffer. With immutable `bytes`, writes will raise an error. + +### Reading and Writing + +Every libdestruct object exposes: + +| Property / Method | Description | +|---|---| +| `.value` | Get or set the current value | +| `.address` | The address (offset) in memory | +| `.to_bytes()` | Serialize the object to bytes | +| `bytes(obj)` | Same as `.to_bytes()` | +| `.to_str()` | Human-readable string representation | + +### Serialization Round-Trip + +You can serialize any object to bytes and deserialize it back: + +```python +from libdestruct import struct, c_int + +class pair_t(struct): + a: c_int + b: c_int + +# Create from raw bytes +original = pair_t.from_bytes(b"\x01\x00\x00\x00\x02\x00\x00\x00") + +# Serialize +data = original.to_bytes() + +# Deserialize +copy = pair_t.from_bytes(data) + +assert copy.a.value == 1 +assert copy.b.value == 2 +``` diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md new file mode 100644 index 0000000..7e2947b --- /dev/null +++ b/docs/basics/pointers.md @@ -0,0 +1,103 @@ +# Pointers + +libdestruct supports typed pointers that can be dereferenced to follow references in memory. + +## Defining Pointers in Structs + +Use `ptr` with `ptr_to()` to declare a typed pointer field: + +```python +from libdestruct import struct, c_int, ptr_to, inflater + +class data_t(struct): + value: c_int + next: ptr_to(c_int) +``` + +A pointer occupies 8 bytes (64-bit) and stores an address into the memory buffer. + +## Dereferencing + +Use `unwrap()` to follow a pointer: + +```python +memory = bytearray(16) +lib = inflater(memory) + +# Set up: value=42 at offset 0, pointer to offset 12 at offset 4 +import struct as pystruct +memory[0:4] = pystruct.pack(" offset 12 +memory[12:16] = pystruct.pack(" offset 12 +memory[0:4] = pystruct.pack(" null +memory[12:16] = pystruct.pack(" Date: Tue, 31 Mar 2026 18:08:35 -0500 Subject: [PATCH 09/46] feat: implement bitfield support --- libdestruct/__init__.py | 4 +- libdestruct/c/struct_parser.py | 40 +++++- libdestruct/common/bitfield/__init__.py | 11 ++ libdestruct/common/bitfield/bitfield.py | 100 ++++++++++++++ libdestruct/common/bitfield/bitfield_field.py | 40 ++++++ libdestruct/common/bitfield/bitfield_of.py | 30 +++++ .../common/bitfield/bitfield_tracker.py | 118 +++++++++++++++++ libdestruct/common/struct/struct_impl.py | 124 +++++++++++------- 8 files changed, 421 insertions(+), 46 deletions(-) create mode 100644 libdestruct/common/bitfield/__init__.py create mode 100644 libdestruct/common/bitfield/bitfield.py create mode 100644 libdestruct/common/bitfield/bitfield_field.py create mode 100644 libdestruct/common/bitfield/bitfield_of.py create mode 100644 libdestruct/common/bitfield/bitfield_tracker.py diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index 4c5826c..61703d6 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -15,14 +15,17 @@ from libdestruct.c import c_int, c_long, c_str, c_uint, c_ulong from libdestruct.common.array import array, array_of from libdestruct.common.attributes import offset +from libdestruct.common.bitfield import bitfield_of from libdestruct.common.enum import enum, enum_of from libdestruct.common.ptr.ptr import ptr from libdestruct.common.struct import ptr_to, ptr_to_self, struct from libdestruct.libdestruct import inflate, inflater __all__ = [ + "Resolver", "array", "array_of", + "bitfield_of", "c_int", "c_long", "c_str", @@ -36,6 +39,5 @@ "ptr", "ptr_to", "ptr_to_self", - "Resolver", "struct", ] diff --git a/libdestruct/c/struct_parser.py b/libdestruct/c/struct_parser.py index 9c4195e..fd89b7b 100644 --- a/libdestruct/c/struct_parser.py +++ b/libdestruct/c/struct_parser.py @@ -14,10 +14,37 @@ from pycparser import c_ast, c_parser +from libdestruct.c.c_integer_types import c_char, c_int, c_long, c_short, c_uchar, c_uint, c_ulong, c_ushort from libdestruct.common.array.array_of import array_of +from libdestruct.common.bitfield.bitfield_of import bitfield_of from libdestruct.common.ptr.ptr_factory import ptr_to, ptr_to_self from libdestruct.common.struct import struct +# Mapping from ctypes types to libdestruct native integer types (needed for bitfields) +_CTYPES_TO_NATIVE = { + ctypes.c_byte: c_char, + ctypes.c_char: c_char, + ctypes.c_ubyte: c_uchar, + ctypes.c_short: c_short, + ctypes.c_ushort: c_ushort, + ctypes.c_int: c_int, + ctypes.c_uint: c_uint, + ctypes.c_long: c_long, + ctypes.c_ulong: c_ulong, + ctypes.c_longlong: c_long, + ctypes.c_ulonglong: c_ulong, + ctypes.c_int8: c_char, + ctypes.c_int16: c_short, + ctypes.c_int32: c_int, + ctypes.c_int64: c_long, + ctypes.c_uint8: c_uchar, + ctypes.c_uint16: c_ushort, + ctypes.c_uint32: c_uint, + ctypes.c_uint64: c_ulong, + ctypes.c_size_t: c_ulong, + ctypes.c_ssize_t: c_long, +} + if TYPE_CHECKING: from libdestruct.common.obj import obj @@ -81,14 +108,25 @@ def struct_to_type(struct_node: c_ast.Struct) -> type[struct]: elif not struct_node.decls: raise ValueError("Struct must have fields.") + class_dict = {} + for decl in struct_node.decls: name = decl.name typ = type_decl_to_type(decl.type, struct_node) fields[name] = typ + # Handle bitfields: decl.bitsize is set when the declaration has ": N" + if decl.bitsize is not None: + bit_width = int(decl.bitsize.value) + # Convert ctypes types to native libdestruct types for bitfield backing + native_type = _CTYPES_TO_NATIVE.get(typ, typ) + class_dict[name] = bitfield_of(native_type, bit_width) + fields[name] = native_type + type_name = struct_node.name if struct_node.name else "anon_struct" - return type(type_name, (struct,), {"__annotations__": fields}) + class_dict["__annotations__"] = fields + return type(type_name, (struct,), class_dict) def ptr_to_type(ptr: c_ast.PtrDecl, parent: c_ast.Struct | None = None) -> type[obj]: diff --git a/libdestruct/common/bitfield/__init__.py b/libdestruct/common/bitfield/__init__.py new file mode 100644 index 0000000..fd055c9 --- /dev/null +++ b/libdestruct/common/bitfield/__init__.py @@ -0,0 +1,11 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from libdestruct.common.bitfield.bitfield import bitfield +from libdestruct.common.bitfield.bitfield_field import BitfieldField +from libdestruct.common.bitfield.bitfield_of import bitfield_of + +__all__ = ["BitfieldField", "bitfield", "bitfield_of"] diff --git a/libdestruct/common/bitfield/bitfield.py b/libdestruct/common/bitfield/bitfield.py new file mode 100644 index 0000000..7d4bd58 --- /dev/null +++ b/libdestruct/common/bitfield/bitfield.py @@ -0,0 +1,100 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.obj import obj + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + + +class bitfield(obj): + """A bitfield within a backing integer type.""" + + _backing_instance: obj + """The inflated backing integer instance (shared with sibling bitfields).""" + + _bit_offset: int + """The starting bit position within the backing integer.""" + + _bit_width: int + """The number of bits this field occupies.""" + + _signed: bool + """Whether to sign-extend when reading.""" + + _is_group_owner: bool + """Whether this bitfield owns the backing bytes (first in its group).""" + + def __init__( + self: bitfield, + resolver: Resolver, + backing_instance: obj, + bit_offset: int, + bit_width: int, + signed: bool, + is_group_owner: bool, + ) -> None: + """Initialize the bitfield. + + Args: + resolver: The backing resolver. + backing_instance: The already-inflated backing integer (shared across bitfields in the same group). + bit_offset: The starting bit position within the backing integer. + bit_width: The number of bits this field occupies. + signed: Whether to sign-extend when reading. + is_group_owner: Whether this bitfield is the first in its group (owns the backing bytes). + """ + super().__init__(resolver) + self._backing_instance = backing_instance + self._bit_offset = bit_offset + self._bit_width = bit_width + self._signed = signed + self._mask = (1 << bit_width) - 1 + self._is_group_owner = is_group_owner + # Owner reports the full backing size; non-owners report 0 + self.size = backing_instance.size if is_group_owner else 0 + + def get(self: bitfield) -> int: + """Return the value of the bitfield.""" + raw = self._backing_instance.get() + # For signed backing types, raw may be negative. Work with unsigned representation. + if raw < 0: + raw += 1 << (self._backing_instance.size * 8) + value = (raw >> self._bit_offset) & self._mask + if self._signed and (value >> (self._bit_width - 1)) & 1: + value -= 1 << self._bit_width + return value + + def _set(self: bitfield, value: int) -> None: + """Set the value of the bitfield.""" + masked_value = value & self._mask + raw = self._backing_instance.get() + if raw < 0: + raw += 1 << (self._backing_instance.size * 8) + raw = (raw & ~(self._mask << self._bit_offset)) | (masked_value << self._bit_offset) + total_bits = self._backing_instance.size * 8 + is_signed = hasattr(self._backing_instance, "signed") and self._backing_instance.signed + if is_signed and raw >= (1 << (total_bits - 1)): + raw -= 1 << total_bits + self._backing_instance._set(raw) + + def to_bytes(self: bitfield) -> bytes: + """Return the serialized representation of the backing type. + + Only the group owner emits bytes; non-owners return empty bytes + to avoid duplication when the struct serializes all members. + """ + if self._is_group_owner: + return self._backing_instance.to_bytes() + return b"" + + def to_str(self: bitfield, _: int = 0) -> str: + """Return a string representation of the bitfield.""" + return f"{self.get()}" diff --git a/libdestruct/common/bitfield/bitfield_field.py b/libdestruct/common/bitfield/bitfield_field.py new file mode 100644 index 0000000..7dd51d5 --- /dev/null +++ b/libdestruct/common/bitfield/bitfield_field.py @@ -0,0 +1,40 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.field import Field + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class BitfieldField(Field): + """A generator for a bitfield within a struct.""" + + base_type: type[obj] + + def __init__(self: BitfieldField, backing_type: type, bit_width: int) -> None: + """Initialize the bitfield field. + + Args: + backing_type: The backing integer type (e.g., c_int, c_uint). + bit_width: The number of bits this field occupies. + """ + self.backing_type = backing_type + self.bit_width = bit_width + self.base_type = backing_type + + def inflate(self: BitfieldField, resolver: Resolver) -> obj: + """Inflate the field. Not used directly — struct_impl handles bitfield inflation.""" + raise NotImplementedError("BitfieldField inflation is handled by struct_impl.") + + def get_size(self: BitfieldField) -> int: + """Returns 0 — bitfields do not independently advance the struct offset.""" + return 0 diff --git a/libdestruct/common/bitfield/bitfield_of.py b/libdestruct/common/bitfield/bitfield_of.py new file mode 100644 index 0000000..3bd9d6a --- /dev/null +++ b/libdestruct/common/bitfield/bitfield_of.py @@ -0,0 +1,30 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.bitfield.bitfield_field import BitfieldField + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.common.obj import obj + + +def bitfield_of(backing_type: type[obj], bit_width: int) -> BitfieldField: + """Create a bitfield descriptor for use in struct annotations. + + Args: + backing_type: The backing integer type (e.g., c_int, c_uint). + bit_width: The number of bits this field occupies. + """ + if bit_width <= 0: + raise ValueError("Bit width must be positive.") + + if hasattr(backing_type, "size") and bit_width > backing_type.size * 8: + raise ValueError(f"Bit width {bit_width} exceeds backing type size ({backing_type.size * 8} bits).") + + return BitfieldField(backing_type, bit_width) diff --git a/libdestruct/common/bitfield/bitfield_tracker.py b/libdestruct/common/bitfield/bitfield_tracker.py new file mode 100644 index 0000000..86fbb1c --- /dev/null +++ b/libdestruct/common/bitfield/bitfield_tracker.py @@ -0,0 +1,118 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.bitfield.bitfield import bitfield + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.bitfield.bitfield_field import BitfieldField + from libdestruct.common.obj import obj + from libdestruct.common.type_registry import TypeRegistry + + +class BitfieldTracker: + """Tracks bitfield group state during struct field inflation. + + Consecutive bitfields with the same backing type are packed into a shared + backing integer instance. This class manages the grouping, bit offset + tracking, and byte offset advancement. + """ + + def __init__(self: BitfieldTracker) -> None: + """Initialize the tracker with no active group.""" + self._bit_offset: int = 0 + self._backing_type: type | None = None + self._backing_instance: obj | None = None + + @property + def active(self: BitfieldTracker) -> bool: + """Return whether a bitfield group is currently active.""" + return self._backing_type is not None + + def flush(self: BitfieldTracker) -> int: + """Close the current bitfield group and return the byte size to advance. + + Returns: + The backing type's byte size if a group was active, 0 otherwise. + """ + if self._backing_type is not None: + size = self._backing_type.size + self._backing_type = None + self._backing_instance = None + self._bit_offset = 0 + return size + return 0 + + def create_bitfield( + self: BitfieldTracker, + field: BitfieldField, + inflater: TypeRegistry, + resolver: Resolver, + current_offset: int, + ) -> tuple[bitfield, int]: + """Create a bitfield instance, managing group transitions. + + Args: + field: The BitfieldField descriptor. + inflater: The type registry for inflating the backing type. + resolver: The struct's resolver. + current_offset: The current byte offset in the struct. + + Returns: + A tuple of (bitfield_instance, byte_offset_delta). + The delta is nonzero only when a new group starts (flushing the old one). + """ + backing_type = field.backing_type + bit_width = field.bit_width + backing_size_bits = backing_type.size * 8 + offset_delta = 0 + + # Start a new group if the backing type changed or bits would overflow + if self._backing_type is not backing_type or self._bit_offset + bit_width > backing_size_bits: + offset_delta = self.flush() + self._backing_type = backing_type + self._backing_instance = inflater.inflater_for(backing_type)( + resolver.relative_from_own(current_offset + offset_delta, 0), + ) + + is_owner = self._bit_offset == 0 + signed = getattr(backing_type, "signed", False) + + result = bitfield( + resolver.relative_from_own(current_offset + offset_delta, 0), + self._backing_instance, + self._bit_offset, + bit_width, + signed, + is_owner, + ) + self._bit_offset += bit_width + return result, offset_delta + + def compute_size(self: BitfieldTracker, field: BitfieldField) -> int: + """Account for a bitfield during size computation, without inflating. + + Args: + field: The BitfieldField descriptor. + + Returns: + The byte size delta (nonzero only when a new group starts). + """ + backing_type = field.backing_type + bit_width = field.bit_width + backing_size_bits = backing_type.size * 8 + size_delta = 0 + + if self._backing_type is not backing_type or self._bit_offset + bit_width > backing_size_bits: + size_delta = self.flush() + self._backing_type = backing_type + + self._bit_offset += bit_width + return size_delta diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 0595c7b..b46890b 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -11,6 +11,8 @@ from libdestruct.backing.fake_resolver import FakeResolver from libdestruct.backing.resolver import Resolver from libdestruct.common.attributes.offset_attribute import OffsetAttribute +from libdestruct.common.bitfield.bitfield_field import BitfieldField +from libdestruct.common.bitfield.bitfield_tracker import BitfieldTracker from libdestruct.common.field import Field from libdestruct.common.obj import obj from libdestruct.common.struct import struct @@ -75,87 +77,121 @@ def _inflate_struct_attributes( reference_type: type, ) -> None: current_offset = 0 + bf_tracker = BitfieldTracker() for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): - if name in reference.__dict__: - # Field associated with the annotation - attrs = getattr(reference, name) - - # If attrs is not a tuple, we need to convert it to a tuple - if not isinstance(attrs, tuple): - attrs = (attrs,) + resolved_type, bitfield_field, explicit_offset = self._resolve_field( + name, annotation, reference, inflater, reference_type, + ) + + if explicit_offset is not None: + if explicit_offset < current_offset: + raise ValueError("Offset must be greater than the current size.") + current_offset = explicit_offset + + if bitfield_field: + result, offset_delta = bf_tracker.create_bitfield( + bitfield_field, inflater, resolver, current_offset, + ) + current_offset += offset_delta + else: + current_offset += bf_tracker.flush() + result = resolved_type(resolver.relative_from_own(current_offset, 0)) + current_offset += size_of(result) - # Assert that in all attributes, there is only one Field - if sum(isinstance(attr, Field) for attr in attrs) > 1: - raise ValueError("Only one Field is allowed per attribute.") + self._members[name] = result - resolved_type = None + current_offset += bf_tracker.flush() - for attr in attrs: - if isinstance(attr, Field): - resolved_type = inflater.inflater_for( - (attr, annotation), - owner=(self, reference_type._type_impl), - ) - elif isinstance(attr, OffsetAttribute): - offset = attr.offset - if offset < current_offset: - raise ValueError("Offset must be greater than the current size.") - current_offset = offset - else: - raise TypeError("Only Field and OffsetAttribute are allowed in attributes.") - - # If we don't have a Field, we need to inflate the type as if we have no attributes - if not resolved_type: - resolved_type = inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)) + def _resolve_field( + self: struct_impl, + name: str, + annotation: type, + reference: type, + inflater: TypeRegistry, + reference_type: type, + ) -> tuple[object | None, BitfieldField | None, int | None]: + """Resolve a single struct field annotation to its inflater or BitfieldField. + + Returns: + A tuple of (resolved_inflater, bitfield_field, explicit_offset). + Either resolved_inflater or bitfield_field will be non-None (not both). + explicit_offset is set when an OffsetAttribute is present. + """ + if name not in reference.__dict__: + return inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)), None, None + + attrs = getattr(reference, name) + if not isinstance(attrs, tuple): + attrs = (attrs,) + + if sum(isinstance(attr, Field) for attr in attrs) > 1: + raise ValueError("Only one Field is allowed per attribute.") + + resolved_type = None + bitfield_field = None + explicit_offset = None + + for attr in attrs: + if isinstance(attr, BitfieldField): + bitfield_field = attr + elif isinstance(attr, Field): + resolved_type = inflater.inflater_for( + (attr, annotation), owner=(self, reference_type._type_impl), + ) + elif isinstance(attr, OffsetAttribute): + explicit_offset = attr.offset else: - resolved_type = inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)) + raise TypeError("Only Field, BitfieldField, and OffsetAttribute are allowed in attributes.") - result = resolved_type(resolver.relative_from_own(current_offset, 0)) - self._members[name] = result - current_offset += size_of(result) + if not resolved_type and not bitfield_field: + resolved_type = inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)) + + return resolved_type, bitfield_field, explicit_offset @classmethod def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: """Compute the size of the struct.""" size = 0 + bf_tracker = BitfieldTracker() for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): + bitfield_field = None + attribute = None + if name in reference.__dict__: - # Field associated with the annotation attrs = getattr(reference, name) - - # If attrs is not a tuple, we need to convert it to a tuple if not isinstance(attrs, tuple): attrs = (attrs,) - # Assert that in all attributes, there is only one Field if sum(isinstance(attr, Field) for attr in attrs) > 1: raise ValueError("Only one Field is allowed per attribute.") - attribute = None - for attr in attrs: - if isinstance(attr, Field): + if isinstance(attr, BitfieldField): + bitfield_field = attr + elif isinstance(attr, Field): attribute = cls._inflater.inflater_for((attr, annotation), (None, cls))(None) elif isinstance(attr, OffsetAttribute): offset = attr.offset if offset < size: raise ValueError("Offset must be greater than the current size.") size = offset - else: - raise TypeError("Only Field and OffsetAttribute are allowed in attributes.") - # If we don't have a Field, we need to inflate the attribute as if we have no attributes - if not attribute: + if not attribute and not bitfield_field: attribute = cls._inflater.inflater_for(annotation, (None, cls)) elif isinstance(annotation, Field): attribute = cls._inflater.inflater_for((annotation, annotation.base_type), (None, cls))(None) else: attribute = cls._inflater.inflater_for(annotation, (None, cls)) - size += size_of(attribute) + if bitfield_field: + size += bf_tracker.compute_size(bitfield_field) + else: + size += bf_tracker.flush() + size += size_of(attribute) + size += bf_tracker.flush() cls.size = size @property From 54ef86aaa18c22634e64ee13396108415a1887e3 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:08:54 -0500 Subject: [PATCH 10/46] docs: add bitfield documentation --- docs/advanced/bitfields.md | 110 +++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 111 insertions(+) create mode 100644 docs/advanced/bitfields.md diff --git a/docs/advanced/bitfields.md b/docs/advanced/bitfields.md new file mode 100644 index 0000000..e96d634 --- /dev/null +++ b/docs/advanced/bitfields.md @@ -0,0 +1,110 @@ +# Bitfields + +Bitfields let you pack multiple values into a single integer, just like C bitfields. This is common in hardware registers, protocol headers, and OS structures. + +## Defining Bitfields + +Use `bitfield_of(backing_type, bit_width)` as a struct field descriptor: + +```python +from libdestruct import struct, c_uint, bitfield_of + +class flags_t(struct): + read: c_uint = bitfield_of(c_uint, 1) + write: c_uint = bitfield_of(c_uint, 1) + execute: c_uint = bitfield_of(c_uint, 1) + reserved: c_uint = bitfield_of(c_uint, 29) +``` + +All four fields share a single `c_uint` (4 bytes). The bits are allocated left-to-right: + +- `read` occupies bit 0 +- `write` occupies bit 1 +- `execute` occupies bit 2 +- `reserved` occupies bits 3-31 + +## Reading Bitfields + +```python +# Bit pattern: 0b101 = read=1, write=0, execute=1 +memory = (0b101).to_bytes(4, "little") +flags = flags_t.from_bytes(memory) + +print(flags.read.value) # 1 +print(flags.write.value) # 0 +print(flags.execute.value) # 1 +``` + +## Writing Bitfields + +Writes only affect the relevant bits — other bits are preserved: + +```python +from libdestruct import inflater + +memory = bytearray(4) +lib = inflater(memory) +flags = lib.inflate(flags_t, 0) + +flags.read.value = 1 +flags.execute.value = 1 + +print(flags.read.value) # 1 +print(flags.write.value) # 0 (untouched) +print(flags.execute.value) # 1 +``` + +## Signed Bitfields + +Use a signed backing type (e.g., `c_int`) for sign-extended extraction: + +```python +from libdestruct import struct, c_int, bitfield_of + +class example_t(struct): + val: c_int = bitfield_of(c_int, 4) + +# 4-bit signed: 0b1111 = -1 +memory = (0b1111).to_bytes(4, "little") +test = example_t.from_bytes(memory) +print(test.val.value) # -1 +``` + +## Multiple Backing Types + +When consecutive bitfields use different backing types, a new group starts automatically: + +```python +class mixed_t(struct): + a: c_uint = bitfield_of(c_uint, 3) # bits 0-2 of a c_uint + b: c_uint = bitfield_of(c_uint, 5) # bits 3-7 of the same c_uint + c: c_long = bitfield_of(c_long, 16) # bits 0-15 of a new c_long +``` + +`a` and `b` share 4 bytes, `c` starts a new 8-byte group. Total struct size: 12 bytes. + +## C Parser Support + +The C struct parser handles bitfield syntax: + +```python +from libdestruct.c.struct_parser import definition_to_type + +flags_t = definition_to_type(""" + struct flags_t { + unsigned int read:1; + unsigned int write:1; + unsigned int execute:1; + unsigned int reserved:29; + }; +""") +``` + +## Serialization + +Bitfield structs serialize correctly — shared backing bytes are emitted once: + +```python +data = flags.to_bytes() +assert len(data) == 4 # one c_uint +``` diff --git a/mkdocs.yml b/mkdocs.yml index 8e20cf8..7f29745 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -61,6 +61,7 @@ nav: - The Inflater: memory/inflater.md - Resolvers: memory/resolvers.md - Advanced: + - Bitfields: advanced/bitfields.md - Freeze, Diff & Reset: advanced/freeze_diff.md - C Struct Parser: advanced/c_parser.md - Forward References: advanced/forward_refs.md From 9aeae00567b4d42e319f9136d671bba6acc00996 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:09:12 -0500 Subject: [PATCH 11/46] test: add bitfield tests --- test/scripts/bitfield_unit_test.py | 143 +++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 test/scripts/bitfield_unit_test.py diff --git a/test/scripts/bitfield_unit_test.py b/test/scripts/bitfield_unit_test.py new file mode 100644 index 0000000..8c44540 --- /dev/null +++ b/test/scripts/bitfield_unit_test.py @@ -0,0 +1,143 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import unittest + +from libdestruct import c_int, c_uint, c_long, struct, bitfield_of +from libdestruct.c.struct_parser import definition_to_type + + +class BitfieldReadTest(unittest.TestCase): + """Bitfield read operations.""" + + def test_single_bitfield_read(self): + class test_t(struct): + flags: c_uint = bitfield_of(c_uint, 3) + + # 0b00000101 = 5, low 3 bits = 5 + memory = (0b00000101).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.flags.value, 5) + + def test_multiple_bitfields_packing(self): + class test_t(struct): + a: c_uint = bitfield_of(c_uint, 3) + b: c_uint = bitfield_of(c_uint, 5) + + # a uses bits 0-2, b uses bits 3-7 + # a=5 (0b101), b=10 (0b01010) -> combined: 0b01010_101 = 0x55 + memory = (0b01010_101).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.a.value, 5) + self.assertEqual(test.b.value, 10) + + # Struct should be 4 bytes total (both share one c_uint) + self.assertEqual(test.to_bytes(), memory) + + def test_bitfield_signed(self): + class test_t(struct): + val: c_int = bitfield_of(c_int, 4) + + # 4-bit signed: 0b1111 = -1 + memory = (0b1111).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.val.value, -1) + + # 4-bit signed: 0b0111 = 7 + memory2 = (0b0111).to_bytes(4, "little") + test2 = test_t.from_bytes(memory2) + self.assertEqual(test2.val.value, 7) + + def test_bitfield_full_width(self): + class test_t(struct): + val: c_uint = bitfield_of(c_uint, 32) + + memory = (0xDEADBEEF).to_bytes(4, "little") + test = test_t.from_bytes(memory) + self.assertEqual(test.val.value, 0xDEADBEEF) + + +class BitfieldWriteTest(unittest.TestCase): + """Bitfield write operations.""" + + def test_bitfield_write(self): + class test_t(struct): + a: c_uint = bitfield_of(c_uint, 3) + b: c_uint = bitfield_of(c_uint, 5) + + memory = bytearray(4) + from libdestruct import inflater + lib = inflater(memory) + test = lib.inflate(test_t, 0) + + test.a.value = 7 # 0b111 + test.b.value = 15 # 0b01111 + + self.assertEqual(test.a.value, 7) + self.assertEqual(test.b.value, 15) + + # Verify only relevant bits changed + raw = int.from_bytes(memory[:4], "little") + self.assertEqual(raw & 0b111, 7) # bits 0-2 + self.assertEqual((raw >> 3) & 0b11111, 15) # bits 3-7 + + +class BitfieldRoundTripTest(unittest.TestCase): + """Bitfield serialization.""" + + def test_bitfield_round_trip(self): + class test_t(struct): + a: c_uint = bitfield_of(c_uint, 3) + b: c_uint = bitfield_of(c_uint, 5) + c: c_int + + # a=3, b=10, c=42 + # a=0b011, b=0b01010 -> byte 0-3: 0b01010_011 = 0x53 + val = 0b01010_011 + memory = val.to_bytes(4, "little") + (42).to_bytes(4, "little") + + test = test_t.from_bytes(memory) + self.assertEqual(test.a.value, 3) + self.assertEqual(test.b.value, 10) + self.assertEqual(test.c.value, 42) + + self.assertEqual(test.to_bytes(), memory) + + +class BitfieldBackingTypeTest(unittest.TestCase): + """Bitfield backing type transitions.""" + + def test_bitfield_backing_type_change(self): + class test_t(struct): + a: c_uint = bitfield_of(c_uint, 3) + b: c_uint = bitfield_of(c_uint, 5) + # Different backing type -> new group + c: c_long = bitfield_of(c_long, 16) + + # a+b share 4 bytes (c_uint), c uses 8 bytes (c_long) + # Total = 12 bytes + memory = b"\x00" * 12 + test = test_t.from_bytes(memory) + self.assertEqual(len(test.to_bytes()), 12) + + +class BitfieldCParserTest(unittest.TestCase): + """C parser bitfield support.""" + + def test_bitfield_c_parser(self): + t = definition_to_type("struct test { unsigned int flags:3; unsigned int reserved:5; };") + self.assertIn("flags", t.__annotations__) + self.assertIn("reserved", t.__annotations__) + + # Inflate and verify + memory = (0b01010_101).to_bytes(4, "little") + test = t.from_bytes(memory) + self.assertEqual(test.flags.value, 5) + self.assertEqual(test.reserved.value, 10) + + +if __name__ == "__main__": + unittest.main() From c6cfefc546125882f0bf5317fd69f09c8d10c187 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:14:11 -0500 Subject: [PATCH 12/46] test: add tests for c_float and c_double --- test/scripts/types_unit_test.py | 80 ++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 9db116d..6fdef7c 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -5,9 +5,11 @@ # import ctypes +import math +import struct as pystruct import unittest -from libdestruct import c_int, c_long, c_str, c_uint, inflater, struct, ptr, ptr_to_self +from libdestruct import c_int, c_long, c_str, c_uint, c_float, c_double, inflater, struct, ptr, ptr_to_self class ObjFromBytesTest(unittest.TestCase): @@ -104,6 +106,82 @@ class test_t(struct): self.assertIn("0x0", s) +class FloatTest(unittest.TestCase): + """c_float and c_double types.""" + + def test_c_float_read(self): + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 18:14:42 -0500 Subject: [PATCH 13/46] fix: add missing support for c_float and c_double --- docs/basics/types.md | 25 ++++++++++ libdestruct/__init__.py | 4 +- libdestruct/c/__init__.py | 6 ++- libdestruct/c/base_type_inflater.py | 3 ++ libdestruct/c/c_float_types.py | 73 +++++++++++++++++++++++++++++ libdestruct/c/struct_parser.py | 6 +++ 6 files changed, 115 insertions(+), 2 deletions(-) create mode 100644 libdestruct/c/c_float_types.py diff --git a/docs/basics/types.md b/docs/basics/types.md index 4ca78cc..8b1b9fe 100644 --- a/docs/basics/types.md +++ b/docs/basics/types.md @@ -11,6 +11,8 @@ libdestruct provides Python equivalents for common C primitive types. All types | `c_long` | `long` / `int64_t` | 8 | Yes | | `c_ulong` | `unsigned long` / `uint64_t` | 8 | No | | `c_char` | `char` | 1 | — | +| `c_float` | `float` | 4 | — | +| `c_double` | `double` | 8 | — | | `c_str` | `char[]` | variable | — | ## Usage @@ -56,6 +58,29 @@ x = c_int.from_bytes(b"\x2a\x00\x00\x00") print(x.value) # 42 ``` +## Floating-Point Types + +`c_float` and `c_double` represent IEEE 754 single-precision (32-bit) and double-precision (64-bit) floating-point numbers. + +```python +import struct as pystruct +from libdestruct import c_float, c_double, inflater + +# Read a float from bytes +data = pystruct.pack(" str: + return "f" + + def get(self: c_float) -> float: + """Return the value of the float.""" + return struct.unpack(self._format_char(), self.resolver.resolve(self.size, 0))[0] + + def _set(self: c_float, value: float) -> None: + """Set the value of the float.""" + self.resolver.modify(self.size, 0, struct.pack(self._format_char(), value)) + + def to_bytes(self: c_float) -> bytes: + """Return the serialized representation of the float.""" + if self._frozen: + return struct.pack(self._format_char(), self._frozen_value) + return self.resolver.resolve(self.size, 0) + + def __float__(self: c_float) -> float: + """Return the value as a Python float.""" + return self.get() + + +class c_double(obj): + """A C double (IEEE 754 double-precision, 64-bit).""" + + size: int = 8 + """The size of a double in bytes.""" + + _frozen_value: float | None = None + """The frozen value of the double.""" + + def _format_char(self: c_double) -> str: + return "d" + + def get(self: c_double) -> float: + """Return the value of the double.""" + return struct.unpack(self._format_char(), self.resolver.resolve(self.size, 0))[0] + + def _set(self: c_double, value: float) -> None: + """Set the value of the double.""" + self.resolver.modify(self.size, 0, struct.pack(self._format_char(), value)) + + def to_bytes(self: c_double) -> bytes: + """Return the serialized representation of the double.""" + if self._frozen: + return struct.pack(self._format_char(), self._frozen_value) + return self.resolver.resolve(self.size, 0) + + def __float__(self: c_double) -> float: + """Return the value as a Python float.""" + return self.get() diff --git a/libdestruct/c/struct_parser.py b/libdestruct/c/struct_parser.py index fd89b7b..60cebb6 100644 --- a/libdestruct/c/struct_parser.py +++ b/libdestruct/c/struct_parser.py @@ -14,6 +14,7 @@ from pycparser import c_ast, c_parser +from libdestruct.c.c_float_types import c_double, c_float from libdestruct.c.c_integer_types import c_char, c_int, c_long, c_short, c_uchar, c_uint, c_ulong, c_ushort from libdestruct.common.array.array_of import array_of from libdestruct.common.bitfield.bitfield_of import bitfield_of @@ -253,6 +254,11 @@ def identifier_to_type(identifier: c_ast.IdentifierType) -> type[obj]: identifier_name = "".join(identifier.names) + # Native float/double types (before ctypes fallback, so we get libdestruct types) + native_float_types = {"float": c_float, "double": c_double} + if identifier_name in native_float_types: + return native_float_types[identifier_name] + ctypes_name = "c_" + identifier_name if hasattr(ctypes, ctypes_name): From 86e7a07ad8d5056ed28fb25b472ce3d788c1e483 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:20:37 -0500 Subject: [PATCH 14/46] feat: add support for pointer arithmetics --- libdestruct/common/ptr/ptr.py | 55 ++++++++++++++++++++++++-- test/scripts/types_unit_test.py | 69 +++++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+), 4 deletions(-) diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index ff6733c..fd1034b 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -6,16 +6,42 @@ from __future__ import annotations -from typing import TYPE_CHECKING, TypeVar +from typing import TypeVar +from libdestruct.backing.resolver import Resolver from libdestruct.common.field import Field from libdestruct.common.obj import obj - -if TYPE_CHECKING: # pragma: no cover - from libdestruct.backing.resolver import Resolver +from libdestruct.common.utils import size_of T = TypeVar("T") + +class _ArithmeticResolver(Resolver): + """A resolver for pointers produced by arithmetic operations. + + Stores a fixed address but delegates memory access to the original resolver. + """ + + def __init__(self: _ArithmeticResolver, original: Resolver, address: int) -> None: + self._original = original + self._address = address + + def resolve_address(self: _ArithmeticResolver) -> int: + return self._address + + def resolve(self: _ArithmeticResolver, size: int, _: int) -> bytes: + return self._address.to_bytes(size, "little") + + def modify(self: _ArithmeticResolver, _size: int, _index: int, _value: bytes) -> None: + raise RuntimeError("Cannot modify a synthetic pointer.") + + def absolute_from_own(self: _ArithmeticResolver, address: int) -> Resolver: + return self._original.absolute_from_own(address) + + def relative_from_own(self: _ArithmeticResolver, address_offset: int, _index_offset: int) -> Resolver: + return self._original.absolute_from_own(self._address + address_offset) + + class ptr(obj[T]): """A pointer to an object in memory.""" @@ -96,6 +122,27 @@ def to_str(self: ptr, _: int = 0) -> str: return f"{name}@0x{self.get():x}" + @property + def _element_size(self: ptr) -> int: + """Return the byte size of the pointed-to element.""" + if self.wrapper is None: + return 1 + return size_of(self.wrapper) + + def __add__(self: ptr, n: int) -> ptr: + """Return a new pointer advanced by n elements.""" + new_addr = self.get() + n * self._element_size + return ptr(_ArithmeticResolver(self.resolver, new_addr), self.wrapper) + + def __sub__(self: ptr, n: int) -> ptr: + """Return a new pointer retreated by n elements.""" + new_addr = self.get() - n * self._element_size + return ptr(_ArithmeticResolver(self.resolver, new_addr), self.wrapper) + + def __getitem__(self: ptr, n: int) -> obj: + """Return the object at index n relative to this pointer.""" + return (self + n).unwrap() + def __str__(self: ptr) -> str: """Return a string representation of the pointer.""" return self.to_str() diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 6fdef7c..2cd6163 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -10,6 +10,7 @@ import unittest from libdestruct import c_int, c_long, c_str, c_uint, c_float, c_double, inflater, struct, ptr, ptr_to_self +from libdestruct.backing.memory_resolver import MemoryResolver class ObjFromBytesTest(unittest.TestCase): @@ -105,6 +106,74 @@ class test_t(struct): s = str(test.p) self.assertIn("0x0", s) + def test_ptr_add(self): + """ptr + 1 returns new ptr at addr + sizeof(target).""" + # Array of 3 c_int values: [10, 20, 30] + memory = bytearray(8 + 12) + memory[0:8] = (8).to_bytes(8, "little") # pointer to offset 8 + memory[8:12] = (10).to_bytes(4, "little") + memory[12:16] = (20).to_bytes(4, "little") + memory[16:20] = (30).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0), c_int) + + p2 = p + 1 + self.assertEqual(p2.unwrap().value, 20) + + p3 = p + 2 + self.assertEqual(p3.unwrap().value, 30) + + def test_ptr_sub(self): + """ptr - 1 returns new ptr at addr - sizeof(target).""" + memory = bytearray(8 + 12) + memory[0:8] = (12).to_bytes(8, "little") # pointer to second element + memory[8:12] = (10).to_bytes(4, "little") + memory[12:16] = (20).to_bytes(4, "little") + memory[16:20] = (30).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0), c_int) + + p2 = p - 1 + self.assertEqual(p2.unwrap().value, 10) + + def test_ptr_add_raw(self): + """Untyped ptr: ptr + n advances by n bytes.""" + memory = bytearray(8 + 4) + memory[0:8] = (8).to_bytes(8, "little") # pointer to offset 8 + memory[8:12] = (0x44332211).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0)) + # No wrapper set, so element size is 1 byte + + p2 = p + 2 + self.assertEqual(p2.get(), 10) # 8 + 2 + + def test_ptr_getitem(self): + """ptr[0] == unwrap(), ptr[1] == (ptr+1).unwrap().""" + memory = bytearray(8 + 12) + memory[0:8] = (8).to_bytes(8, "little") + memory[8:12] = (100).to_bytes(4, "little") + memory[12:16] = (200).to_bytes(4, "little") + memory[16:20] = (300).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0), c_int) + + self.assertEqual(p[0].value, 100) + self.assertEqual(p[1].value, 200) + self.assertEqual(p[2].value, 300) + + def test_ptr_arithmetic_chain(self): + """(ptr + 2)[0] accesses element at index 2.""" + memory = bytearray(8 + 12) + memory[0:8] = (8).to_bytes(8, "little") + memory[8:12] = (1).to_bytes(4, "little") + memory[12:16] = (2).to_bytes(4, "little") + memory[16:20] = (3).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0), c_int) + + self.assertEqual((p + 2)[0].value, 3) + class FloatTest(unittest.TestCase): """c_float and c_double types.""" From 1e1ed75bdc837f4ec21eedbd7989ada50fd49e9f Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:20:47 -0500 Subject: [PATCH 15/46] docs: add documentation about pointer arithmetics --- docs/basics/pointers.md | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md index 7e2947b..4ef384d 100644 --- a/docs/basics/pointers.md +++ b/docs/basics/pointers.md @@ -96,6 +96,48 @@ print(head.next.unwrap().val.value) # 20 print(head.next.unwrap().next.try_unwrap()) # None ``` +## Pointer Arithmetic + +Typed pointers support C-style pointer arithmetic. Adding or subtracting an integer advances/retreats by that many elements (scaled by the pointed-to type's size): + +```python +from libdestruct import c_int, ptr, inflater +from libdestruct.backing.memory_resolver import MemoryResolver +import struct as pystruct + +# Memory: [ptr to arr] [10] [20] [30] +memory = bytearray(8 + 12) +memory[0:8] = pystruct.pack(" Date: Tue, 31 Mar 2026 18:22:46 -0500 Subject: [PATCH 16/46] feat: add support for typedefs in C-to-Python struct parsing --- libdestruct/c/struct_parser.py | 13 +++++-- test/scripts/struct_parser_unit_test.py | 49 +++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 4 deletions(-) diff --git a/libdestruct/c/struct_parser.py b/libdestruct/c/struct_parser.py index 60cebb6..09dd4f2 100644 --- a/libdestruct/c/struct_parser.py +++ b/libdestruct/c/struct_parser.py @@ -196,11 +196,16 @@ def typedef_to_pair(typedef: c_ast.Typedef) -> tuple[str, type[obj]]: if not isinstance(typedef, c_ast.Typedef): raise TypeError("Definition must be a typedef.") - if not isinstance(typedef.type, c_ast.TypeDecl): - raise TypeError("Definition must be a type declaration.") - name = "".join(typedef.name) - definition = type_decl_to_type(typedef.type) + + if isinstance(typedef.type, c_ast.PtrDecl): + definition = ptr_to_type(typedef.type) + elif isinstance(typedef.type, c_ast.ArrayDecl): + definition = arr_to_type(typedef.type) + elif isinstance(typedef.type, c_ast.TypeDecl): + definition = type_decl_to_type(typedef.type) + else: + raise TypeError("Unsupported typedef target type.") return name, definition diff --git a/test/scripts/struct_parser_unit_test.py b/test/scripts/struct_parser_unit_test.py index a05cbb4..3593306 100644 --- a/test/scripts/struct_parser_unit_test.py +++ b/test/scripts/struct_parser_unit_test.py @@ -4,9 +4,11 @@ # Licensed under the MIT license. See LICENSE file in the project root for details. # +import struct as pystruct import unittest from libdestruct.c.struct_parser import definition_to_type +from libdestruct import inflater class StructParserTest(unittest.TestCase): @@ -39,5 +41,52 @@ def test_nested_struct_definition(self): self.assertIn("b", t.__annotations__) +class TypedefTest(unittest.TestCase): + """Typedef support in C struct parser.""" + + def test_simple_typedef(self): + t = definition_to_type(""" + typedef unsigned int uint32_t; + struct S { uint32_t x; }; + """) + self.assertIn("x", t.__annotations__) + + def test_typedef_of_struct(self): + t = definition_to_type(""" + typedef struct { int x; } Point; + struct S { Point p; }; + """) + self.assertIn("p", t.__annotations__) + + def test_typedef_of_pointer(self): + t = definition_to_type(""" + typedef int *intptr; + struct S { intptr p; }; + """) + self.assertIn("p", t.__annotations__) + + def test_typedef_chain(self): + t = definition_to_type(""" + typedef unsigned int u32; + typedef u32 mytype; + struct S { mytype x; }; + """) + self.assertIn("x", t.__annotations__) + + def test_typedef_inflate_and_read(self): + t = definition_to_type(""" + typedef unsigned int uint32_t; + struct S { uint32_t x; int y; }; + """) + memory = bytearray(8) + memory[0:4] = pystruct.pack(" Date: Tue, 31 Mar 2026 18:23:18 -0500 Subject: [PATCH 17/46] docs: add documentation for typedef support in C struct parsing --- docs/advanced/c_parser.md | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/advanced/c_parser.md b/docs/advanced/c_parser.md index f9637b4..84112de 100644 --- a/docs/advanced/c_parser.md +++ b/docs/advanced/c_parser.md @@ -94,6 +94,40 @@ t = definition_to_type(""" The last struct in the definition is returned. All previous structs are cached and available for forward references. +## Typedefs + +The parser supports `typedef` declarations. Typedefs are resolved when used as field types in subsequent structs: + +```python +t = definition_to_type(""" + typedef unsigned int uint32_t; + struct S { uint32_t x; }; +""") +``` + +Struct typedefs, pointer typedefs, and chained typedefs all work: + +```python +# Struct typedef +t = definition_to_type(""" + typedef struct { int x; int y; } Point; + struct S { Point p; }; +""") + +# Pointer typedef +t = definition_to_type(""" + typedef int *intptr; + struct S { intptr p; }; +""") + +# Chained typedef +t = definition_to_type(""" + typedef unsigned int u32; + typedef u32 mytype; + struct S { mytype x; }; +""") +``` + ## Include Directives The parser supports `#include` directives by running the C preprocessor: From 40ec6b3955a49fc43f71f800b3f8a53bb320e074 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:29:05 -0500 Subject: [PATCH 18/46] feat: publicly exposed size_of method --- libdestruct/__init__.py | 2 ++ libdestruct/common/utils.py | 40 +++++++++++++++++----------- test/scripts/types_unit_test.py | 46 ++++++++++++++++++++++++++++++--- 3 files changed, 70 insertions(+), 18 deletions(-) diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index ef75971..cd884ee 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -19,6 +19,7 @@ from libdestruct.common.enum import enum, enum_of from libdestruct.common.ptr.ptr import ptr from libdestruct.common.struct import ptr_to, ptr_to_self, struct +from libdestruct.common.utils import size_of from libdestruct.libdestruct import inflate, inflater __all__ = [ @@ -41,5 +42,6 @@ "ptr", "ptr_to", "ptr_to_self", + "size_of", "struct", ] diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index 0054c25..487bd7f 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -25,23 +25,33 @@ def is_field_bound_method(item: obj) -> bool: def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: - """Return the size of an object, from an obj or it's inflater.""" - if hasattr(item_or_inflater.__class__, "size"): - # This has the priority over the size of the object itself - # as we might be dealing with a struct object - # that defines an attribute named "size" - return item_or_inflater.__class__.size - if hasattr(item_or_inflater, "size"): - return item_or_inflater.size - - # Check if item is the bound method of a Field - if is_field_bound_method(item_or_inflater): - field_object = item_or_inflater.__self__ - return field_object.get_size() - - # Check if item is directly a Field instance + """Return the size in bytes of a type, instance, or field descriptor.""" + # Field instances (e.g. array_of, ptr_to) — must come before .size check if isinstance(item_or_inflater, Field): return item_or_inflater.get_size() + if is_field_bound_method(item_or_inflater): + return item_or_inflater.__self__.get_size() + + # Struct types: size is on the inflated _type_impl class + if isinstance(item_or_inflater, type) and hasattr(item_or_inflater, "_type_impl"): + return item_or_inflater._type_impl.size + + # Struct types not yet inflated: trigger inflation to compute size + if isinstance(item_or_inflater, type) and not hasattr(item_or_inflater, "size"): + from libdestruct.common.type_registry import TypeRegistry + + impl = TypeRegistry().inflater_for(item_or_inflater) + if hasattr(impl, "size") and isinstance(impl.size, int): + return impl.size + + # Check class-level size (works for both types and instances) + if isinstance(item_or_inflater, type): + if hasattr(item_or_inflater, "size") and isinstance(item_or_inflater.size, int): + return item_or_inflater.size + elif hasattr(item_or_inflater.__class__, "size"): + return item_or_inflater.__class__.size + elif hasattr(item_or_inflater, "size"): + return item_or_inflater.size raise ValueError(f"Cannot determine the size of {item_or_inflater}") diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 2cd6163..16a7ced 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -9,7 +9,7 @@ import struct as pystruct import unittest -from libdestruct import c_int, c_long, c_str, c_uint, c_float, c_double, inflater, struct, ptr, ptr_to_self +from libdestruct import c_int, c_long, c_str, c_uint, c_float, c_double, inflater, struct, ptr, ptr_to_self, size_of, array_of from libdestruct.backing.memory_resolver import MemoryResolver @@ -107,7 +107,7 @@ class test_t(struct): self.assertIn("0x0", s) def test_ptr_add(self): - """ptr + 1 returns new ptr at addr + sizeof(target).""" + """ptr + 1 returns new ptr at addr + size_of(target).""" # Array of 3 c_int values: [10, 20, 30] memory = bytearray(8 + 12) memory[0:8] = (8).to_bytes(8, "little") # pointer to offset 8 @@ -124,7 +124,7 @@ def test_ptr_add(self): self.assertEqual(p3.unwrap().value, 30) def test_ptr_sub(self): - """ptr - 1 returns new ptr at addr - sizeof(target).""" + """ptr - 1 returns new ptr at addr - size_of(target).""" memory = bytearray(8 + 12) memory[0:8] = (12).to_bytes(8, "little") # pointer to second element memory[8:12] = (10).to_bytes(4, "little") @@ -313,5 +313,45 @@ def test_setitem_middle(self): self.assertEqual(s.get(1), b"a") +class SizeofTest(unittest.TestCase): + """size_of() function.""" + + def test_size_of_c_int(self): + self.assertEqual(size_of(c_int), 4) + + def test_size_of_c_long(self): + self.assertEqual(size_of(c_long), 8) + + def test_size_of_c_float(self): + self.assertEqual(size_of(c_float), 4) + + def test_size_of_ptr(self): + self.assertEqual(size_of(ptr), 8) + + def test_size_of_struct(self): + class two_ints(struct): + a: c_int + b: c_int + + self.assertEqual(size_of(two_ints), 8) + + def test_size_of_instance(self): + obj = c_int.from_bytes((42).to_bytes(4, "little")) + self.assertEqual(size_of(obj), 4) + + def test_size_of_array_field(self): + self.assertEqual(size_of(array_of(c_int, 10)), 40) + + def test_size_of_nested_struct(self): + class inner(struct): + x: c_int + + class outer(struct): + a: inner + b: c_int + + self.assertEqual(size_of(outer), 8) + + if __name__ == "__main__": unittest.main() From ca3487fa14c4bae2d1ceacbcc66e24018d32780c Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:29:20 -0500 Subject: [PATCH 19/46] docs: add documentation about size_of --- docs/basics/types.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/docs/basics/types.md b/docs/basics/types.md index 8b1b9fe..624fdbc 100644 --- a/docs/basics/types.md +++ b/docs/basics/types.md @@ -58,6 +58,33 @@ x = c_int.from_bytes(b"\x2a\x00\x00\x00") print(x.value) # 42 ``` +## size_of() + +The `size_of()` function returns the size in bytes of any type, instance, or field descriptor: + +```python +from libdestruct import size_of, c_int, c_long, c_float, ptr, struct, array_of + +size_of(c_int) # 4 +size_of(c_long) # 8 +size_of(c_float) # 4 +size_of(ptr) # 8 + +# Works with struct types +class point_t(struct): + x: c_int + y: c_int + +size_of(point_t) # 8 + +# Works with instances +x = c_int.from_bytes(b"\x00\x00\x00\x00") +size_of(x) # 4 + +# Works with array field descriptors +size_of(array_of(c_int, 10)) # 40 +``` + ## Floating-Point Types `c_float` and `c_double` represent IEEE 754 single-precision (32-bit) and double-precision (64-bit) floating-point numbers. From 69be13693f7167da9b3b34276d108341e25e1af2 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:34:13 -0500 Subject: [PATCH 20/46] feat: implement hexdump functionality for all objects --- libdestruct/common/hexdump.py | 48 +++++++++++++++++++++ libdestruct/common/obj.py | 7 ++++ libdestruct/common/struct/struct_impl.py | 12 ++++++ test/scripts/types_unit_test.py | 53 ++++++++++++++++++++++++ 4 files changed, 120 insertions(+) create mode 100644 libdestruct/common/hexdump.py diff --git a/libdestruct/common/hexdump.py b/libdestruct/common/hexdump.py new file mode 100644 index 0000000..0775dda --- /dev/null +++ b/libdestruct/common/hexdump.py @@ -0,0 +1,48 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + + +def format_hexdump( + data: bytes, + base_address: int = 0, + annotations: dict[int, str] | None = None, +) -> str: + """Format a classic hex dump of the given data. + + Args: + data: The bytes to dump. + base_address: The starting address shown in the offset column. + annotations: Optional mapping from byte offset to field name, shown in the margin. + + Returns: + A formatted hex dump string. + """ + lines = [] + for offset in range(0, len(data), 16): + chunk = data[offset : offset + 16] + addr = base_address + offset + + hex_parts = " ".join(f"{b:02x}" for b in chunk) + # Pad to full 16-byte width + hex_parts = hex_parts.ljust(47) + + ascii_parts = "".join(chr(b) if chr(b).isprintable() and b < 128 else "." for b in chunk) # noqa: PLR2004 + + line = f"{addr:08x} {hex_parts} |{ascii_parts}|" + + # Add field annotations for this line + if annotations: + fields_on_line = [ + name for byte_offset, name in sorted(annotations.items()) if offset <= byte_offset < offset + 16 + ] + if fields_on_line: + line += " " + ", ".join(fields_on_line) + + lines.append(line) + + return "\n".join(lines) diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index 9476a23..d072ca0 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -9,6 +9,8 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Generic, TypeVar +from libdestruct.common.hexdump import format_hexdump + if TYPE_CHECKING: # pragma: no cover from libdestruct.backing.resolver import Resolver @@ -135,6 +137,11 @@ def __eq__(self: obj, value: object) -> bool: return self.get() == value.get() + def hexdump(self: obj) -> str: + """Return a hex dump of this object's bytes.""" + address = self.address if not self._frozen else 0 + return format_hexdump(self.to_bytes(), address) + def __bytes__(self: obj) -> bytes: """Return the serialized object.""" return self.to_bytes() diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index b46890b..be54d1a 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -14,6 +14,7 @@ from libdestruct.common.bitfield.bitfield_field import BitfieldField from libdestruct.common.bitfield.bitfield_tracker import BitfieldTracker from libdestruct.common.field import Field +from libdestruct.common.hexdump import format_hexdump from libdestruct.common.obj import obj from libdestruct.common.struct import struct from libdestruct.common.type_registry import TypeRegistry @@ -210,6 +211,17 @@ def to_bytes(self: struct_impl) -> bytes: """Return the serialized representation of the struct.""" return b"".join(member.to_bytes() for member in self._members.values()) + def hexdump(self: struct_impl) -> str: + """Return a hex dump of this struct's bytes with field annotations.""" + annotations = {} + offset = 0 + for name, member in self._members.items(): + annotations[offset] = name + offset += len(member.to_bytes()) + + address = struct_impl.address.fget(self) if not self._frozen else 0 + return format_hexdump(self.to_bytes(), address, annotations) + def _set(self: struct_impl, _: str) -> None: """Set the value of the struct to the given value.""" raise RuntimeError("Cannot set the value of a struct.") diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 16a7ced..ffe11a9 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -353,5 +353,58 @@ class outer(struct): self.assertEqual(size_of(outer), 8) +class HexdumpTest(unittest.TestCase): + """Pretty hex dump.""" + + def test_hexdump_primitive(self): + data = (0x2a).to_bytes(4, "little") + obj = c_int.from_bytes(data) + result = obj.hexdump() + self.assertIn("2a 00 00 00", result) + + def test_hexdump_struct(self): + class test_t(struct): + a: c_int + b: c_int + + memory = b"" + memory += (1).to_bytes(4, "little") + memory += (2).to_bytes(4, "little") + test = test_t.from_bytes(memory) + result = test.hexdump() + # Should contain field name annotations + self.assertIn("a", result) + self.assertIn("b", result) + + def test_hexdump_returns_string(self): + obj = c_int.from_bytes((0).to_bytes(4, "little")) + self.assertIsInstance(obj.hexdump(), str) + + def test_hexdump_offset_column(self): + obj = c_int.from_bytes((0).to_bytes(4, "little")) + result = obj.hexdump() + self.assertIn("00000000", result) + + def test_hexdump_ascii_column(self): + memory = bytearray(b"ABCD") + lib = inflater(memory) + obj = lib.inflate(c_int, 0) + result = obj.hexdump() + self.assertIn("ABCD", result) + + def test_hexdump_multiline(self): + """More than 16 bytes should produce multiple lines.""" + class big_t(struct): + a: c_long + b: c_long + c: c_long + + memory = b"\x00" * 24 + test = big_t.from_bytes(memory) + result = test.hexdump() + lines = [l for l in result.strip().split("\n") if l.strip()] + self.assertGreater(len(lines), 1) + + if __name__ == "__main__": unittest.main() From bda7078baea76d343769ad1d068200d7032a578f Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:34:33 -0500 Subject: [PATCH 21/46] docs: add guide for hexdump formatting --- docs/advanced/hexdump.md | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 docs/advanced/hexdump.md diff --git a/docs/advanced/hexdump.md b/docs/advanced/hexdump.md new file mode 100644 index 0000000..6b796b2 --- /dev/null +++ b/docs/advanced/hexdump.md @@ -0,0 +1,57 @@ +# Hex Dump + +Every libdestruct object has a `hexdump()` method that returns a classic hex dump of its serialized bytes. + +## Basic Usage + +```python +from libdestruct import c_int, inflater + +memory = bytearray(b"Hello, World!\x00\x00\x00") +lib = inflater(memory) +x = lib.inflate(c_int, 0) +print(x.hexdump()) +``` + +Output format: + +``` +00000000 48 65 6c 6c |Hell| +``` + +Each line shows: offset, hex bytes (up to 16 per line), and ASCII representation (non-printable bytes shown as `.`). + +## Struct Hex Dump + +When called on a struct, `hexdump()` annotates each line with the field names that start on that line: + +```python +from libdestruct import struct, c_int, c_long + +class player_t(struct): + health: c_int + score: c_long + +memory = bytearray(12) +memory[0:4] = (100).to_bytes(4, "little") +memory[4:12] = (9999).to_bytes(8, "little") + +player = player_t.from_bytes(memory) +print(player.hexdump()) +``` + +Output: + +``` +00000000 64 00 00 00 0f 27 00 00 00 00 00 00 |d....'......| health, score +``` + +## Standalone Utility + +The underlying `format_hexdump` function can be used directly: + +```python +from libdestruct.common.hexdump import format_hexdump + +print(format_hexdump(b"\xde\xad\xbe\xef", base_address=0x1000)) +``` From 03505ab49e6a157f1a860a4860ad5ef6c7628f4e Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:36:48 -0500 Subject: [PATCH 22/46] fix: implement caching for dereferenced pointer objects --- libdestruct/common/ptr/ptr.py | 27 +++++++++-- test/scripts/types_unit_test.py | 83 +++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 5 deletions(-) diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index fd1034b..6474708 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -57,6 +57,8 @@ def __init__(self: ptr, resolver: Resolver, wrapper: type | None = None) -> None """ super().__init__(resolver) self.wrapper = wrapper + self._cached_unwrap: obj | None = None + self._cache_valid: bool = False def get(self: ptr) -> int: """Return the value of the pointer.""" @@ -73,6 +75,12 @@ def to_bytes(self: obj) -> bytes: def _set(self: ptr, value: int) -> None: """Set the value of the pointer to the given value.""" self.resolver.modify(self.size, 0, value.to_bytes(self.size, self.endianness)) + self.invalidate() + + def invalidate(self: ptr) -> None: + """Clear the cached unwrap result.""" + self._cached_unwrap = None + self._cache_valid = False def unwrap(self: ptr, length: int | None = None) -> obj: """Return the object pointed to by the pointer. @@ -80,18 +88,24 @@ def unwrap(self: ptr, length: int | None = None) -> obj: Args: length: The length of the object in memory this points to. """ + if self._cache_valid: + return self._cached_unwrap + address = self.get() if self.wrapper: if length: raise ValueError("Length is not supported when unwrapping a pointer to a wrapper object.") - return self.wrapper(self.resolver.absolute_from_own(address)) - - if not length: - length = 1 + result = self.wrapper(self.resolver.absolute_from_own(address)) + elif not length: + result = self.resolver.resolve(1, 0) + else: + result = self.resolver.resolve(length, 0) - return self.resolver.resolve(length, 0) + self._cached_unwrap = result + self._cache_valid = True + return result def try_unwrap(self: ptr, length: int | None = None) -> obj | None: """Return the object pointed to by the pointer, if it is valid. @@ -99,6 +113,9 @@ def try_unwrap(self: ptr, length: int | None = None) -> obj | None: Args: length: The length of the object in memory this points to. """ + if self._cache_valid: + return self._cached_unwrap + address = self.get() try: diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index ffe11a9..4c38f63 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -174,6 +174,89 @@ def test_ptr_arithmetic_chain(self): self.assertEqual((p + 2)[0].value, 3) + def test_unwrap_cached(self): + """Two unwrap() calls return the same object.""" + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = bytearray(12) + memory[0:4] = (42).to_bytes(4, "little") + memory[4:12] = (0).to_bytes(8, "little") + + lib = inflater(memory) + test = lib.inflate(test_t, 0) + + r1 = test.p.unwrap() + r2 = test.p.unwrap() + self.assertIs(r1, r2) + + def test_invalidate_clears_cache(self): + """invalidate() causes next unwrap() to return a new object.""" + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = bytearray(12) + memory[0:4] = (42).to_bytes(4, "little") + memory[4:12] = (0).to_bytes(8, "little") + + lib = inflater(memory) + test = lib.inflate(test_t, 0) + + r1 = test.p.unwrap() + test.p.invalidate() + r2 = test.p.unwrap() + self.assertIsNot(r1, r2) + + def test_cache_reflects_memory_change(self): + """After memory change + invalidate, unwrap gets new value.""" + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = bytearray(12) + memory[0:4] = (42).to_bytes(4, "little") + memory[4:12] = (0).to_bytes(8, "little") + + lib = inflater(memory) + test = lib.inflate(test_t, 0) + + self.assertEqual(test.p.unwrap().a.value, 42) + memory[0:4] = (99).to_bytes(4, "little") + test.p.invalidate() + self.assertEqual(test.p.unwrap().a.value, 99) + + def test_try_unwrap_cached(self): + """try_unwrap() also uses cache.""" + class test_t(struct): + a: c_int + p: ptr = ptr_to_self() + + memory = bytearray(12) + memory[0:4] = (42).to_bytes(4, "little") + memory[4:12] = (0).to_bytes(8, "little") + + lib = inflater(memory) + test = lib.inflate(test_t, 0) + + r1 = test.p.try_unwrap() + r2 = test.p.try_unwrap() + self.assertIs(r1, r2) + + def test_cache_invalidated_on_set(self): + """ptr.value = new_addr auto-invalidates the cache.""" + memory = bytearray(8 + 8) # ptr + two c_int slots + memory[0:8] = (8).to_bytes(8, "little") # points to offset 8 + memory[8:12] = (10).to_bytes(4, "little") + memory[12:16] = (20).to_bytes(4, "little") + + p = ptr(MemoryResolver(memory, 0), c_int) + + self.assertEqual(p.unwrap().value, 10) + p.value = 12 # now points to offset 12 + self.assertEqual(p.unwrap().value, 20) + class FloatTest(unittest.TestCase): """c_float and c_double types.""" From 2e5b7da9dfbccf3982fd21b341e6e421de9f0836 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:37:01 -0500 Subject: [PATCH 23/46] docs: add explanation about pointer caching --- docs/basics/pointers.md | 24 ++++++++++++++++++++++++ mkdocs.yml | 1 + 2 files changed, 25 insertions(+) diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md index 4ef384d..fe989ab 100644 --- a/docs/basics/pointers.md +++ b/docs/basics/pointers.md @@ -138,6 +138,30 @@ p_raw = ptr(MemoryResolver(memory, 0)) # no wrapper p2 = p_raw + 4 # advances by 4 bytes ``` +## Caching + +Pointer dereferencing is cached — repeated calls to `unwrap()` or `try_unwrap()` return the same object without re-inflating: + +```python +r1 = node.next.unwrap() +r2 = node.next.unwrap() +assert r1 is r2 # same object +``` + +If the underlying memory changes, call `invalidate()` to clear the cache: + +```python +# Memory was modified externally +node.next.invalidate() +r3 = node.next.unwrap() # re-inflated from updated memory +``` + +Setting the pointer's value automatically invalidates the cache: + +```python +node.next.value = new_address # cache cleared automatically +``` + ## Pointer String Representation ```python diff --git a/mkdocs.yml b/mkdocs.yml index 7f29745..fb5ee96 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -65,4 +65,5 @@ nav: - Freeze, Diff & Reset: advanced/freeze_diff.md - C Struct Parser: advanced/c_parser.md - Forward References: advanced/forward_refs.md + - Hex Dump: advanced/hexdump.md - Field Offsets: advanced/offset.md From 59f970ee93f896655aff2edf4607f0b31cdd298b Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:45:42 -0500 Subject: [PATCH 24/46] chore: create SKILL.md file for AI agents --- SKILL.md | 335 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 335 insertions(+) create mode 100644 SKILL.md diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..74011eb --- /dev/null +++ b/SKILL.md @@ -0,0 +1,335 @@ +# libdestruct Skills + +libdestruct is a Python library for destructuring binary data into typed objects. It maps raw bytes to C-like types (integers, floats, strings, structs, pointers, arrays, enums, bitfields) with read/write support. + +## Installation + +```bash +pip install git+https://github.com/mrindeciso/libdestruct.git +``` + +## Core Concepts + +All types inherit from `obj`. Every `obj` has: +- `.value` property to read/write the underlying data +- `.address` property for the memory offset +- `.to_bytes()` to serialize back to bytes +- `.freeze()` / `.diff()` / `.reset()` for snapshotting +- `.hexdump()` for a hex dump of the object's bytes +- `.from_bytes(data)` class method to create a read-only instance from raw bytes + +Memory is accessed through an `inflater`, which wraps a `bytes` or `bytearray` buffer. Use `bytearray` for read/write access. + +## Quick Reference + +### Imports + +```python +from libdestruct import ( + inflater, # memory wrapper + struct, # struct base class + c_int, c_uint, # 32-bit integers (signed/unsigned) + c_long, c_ulong, # 64-bit integers (signed/unsigned) + c_float, c_double, # IEEE 754 floats (32/64-bit) + c_str, # null-terminated C string + ptr, # 8-byte pointer + ptr_to, # typed pointer field descriptor + ptr_to_self, # self-referential pointer field descriptor + array_of, # fixed-size array field descriptor + enum_of, # enum field descriptor + bitfield_of, # bitfield descriptor + offset, # explicit field offset + size_of, # get size in bytes of any type/instance/field +) +``` + +### Type Sizes + +| Type | Size (bytes) | +|---|---| +| `c_int` / `c_uint` | 4 | +| `c_long` / `c_ulong` | 8 | +| `c_float` | 4 | +| `c_double` | 8 | +| `ptr` | 8 | +| `c_str` | variable (reads until null) | + +### Reading Primitives from a Buffer + +```python +memory = bytearray(b"\x2a\x00\x00\x00\x00\x00\x00\x00") +lib = inflater(memory) + +x = lib.inflate(c_int, 0) # inflate c_int at offset 0 +print(x.value) # 42 + +y = lib.inflate(c_long, 0) # inflate c_long at offset 0 +print(y.value) +``` + +### Reading Primitives from Raw Bytes + +```python +x = c_int.from_bytes(b"\x2a\x00\x00\x00") +print(x.value) # 42 +# Note: from_bytes returns a frozen (read-only) object +``` + +### Writing Primitives + +```python +memory = bytearray(4) +lib = inflater(memory) +x = lib.inflate(c_int, 0) +x.value = -1 +print(memory) # bytearray(b'\xff\xff\xff\xff') +``` + +### Defining Structs + +```python +class player_t(struct): + health: c_int + score: c_uint + position_x: c_float + position_y: c_float +``` + +Struct fields are laid out sequentially. Access members as attributes; each returns a typed `obj` (use `.value` to get the Python value). + +### Inflating Structs + +```python +import struct as pystruct + +memory = bytearray(16) +memory[0:4] = pystruct.pack(" Date: Tue, 31 Mar 2026 18:49:15 -0500 Subject: [PATCH 25/46] fix: export c_char, c_uchar, c_short, c_ushort too --- docs/basics/types.md | 5 ++++- libdestruct/__init__.py | 6 +++++- test/scripts/types_unit_test.py | 26 +++++++++++++++++++++++++- 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/docs/basics/types.md b/docs/basics/types.md index 624fdbc..dbe3bca 100644 --- a/docs/basics/types.md +++ b/docs/basics/types.md @@ -6,11 +6,14 @@ libdestruct provides Python equivalents for common C primitive types. All types | libdestruct | C equivalent | Size (bytes) | Signed | |---|---|---|---| +| `c_char` | `char` / `int8_t` | 1 | Yes | +| `c_uchar` | `unsigned char` / `uint8_t` | 1 | No | +| `c_short` | `short` / `int16_t` | 2 | Yes | +| `c_ushort` | `unsigned short` / `uint16_t` | 2 | No | | `c_int` | `int` / `int32_t` | 4 | Yes | | `c_uint` | `unsigned int` / `uint32_t` | 4 | No | | `c_long` | `long` / `int64_t` | 8 | Yes | | `c_ulong` | `unsigned long` / `uint64_t` | 8 | No | -| `c_char` | `char` | 1 | — | | `c_float` | `float` | 4 | — | | `c_double` | `double` | 8 | — | | `c_str` | `char[]` | variable | — | diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index cd884ee..6b16ef2 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -12,7 +12,7 @@ pass from libdestruct.backing.resolver import Resolver -from libdestruct.c import c_double, c_float, c_int, c_long, c_str, c_uint, c_ulong +from libdestruct.c import c_char, c_double, c_float, c_int, c_long, c_short, c_str, c_uchar, c_uint, c_ulong, c_ushort from libdestruct.common.array import array, array_of from libdestruct.common.attributes import offset from libdestruct.common.bitfield import bitfield_of @@ -27,13 +27,17 @@ "array", "array_of", "bitfield_of", + "c_char", "c_double", "c_float", "c_int", "c_long", + "c_short", "c_str", + "c_uchar", "c_uint", "c_ulong", + "c_ushort", "enum", "enum_of", "inflate", diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 4c38f63..6e0d94a 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -9,7 +9,11 @@ import struct as pystruct import unittest -from libdestruct import c_int, c_long, c_str, c_uint, c_float, c_double, inflater, struct, ptr, ptr_to_self, size_of, array_of +from libdestruct import ( + c_char, c_double, c_float, c_int, c_long, c_short, + c_str, c_uchar, c_uint, c_ulong, c_ushort, + inflater, struct, ptr, ptr_to_self, size_of, array_of, +) from libdestruct.backing.memory_resolver import MemoryResolver @@ -26,6 +30,26 @@ def test_c_long_from_bytes(self): obj = c_long.from_bytes(data) self.assertEqual(obj.value, 123456789) + def test_c_char_from_bytes(self): + data = (65).to_bytes(1, "little") + obj = c_char.from_bytes(data) + self.assertEqual(obj.value, 65) + + def test_c_uchar_from_bytes(self): + data = (200).to_bytes(1, "little") + obj = c_uchar.from_bytes(data) + self.assertEqual(obj.value, 200) + + def test_c_short_from_bytes(self): + data = (-1234).to_bytes(2, "little", signed=True) + obj = c_short.from_bytes(data) + self.assertEqual(obj.value, -1234) + + def test_c_ushort_from_bytes(self): + data = (60000).to_bytes(2, "little") + obj = c_ushort.from_bytes(data) + self.assertEqual(obj.value, 60000) + def test_c_uint_from_bytes(self): data = (0xDEADBEEF).to_bytes(4, "little") obj = c_uint.from_bytes(data) From 911550e702485c91a5412bac2e8f1c51e22a7aec Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 18:52:32 -0500 Subject: [PATCH 26/46] feat: add comparison operators for libdestruct objects --- libdestruct/common/obj.py | 54 ++++++++++++++++++++++++++++--- test/scripts/types_unit_test.py | 56 +++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 5 deletions(-) diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index d072ca0..cf8274b 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -130,12 +130,56 @@ def __repr__(self: obj) -> str: """Return a string representation of the object.""" return f"{self.__class__.__name__}({self.get()})" - def __eq__(self: obj, value: object) -> bool: + def _compare_value(self: obj, other: object) -> tuple[object, object] | None: + """Extract comparable values from self and other, or None if incompatible.""" + self_val = self.value + if isinstance(other, obj): + return self_val, other.value + if isinstance(other, int | float): + return self_val, other + return None + + def __eq__(self: obj, other: object) -> bool: """Return whether the object is equal to the given value.""" - if not isinstance(value, obj): - return False - - return self.get() == value.get() + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] == pair[1] + + def __ne__(self: obj, other: object) -> bool: + """Return whether the object is not equal to the given value.""" + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] != pair[1] + + def __lt__(self: obj, other: object) -> bool: + """Return whether this object is less than the given value.""" + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] < pair[1] + + def __le__(self: obj, other: object) -> bool: + """Return whether this object is less than or equal to the given value.""" + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] <= pair[1] + + def __gt__(self: obj, other: object) -> bool: + """Return whether this object is greater than the given value.""" + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] > pair[1] + + def __ge__(self: obj, other: object) -> bool: + """Return whether this object is greater than or equal to the given value.""" + pair = self._compare_value(other) + if pair is None: + return NotImplemented + return pair[0] >= pair[1] def hexdump(self: obj) -> str: """Return a hex dump of this object's bytes.""" diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 6e0d94a..519a4d2 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -513,5 +513,61 @@ class big_t(struct): self.assertGreater(len(lines), 1) +class ComparisonTest(unittest.TestCase): + """Comparison operators on primitive types.""" + + def test_int_gt_python_int(self): + x = c_int.from_bytes((10).to_bytes(4, "little")) + self.assertTrue(x > 5) + self.assertFalse(x > 10) + + def test_int_lt_python_int(self): + x = c_int.from_bytes((3).to_bytes(4, "little")) + self.assertTrue(x < 5) + self.assertFalse(x < 3) + + def test_int_ge_le(self): + x = c_int.from_bytes((7).to_bytes(4, "little")) + self.assertTrue(x >= 7) + self.assertTrue(x >= 6) + self.assertFalse(x >= 8) + self.assertTrue(x <= 7) + self.assertTrue(x <= 8) + self.assertFalse(x <= 6) + + def test_int_eq_python_int(self): + x = c_int.from_bytes((42).to_bytes(4, "little")) + self.assertTrue(x == 42) + self.assertFalse(x == 43) + + def test_int_ne_python_int(self): + x = c_int.from_bytes((42).to_bytes(4, "little")) + self.assertTrue(x != 43) + self.assertFalse(x != 42) + + def test_float_gt_python_float(self): + x = c_float.from_bytes(pystruct.pack(" 3.0) + self.assertFalse(x > 4.0) + + def test_float_eq_python_float(self): + x = c_double.from_bytes(pystruct.pack(" a) + self.assertTrue(a != b) + self.assertFalse(a == b) + + def test_comparison_returns_not_implemented_for_incompatible(self): + x = c_int.from_bytes((1).to_bytes(4, "little")) + self.assertFalse(x == "hello") + self.assertTrue(x != "hello") + + if __name__ == "__main__": unittest.main() From 628bd37d2567e9af1bcf155e5423f48983c69bb8 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 19:21:21 -0500 Subject: [PATCH 27/46] feat: add support for simple and tagged unions --- libdestruct/__init__.py | 4 + libdestruct/common/union/__init__.py | 14 +++ .../common/union/tagged_union_field.py | 45 ++++++++ .../union/tagged_union_field_inflater.py | 61 ++++++++++ libdestruct/common/union/tagged_union_of.py | 22 ++++ libdestruct/common/union/union.py | 106 ++++++++++++++++++ libdestruct/common/union/union_field.py | 43 +++++++ .../common/union/union_field_inflater.py | 51 +++++++++ libdestruct/common/union/union_of.py | 21 ++++ 9 files changed, 367 insertions(+) create mode 100644 libdestruct/common/union/__init__.py create mode 100644 libdestruct/common/union/tagged_union_field.py create mode 100644 libdestruct/common/union/tagged_union_field_inflater.py create mode 100644 libdestruct/common/union/tagged_union_of.py create mode 100644 libdestruct/common/union/union.py create mode 100644 libdestruct/common/union/union_field.py create mode 100644 libdestruct/common/union/union_field_inflater.py create mode 100644 libdestruct/common/union/union_of.py diff --git a/libdestruct/__init__.py b/libdestruct/__init__.py index 6b16ef2..fcc1877 100644 --- a/libdestruct/__init__.py +++ b/libdestruct/__init__.py @@ -19,6 +19,7 @@ from libdestruct.common.enum import enum, enum_of from libdestruct.common.ptr.ptr import ptr from libdestruct.common.struct import ptr_to, ptr_to_self, struct +from libdestruct.common.union import tagged_union, union, union_of from libdestruct.common.utils import size_of from libdestruct.libdestruct import inflate, inflater @@ -48,4 +49,7 @@ "ptr_to_self", "size_of", "struct", + "tagged_union", + "union", + "union_of", ] diff --git a/libdestruct/common/union/__init__.py b/libdestruct/common/union/__init__.py new file mode 100644 index 0000000..587cf52 --- /dev/null +++ b/libdestruct/common/union/__init__.py @@ -0,0 +1,14 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from libdestruct.common.union.tagged_union_of import tagged_union +from libdestruct.common.union.union import union +from libdestruct.common.union.union_of import union_of + +__all__ = ["tagged_union", "union", "union_of"] + +import libdestruct.common.union.tagged_union_field_inflater +import libdestruct.common.union.union_field_inflater # noqa: F401 diff --git a/libdestruct/common/union/tagged_union_field.py b/libdestruct/common/union/tagged_union_field.py new file mode 100644 index 0000000..cf05c5a --- /dev/null +++ b/libdestruct/common/union/tagged_union_field.py @@ -0,0 +1,45 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.field import Field +from libdestruct.common.union.union import union +from libdestruct.common.utils import size_of + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class TaggedUnionField(Field): + """A field descriptor for a tagged union in a struct.""" + + base_type: type[obj] = union + + def __init__(self: TaggedUnionField, discriminator: str, variants: dict[object, type]) -> None: + """Initialize the tagged union field. + + Args: + discriminator: The name of the struct field used as the discriminator. + variants: A mapping from discriminator values to variant types. + """ + self.discriminator = discriminator + self.variants = variants + + def inflate(self: TaggedUnionField, resolver: Resolver | None) -> union: + """Inflate the field (used during size computation with resolver=None). + + Args: + resolver: The backing resolver (None during size computation). + """ + return union(resolver, None, self.get_size()) + + def get_size(self: TaggedUnionField) -> int: + """Return the size of the union (max of all variant sizes).""" + return max(size_of(variant) for variant in self.variants.values()) diff --git a/libdestruct/common/union/tagged_union_field_inflater.py b/libdestruct/common/union/tagged_union_field_inflater.py new file mode 100644 index 0000000..afccd4c --- /dev/null +++ b/libdestruct/common/union/tagged_union_field_inflater.py @@ -0,0 +1,61 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.type_registry import TypeRegistry +from libdestruct.common.union.tagged_union_field import TaggedUnionField +from libdestruct.common.union.union import union + +if TYPE_CHECKING: # pragma: no cover + from collections.abc import Callable + + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + +registry = TypeRegistry() + + +def tagged_union_field_inflater( + field: TaggedUnionField, + _: type[obj], + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj]: + """Return the inflater for a tagged union field. + + During size computation (owner[0] is None), returns field.inflate which + creates a stub with the correct max size. + + During actual inflation, returns a closure that reads the discriminator + from the struct instance and inflates the matching variant. + """ + if owner is None or owner[0] is None: + return field.inflate + + struct_instance = owner[0] + + def inflate_with_discriminator(resolver: Resolver) -> union: + members = object.__getattribute__(struct_instance, "_members") + disc_value = members[field.discriminator].value + + if disc_value not in field.variants: + raise ValueError( + f"Unknown discriminator value {disc_value!r} for field '{field.discriminator}'. " + f"Valid values: {list(field.variants.keys())}" + ) + + variant_type = field.variants[disc_value] + variant_inflater = registry.inflater_for(variant_type) + variant = variant_inflater(resolver) + + return union(resolver, variant, field.get_size()) + + return inflate_with_discriminator + + +registry.register_instance_handler(TaggedUnionField, tagged_union_field_inflater) diff --git a/libdestruct/common/union/tagged_union_of.py b/libdestruct/common/union/tagged_union_of.py new file mode 100644 index 0000000..5555026 --- /dev/null +++ b/libdestruct/common/union/tagged_union_of.py @@ -0,0 +1,22 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from libdestruct.common.union.tagged_union_field import TaggedUnionField + + +def tagged_union(discriminator: str, variants: dict[object, type]) -> TaggedUnionField: + """Create a tagged union field descriptor. + + Args: + discriminator: The name of the struct field used to select the active variant. + variants: A mapping from discriminator values to variant types. + + Returns: + A TaggedUnionField for use as a struct field default value. + """ + return TaggedUnionField(discriminator, variants) diff --git a/libdestruct/common/union/union.py b/libdestruct/common/union/union.py new file mode 100644 index 0000000..9f1f249 --- /dev/null +++ b/libdestruct/common/union/union.py @@ -0,0 +1,106 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.obj import obj + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + + +class union(obj): + """A union value, supporting both tagged (single active variant) and plain (all variants overlaid) modes.""" + + _variant: obj | None + """The single active variant (tagged union mode).""" + + _variants: dict[str, obj] + """Named variants (plain union mode).""" + + _frozen_bytes: bytes | None + """The frozen bytes of the full union region.""" + + def __init__( + self: union, + resolver: Resolver | None, + variant: obj | None, + max_size: int, + variants: dict[str, obj] | None = None, + ) -> None: + """Initialize the union. + + Args: + resolver: The backing resolver. + variant: The single active variant (tagged union mode, None for plain unions). + max_size: The size of the union (max of all variant sizes). + variants: Named variants dict (plain union mode, None for tagged unions). + """ + super().__init__(resolver) + self._variant = variant + self._variants = variants or {} + self.size = max_size + self._frozen_bytes = None + + @property + def variant(self: union) -> obj | None: + """Return the active variant object (tagged union mode).""" + return self._variant + + def get(self: union) -> object: + """Return the value of the active variant.""" + if self._variant is not None: + return self._variant.get() + if self._variants: + return {name: v.get() for name, v in self._variants.items()} + return None + + def _set(self: union, value: object) -> None: + """Set the value of the active variant.""" + if self._variant is None: + raise RuntimeError("Cannot set the value of a union without an active variant.") + self._variant._set(value) + + def to_bytes(self: union) -> bytes: + """Return the full union-sized region as bytes.""" + if self._frozen_bytes is not None: + return self._frozen_bytes + if self.resolver is None: + return b"\x00" * self.size + return self.resolver.resolve(self.size, 0) + + def freeze(self: union) -> None: + """Freeze the union and all its variants.""" + if self.resolver is not None: + self._frozen_bytes = self.resolver.resolve(self.size, 0) + else: + self._frozen_bytes = b"\x00" * self.size + if self._variant is not None: + self._variant.freeze() + for v in self._variants.values(): + v.freeze() + super().freeze() + + def to_str(self: union, indent: int = 0) -> str: + """Return a string representation of the union.""" + if self._variant is not None: + return self._variant.to_str(indent) + if self._variants: + members = ", ".join(self._variants) + return f"union({members})" + return "union(empty)" + + def __getattr__(self: union, name: str) -> object: + """Delegate attribute access to named variants or the active variant.""" + variants = object.__getattribute__(self, "_variants") + if name in variants: + return variants[name] + variant = object.__getattribute__(self, "_variant") + if variant is not None: + return getattr(variant, name) + raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'") diff --git a/libdestruct/common/union/union_field.py b/libdestruct/common/union/union_field.py new file mode 100644 index 0000000..cb97b3a --- /dev/null +++ b/libdestruct/common/union/union_field.py @@ -0,0 +1,43 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.field import Field +from libdestruct.common.union.union import union +from libdestruct.common.utils import size_of + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class UnionField(Field): + """A field descriptor for a plain (non-discriminated) union in a struct.""" + + base_type: type[obj] = union + + def __init__(self: UnionField, variants: dict[str, type]) -> None: + """Initialize the union field. + + Args: + variants: A mapping from variant names to their types. + """ + self.variants = variants + + def inflate(self: UnionField, resolver: Resolver | None) -> union: + """Inflate the field (used during size computation with resolver=None). + + Args: + resolver: The backing resolver (None during size computation). + """ + return union(resolver, None, self.get_size()) + + def get_size(self: UnionField) -> int: + """Return the size of the union (max of all variant sizes).""" + return max(size_of(variant) for variant in self.variants.values()) diff --git a/libdestruct/common/union/union_field_inflater.py b/libdestruct/common/union/union_field_inflater.py new file mode 100644 index 0000000..59804d2 --- /dev/null +++ b/libdestruct/common/union/union_field_inflater.py @@ -0,0 +1,51 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.type_registry import TypeRegistry +from libdestruct.common.union.union import union +from libdestruct.common.union.union_field import UnionField + +if TYPE_CHECKING: # pragma: no cover + from collections.abc import Callable + + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + +registry = TypeRegistry() + + +def union_field_inflater( + field: UnionField, + _: type[obj], + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj]: + """Return the inflater for a plain union field. + + During size computation (owner[0] is None), returns field.inflate which + creates a stub with the correct max size. + + During actual inflation, returns a closure that inflates all variants + at the same memory location. + """ + if owner is None or owner[0] is None: + return field.inflate + + def inflate_all_variants(resolver: Resolver) -> union: + variants = {} + for name, variant_type in field.variants.items(): + variant_inflater = registry.inflater_for(variant_type) + variants[name] = variant_inflater(resolver) + + return union(resolver, None, field.get_size(), variants=variants) + + return inflate_all_variants + + +registry.register_instance_handler(UnionField, union_field_inflater) diff --git a/libdestruct/common/union/union_of.py b/libdestruct/common/union/union_of.py new file mode 100644 index 0000000..e7486c3 --- /dev/null +++ b/libdestruct/common/union/union_of.py @@ -0,0 +1,21 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from libdestruct.common.union.union_field import UnionField + + +def union_of(variants: dict[str, type]) -> UnionField: + """Create a plain union field descriptor. + + Args: + variants: A mapping from variant names to their types. + + Returns: + A UnionField for use as a struct field default value. + """ + return UnionField(variants) From 25d7e5b53d96efe5f31808706eb679aa2ab47613 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 19:21:34 -0500 Subject: [PATCH 28/46] test: add checks for tagged and simple unions --- test/scripts/tagged_union_test.py | 178 ++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 test/scripts/tagged_union_test.py diff --git a/test/scripts/tagged_union_test.py b/test/scripts/tagged_union_test.py new file mode 100644 index 0000000..5a4872b --- /dev/null +++ b/test/scripts/tagged_union_test.py @@ -0,0 +1,178 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import struct as pystruct +import unittest + +from libdestruct import c_float, c_int, c_long, inflater, size_of, struct +from libdestruct.common.union import tagged_union, union, union_of + + +class TaggedUnionTest(unittest.TestCase): + def test_basic_variant_selection(self): + """Union selects the correct variant based on discriminator value.""" + class msg_t(struct): + type: c_int + payload: union = tagged_union("type", {0: c_int, 1: c_float}) + + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 19:21:52 -0500 Subject: [PATCH 29/46] docs: add documentation for simple and tagged unions --- SKILL.md | 28 ++++++ docs/advanced/tagged_unions.md | 175 +++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + 3 files changed, 204 insertions(+) create mode 100644 docs/advanced/tagged_unions.md diff --git a/SKILL.md b/SKILL.md index 74011eb..3e4c7bd 100644 --- a/SKILL.md +++ b/SKILL.md @@ -38,6 +38,9 @@ from libdestruct import ( array_of, # fixed-size array field descriptor enum_of, # enum field descriptor bitfield_of, # bitfield descriptor + union, # union annotation type + union_of, # plain union field descriptor + tagged_union, # tagged union field descriptor offset, # explicit field offset size_of, # get size in bytes of any type/instance/field ) @@ -217,6 +220,31 @@ class flags_t(struct): Consecutive bitfields with the same backing type are packed together. The struct above is 4 bytes total, not 16. +### Unions + +```python +from libdestruct.common.union import union, union_of, tagged_union + +# Plain union — all variants overlaid at the same offset +class packet_t(struct): + data: union = union_of({"i": c_int, "f": c_float, "l": c_long}) + +pkt = lib.inflate(packet_t, 0) +pkt.data.i.value # interpret as int +pkt.data.f.value # interpret as float (same bytes) + +# Tagged union — discriminator selects the active variant +class message_t(struct): + type: c_int + payload: union = tagged_union("type", { + 0: c_int, + 1: c_float, + 2: point_t, # struct variants work too + }) +``` + +The discriminator field must appear before the union. The union size is the max of all variant sizes. Struct variant fields are accessible directly: `msg.payload.x.value`. Use `.variant` to get the raw variant object. Unknown discriminator values raise `ValueError`. + ### Explicit Field Offsets ```python diff --git a/docs/advanced/tagged_unions.md b/docs/advanced/tagged_unions.md new file mode 100644 index 0000000..d5cf462 --- /dev/null +++ b/docs/advanced/tagged_unions.md @@ -0,0 +1,175 @@ +# Unions + +libdestruct supports both **plain unions** (C-style, all variants overlaid) and **tagged unions** (discriminated, one active variant selected by another field). + +## Plain Unions + +Use `union_of({"name": type, ...})` to declare a union where all variants share the same memory. Access each interpretation by name: + +```python +from libdestruct import struct, c_int, c_float, c_long, inflater +from libdestruct.common.union import union, union_of + +class packet_t(struct): + data: union = union_of({ + "i": c_int, + "f": c_float, + "l": c_long, + }) +``` + +All variants are inflated at the same offset. Reading one reinterprets the underlying bytes: + +```python +import struct as pystruct + +memory = pystruct.pack(" Date: Tue, 31 Mar 2026 19:25:57 -0500 Subject: [PATCH 30/46] feat: add quick to_dict() method to all objects --- SKILL.md | 12 ++ docs/basics/structs.md | 25 +++++ libdestruct/common/array/array_impl.py | 4 + libdestruct/common/obj.py | 4 + libdestruct/common/struct/struct_impl.py | 4 + libdestruct/common/union/union.py | 8 ++ test/scripts/to_dict_test.py | 135 +++++++++++++++++++++++ 7 files changed, 192 insertions(+) create mode 100644 test/scripts/to_dict_test.py diff --git a/SKILL.md b/SKILL.md index 3e4c7bd..f33a944 100644 --- a/SKILL.md +++ b/SKILL.md @@ -290,6 +290,18 @@ print(player.hexdump()) Struct hexdumps annotate lines with field names. Primitive hexdumps show raw bytes. +### Dict / JSON Export + +```python +point = point_t.from_bytes(memory) +point.to_dict() # {"x": 10, "y": 20} + +import json +json.dumps(entity.to_dict()) # nested structs produce nested dicts +``` + +`to_dict()` works on all types: primitives return their value, structs return `{name: value}` dicts, arrays return lists, unions return variant values, enums return their int value. + ### Freeze / Diff / Reset ```python diff --git a/docs/basics/structs.md b/docs/basics/structs.md index 9ed8bb7..9eb64ba 100644 --- a/docs/basics/structs.md +++ b/docs/basics/structs.md @@ -104,6 +104,31 @@ print(repr(header)) # } ``` +## Dict / JSON Export + +Use `to_dict()` to get a JSON-serializable dictionary of field names to values: + +```python +header = header_t.from_bytes(data) +print(header.to_dict()) +# {"magic": 3735928559, "version": 1, "size": 4096} +``` + +Nested structs produce nested dicts, arrays become lists: + +```python +import json + +rect = rect_t.from_bytes(data) +print(json.dumps(rect.to_dict(), indent=2)) +# { +# "origin": {"x": 0, "y": 0}, +# "size": {"x": 0, "y": 0} +# } +``` + +`to_dict()` also works on individual fields — primitives return their Python value, enums return their integer value. + ## Equality Two struct instances are equal if they have the same members with the same values: diff --git a/libdestruct/common/array/array_impl.py b/libdestruct/common/array/array_impl.py index 963f371..a32e198 100644 --- a/libdestruct/common/array/array_impl.py +++ b/libdestruct/common/array/array_impl.py @@ -60,6 +60,10 @@ def set(self: array_impl, _: list[obj]) -> None: """Set the array from a list.""" raise NotImplementedError("Cannot set items in an array.") + def to_dict(self: array_impl) -> list[object]: + """Return a JSON-serializable list of element values.""" + return [elem.to_dict() for elem in self] + def to_bytes(self: array_impl) -> bytes: """Return the serialized representation of the array.""" return b"".join(bytes(x) for x in self) diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index cf8274b..d4daecd 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -181,6 +181,10 @@ def __ge__(self: obj, other: object) -> bool: return NotImplemented return pair[0] >= pair[1] + def to_dict(self: obj) -> object: + """Return a JSON-serializable representation of the object.""" + return self.value + def hexdump(self: obj) -> str: """Return a hex dump of this object's bytes.""" address = self.address if not self._frozen else 0 diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index be54d1a..2490ffa 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -211,6 +211,10 @@ def to_bytes(self: struct_impl) -> bytes: """Return the serialized representation of the struct.""" return b"".join(member.to_bytes() for member in self._members.values()) + def to_dict(self: struct_impl) -> dict[str, object]: + """Return a JSON-serializable dict of field names to values.""" + return {name: member.to_dict() for name, member in self._members.items()} + def hexdump(self: struct_impl) -> str: """Return a hex dump of this struct's bytes with field annotations.""" annotations = {} diff --git a/libdestruct/common/union/union.py b/libdestruct/common/union/union.py index 9f1f249..7216d36 100644 --- a/libdestruct/common/union/union.py +++ b/libdestruct/common/union/union.py @@ -66,6 +66,14 @@ def _set(self: union, value: object) -> None: raise RuntimeError("Cannot set the value of a union without an active variant.") self._variant._set(value) + def to_dict(self: union) -> object: + """Return a JSON-serializable representation of the union.""" + if self._variant is not None: + return self._variant.to_dict() + if self._variants: + return {name: v.to_dict() for name, v in self._variants.items()} + return None + def to_bytes(self: union) -> bytes: """Return the full union-sized region as bytes.""" if self._frozen_bytes is not None: diff --git a/test/scripts/to_dict_test.py b/test/scripts/to_dict_test.py new file mode 100644 index 0000000..0ee14ea --- /dev/null +++ b/test/scripts/to_dict_test.py @@ -0,0 +1,135 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import json +import struct as pystruct +import unittest +from enum import IntEnum + +from libdestruct import ( + array_of, + c_float, + c_int, + c_long, + c_str, + enum_of, + inflater, + ptr_to, + struct, +) +from libdestruct.common.enum import enum +from libdestruct.common.union import tagged_union, union, union_of + + +class ToDictTest(unittest.TestCase): + def test_primitive_to_dict(self): + """Primitive to_dict returns its Python value.""" + x = c_int.from_bytes(b"\x2a\x00\x00\x00") + self.assertEqual(x.to_dict(), 42) + + def test_struct_to_dict(self): + """Struct to_dict returns a dict of field names to values.""" + class point_t(struct): + x: c_int + y: c_int + + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 19:42:34 -0500 Subject: [PATCH 31/46] feat: add support for alignment in structs --- SKILL.md | 27 +++ docs/advanced/alignment.md | 126 +++++++++++++ libdestruct/__init__.py | 3 +- libdestruct/common/struct/struct_impl.py | 26 ++- libdestruct/common/utils.py | 53 ++++++ mkdocs.yml | 1 + test/scripts/alignment_test.py | 214 +++++++++++++++++++++++ 7 files changed, 448 insertions(+), 2 deletions(-) create mode 100644 docs/advanced/alignment.md create mode 100644 test/scripts/alignment_test.py diff --git a/SKILL.md b/SKILL.md index f33a944..653e649 100644 --- a/SKILL.md +++ b/SKILL.md @@ -43,6 +43,7 @@ from libdestruct import ( tagged_union, # tagged union field descriptor offset, # explicit field offset size_of, # get size in bytes of any type/instance/field + alignment_of, # get natural alignment of any type/instance ) ``` @@ -245,6 +246,32 @@ class message_t(struct): The discriminator field must appear before the union. The union size is the max of all variant sizes. Struct variant fields are accessible directly: `msg.payload.x.value`. Use `.variant` to get the raw variant object. Unknown discriminator values raise `ValueError`. +### Struct Alignment + +```python +# Default: packed (no padding) +class packed_t(struct): + a: c_char + b: c_int +# size: 5 + +# Aligned: natural C alignment with padding +class aligned_t(struct): + _aligned_ = True + a: c_char + b: c_int +# size: 8 (1 + 3 padding + 4) + +alignment_of(c_int) # 4 +alignment_of(aligned_t) # 4 (max member alignment) + +# Custom alignment width +class wide_t(struct): + _aligned_ = 16 + a: c_int +# size: 16, alignment: 16 +``` + ### Explicit Field Offsets ```python diff --git a/docs/advanced/alignment.md b/docs/advanced/alignment.md new file mode 100644 index 0000000..42f5fe5 --- /dev/null +++ b/docs/advanced/alignment.md @@ -0,0 +1,126 @@ +# Struct Alignment + +By default, libdestruct structs are **packed** — fields are placed sequentially with no padding, like C structs with `__attribute__((packed))`. + +You can opt into natural alignment (matching standard C struct layout) by setting `_aligned_ = True` on your struct: + +## Enabling Alignment + +```python +from libdestruct import struct, c_char, c_int, c_long, size_of + +class packed_t(struct): + a: c_char + b: c_int + +size_of(packed_t) # 5 (1 + 4, no padding) + +class aligned_t(struct): + _aligned_ = True + a: c_char + b: c_int + +size_of(aligned_t) # 8 (1 + 3 padding + 4) +``` + +## Alignment Rules + +When `_aligned_ = True`: + +1. **Field alignment**: Each field is placed at an offset that is a multiple of its natural alignment (1 for `c_char`, 2 for `c_short`, 4 for `c_int`/`c_float`, 8 for `c_long`/`c_double`/`ptr`). +2. **Tail padding**: The struct's total size is rounded up to a multiple of the struct's alignment (the maximum alignment of any member). + +```python +class mixed_t(struct): + _aligned_ = True + a: c_char # offset 0, size 1 + b: c_short # offset 2 (aligned to 2), size 2 + c: c_char # offset 4, size 1 + d: c_int # offset 8 (aligned to 4), size 4 + e: c_char # offset 12, size 1 + f: c_long # offset 16 (aligned to 8), size 8 + +size_of(mixed_t) # 24 (padded to 8-byte boundary) +``` + +## Reading Aligned Structs + +```python +import struct as pystruct +from libdestruct import inflater + +class header_t(struct): + _aligned_ = True + flags: c_char + size: c_int + +# flags at offset 0, 3 bytes padding, size at offset 4 +memory = pystruct.pack(" None: current_offset = 0 bf_tracker = BitfieldTracker() + aligned = getattr(reference_type, "_aligned_", False) for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): + if name == "_aligned_": + continue + resolved_type, bitfield_field, explicit_offset = self._resolve_field( name, annotation, reference, inflater, reference_type, ) @@ -97,6 +101,8 @@ def _inflate_struct_attributes( current_offset += offset_delta else: current_offset += bf_tracker.flush() + if aligned and explicit_offset is None: + current_offset = _align_offset(current_offset, alignment_of(resolved_type)) result = resolved_type(resolver.relative_from_own(current_offset, 0)) current_offset += size_of(result) @@ -154,11 +160,17 @@ def _resolve_field( def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: """Compute the size of the struct.""" size = 0 + max_alignment = 1 bf_tracker = BitfieldTracker() + aligned = getattr(reference_type, "_aligned_", False) for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): + if name == "_aligned_": + continue + bitfield_field = None attribute = None + has_explicit_offset = False if name in reference.__dict__: attrs = getattr(reference, name) @@ -174,6 +186,7 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: elif isinstance(attr, Field): attribute = cls._inflater.inflater_for((attr, annotation), (None, cls))(None) elif isinstance(attr, OffsetAttribute): + has_explicit_offset = True offset = attr.offset if offset < size: raise ValueError("Offset must be greater than the current size.") @@ -190,10 +203,21 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: size += bf_tracker.compute_size(bitfield_field) else: size += bf_tracker.flush() + if aligned and not has_explicit_offset: + field_align = alignment_of(attribute) + max_alignment = max(max_alignment, field_align) + size = _align_offset(size, field_align) size += size_of(attribute) size += bf_tracker.flush() + + if aligned: + if isinstance(aligned, int) and aligned is not True: + max_alignment = max(max_alignment, aligned) + size = _align_offset(size, max_alignment) + cls.size = size + cls.alignment = max_alignment if aligned else 1 @property def address(self: struct_impl) -> int: diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index 487bd7f..6e245d7 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -6,6 +6,7 @@ from __future__ import annotations +import contextlib import sys from types import MethodType from typing import TYPE_CHECKING, Any, ForwardRef @@ -56,6 +57,58 @@ def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: raise ValueError(f"Cannot determine the size of {item_or_inflater}") +def alignment_of(item: obj | type[obj]) -> int: + """Return the natural alignment of a type or instance. + + For primitive types, alignment equals their size (1, 2, 4, or 8). + For struct types, alignment is computed as the max of member alignments. + For packed structs (the default), alignment is 1. + """ + # For uninflated struct types, trigger inflation first so alignment is computed + if isinstance(item, type) and not hasattr(item, "size") and not hasattr(item, "_type_impl"): + with contextlib.suppress(ValueError, TypeError): + size_of(item) + + # Struct types with computed alignment + if isinstance(item, type) and hasattr(item, "_type_impl"): + impl = item._type_impl + if hasattr(impl, "alignment"): + return impl.alignment + + # Explicit alignment attribute (struct_impl instances, arrays, etc.) + if not isinstance(item, type) and hasattr(item, "alignment") and isinstance(item.alignment, int): + return item.alignment + if isinstance(item, type) and "alignment" in item.__dict__ and isinstance(item.__dict__["alignment"], int): + return item.__dict__["alignment"] + + # Field descriptors + if isinstance(item, Field): + return _alignment_from_size(item.get_size()) + if is_field_bound_method(item): + return _alignment_from_size(item.__self__.get_size()) + + # Derive from size for power-of-2 sized types + try: + s = size_of(item) + return _alignment_from_size(s) + except (ValueError, TypeError): + return 1 + + +def _alignment_from_size(s: int) -> int: + """Derive alignment from size: return size if it's a power of 2 and <= 8, else 1.""" + max_alignment = 8 + if s > 0 and (s & (s - 1)) == 0 and s <= max_alignment: + return s + return 1 + + +def _align_offset(offset: int, alignment: int) -> int: + """Round up offset to the next multiple of alignment.""" + remainder = offset % alignment + return offset + (alignment - remainder) if remainder else offset + + def _resolve_annotation(annotation: Any, defining_class: type) -> Any: """Resolve a string annotation to its actual type. diff --git a/mkdocs.yml b/mkdocs.yml index cbd2f6f..08ca91a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,4 +67,5 @@ nav: - Forward References: advanced/forward_refs.md - Hex Dump: advanced/hexdump.md - Unions: advanced/tagged_unions.md + - Struct Alignment: advanced/alignment.md - Field Offsets: advanced/offset.md diff --git a/test/scripts/alignment_test.py b/test/scripts/alignment_test.py new file mode 100644 index 0000000..2d020ef --- /dev/null +++ b/test/scripts/alignment_test.py @@ -0,0 +1,214 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import struct as pystruct +import unittest + +from libdestruct import c_char, c_int, c_long, c_short, c_uchar, inflater, offset, size_of, struct +from libdestruct.common.utils import alignment_of + + +class AlignmentOfTest(unittest.TestCase): + def test_alignment_of_c_char(self): + self.assertEqual(alignment_of(c_char), 1) + + def test_alignment_of_c_short(self): + self.assertEqual(alignment_of(c_short), 2) + + def test_alignment_of_c_int(self): + self.assertEqual(alignment_of(c_int), 4) + + def test_alignment_of_c_long(self): + self.assertEqual(alignment_of(c_long), 8) + + +class AlignedStructTest(unittest.TestCase): + def test_packed_struct_no_padding(self): + """Default structs are packed with no alignment padding.""" + class packed_t(struct): + a: c_char + b: c_int + + # packed: 1 + 4 = 5 + self.assertEqual(size_of(packed_t), 5) + + def test_aligned_struct_padding(self): + """Aligned struct inserts padding for field alignment.""" + class aligned_t(struct): + _aligned_ = True + a: c_char + b: c_int + + # aligned: 1 + 3 padding + 4 = 8 + self.assertEqual(size_of(aligned_t), 8) + + def test_aligned_struct_tail_padding(self): + """Aligned struct pads total size to max alignment.""" + class aligned_t(struct): + _aligned_ = True + a: c_int + b: c_char + + # aligned: 4 + 1 + 3 tail padding = 8 (aligned to 4-byte boundary) + self.assertEqual(size_of(aligned_t), 8) + + def test_aligned_struct_read_values(self): + """Values are read correctly from aligned positions.""" + class aligned_t(struct): + _aligned_ = True + a: c_char + b: c_int + + # a at offset 0, padding 3 bytes, b at offset 4 + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 20:38:40 -0500 Subject: [PATCH 32/46] fix: solve various issues in the codebase that Opus created and then fixed itself --- libdestruct/c/c_float_types.py | 8 ++ libdestruct/c/struct_parser.py | 6 ++ libdestruct/common/bitfield/bitfield.py | 6 ++ .../common/bitfield/bitfield_tracker.py | 7 ++ libdestruct/common/obj.py | 2 +- libdestruct/common/struct/struct_impl.py | 96 +++++++++---------- libdestruct/common/union/union.py | 14 +++ test/scripts/alignment_test.py | 56 +++++++++++ test/scripts/bitfield_unit_test.py | 20 +++- test/scripts/struct_unit_test.py | 10 ++ test/scripts/tagged_union_test.py | 47 +++++++++ test/scripts/types_unit_test.py | 34 +++++++ 12 files changed, 255 insertions(+), 51 deletions(-) diff --git a/libdestruct/c/c_float_types.py b/libdestruct/c/c_float_types.py index f41cc6c..d38622c 100644 --- a/libdestruct/c/c_float_types.py +++ b/libdestruct/c/c_float_types.py @@ -41,6 +41,10 @@ def __float__(self: c_float) -> float: """Return the value as a Python float.""" return self.get() + def __int__(self: c_float) -> int: + """Return the value as a Python int.""" + return int(self.get()) + class c_double(obj): """A C double (IEEE 754 double-precision, 64-bit).""" @@ -71,3 +75,7 @@ def to_bytes(self: c_double) -> bytes: def __float__(self: c_double) -> float: """Return the value as a Python float.""" return self.get() + + def __int__(self: c_double) -> int: + """Return the value as a Python int.""" + return int(self.get()) diff --git a/libdestruct/c/struct_parser.py b/libdestruct/c/struct_parser.py index 09dd4f2..eabc309 100644 --- a/libdestruct/c/struct_parser.py +++ b/libdestruct/c/struct_parser.py @@ -57,6 +57,12 @@ """A cache for parsed type definitions, indexed by name.""" +def clear_parser_cache() -> None: + """Clear cached struct definitions and typedefs from previous parses.""" + PARSED_STRUCTS.clear() + TYPEDEFS.clear() + + def definition_to_type(definition: str) -> type[obj]: """Converts a C struct definition to a struct object.""" parser = c_parser.CParser() diff --git a/libdestruct/common/bitfield/bitfield.py b/libdestruct/common/bitfield/bitfield.py index 7d4bd58..d226829 100644 --- a/libdestruct/common/bitfield/bitfield.py +++ b/libdestruct/common/bitfield/bitfield.py @@ -95,6 +95,12 @@ def to_bytes(self: bitfield) -> bytes: return self._backing_instance.to_bytes() return b"" + def freeze(self: bitfield) -> None: + """Freeze the bitfield, also freezing the shared backing instance if this is the group owner.""" + if self._is_group_owner and not self._backing_instance._frozen: + self._backing_instance.freeze() + super().freeze() + def to_str(self: bitfield, _: int = 0) -> str: """Return a string representation of the bitfield.""" return f"{self.get()}" diff --git a/libdestruct/common/bitfield/bitfield_tracker.py b/libdestruct/common/bitfield/bitfield_tracker.py index 86fbb1c..ff129f1 100644 --- a/libdestruct/common/bitfield/bitfield_tracker.py +++ b/libdestruct/common/bitfield/bitfield_tracker.py @@ -36,6 +36,13 @@ def active(self: BitfieldTracker) -> bool: """Return whether a bitfield group is currently active.""" return self._backing_type is not None + def needs_new_group(self: BitfieldTracker, field: BitfieldField) -> bool: + """Return whether the given field would start a new bitfield group.""" + return ( + self._backing_type is not field.backing_type + or self._bit_offset + field.bit_width > field.backing_type.size * 8 + ) + def flush(self: BitfieldTracker) -> int: """Close the current bitfield group and return the byte size to advance. diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index d4daecd..0c9379f 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -135,7 +135,7 @@ def _compare_value(self: obj, other: object) -> tuple[object, object] | None: self_val = self.value if isinstance(other, obj): return self_val, other.value - if isinstance(other, int | float): + if isinstance(other, int | float | bytes): return self_val, other return None diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 338813e..1cfb3ec 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -80,13 +80,14 @@ def _inflate_struct_attributes( current_offset = 0 bf_tracker = BitfieldTracker() aligned = getattr(reference_type, "_aligned_", False) + self._member_offsets = {} for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): if name == "_aligned_": continue - resolved_type, bitfield_field, explicit_offset = self._resolve_field( - name, annotation, reference, inflater, reference_type, + resolved_type, bitfield_field, explicit_offset = struct_impl._resolve_field( + name, annotation, reference, inflater, owner=(self, reference_type._type_impl), ) if explicit_offset is not None: @@ -95,6 +96,10 @@ def _inflate_struct_attributes( current_offset = explicit_offset if bitfield_field: + if aligned and bf_tracker.needs_new_group(bitfield_field): + current_offset += bf_tracker.flush() + current_offset = _align_offset(current_offset, alignment_of(bitfield_field.backing_type)) + self._member_offsets[name] = current_offset result, offset_delta = bf_tracker.create_bitfield( bitfield_field, inflater, resolver, current_offset, ) @@ -103,6 +108,7 @@ def _inflate_struct_attributes( current_offset += bf_tracker.flush() if aligned and explicit_offset is None: current_offset = _align_offset(current_offset, alignment_of(resolved_type)) + self._member_offsets[name] = current_offset result = resolved_type(resolver.relative_from_own(current_offset, 0)) current_offset += size_of(result) @@ -110,13 +116,13 @@ def _inflate_struct_attributes( current_offset += bf_tracker.flush() + @staticmethod def _resolve_field( - self: struct_impl, name: str, annotation: type, reference: type, inflater: TypeRegistry, - reference_type: type, + owner: tuple[obj, type] | None, ) -> tuple[object | None, BitfieldField | None, int | None]: """Resolve a single struct field annotation to its inflater or BitfieldField. @@ -126,7 +132,7 @@ def _resolve_field( explicit_offset is set when an OffsetAttribute is present. """ if name not in reference.__dict__: - return inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)), None, None + return inflater.inflater_for(annotation, owner=owner), None, None attrs = getattr(reference, name) if not isinstance(attrs, tuple): @@ -144,7 +150,7 @@ def _resolve_field( bitfield_field = attr elif isinstance(attr, Field): resolved_type = inflater.inflater_for( - (attr, annotation), owner=(self, reference_type._type_impl), + (attr, annotation), owner=owner, ) elif isinstance(attr, OffsetAttribute): explicit_offset = attr.offset @@ -152,7 +158,7 @@ def _resolve_field( raise TypeError("Only Field, BitfieldField, and OffsetAttribute are allowed in attributes.") if not resolved_type and not bitfield_field: - resolved_type = inflater.inflater_for(annotation, owner=(self, reference_type._type_impl)) + resolved_type = inflater.inflater_for(annotation, owner=owner) return resolved_type, bitfield_field, explicit_offset @@ -168,46 +174,39 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: if name == "_aligned_": continue - bitfield_field = None - attribute = None - has_explicit_offset = False - - if name in reference.__dict__: - attrs = getattr(reference, name) - if not isinstance(attrs, tuple): - attrs = (attrs,) - - if sum(isinstance(attr, Field) for attr in attrs) > 1: - raise ValueError("Only one Field is allowed per attribute.") - - for attr in attrs: - if isinstance(attr, BitfieldField): - bitfield_field = attr - elif isinstance(attr, Field): - attribute = cls._inflater.inflater_for((attr, annotation), (None, cls))(None) - elif isinstance(attr, OffsetAttribute): - has_explicit_offset = True - offset = attr.offset - if offset < size: - raise ValueError("Offset must be greater than the current size.") - size = offset - - if not attribute and not bitfield_field: - attribute = cls._inflater.inflater_for(annotation, (None, cls)) - elif isinstance(annotation, Field): - attribute = cls._inflater.inflater_for((annotation, annotation.base_type), (None, cls))(None) - else: - attribute = cls._inflater.inflater_for(annotation, (None, cls)) + resolved_type, bitfield_field, explicit_offset = struct_impl._resolve_field( + name, annotation, reference, cls._inflater, owner=(None, cls), + ) + + has_explicit_offset = explicit_offset is not None + if has_explicit_offset: + if explicit_offset < size: + raise ValueError("Offset must be greater than the current size.") + size = explicit_offset if bitfield_field: + if aligned and bf_tracker.needs_new_group(bitfield_field): + size += bf_tracker.flush() + field_align = alignment_of(bitfield_field.backing_type) + max_alignment = max(max_alignment, field_align) + size = _align_offset(size, field_align) size += bf_tracker.compute_size(bitfield_field) else: size += bf_tracker.flush() + # Get attribute for size computation — try size_of directly first, + # falling back to calling the inflater with None for complex fields. + # Direct size_of avoids recursion for forward-ref pointers. + try: + attribute_size = size_of(resolved_type) + attribute = resolved_type + except (ValueError, TypeError): + attribute = resolved_type(None) + attribute_size = size_of(attribute) if aligned and not has_explicit_offset: field_align = alignment_of(attribute) max_alignment = max(max_alignment, field_align) size = _align_offset(size, field_align) - size += size_of(attribute) + size += attribute_size size += bf_tracker.flush() @@ -232,8 +231,10 @@ def get(self: struct_impl) -> str: return f"{name}(address={addr}, size={size_of(self)})" def to_bytes(self: struct_impl) -> bytes: - """Return the serialized representation of the struct.""" - return b"".join(member.to_bytes() for member in self._members.values()) + """Return the serialized representation of the struct, including padding.""" + if self._frozen: + return self._frozen_struct_bytes + return self.resolver.resolve(size_of(self), 0) def to_dict(self: struct_impl) -> dict[str, object]: """Return a JSON-serializable dict of field names to values.""" @@ -241,12 +242,8 @@ def to_dict(self: struct_impl) -> dict[str, object]: def hexdump(self: struct_impl) -> str: """Return a hex dump of this struct's bytes with field annotations.""" - annotations = {} - offset = 0 - for name, member in self._members.items(): - annotations[offset] = name - offset += len(member.to_bytes()) - + member_offsets = object.__getattribute__(self, "_member_offsets") + annotations = {member_offsets[name]: name for name in self._members} address = struct_impl.address.fget(self) if not self._frozen else 0 return format_hexdump(self.to_bytes(), address, annotations) @@ -255,8 +252,9 @@ def _set(self: struct_impl, _: str) -> None: raise RuntimeError("Cannot set the value of a struct.") def freeze(self: struct_impl) -> None: - """Freeze the struct.""" - # The struct has no implicit value, but it must freeze its members + """Freeze the struct, capturing the full byte representation including padding.""" + self._frozen_struct_bytes = self.resolver.resolve(size_of(self), 0) + for member in self._members.values(): member.freeze() @@ -288,7 +286,7 @@ def __repr__(self: struct_impl) -> str: def __eq__(self: struct_impl, value: object) -> bool: """Return whether the struct is equal to the given value.""" if not isinstance(value, struct_impl): - return False + return NotImplemented if size_of(self) != size_of(value): return False diff --git a/libdestruct/common/union/union.py b/libdestruct/common/union/union.py index 7216d36..1506e8b 100644 --- a/libdestruct/common/union/union.py +++ b/libdestruct/common/union/union.py @@ -94,6 +94,20 @@ def freeze(self: union) -> None: v.freeze() super().freeze() + def diff(self: union) -> tuple[object, object]: + """Return the difference between the frozen and current value.""" + if self._variant is not None: + return self._variant.diff() + return {name: v.diff() for name, v in self._variants.items()} + + def reset(self: union) -> None: + """Reset the union to its frozen value.""" + if self._variant is not None: + self._variant.reset() + else: + for v in self._variants.values(): + v.reset() + def to_str(self: union, indent: int = 0) -> str: """Return a string representation of the union.""" if self._variant is not None: diff --git a/test/scripts/alignment_test.py b/test/scripts/alignment_test.py index 2d020ef..2bf2af9 100644 --- a/test/scripts/alignment_test.py +++ b/test/scripts/alignment_test.py @@ -212,3 +212,59 @@ class s_t(struct): c: c_int # should be at offset 8 (aligned to 4) self.assertEqual(size_of(s_t), 12) # 4 + 1 + 3 padding + 4 + + +class AlignedStructSerializationTest(unittest.TestCase): + def test_aligned_struct_to_bytes_includes_padding(self): + """to_bytes on aligned struct includes padding bytes.""" + class aligned_t(struct): + _aligned_ = True + a: c_char + b: c_int + + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 21:03:24 -0500 Subject: [PATCH 33/46] feat: add big-endian support in the resolver This propagates down the chain, and everything works big-endian --- libdestruct/backing/fake_resolver.py | 7 +- libdestruct/backing/memory_resolver.py | 7 +- libdestruct/backing/resolver.py | 3 + libdestruct/common/inflater.py | 5 +- libdestruct/common/obj.py | 6 +- libdestruct/common/ptr/ptr.py | 8 +- libdestruct/common/struct/struct.py | 4 +- libdestruct/libdestruct.py | 9 +- test/scripts/endianness_test.py | 268 +++++++++++++++++++++++++ test/scripts/types_unit_test.py | 12 ++ 10 files changed, 309 insertions(+), 20 deletions(-) create mode 100644 test/scripts/endianness_test.py diff --git a/libdestruct/backing/fake_resolver.py b/libdestruct/backing/fake_resolver.py index 496030d..637e67f 100644 --- a/libdestruct/backing/fake_resolver.py +++ b/libdestruct/backing/fake_resolver.py @@ -12,12 +12,13 @@ class FakeResolver(Resolver): """A class that can resolve elements in a simulated memory storage.""" - def __init__(self: FakeResolver, memory: dict | None = None, address: int | None = 0) -> None: + def __init__(self: FakeResolver, memory: dict | None = None, address: int | None = 0, endianness: str = "little") -> None: """Initializes a basic fake resolver.""" self.memory = memory if memory is not None else {} self.address = address self.parent = None self.offset = None + self.endianness = endianness def resolve_address(self: FakeResolver) -> int: """Resolves self's address, mainly used by children to determine their own address.""" @@ -28,14 +29,14 @@ def resolve_address(self: FakeResolver) -> int: def relative_from_own(self: FakeResolver, address_offset: int, _: int) -> FakeResolver: """Creates a resolver that references a parent, such that a change in the parent is propagated on the child.""" - new_resolver = FakeResolver(self.memory, None) + new_resolver = FakeResolver(self.memory, None, self.endianness) new_resolver.parent = self new_resolver.offset = address_offset return new_resolver def absolute_from_own(self: FakeResolver, address: int) -> FakeResolver: """Creates a resolver that has an absolute reference to an object, from the parent's view.""" - return FakeResolver(self.memory, address) + return FakeResolver(self.memory, address, self.endianness) def resolve(self: FakeResolver, size: int, _: int) -> bytes: """Resolves itself, providing the bytes it references for the specified size and index.""" diff --git a/libdestruct/backing/memory_resolver.py b/libdestruct/backing/memory_resolver.py index 8e9054c..34c559d 100644 --- a/libdestruct/backing/memory_resolver.py +++ b/libdestruct/backing/memory_resolver.py @@ -17,12 +17,13 @@ class MemoryResolver(Resolver): """A class that can resolve itself to a value in a referenced memory storage.""" - def __init__(self: MemoryResolver, memory: MutableSequence, address: int | None) -> None: + def __init__(self: MemoryResolver, memory: MutableSequence, address: int | None, endianness: str = "little") -> None: """Initializes a basic memory resolver.""" self.memory = memory self.address = address self.parent = None self.offset = None + self.endianness = endianness def resolve_address(self: MemoryResolver) -> int: """Resolves self's address, mainly used by childs to determine their own address.""" @@ -33,14 +34,14 @@ def resolve_address(self: MemoryResolver) -> int: def relative_from_own(self: MemoryResolver, address_offset: int, _: int) -> MemoryResolver: """Creates a resolver that references a parent, such that a change in the parent is propagated on the child.""" - new_resolver = MemoryResolver(self.memory, None) + new_resolver = MemoryResolver(self.memory, None, self.endianness) new_resolver.parent = self new_resolver.offset = address_offset return new_resolver def absolute_from_own(self: MemoryResolver, address: int) -> MemoryResolver: """Creates a resolver that has an absolute reference to an object, from the parent's view.""" - return MemoryResolver(self.memory, address) + return MemoryResolver(self.memory, address, self.endianness) def resolve(self: MemoryResolver, size: int, _: int) -> bytes: """Resolves itself, providing the bytes it references for the specified size and index.""" diff --git a/libdestruct/backing/resolver.py b/libdestruct/backing/resolver.py index 3c04cd8..32612f9 100644 --- a/libdestruct/backing/resolver.py +++ b/libdestruct/backing/resolver.py @@ -16,6 +16,9 @@ class Resolver(ABC): parent: Self + endianness: str = "little" + """The endianness of the data this resolver accesses.""" + @abstractmethod def relative_from_own(self: Resolver, address_offset: int, index_offset: int) -> Self: """Creates a resolver that references a parent, such that a change in the parent is propagated on the child.""" diff --git a/libdestruct/common/inflater.py b/libdestruct/common/inflater.py index fddb6f6..ff457c8 100644 --- a/libdestruct/common/inflater.py +++ b/libdestruct/common/inflater.py @@ -21,9 +21,10 @@ class Inflater: """The memory manager, which inflates any memory-referencing type.""" - def __init__(self: Inflater, memory: MutableSequence) -> None: + def __init__(self: Inflater, memory: MutableSequence, endianness: str = "little") -> None: """Initialize the memory manager.""" self.memory = memory + self.endianness = endianness self.type_registry = TypeRegistry() def inflate(self: Inflater, item: type, address: int | Resolver) -> obj: @@ -38,6 +39,6 @@ def inflate(self: Inflater, item: type, address: int | Resolver) -> obj: """ if isinstance(address, int): # Create a memory resolver from the address - address = MemoryResolver(self.memory, address) + address = MemoryResolver(self.memory, address, self.endianness) return self.type_registry.inflater_for(item)(address) diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index 0c9379f..0985557 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -38,6 +38,8 @@ def __init__(self: obj, resolver: Resolver) -> None: resolver: The resolver for the value of this object. """ self.resolver = resolver + if resolver is not None: + self.endianness = resolver.endianness @property def address(self: obj) -> int: @@ -57,11 +59,11 @@ def to_bytes(self: obj) -> bytes: """Serialize the object to bytes.""" @classmethod - def from_bytes(cls: type[obj], data: bytes) -> obj: + def from_bytes(cls: type[obj], data: bytes, endianness: str = "little") -> obj: """Deserialize the object from bytes.""" from libdestruct.libdestruct import inflater - lib = inflater(data) + lib = inflater(data, endianness=endianness) item = lib.inflate(cls, 0) item.freeze() return item diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index 6474708..2122fc4 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -25,12 +25,13 @@ class _ArithmeticResolver(Resolver): def __init__(self: _ArithmeticResolver, original: Resolver, address: int) -> None: self._original = original self._address = address + self.endianness = original.endianness def resolve_address(self: _ArithmeticResolver) -> int: return self._address def resolve(self: _ArithmeticResolver, size: int, _: int) -> bytes: - return self._address.to_bytes(size, "little") + return self._address.to_bytes(size, self.endianness) def modify(self: _ArithmeticResolver, _size: int, _index: int, _value: bytes) -> None: raise RuntimeError("Cannot modify a synthetic pointer.") @@ -98,10 +99,9 @@ def unwrap(self: ptr, length: int | None = None) -> obj: raise ValueError("Length is not supported when unwrapping a pointer to a wrapper object.") result = self.wrapper(self.resolver.absolute_from_own(address)) - elif not length: - result = self.resolver.resolve(1, 0) else: - result = self.resolver.resolve(length, 0) + target_resolver = self.resolver.absolute_from_own(address) + result = target_resolver.resolve(length or 1, 0) self._cached_unwrap = result self._cache_valid = True diff --git a/libdestruct/common/struct/struct.py b/libdestruct/common/struct/struct.py index 0ef7eed..5b2ab96 100644 --- a/libdestruct/common/struct/struct.py +++ b/libdestruct/common/struct/struct.py @@ -30,8 +30,8 @@ def __new__(cls: type[struct], *args: ..., **kwargs: ...) -> struct: # noqa: PY return type_impl(*args, **kwargs) @classmethod - def from_bytes(cls: type[struct], data: bytes) -> struct_impl: + def from_bytes(cls: type[struct], data: bytes, endianness: str = "little") -> struct_impl: """Create a struct from a serialized representation.""" - type_inflater = inflater(data) + type_inflater = inflater(data, endianness=endianness) return type_inflater.inflate(cls, 0) diff --git a/libdestruct/libdestruct.py b/libdestruct/libdestruct.py index 81e8235..5289360 100644 --- a/libdestruct/libdestruct.py +++ b/libdestruct/libdestruct.py @@ -16,21 +16,22 @@ from libdestruct.common.obj import obj -def inflater(memory: Sequence) -> Inflater: +def inflater(memory: Sequence, endianness: str = "little") -> Inflater: """Return a TypeInflater instance.""" if not isinstance(memory, Sequence): raise TypeError(f"memory must be a MutableSequence, not {type(memory).__name__}") - return Inflater(memory) + return Inflater(memory, endianness=endianness) -def inflate(item: type, memory: Sequence, address: int | Resolver) -> obj: +def inflate(item: type, memory: Sequence, address: int | Resolver, endianness: str = "little") -> obj: """Inflate a memory-referencing type. Args: item: The type to inflate. memory: The memory view, which can be mutable or immutable. address: The address of the object in the memory view. + endianness: The byte order ("little" or "big"). Returns: The inflated object. @@ -38,4 +39,4 @@ def inflate(item: type, memory: Sequence, address: int | Resolver) -> obj: if not isinstance(address, int) and not isinstance(address, Resolver): raise TypeError(f"address must be an int or a Resolver, not {type(address).__name__}") - return inflater(memory).inflate(item, address) + return inflater(memory, endianness=endianness).inflate(item, address) diff --git a/test/scripts/endianness_test.py b/test/scripts/endianness_test.py new file mode 100644 index 0000000..72f53fd --- /dev/null +++ b/test/scripts/endianness_test.py @@ -0,0 +1,268 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import struct as pystruct +import unittest + +from libdestruct import ( + array_of, + bitfield_of, + c_char, + c_double, + c_float, + c_int, + c_long, + c_short, + c_uint, + c_ulong, + c_ushort, + inflater, + ptr, + ptr_to, + size_of, + struct, +) +from libdestruct.backing.memory_resolver import MemoryResolver + + +class BigEndianIntegerTest(unittest.TestCase): + def test_c_int_read_big_endian(self): + """Big-endian c_int reads bytes in big-endian order.""" + memory = bytearray(pystruct.pack(">i", 0x12345678)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_int, 0) + self.assertEqual(val.value, 0x12345678) + + def test_c_int_read_little_endian_default(self): + """Default endianness is little-endian (backward compatibility).""" + memory = bytearray(pystruct.pack("h", -1234)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_short, 0) + self.assertEqual(val.value, -1234) + + def test_c_long_read_big_endian(self): + """Big-endian c_long reads correctly.""" + memory = bytearray(pystruct.pack(">q", 0x0102030405060708)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_long, 0) + self.assertEqual(val.value, 0x0102030405060708) + + def test_c_uint_read_big_endian(self): + """Big-endian unsigned int reads correctly.""" + memory = bytearray(pystruct.pack(">I", 0xDEADBEEF)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_uint, 0) + self.assertEqual(val.value, 0xDEADBEEF) + + def test_c_int_write_big_endian(self): + """Writing a big-endian c_int stores bytes in big-endian order.""" + memory = bytearray(4) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_int, 0) + val.value = 0x12345678 + self.assertEqual(memory, pystruct.pack(">i", 0x12345678)) + + def test_c_int_to_bytes_big_endian(self): + """to_bytes returns big-endian representation.""" + memory = bytearray(pystruct.pack(">i", 42)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_int, 0) + self.assertEqual(val.to_bytes(), pystruct.pack(">i", 42)) + + +class BigEndianFloatTest(unittest.TestCase): + def test_c_float_read_big_endian(self): + """Big-endian c_float reads correctly.""" + memory = bytearray(pystruct.pack(">f", 3.14)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_float, 0) + self.assertAlmostEqual(val.value, 3.14, places=5) + + def test_c_double_read_big_endian(self): + """Big-endian c_double reads correctly.""" + memory = bytearray(pystruct.pack(">d", 2.718281828)) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_double, 0) + self.assertAlmostEqual(val.value, 2.718281828, places=8) + + def test_c_float_write_big_endian(self): + """Writing a big-endian c_float stores bytes in big-endian order.""" + memory = bytearray(4) + lib = inflater(memory, endianness="big") + val = lib.inflate(c_float, 0) + val.value = 1.5 + self.assertEqual(memory, pystruct.pack(">f", 1.5)) + + +class BigEndianStructTest(unittest.TestCase): + def test_struct_fields_inherit_endianness(self): + """Struct fields inherit big-endian from the inflater.""" + class s_t(struct): + a: c_int + b: c_short + + memory = bytearray(pystruct.pack(">i", 0x12345678) + pystruct.pack(">h", -100)) + lib = inflater(memory, endianness="big") + s = lib.inflate(s_t, 0) + self.assertEqual(s.a.value, 0x12345678) + self.assertEqual(s.b.value, -100) + + def test_nested_struct_inherits_endianness(self): + """Nested struct fields also inherit big-endian.""" + class inner_t(struct): + x: c_int + + class outer_t(struct): + a: c_short + inner: inner_t + + memory = bytearray(pystruct.pack(">h", 0x0102) + pystruct.pack(">i", 0x03040506)) + lib = inflater(memory, endianness="big") + s = lib.inflate(outer_t, 0) + self.assertEqual(s.a.value, 0x0102) + self.assertEqual(s.inner.x.value, 0x03040506) + + def test_struct_write_big_endian(self): + """Writing to struct fields stores bytes in big-endian order.""" + class s_t(struct): + a: c_int + + memory = bytearray(4) + lib = inflater(memory, endianness="big") + s = lib.inflate(s_t, 0) + s.a.value = 0x1A2B3C4D + self.assertEqual(memory, b"\x1A\x2B\x3C\x4D") + + +class BigEndianFromBytesTest(unittest.TestCase): + def test_from_bytes_big_endian(self): + """from_bytes with endianness='big' reads correctly.""" + data = pystruct.pack(">i", 0x12345678) + val = c_int.from_bytes(data, endianness="big") + self.assertEqual(val.value, 0x12345678) + + def test_from_bytes_default_little_endian(self): + """from_bytes defaults to little-endian.""" + data = pystruct.pack("i", 1000) + pystruct.pack(">h", 2000) + s = s_t.from_bytes(data, endianness="big") + self.assertEqual(s.a.value, 1000) + self.assertEqual(s.b.value, 2000) + + +class BigEndianRoundTripTest(unittest.TestCase): + def test_int_round_trip(self): + """Big-endian int survives from_bytes -> to_bytes round trip.""" + original = pystruct.pack(">i", 0x12345678) + val = c_int.from_bytes(original, endianness="big") + self.assertEqual(val.to_bytes(), original) + + def test_struct_round_trip(self): + """Big-endian struct survives from_bytes -> to_bytes round trip.""" + class s_t(struct): + a: c_int + b: c_short + + original = pystruct.pack(">i", 0x1A2B3C4D) + pystruct.pack(">h", 0x1122) + s = s_t.from_bytes(original, endianness="big") + self.assertEqual(s.to_bytes(), original) + + def test_float_round_trip(self): + """Big-endian float survives from_bytes -> to_bytes round trip.""" + original = pystruct.pack(">f", 3.14) + val = c_float.from_bytes(original, endianness="big") + self.assertEqual(val.to_bytes(), original) + + +class BigEndianPointerTest(unittest.TestCase): + def test_ptr_read_big_endian(self): + """Big-endian pointer reads address in big-endian order.""" + memory = bytearray(16) + # Pointer at offset 0 with big-endian value 8 + memory[0:8] = pystruct.pack(">Q", 8) + # Target int at offset 8 + memory[8:12] = pystruct.pack(">i", 42) + + lib = inflater(memory, endianness="big") + p = lib.inflate(ptr_to(c_int), 0) + self.assertEqual(p.get(), 8) + self.assertEqual(p.unwrap().value, 42) + + def test_ptr_arithmetic_big_endian(self): + """Pointer arithmetic works with big-endian pointers.""" + memory = bytearray(24) + # Pointer at offset 0 pointing to offset 8 + memory[0:8] = pystruct.pack(">Q", 8) + # Two ints at offset 8 and 12 + memory[8:12] = pystruct.pack(">i", 100) + memory[12:16] = pystruct.pack(">i", 200) + + lib = inflater(memory, endianness="big") + p = lib.inflate(ptr_to(c_int), 0) + self.assertEqual(p[0].value, 100) + self.assertEqual(p[1].value, 200) + + +class BigEndianArrayTest(unittest.TestCase): + def test_array_big_endian(self): + """Array elements inherit big-endian.""" + class s_t(struct): + arr: list[c_int] = array_of(c_int, 3) + + data = b"" + for v in [10, 20, 30]: + data += pystruct.pack(">i", v) + + memory = bytearray(data) + lib = inflater(memory, endianness="big") + s = lib.inflate(s_t, 0) + self.assertEqual(s.arr[0].value, 10) + self.assertEqual(s.arr[1].value, 20) + self.assertEqual(s.arr[2].value, 30) + + +class BigEndianBitfieldTest(unittest.TestCase): + def test_bitfield_big_endian(self): + """Bitfield reads from big-endian backing integer.""" + class s_t(struct): + flags: c_uint = bitfield_of(c_uint, 3) + + # Value 5 (0b101) in big-endian uint32 + memory = bytearray(pystruct.pack(">I", 5)) + lib = inflater(memory, endianness="big") + s = lib.inflate(s_t, 0) + self.assertEqual(s.flags.value, 5) + + def test_bitfield_write_big_endian(self): + """Bitfield writes to big-endian backing integer.""" + class s_t(struct): + flags: c_uint = bitfield_of(c_uint, 3) + + memory = bytearray(4) + lib = inflater(memory, endianness="big") + s = lib.inflate(s_t, 0) + s.flags.value = 5 + self.assertEqual(memory, pystruct.pack(">I", 5)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 453e209..34d436b 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -198,6 +198,18 @@ def test_ptr_arithmetic_chain(self): self.assertEqual((p + 2)[0].value, 3) + def test_untyped_unwrap_reads_target(self): + """Untyped pointer unwrap reads bytes at the target address, not the pointer's own bytes.""" + memory = bytearray(16) + # Pointer at offset 0 with value 8 (points to offset 8) + memory[0:8] = (8).to_bytes(8, "little") + # Target byte at offset 8 + memory[8] = 0xAB + + p = ptr(MemoryResolver(memory, 0)) + result = p.unwrap() + self.assertEqual(result, bytes([0xAB])) + def test_unwrap_cached(self): """Two unwrap() calls return the same object.""" class test_t(struct): From 837c574468e569307c21d23da0af249d1900135d Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 22:34:28 -0500 Subject: [PATCH 34/46] fix: struct freezing didn't work correctly, alignment was broken for arrays --- libdestruct/common/struct/struct.py | 4 ++- libdestruct/common/struct/struct_impl.py | 8 +++-- libdestruct/common/utils.py | 9 ++++-- test/scripts/alignment_test.py | 40 ++++++++++++++++++++++++ test/scripts/struct_unit_test.py | 28 +++++++++++++++++ 5 files changed, 83 insertions(+), 6 deletions(-) diff --git a/libdestruct/common/struct/struct.py b/libdestruct/common/struct/struct.py index 5b2ab96..2d812d6 100644 --- a/libdestruct/common/struct/struct.py +++ b/libdestruct/common/struct/struct.py @@ -34,4 +34,6 @@ def from_bytes(cls: type[struct], data: bytes, endianness: str = "little") -> st """Create a struct from a serialized representation.""" type_inflater = inflater(data, endianness=endianness) - return type_inflater.inflate(cls, 0) + result = type_inflater.inflate(cls, 0) + result.freeze() + return result diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 1cfb3ec..10d3ed2 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -234,7 +234,8 @@ def to_bytes(self: struct_impl) -> bytes: """Return the serialized representation of the struct, including padding.""" if self._frozen: return self._frozen_struct_bytes - return self.resolver.resolve(size_of(self), 0) + resolver = object.__getattribute__(self, "resolver") + return resolver.resolve(size_of(self), 0) def to_dict(self: struct_impl) -> dict[str, object]: """Return a JSON-serializable dict of field names to values.""" @@ -253,12 +254,13 @@ def _set(self: struct_impl, _: str) -> None: def freeze(self: struct_impl) -> None: """Freeze the struct, capturing the full byte representation including padding.""" - self._frozen_struct_bytes = self.resolver.resolve(size_of(self), 0) + resolver = object.__getattribute__(self, "resolver") + self._frozen_struct_bytes = resolver.resolve(size_of(self), 0) for member in self._members.values(): member.freeze() - self._frozen = True + super().freeze() def to_str(self: struct_impl, indent: int = 0) -> str: """Return a string representation of the struct.""" diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index 6e245d7..aba8157 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -81,11 +81,16 @@ def alignment_of(item: obj | type[obj]) -> int: if isinstance(item, type) and "alignment" in item.__dict__ and isinstance(item.__dict__["alignment"], int): return item.__dict__["alignment"] - # Field descriptors + # Field descriptors — for array fields, alignment comes from the element type if isinstance(item, Field): + if hasattr(item, "item"): + return alignment_of(item.item) return _alignment_from_size(item.get_size()) if is_field_bound_method(item): - return _alignment_from_size(item.__self__.get_size()) + field = item.__self__ + if hasattr(field, "item"): + return alignment_of(field.item) + return _alignment_from_size(field.get_size()) # Derive from size for power-of-2 sized types try: diff --git a/test/scripts/alignment_test.py b/test/scripts/alignment_test.py index 2bf2af9..b714b5e 100644 --- a/test/scripts/alignment_test.py +++ b/test/scripts/alignment_test.py @@ -241,6 +241,46 @@ class aligned_t(struct): self.assertEqual(s2.b.value, 42) +class ArrayAlignmentTest(unittest.TestCase): + def test_alignment_of_array_field(self): + """alignment_of(array_of(c_int, N)) should be 4 (element alignment), not 1.""" + from libdestruct import array_of + + arr = array_of(c_int, 3) + self.assertEqual(alignment_of(arr), 4) + + def test_aligned_struct_with_array(self): + """Array field in aligned struct should be aligned to element alignment.""" + from libdestruct import array_of + + class s_t(struct): + _aligned_ = True + a: c_char + arr: list[c_int] = array_of(c_int, 3) + + # a at 0 (1 byte), padding 3, arr at 4 (12 bytes) = 16, tail padded to 4 = 16 + self.assertEqual(size_of(s_t), 16) + + def test_aligned_struct_with_array_read(self): + """Values read correctly from aligned array in struct.""" + from libdestruct import array_of + + class s_t(struct): + _aligned_ = True + a: c_char + arr: list[c_int] = array_of(c_int, 3) + + memory = pystruct.pack(" Date: Tue, 31 Mar 2026 22:35:16 -0500 Subject: [PATCH 35/46] style: hide incorrect linter annotation --- libdestruct/common/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libdestruct/common/__init__.py b/libdestruct/common/__init__.py index 59f8f80..21a8b08 100644 --- a/libdestruct/common/__init__.py +++ b/libdestruct/common/__init__.py @@ -4,4 +4,4 @@ # Licensed under the MIT license. See LICENSE file in the project root for details. # -import libdestruct.common.forward_ref_inflater +import libdestruct.common.forward_ref_inflater # noqa: F401 From fffff7fc0292e330d83a8328f736a2c9ef5023f9 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 22:38:04 -0500 Subject: [PATCH 36/46] ci: run test suite on all python versions we support --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d8ae703..1d18ccc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -19,7 +19,7 @@ jobs: timeout-minutes: 15 strategy: matrix: - python-version: ["3.10", "3.13"] + python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] steps: - uses: actions/checkout@v4 From 726f12d409c073dd21d76d6b681db0b261026330 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Tue, 31 Mar 2026 23:06:55 -0500 Subject: [PATCH 37/46] feat: implement the proposal in issue #5 --- libdestruct/common/array/array.py | 7 ++ .../common/array/array_field_inflater.py | 19 ++++ libdestruct/common/enum/enum.py | 7 ++ .../common/enum/enum_field_inflater.py | 17 ++++ test/scripts/struct_unit_test.py | 92 ++++++++++++++++++- 5 files changed, 141 insertions(+), 1 deletion(-) diff --git a/libdestruct/common/array/array.py b/libdestruct/common/array/array.py index f9d6e9a..e204302 100644 --- a/libdestruct/common/array/array.py +++ b/libdestruct/common/array/array.py @@ -7,6 +7,7 @@ from __future__ import annotations from abc import abstractmethod +from types import GenericAlias from libdestruct.common.obj import obj @@ -14,6 +15,12 @@ class array(obj): """An array of objects.""" + def __class_getitem__(cls, params: tuple) -> GenericAlias: + """Support array[c_int, 3] subscript syntax.""" + if not isinstance(params, tuple): + params = (params,) + return GenericAlias(cls, params) + @abstractmethod def count(self: array) -> int: """Return the size of the array.""" diff --git a/libdestruct/common/array/array_field_inflater.py b/libdestruct/common/array/array_field_inflater.py index 51738d9..3fc217b 100644 --- a/libdestruct/common/array/array_field_inflater.py +++ b/libdestruct/common/array/array_field_inflater.py @@ -9,6 +9,7 @@ from typing import TYPE_CHECKING +from libdestruct.common.array.array import array from libdestruct.common.array.linear_array_field import LinearArrayField from libdestruct.common.type_registry import TypeRegistry @@ -18,6 +19,7 @@ from libdestruct.backing.resolver import Resolver from libdestruct.common.obj import obj + registry = TypeRegistry() @@ -32,4 +34,21 @@ def linear_array_field_inflater( return field.inflate +def _subscripted_array_handler( + item: object, + args: tuple, + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj] | None: + """Handle subscripted array types like array[c_int, 3].""" + if len(args) != 2: + return None + element_type, count = args + if not isinstance(count, int): + return None + field = LinearArrayField(element_type, count) + field.item = registry.inflater_for(element_type) + return field.inflate + + registry.register_instance_handler(LinearArrayField, linear_array_field_inflater) +registry.register_generic_handler(array, _subscripted_array_handler) diff --git a/libdestruct/common/enum/enum.py b/libdestruct/common/enum/enum.py index 03fdd70..1bc83f2 100644 --- a/libdestruct/common/enum/enum.py +++ b/libdestruct/common/enum/enum.py @@ -6,6 +6,7 @@ from __future__ import annotations +from types import GenericAlias from typing import TYPE_CHECKING from libdestruct.common.obj import obj @@ -20,6 +21,12 @@ class enum(obj): """A generic enum.""" + def __class_getitem__(cls, params: tuple) -> GenericAlias: + """Support enum[MyEnum] and enum[MyEnum, c_short] subscript syntax.""" + if not isinstance(params, tuple): + params = (params,) + return GenericAlias(cls, params) + python_enum: type[Enum] """The backing Python enum.""" diff --git a/libdestruct/common/enum/enum_field_inflater.py b/libdestruct/common/enum/enum_field_inflater.py index 4724a3b..5dd622e 100644 --- a/libdestruct/common/enum/enum_field_inflater.py +++ b/libdestruct/common/enum/enum_field_inflater.py @@ -8,6 +8,8 @@ from typing import TYPE_CHECKING +from libdestruct.c.c_integer_types import c_int +from libdestruct.common.enum.enum import enum from libdestruct.common.enum.int_enum_field import IntEnumField from libdestruct.common.type_registry import TypeRegistry @@ -30,4 +32,19 @@ def generic_enum_field_inflater( return field.inflate +def _subscripted_enum_handler( + item: object, + args: tuple, + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj] | None: + """Handle subscripted enum types like enum[MyEnum] or enum[MyEnum, c_short].""" + if not args: + return None + python_enum = args[0] + backing_type = args[1] if len(args) > 1 else c_int + field = IntEnumField(python_enum, size=backing_type.size) + return field.inflate + + registry.register_instance_handler(IntEnumField, generic_enum_field_inflater) +registry.register_generic_handler(enum, _subscripted_enum_handler) diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index 3b23d6a..ccf1dd6 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -7,7 +7,7 @@ import unittest from enum import IntEnum -from libdestruct import c_int, c_long, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of +from libdestruct import array, c_int, c_long, c_short, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of class StructMemberCollisionTest(unittest.TestCase): @@ -264,6 +264,96 @@ class TreeNode(struct): self.assertEqual(node.data.value, 42) +class SubscriptSyntaxTest(unittest.TestCase): + """Test the subscript syntax: enum[T], array[T, N], ptr[T].""" + + def test_enum_subscript(self): + """enum[MyEnum] works as a type annotation for struct fields.""" + class Color(IntEnum): + RED = 0 + GREEN = 1 + BLUE = 2 + + class s_t(struct): + color: enum[Color] + + memory = (1).to_bytes(4, "little") + s = s_t.from_bytes(memory) + self.assertEqual(s.color.value, Color.GREEN) + + def test_enum_subscript_custom_backing(self): + """enum[MyEnum, c_short] uses a custom backing type.""" + class Status(IntEnum): + OFF = 0 + ON = 1 + + class s_t(struct): + status: enum[Status, c_short] + + memory = (1).to_bytes(2, "little") + s = s_t.from_bytes(memory) + self.assertEqual(s.status.value, Status.ON) + from libdestruct import size_of + self.assertEqual(size_of(s_t), 2) + + def test_array_subscript(self): + """array[c_int, 3] works as a type annotation for struct fields.""" + class s_t(struct): + data: array[c_int, 3] + + memory = b"" + for v in [10, 20, 30]: + memory += v.to_bytes(4, "little") + + s = s_t.from_bytes(memory) + self.assertEqual(s.data[0].value, 10) + self.assertEqual(s.data[1].value, 20) + self.assertEqual(s.data[2].value, 30) + + def test_array_subscript_size(self): + """array[c_int, 3] has correct size.""" + class s_t(struct): + data: array[c_int, 3] + + from libdestruct import size_of + self.assertEqual(size_of(s_t), 12) + + def test_ptr_subscript(self): + """ptr[T] works as a type annotation (already supported).""" + class s_t(struct): + val: c_int + ref: ptr[c_int] + + memory = b"" + memory += (42).to_bytes(4, "little") + memory += (0).to_bytes(8, "little") + + s = s_t.from_bytes(memory) + self.assertEqual(s.val.value, 42) + + def test_mixed_subscript_struct(self): + """Struct mixing all subscript syntaxes.""" + class Direction(IntEnum): + UP = 0 + DOWN = 1 + + class s_t(struct): + dir: enum[Direction] + coords: array[c_int, 2] + next: ptr["s_t"] + + memory = b"" + memory += (1).to_bytes(4, "little") # dir = DOWN + memory += (10).to_bytes(4, "little") # coords[0] + memory += (20).to_bytes(4, "little") # coords[1] + memory += (0).to_bytes(8, "little") # next = null + + s = s_t.from_bytes(memory) + self.assertEqual(s.dir.value, Direction.DOWN) + self.assertEqual(s.coords[0].value, 10) + self.assertEqual(s.coords[1].value, 20) + + class StructEqualityTest(unittest.TestCase): def test_struct_eq_non_struct_returns_not_implemented(self): """struct.__eq__ returns NotImplemented for non-struct values.""" From 88b489f78d0b760df7813b1b391c9c187c27f443 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Wed, 1 Apr 2026 09:14:34 -0500 Subject: [PATCH 38/46] feat: add support for modifiers like offset specified through Annotated types --- libdestruct/common/struct/struct_impl.py | 15 ++++++- test/scripts/struct_unit_test.py | 57 +++++++++++++++++++++++- 2 files changed, 69 insertions(+), 3 deletions(-) diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 10d3ed2..37464bb 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -6,6 +6,8 @@ from __future__ import annotations +from typing import Annotated, get_args, get_origin + from typing_extensions import Self from libdestruct.backing.fake_resolver import FakeResolver @@ -131,8 +133,17 @@ def _resolve_field( Either resolved_inflater or bitfield_field will be non-None (not both). explicit_offset is set when an OffsetAttribute is present. """ + # Unwrap Annotated[type, metadata...] — extract the real type and any metadata + annotated_offset = None + if get_origin(annotation) is Annotated: + ann_args = get_args(annotation) + annotation = ann_args[0] + for meta in ann_args[1:]: + if isinstance(meta, OffsetAttribute): + annotated_offset = meta.offset + if name not in reference.__dict__: - return inflater.inflater_for(annotation, owner=owner), None, None + return inflater.inflater_for(annotation, owner=owner), None, annotated_offset attrs = getattr(reference, name) if not isinstance(attrs, tuple): @@ -143,7 +154,7 @@ def _resolve_field( resolved_type = None bitfield_field = None - explicit_offset = None + explicit_offset = annotated_offset for attr in attrs: if isinstance(attr, BitfieldField): diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index ccf1dd6..e952034 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -7,7 +7,9 @@ import unittest from enum import IntEnum -from libdestruct import array, c_int, c_long, c_short, c_uint, inflater, struct, ptr, ptr_to_self, array_of, enum, enum_of +from typing import Annotated + +from libdestruct import array, c_int, c_long, c_short, c_uint, inflater, offset, struct, ptr, ptr_to_self, array_of, enum, enum_of class StructMemberCollisionTest(unittest.TestCase): @@ -354,6 +356,59 @@ class s_t(struct): self.assertEqual(s.coords[1].value, 20) +class AnnotatedOffsetTest(unittest.TestCase): + """Test Annotated[type, offset(N)] syntax for explicit field offsets.""" + + def test_annotated_offset_basic(self): + """Annotated[c_int, offset(N)] places a field at the given offset.""" + class s_t(struct): + a: c_int + b: Annotated[c_int, offset(8)] + + from libdestruct import size_of + self.assertEqual(size_of(s_t), 12) # 8 + 4 + + def test_annotated_offset_read(self): + """Values are read correctly from Annotated offset positions.""" + import struct as pystruct + + class s_t(struct): + a: c_int + b: Annotated[c_int, offset(8)] + + memory = pystruct.pack(" Date: Wed, 1 Apr 2026 09:14:57 -0500 Subject: [PATCH 39/46] docs: change docs to use the new declarative types and Annotated fields --- SKILL.md | 69 +++++++++++++++++++++++++++++++---- docs/advanced/c_parser.md | 2 +- docs/advanced/forward_refs.md | 6 +-- docs/advanced/offset.md | 47 +++++++++++++++++++++--- docs/basics/arrays.md | 20 ++++++++-- docs/basics/enums.md | 33 +++++++++++++---- docs/basics/pointers.md | 32 ++++++++++------ docs/basics/types.md | 5 ++- docs/index.md | 6 +-- 9 files changed, 176 insertions(+), 44 deletions(-) diff --git a/SKILL.md b/SKILL.md index 653e649..8f4eb95 100644 --- a/SKILL.md +++ b/SKILL.md @@ -25,6 +25,7 @@ Memory is accessed through an `inflater`, which wraps a `bytes` or `bytearray` b ### Imports ```python +from typing import Annotated from libdestruct import ( inflater, # memory wrapper struct, # struct base class @@ -33,10 +34,10 @@ from libdestruct import ( c_float, c_double, # IEEE 754 floats (32/64-bit) c_str, # null-terminated C string ptr, # 8-byte pointer - ptr_to, # typed pointer field descriptor - ptr_to_self, # self-referential pointer field descriptor - array_of, # fixed-size array field descriptor - enum_of, # enum field descriptor + ptr_to, # typed pointer field descriptor (legacy) + ptr_to_self, # self-referential pointer field descriptor (legacy) + array, array_of, # array type + field descriptor + enum, enum_of, # enum type + field descriptor bitfield_of, # bitfield descriptor union, # union annotation type union_of, # plain union field descriptor @@ -131,9 +132,21 @@ player = player_t.from_bytes(memory) ```python class node_t(struct): value: c_int - next: ptr = ptr_to_self() # pointer to own type + next: ptr["node_t"] # pointer to own type (forward ref) # Typed pointer to another type: +class container_t(struct): + data: c_int + ref: ptr[c_long] # subscript syntax (preferred) +``` + +Legacy syntax with `ptr_to()` and `ptr_to_self()` is still supported: + +```python +class node_t(struct): + value: c_int + next: ptr = ptr_to_self() + class container_t(struct): data: c_int ref: ptr = ptr_to(c_long) @@ -176,7 +189,15 @@ class tree_t(struct): ```python class packet_t(struct): length: c_int - data: array_of(c_int, 8) # fixed array of 8 c_int + data: array[c_int, 8] # subscript syntax (preferred) +``` + +Legacy syntax with `array_of()` is still supported: + +```python +class packet_t(struct): + length: c_int + data: array_of(c_int, 8) ``` Access array elements: @@ -199,6 +220,19 @@ class Color(IntEnum): GREEN = 1 BLUE = 2 +class pixel_t(struct): + color: enum[Color] # subscript syntax (preferred, defaults to c_int backing) + alpha: c_int + +# With a custom backing type: +class pixel_t(struct): + color: enum[Color, c_short] # 2-byte backing type + alpha: c_int +``` + +Legacy syntax with `enum_of()` is still supported: + +```python class pixel_t(struct): color: c_int = enum_of(Color) alpha: c_int @@ -274,10 +308,29 @@ class wide_t(struct): ### Explicit Field Offsets +```python +from typing import Annotated + +class sparse_t(struct): + a: c_int + b: Annotated[c_int, offset(0x10)] # Annotated syntax (preferred) +``` + +This works with any type, including subscript types: + +```python +class example_t(struct): + a: c_int + data: Annotated[array[c_int, 4], offset(0x10)] + ref: Annotated[ptr[c_int], offset(0x20)] +``` + +Legacy syntax with default values is still supported: + ```python class sparse_t(struct): a: c_int - b: c_int = offset(0x10) # b starts at byte offset 0x10 + b: c_int = offset(0x10) ``` ### Nested Structs @@ -374,7 +427,7 @@ class header_t(struct): magic: c_uint version: c_int num_entries: c_int - entries_ptr: ptr = ptr_to(entry_t) + entries_ptr: ptr[entry_t] with open("file.bin", "rb") as f: data = bytearray(f.read()) diff --git a/docs/advanced/c_parser.md b/docs/advanced/c_parser.md index 84112de..ba3811d 100644 --- a/docs/advanced/c_parser.md +++ b/docs/advanced/c_parser.md @@ -64,7 +64,7 @@ node_t = definition_to_type(""" ## Arrays -Fixed-size arrays are converted to `array_of()`: +Fixed-size arrays are converted to `array[T, N]` types: ```python t = definition_to_type(""" diff --git a/docs/advanced/forward_refs.md b/docs/advanced/forward_refs.md index 22aeabc..1a27fab 100644 --- a/docs/advanced/forward_refs.md +++ b/docs/advanced/forward_refs.md @@ -16,9 +16,9 @@ class Node(struct): At inflation time, the string `"Node"` is resolved to the actual `Node` class. This works because Python's `from __future__ import annotations` (used internally by libdestruct) defers annotation evaluation. -## The `ptr_to_self` Shortcut +## The Legacy `ptr_to_self` Shortcut -For the common case of a pointer to the enclosing struct, use `ptr_to_self`: +For the common case of a pointer to the enclosing struct, the legacy `ptr_to_self` syntax is also available: ```python from libdestruct import struct, c_int, ptr_to_self @@ -28,7 +28,7 @@ class Node(struct): next: ptr_to_self ``` -This is equivalent to `ptr["Node"]` but doesn't require you to spell out the type name. +This is equivalent to `ptr["Node"]` but doesn't require you to spell out the type name. The `ptr["TypeName"]` syntax is preferred as it is more explicit. ## Linked List Example diff --git a/docs/advanced/offset.md b/docs/advanced/offset.md index 0a412d1..9e27190 100644 --- a/docs/advanced/offset.md +++ b/docs/advanced/offset.md @@ -4,6 +4,35 @@ By default, struct fields are laid out sequentially — each field starts immedi ## Usage +### Annotated syntax (preferred) + +Use `Annotated[T, offset(N)]` to place a field at a specific offset: + +```python +from typing import Annotated +from libdestruct import struct, c_int, offset + +class sparse_t(struct): + a: c_int + b: Annotated[c_int, offset(16)] + c: c_int +``` + +This works with any type, including subscript types: + +```python +from libdestruct import struct, c_int, ptr, array, offset + +class example_t(struct): + a: c_int + data: Annotated[array[c_int, 4], offset(0x10)] + ref: Annotated[ptr[c_int], offset(0x20)] +``` + +### Legacy syntax + +The default-value syntax is also supported: + ```python from libdestruct import struct, c_int, offset @@ -49,8 +78,8 @@ C compilers often insert padding for alignment. Use `offset()` to match the actu class data_t(struct): flag: c_char - value: c_int = offset(4) - timestamp: c_long = offset(8) + value: Annotated[c_int, offset(4)] + timestamp: Annotated[c_long, offset(8)] ``` ### Skipping Unknown Fields @@ -59,13 +88,21 @@ When reverse engineering, you might know the offset of a field but not what come ```python class mystery_t(struct): - known_field: c_int = offset(0x40) - another_field: c_long = offset(0x100) + known_field: Annotated[c_int, offset(0x40)] + another_field: Annotated[c_long, offset(0x100)] ``` ## Combining with Other Attributes -`offset()` can be combined with `Field` attributes using a tuple: +With the `Annotated` syntax, `offset()` can be combined with any type naturally: + +```python +class example_t(struct): + data: Annotated[ptr[c_int], offset(8)] + items: Annotated[array[c_int, 4], offset(0x10)] +``` + +With the legacy syntax, `offset()` can be combined with `Field` attributes using a tuple: ```python from libdestruct.common.field import Field diff --git a/docs/basics/arrays.md b/docs/basics/arrays.md index 22c970b..ec67921 100644 --- a/docs/basics/arrays.md +++ b/docs/basics/arrays.md @@ -1,13 +1,16 @@ # Arrays -Fixed-size arrays are created with `array_of()`. +Fixed-size arrays can be defined using the `array[T, N]` subscript syntax or the `array_of()` factory function. ## Defining Arrays ```python -from libdestruct import c_int, array_of, inflater +from libdestruct import c_int, array, array_of, inflater -# An array of 5 c_int values +# Subscript syntax (preferred) +int_array_t = array[c_int, 5] + +# Legacy factory function int_array_t = array_of(c_int, 5) ``` @@ -76,7 +79,16 @@ raw = bytes(arr) ## Arrays in Structs -Use `array_of()` as a type annotation: +Use `array[T, N]` as a type annotation: + +```python +from libdestruct import struct, c_int, array + +class matrix_row_t(struct): + values: array[c_int, 4] +``` + +The legacy `array_of()` syntax is also supported: ```python from libdestruct import struct, c_int, array_of diff --git a/docs/basics/enums.md b/docs/basics/enums.md index 072878d..96c5efc 100644 --- a/docs/basics/enums.md +++ b/docs/basics/enums.md @@ -1,27 +1,45 @@ # Enums -libdestruct maps integer values in memory to Python `Enum` types using `enum_of()`. +libdestruct maps integer values in memory to Python `Enum` types using the `enum[T]` subscript syntax or the `enum_of()` factory function. ## Defining Enums ```python from enum import IntEnum -from libdestruct import struct, c_int, enum_of +from libdestruct import struct, c_int, enum class Color(IntEnum): RED = 0 GREEN = 1 BLUE = 2 +# Subscript syntax (preferred) +class pixel_t(struct): + color: enum[Color] # defaults to c_int backing type + x: c_int + y: c_int + +# With a custom backing type: +class pixel2_t(struct): + color: enum[Color, c_short] # 2-byte backing type + x: c_int + y: c_int +``` + +The legacy `enum_of()` syntax is also supported: + +```python +from libdestruct import struct, c_int, enum_of + class pixel_t(struct): color: enum_of(Color, c_int) x: c_int y: c_int ``` -`enum_of(PythonEnum, backing_type)` creates a type that: +The enum type: -- Reads the raw integer from memory using the backing type (`c_int`) +- Reads the raw integer from memory using the backing type (`c_int` by default) - Converts it to the corresponding `Enum` member (`Color.RED`, etc.) ## Reading Enum Values @@ -51,7 +69,7 @@ print(pixel.color.value) # 99 (raw integer, no error) ## Standalone Enums -You can also use `enum` directly (without `enum_of`): +You can also inflate enums directly outside of structs: ```python from libdestruct import enum, inflater @@ -59,11 +77,12 @@ from libdestruct import enum, inflater memory = (2).to_bytes(4, "little") lib = inflater(memory) -# The enum() constructor takes a resolver, a Python Enum, and a backing type +e = lib.inflate(enum[Color], 0) +print(e.value) # Color.BLUE ``` !!! tip - For struct fields, `enum_of()` is the recommended API. It automatically handles type registration and inflation. + For struct fields, the `enum[T]` subscript syntax is the recommended API. It automatically handles type registration and inflation. ## Serialization diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md index fe989ab..c289c4f 100644 --- a/docs/basics/pointers.md +++ b/docs/basics/pointers.md @@ -4,14 +4,24 @@ libdestruct supports typed pointers that can be dereferenced to follow reference ## Defining Pointers in Structs -Use `ptr` with `ptr_to()` to declare a typed pointer field: +Use `ptr[T]` to declare a typed pointer field: ```python -from libdestruct import struct, c_int, ptr_to, inflater +from libdestruct import struct, c_int, ptr, inflater class data_t(struct): value: c_int - next: ptr_to(c_int) + next: ptr[c_int] +``` + +The legacy `ptr_to()` syntax is also supported: + +```python +from libdestruct import ptr_to + +class data_t(struct): + value: c_int + next: ptr = ptr_to(c_int) ``` A pointer occupies 8 bytes (64-bit) and stores an address into the memory buffer. @@ -42,7 +52,7 @@ Use `try_unwrap()` for null-safe pointer access. It returns `None` if the pointe ```python class node_t(struct): val: c_int - next: ptr_to(c_int) + next: ptr[c_int] memory = b"\x0a\x00\x00\x00" + b"\x00" * 8 # val=10, next=null node = node_t.from_bytes(memory) @@ -53,27 +63,27 @@ print(result) # None ## Self-Referential Structs -Use `ptr_to_self` for linked lists and trees: +Use `ptr["TypeName"]` for self-referential structs: ```python -from libdestruct import struct, c_int, ptr_to_self +from libdestruct import struct, c_int, ptr class node_t(struct): val: c_int - next: ptr_to_self + next: ptr["node_t"] ``` -Or use the forward reference syntax with `ptr["TypeName"]`: +The legacy `ptr_to_self()` syntax is also supported: ```python -from libdestruct import struct, c_int, ptr +from libdestruct import struct, c_int, ptr_to_self class node_t(struct): val: c_int - next: ptr["node_t"] + next: ptr = ptr_to_self() ``` -Both forms are equivalent. See [Forward References](../advanced/forward_refs.md) for more details. +See [Forward References](../advanced/forward_refs.md) for more details. ### Linked List Example diff --git a/docs/basics/types.md b/docs/basics/types.md index dbe3bca..712bf34 100644 --- a/docs/basics/types.md +++ b/docs/basics/types.md @@ -84,8 +84,9 @@ size_of(point_t) # 8 x = c_int.from_bytes(b"\x00\x00\x00\x00") size_of(x) # 4 -# Works with array field descriptors -size_of(array_of(c_int, 10)) # 40 +# Works with array types +size_of(array[c_int, 10]) # 40 +size_of(array_of(c_int, 10)) # 40 (legacy syntax) ``` ## Floating-Point Types diff --git a/docs/index.md b/docs/index.md index bf1815c..a882122 100644 --- a/docs/index.md +++ b/docs/index.md @@ -72,9 +72,9 @@ print(player.health.value) # 100 - **Pythonic API** — define structs with type annotations, access fields as attributes - **C type system** — `c_int`, `c_uint`, `c_long`, `c_ulong`, `c_char`, `c_str` -- **Pointers** — typed pointers with `ptr`, automatic dereferencing with `unwrap()` -- **Arrays** — fixed-size arrays with `array_of()` -- **Enums** — map integer values to Python `Enum` types +- **Pointers** — typed pointers with `ptr[T]`, automatic dereferencing with `unwrap()` +- **Arrays** — fixed-size arrays with `array[T, N]` +- **Enums** — map integer values to Python `Enum` types with `enum[T]` - **Nested structs** — compose structs within structs - **Self-referential types** — forward references via `ptr["TypeName"]` - **C struct parser** — parse C struct definitions directly with `definition_to_type()` From 2a849b6af63ef7bf8568145de68d3878b2650124 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Wed, 1 Apr 2026 11:04:24 -0500 Subject: [PATCH 40/46] fix: CoPilot noticed a few smaller mistakes --- docs/basics/pointers.md | 2 +- docs/basics/types.md | 4 ++-- docs/memory/resolvers.md | 6 +++--- libdestruct/common/ptr/ptr.py | 13 ++++++++----- libdestruct/libdestruct.py | 2 +- test/scripts/types_unit_test.py | 14 ++++++++++++++ 6 files changed, 29 insertions(+), 12 deletions(-) diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md index c289c4f..8c6f611 100644 --- a/docs/basics/pointers.md +++ b/docs/basics/pointers.md @@ -47,7 +47,7 @@ print(data.next.unwrap().value) # 99 ### Safe Dereferencing -Use `try_unwrap()` for null-safe pointer access. It returns `None` if the pointer is null (0): +Use `try_unwrap()` for safe pointer access. It returns `None` if the pointer address is unresolvable (e.g., out of bounds): ```python class node_t(struct): diff --git a/docs/basics/types.md b/docs/basics/types.md index 712bf34..3e5f3ee 100644 --- a/docs/basics/types.md +++ b/docs/basics/types.md @@ -123,9 +123,9 @@ memory = bytearray(b"Hello\x00World\x00") lib = inflater(memory) s = lib.inflate(c_str, 0) -print(s.value) # "Hello" +print(s.value) # b"Hello" print(len(s)) # 5 -print(s[0]) # 72 (ord('H')) +print(s[0]) # b"H" ``` !!! info diff --git a/docs/memory/resolvers.md b/docs/memory/resolvers.md index c2ccacf..db372c3 100644 --- a/docs/memory/resolvers.md +++ b/docs/memory/resolvers.md @@ -51,7 +51,7 @@ All resolvers implement these methods: |---|---| | `resolve(size, offset)` | Read `size` bytes starting at the resolved address + offset | | `resolve_address()` | Return the absolute address of this resolver | -| `write(data)` | Write bytes at the resolved address | +| `modify(size, index, value)` | Write `value` bytes at the resolved address + index | | `relative_from_own(offset, size)` | Create a child resolver at a relative offset | | `absolute_from_own(address)` | Create a child resolver at an absolute address | @@ -73,8 +73,8 @@ class DebuggerResolver(Resolver): def resolve_address(self): return self._address - def write(self, data, offset=0): - self.debugger.write_memory(self._address + offset, data) + def modify(self, size, index, value): + self.debugger.write_memory(self._address + index, value) # ... implement relative_from_own, absolute_from_own ``` diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index 2122fc4..49bcd11 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -58,8 +58,9 @@ def __init__(self: ptr, resolver: Resolver, wrapper: type | None = None) -> None """ super().__init__(resolver) self.wrapper = wrapper - self._cached_unwrap: obj | None = None + self._cached_unwrap: obj | bytes | None = None self._cache_valid: bool = False + self._cached_length: int | None = None def get(self: ptr) -> int: """Return the value of the pointer.""" @@ -82,14 +83,15 @@ def invalidate(self: ptr) -> None: """Clear the cached unwrap result.""" self._cached_unwrap = None self._cache_valid = False + self._cached_length = None - def unwrap(self: ptr, length: int | None = None) -> obj: + def unwrap(self: ptr, length: int | None = None) -> obj | bytes: """Return the object pointed to by the pointer. Args: length: The length of the object in memory this points to. """ - if self._cache_valid: + if self._cache_valid and self._cached_length == length: return self._cached_unwrap address = self.get() @@ -105,15 +107,16 @@ def unwrap(self: ptr, length: int | None = None) -> obj: self._cached_unwrap = result self._cache_valid = True + self._cached_length = length return result - def try_unwrap(self: ptr, length: int | None = None) -> obj | None: + def try_unwrap(self: ptr, length: int | None = None) -> obj | bytes | None: """Return the object pointed to by the pointer, if it is valid. Args: length: The length of the object in memory this points to. """ - if self._cache_valid: + if self._cache_valid and self._cached_length == length: return self._cached_unwrap address = self.get() diff --git a/libdestruct/libdestruct.py b/libdestruct/libdestruct.py index 5289360..e11b3a5 100644 --- a/libdestruct/libdestruct.py +++ b/libdestruct/libdestruct.py @@ -19,7 +19,7 @@ def inflater(memory: Sequence, endianness: str = "little") -> Inflater: """Return a TypeInflater instance.""" if not isinstance(memory, Sequence): - raise TypeError(f"memory must be a MutableSequence, not {type(memory).__name__}") + raise TypeError(f"memory must be a Sequence, not {type(memory).__name__}") return Inflater(memory, endianness=endianness) diff --git a/test/scripts/types_unit_test.py b/test/scripts/types_unit_test.py index 34d436b..4584430 100644 --- a/test/scripts/types_unit_test.py +++ b/test/scripts/types_unit_test.py @@ -293,6 +293,20 @@ def test_cache_invalidated_on_set(self): p.value = 12 # now points to offset 12 self.assertEqual(p.unwrap().value, 20) + def test_untyped_cache_different_lengths(self): + """Untyped ptr cache must differentiate by length parameter.""" + memory = bytearray(8 + 4) + memory[0:8] = (8).to_bytes(8, "little") # pointer to offset 8 + memory[8:12] = b"\x01\x02\x03\x04" + + p = ptr(MemoryResolver(memory, 0)) + + r1 = p.unwrap(length=1) + self.assertEqual(r1, b"\x01") + + r2 = p.unwrap(length=3) + self.assertEqual(r2, b"\x01\x02\x03") + class FloatTest(unittest.TestCase): """c_float and c_double types.""" From b39a9937b6cc99e4d6dc42e605e7178051dcfc08 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Wed, 1 Apr 2026 11:44:47 -0500 Subject: [PATCH 41/46] fix: solve incorrect union alignment and enum backing type assumptions --- .../common/enum/enum_field_inflater.py | 2 +- libdestruct/common/enum/int_enum_field.py | 15 ++- libdestruct/common/struct/struct_impl.py | 2 + .../common/union/tagged_union_field.py | 6 +- libdestruct/common/union/union_field.py | 6 +- libdestruct/common/utils.py | 6 +- test/scripts/struct_unit_test.py | 107 +++++++++++++++++- 7 files changed, 137 insertions(+), 7 deletions(-) diff --git a/libdestruct/common/enum/enum_field_inflater.py b/libdestruct/common/enum/enum_field_inflater.py index 5dd622e..d4f99cb 100644 --- a/libdestruct/common/enum/enum_field_inflater.py +++ b/libdestruct/common/enum/enum_field_inflater.py @@ -42,7 +42,7 @@ def _subscripted_enum_handler( return None python_enum = args[0] backing_type = args[1] if len(args) > 1 else c_int - field = IntEnumField(python_enum, size=backing_type.size) + field = IntEnumField(python_enum, backing_type=backing_type) return field.inflate diff --git a/libdestruct/common/enum/int_enum_field.py b/libdestruct/common/enum/int_enum_field.py index 291df0a..a9ead12 100644 --- a/libdestruct/common/enum/int_enum_field.py +++ b/libdestruct/common/enum/int_enum_field.py @@ -21,17 +21,28 @@ class IntEnumField(EnumField): """A generator for an enum of integers.""" - def __init__(self: IntEnumField, enum: type[IntEnum], lenient: bool = True, size: int = 4) -> None: + def __init__( + self: IntEnumField, + enum: type[IntEnum], + lenient: bool = True, + size: int = 4, + backing_type: type | None = None, + ) -> None: """Initialize the field. Args: enum: The enum class. lenient: Whether the conversion is lenient or not. - size: The size of the field in bytes. + size: The size of the field in bytes (used when backing_type is not provided). + backing_type: The explicit backing type to use. If provided, overrides size. """ self.enum = enum self.lenient = lenient + if backing_type is not None: + self.backing_type = backing_type + return + if not 0 < size <= 8: raise ValueError("The size of the field must be between 1 and 8 bytes.") diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 37464bb..780e6da 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -93,6 +93,7 @@ def _inflate_struct_attributes( ) if explicit_offset is not None: + current_offset += bf_tracker.flush() if explicit_offset < current_offset: raise ValueError("Offset must be greater than the current size.") current_offset = explicit_offset @@ -191,6 +192,7 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: has_explicit_offset = explicit_offset is not None if has_explicit_offset: + size += bf_tracker.flush() if explicit_offset < size: raise ValueError("Offset must be greater than the current size.") size = explicit_offset diff --git a/libdestruct/common/union/tagged_union_field.py b/libdestruct/common/union/tagged_union_field.py index cf05c5a..9c4929c 100644 --- a/libdestruct/common/union/tagged_union_field.py +++ b/libdestruct/common/union/tagged_union_field.py @@ -10,7 +10,7 @@ from libdestruct.common.field import Field from libdestruct.common.union.union import union -from libdestruct.common.utils import size_of +from libdestruct.common.utils import alignment_of, size_of if TYPE_CHECKING: # pragma: no cover from libdestruct.backing.resolver import Resolver @@ -43,3 +43,7 @@ def inflate(self: TaggedUnionField, resolver: Resolver | None) -> union: def get_size(self: TaggedUnionField) -> int: """Return the size of the union (max of all variant sizes).""" return max(size_of(variant) for variant in self.variants.values()) + + def get_alignment(self: TaggedUnionField) -> int: + """Return the alignment of the union (max of all variant alignments).""" + return max(alignment_of(variant) for variant in self.variants.values()) diff --git a/libdestruct/common/union/union_field.py b/libdestruct/common/union/union_field.py index cb97b3a..5f709c9 100644 --- a/libdestruct/common/union/union_field.py +++ b/libdestruct/common/union/union_field.py @@ -10,7 +10,7 @@ from libdestruct.common.field import Field from libdestruct.common.union.union import union -from libdestruct.common.utils import size_of +from libdestruct.common.utils import alignment_of, size_of if TYPE_CHECKING: # pragma: no cover from libdestruct.backing.resolver import Resolver @@ -41,3 +41,7 @@ def inflate(self: UnionField, resolver: Resolver | None) -> union: def get_size(self: UnionField) -> int: """Return the size of the union (max of all variant sizes).""" return max(size_of(variant) for variant in self.variants.values()) + + def get_alignment(self: UnionField) -> int: + """Return the alignment of the union (max of all variant alignments).""" + return max(alignment_of(variant) for variant in self.variants.values()) diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index aba8157..6aecfd2 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -81,13 +81,17 @@ def alignment_of(item: obj | type[obj]) -> int: if isinstance(item, type) and "alignment" in item.__dict__ and isinstance(item.__dict__["alignment"], int): return item.__dict__["alignment"] - # Field descriptors — for array fields, alignment comes from the element type + # Field descriptors — use get_alignment if available, else derive from element type or size if isinstance(item, Field): + if hasattr(item, "get_alignment"): + return item.get_alignment() if hasattr(item, "item"): return alignment_of(item.item) return _alignment_from_size(item.get_size()) if is_field_bound_method(item): field = item.__self__ + if hasattr(field, "get_alignment"): + return field.get_alignment() if hasattr(field, "item"): return alignment_of(field.item) return _alignment_from_size(field.get_size()) diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index e952034..d97e695 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -9,7 +9,8 @@ from typing import Annotated -from libdestruct import array, c_int, c_long, c_short, c_uint, inflater, offset, struct, ptr, ptr_to_self, array_of, enum, enum_of +from libdestruct import array, c_int, c_long, c_short, c_uint, c_ushort, inflater, offset, struct, ptr, ptr_to_self, array_of, enum, enum_of, size_of, bitfield_of +from libdestruct.common.union import union, union_of, tagged_union class StructMemberCollisionTest(unittest.TestCase): @@ -419,5 +420,109 @@ class s_t(struct): self.assertIs(s.__eq__(42), NotImplemented) +class BitfieldExplicitOffsetTest(unittest.TestCase): + """Explicit offset after bitfields must flush the pending bitfield group first.""" + + def test_offset_after_bitfield_size(self): + """Struct size must be correct when offset() follows bitfield fields.""" + class s_t(struct): + a: c_uint = bitfield_of(c_uint, 1) + b: c_int = offset(8) + + # a is a 1-bit bitfield in a 4-byte c_uint group at offset 0. + # b is at explicit offset 8 with size 4. + # Total size: 8 + 4 = 12 + self.assertEqual(size_of(s_t), 12) + + def test_offset_after_bitfield_read(self): + """Values must be read correctly when offset() follows bitfield fields.""" + import struct as pystruct + + class s_t(struct): + a: c_uint = bitfield_of(c_uint, 1) + b: c_int = offset(8) + + memory = bytearray(12) + memory[0:4] = pystruct.pack(" Date: Wed, 1 Apr 2026 12:23:40 -0500 Subject: [PATCH 42/46] fix: Opus found even more bugs, everything should be fixed now --- libdestruct/c/c_float_types.py | 63 ++-- .../common/array/array_field_inflater.py | 4 +- libdestruct/common/obj.py | 6 +- libdestruct/common/struct/struct_impl.py | 30 +- libdestruct/common/type_registry.py | 9 +- libdestruct/common/union/union.py | 20 +- libdestruct/libdestruct.py | 6 + test/scripts/review_fix_test.py | 209 +++++++++++++ test/scripts/review_fix_test_2.py | 289 ++++++++++++++++++ 9 files changed, 573 insertions(+), 63 deletions(-) create mode 100644 test/scripts/review_fix_test.py create mode 100644 test/scripts/review_fix_test_2.py diff --git a/libdestruct/c/c_float_types.py b/libdestruct/c/c_float_types.py index d38622c..135816a 100644 --- a/libdestruct/c/c_float_types.py +++ b/libdestruct/c/c_float_types.py @@ -11,71 +11,54 @@ from libdestruct.common.obj import obj -class c_float(obj): - """A C float (IEEE 754 single-precision, 32-bit).""" +class _c_float_base(obj): + """A generic C floating-point type, to be subclassed by c_float and c_double.""" - size: int = 4 - """The size of a float in bytes.""" + size: int + """The size of the float in bytes.""" + + _format: str + """The struct format character ('f' or 'd').""" _frozen_value: float | None = None """The frozen value of the float.""" - def _format_char(self: c_float) -> str: - return "f" + def _format_char(self: _c_float_base) -> str: + prefix = "<" if self.endianness == "little" else ">" + return prefix + self._format - def get(self: c_float) -> float: + def get(self: _c_float_base) -> float: """Return the value of the float.""" return struct.unpack(self._format_char(), self.resolver.resolve(self.size, 0))[0] - def _set(self: c_float, value: float) -> None: + def _set(self: _c_float_base, value: float) -> None: """Set the value of the float.""" self.resolver.modify(self.size, 0, struct.pack(self._format_char(), value)) - def to_bytes(self: c_float) -> bytes: + def to_bytes(self: _c_float_base) -> bytes: """Return the serialized representation of the float.""" if self._frozen: return struct.pack(self._format_char(), self._frozen_value) return self.resolver.resolve(self.size, 0) - def __float__(self: c_float) -> float: + def __float__(self: _c_float_base) -> float: """Return the value as a Python float.""" return self.get() - def __int__(self: c_float) -> int: + def __int__(self: _c_float_base) -> int: """Return the value as a Python int.""" return int(self.get()) -class c_double(obj): - """A C double (IEEE 754 double-precision, 64-bit).""" - - size: int = 8 - """The size of a double in bytes.""" - - _frozen_value: float | None = None - """The frozen value of the double.""" - - def _format_char(self: c_double) -> str: - return "d" - - def get(self: c_double) -> float: - """Return the value of the double.""" - return struct.unpack(self._format_char(), self.resolver.resolve(self.size, 0))[0] +class c_float(_c_float_base): + """A C float (IEEE 754 single-precision, 32-bit).""" - def _set(self: c_double, value: float) -> None: - """Set the value of the double.""" - self.resolver.modify(self.size, 0, struct.pack(self._format_char(), value)) + size: int = 4 + _format: str = "f" - def to_bytes(self: c_double) -> bytes: - """Return the serialized representation of the double.""" - if self._frozen: - return struct.pack(self._format_char(), self._frozen_value) - return self.resolver.resolve(self.size, 0) - def __float__(self: c_double) -> float: - """Return the value as a Python float.""" - return self.get() +class c_double(_c_float_base): + """A C double (IEEE 754 double-precision, 64-bit).""" - def __int__(self: c_double) -> int: - """Return the value as a Python int.""" - return int(self.get()) + size: int = 8 + _format: str = "d" diff --git a/libdestruct/common/array/array_field_inflater.py b/libdestruct/common/array/array_field_inflater.py index 3fc217b..08b7897 100644 --- a/libdestruct/common/array/array_field_inflater.py +++ b/libdestruct/common/array/array_field_inflater.py @@ -43,8 +43,8 @@ def _subscripted_array_handler( if len(args) != 2: return None element_type, count = args - if not isinstance(count, int): - return None + if not isinstance(count, int) or count <= 0: + raise ValueError(f"array count must be a positive integer, got {count}") field = LinearArrayField(element_type, count) field.item = registry.inflater_for(element_type) return field.inflate diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index 0985557..8aa2f8b 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -136,7 +136,11 @@ def _compare_value(self: obj, other: object) -> tuple[object, object] | None: """Extract comparable values from self and other, or None if incompatible.""" self_val = self.value if isinstance(other, obj): - return self_val, other.value + other_val = other.value + # Guard against incompatible value types (e.g. int vs str from struct.get()) + if type(self_val) is not type(other_val) and not isinstance(self_val, type(other_val)) and not isinstance(other_val, type(self_val)): + return None + return self_val, other_val if isinstance(other, int | float | bytes): return self_val, other return None diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 780e6da..b3b7ead 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -245,20 +245,22 @@ def get(self: struct_impl) -> str: def to_bytes(self: struct_impl) -> bytes: """Return the serialized representation of the struct, including padding.""" - if self._frozen: - return self._frozen_struct_bytes + if object.__getattribute__(self, "_frozen"): + return object.__getattribute__(self, "_frozen_struct_bytes") resolver = object.__getattribute__(self, "resolver") return resolver.resolve(size_of(self), 0) def to_dict(self: struct_impl) -> dict[str, object]: """Return a JSON-serializable dict of field names to values.""" - return {name: member.to_dict() for name, member in self._members.items()} + members = object.__getattribute__(self, "_members") + return {name: member.to_dict() for name, member in members.items()} def hexdump(self: struct_impl) -> str: """Return a hex dump of this struct's bytes with field annotations.""" member_offsets = object.__getattribute__(self, "_member_offsets") - annotations = {member_offsets[name]: name for name in self._members} - address = struct_impl.address.fget(self) if not self._frozen else 0 + members = object.__getattribute__(self, "_members") + annotations = {member_offsets[name]: name for name in members} + address = struct_impl.address.fget(self) if not object.__getattribute__(self, "_frozen") else 0 return format_hexdump(self.to_bytes(), address, annotations) def _set(self: struct_impl, _: str) -> None: @@ -268,9 +270,10 @@ def _set(self: struct_impl, _: str) -> None: def freeze(self: struct_impl) -> None: """Freeze the struct, capturing the full byte representation including padding.""" resolver = object.__getattribute__(self, "resolver") - self._frozen_struct_bytes = resolver.resolve(size_of(self), 0) + object.__setattr__(self, "_frozen_struct_bytes", resolver.resolve(size_of(self), 0)) - for member in self._members.values(): + members = object.__getattribute__(self, "_members") + for member in members.values(): member.freeze() super().freeze() @@ -278,8 +281,9 @@ def freeze(self: struct_impl) -> None: def to_str(self: struct_impl, indent: int = 0) -> str: """Return a string representation of the struct.""" name = object.__getattribute__(self, "_struct_name") + members_dict = object.__getattribute__(self, "_members") members = ",\n".join( - [f"{' ' * (indent + 4)}{n}: {member.to_str(indent + 4)}" for n, member in self._members.items()], + [f"{' ' * (indent + 4)}{n}: {member.to_str(indent + 4)}" for n, member in members_dict.items()], ) return f"""{name} {{ {members} @@ -289,7 +293,8 @@ def __repr__(self: struct_impl) -> str: """Return a string representation of the struct.""" name = object.__getattribute__(self, "_struct_name") addr = struct_impl.address.fget(self) - members = ",\n".join([f"{n}: {member}" for n, member in self._members.items()]) + members_dict = object.__getattribute__(self, "_members") + members = ",\n".join([f"{n}: {member}" for n, member in members_dict.items()]) return f"""{name} {{ address: 0x{addr:x}, size: 0x{size_of(self):x}, @@ -306,7 +311,10 @@ def __eq__(self: struct_impl, value: object) -> bool: if size_of(self) != size_of(value): return False - if not self._members.keys() == value._members.keys(): + self_members = object.__getattribute__(self, "_members") + other_members = object.__getattribute__(value, "_members") + + if self_members.keys() != other_members.keys(): return False - return all(getattr(self, name) == getattr(value, name) for name in self._members) + return all(getattr(self, name) == getattr(value, name) for name in self_members) diff --git a/libdestruct/common/type_registry.py b/libdestruct/common/type_registry.py index 2467ca6..35a9c23 100644 --- a/libdestruct/common/type_registry.py +++ b/libdestruct/common/type_registry.py @@ -146,7 +146,8 @@ def register_type_handler( if parent not in self.type_handlers: self.type_handlers[parent] = [] - self.type_handlers[parent].append(handler) + if handler not in self.type_handlers[parent]: + self.type_handlers[parent].append(handler) def register_instance_handler( self: TypeRegistry, @@ -165,7 +166,8 @@ def register_instance_handler( if parent not in self.instance_handlers: self.instance_handlers[parent] = [] - self.instance_handlers[parent].append(handler) + if handler not in self.instance_handlers[parent]: + self.instance_handlers[parent].append(handler) def register_generic_handler( self: TypeRegistry, @@ -181,7 +183,8 @@ def register_generic_handler( if origin not in self.generic_handlers: self.generic_handlers[origin] = [] - self.generic_handlers[origin].append(handler) + if handler not in self.generic_handlers[origin]: + self.generic_handlers[origin].append(handler) def register_mapping( self: TypeRegistry, diff --git a/libdestruct/common/union/union.py b/libdestruct/common/union/union.py index 1506e8b..4e1991e 100644 --- a/libdestruct/common/union/union.py +++ b/libdestruct/common/union/union.py @@ -119,10 +119,18 @@ def to_str(self: union, indent: int = 0) -> str: def __getattr__(self: union, name: str) -> object: """Delegate attribute access to named variants or the active variant.""" - variants = object.__getattribute__(self, "_variants") - if name in variants: - return variants[name] - variant = object.__getattribute__(self, "_variant") - if variant is not None: - return getattr(variant, name) + try: + variants = object.__getattribute__(self, "_variants") + if name in variants: + return variants[name] + except AttributeError: + pass + + try: + variant = object.__getattribute__(self, "_variant") + if variant is not None: + return getattr(variant, name) + except AttributeError: + pass + raise AttributeError(f"'{type(self).__name__}' has no attribute '{name}'") diff --git a/libdestruct/libdestruct.py b/libdestruct/libdestruct.py index e11b3a5..882e714 100644 --- a/libdestruct/libdestruct.py +++ b/libdestruct/libdestruct.py @@ -16,11 +16,17 @@ from libdestruct.common.obj import obj +_VALID_ENDIANNESS = ("little", "big") + + def inflater(memory: Sequence, endianness: str = "little") -> Inflater: """Return a TypeInflater instance.""" if not isinstance(memory, Sequence): raise TypeError(f"memory must be a Sequence, not {type(memory).__name__}") + if endianness not in _VALID_ENDIANNESS: + raise ValueError(f"endianness must be 'little' or 'big', not {endianness!r}") + return Inflater(memory, endianness=endianness) diff --git a/test/scripts/review_fix_test.py b/test/scripts/review_fix_test.py new file mode 100644 index 0000000..606f1ba --- /dev/null +++ b/test/scripts/review_fix_test.py @@ -0,0 +1,209 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +"""Tests that expose bugs found during code review of the dev branch.""" + +import struct as pystruct +import unittest + +from libdestruct import c_float, c_double, c_int, c_long, inflater, struct +from libdestruct.common.union import union, union_of + + +class EndiannessValidationTest(unittest.TestCase): + """inflater() should reject invalid endianness strings.""" + + def test_invalid_endianness_raises(self): + """Passing a typo like 'big-endian' must raise ValueError, not silently produce wrong results.""" + with self.assertRaises(ValueError): + inflater(bytearray(4), endianness="big-endian") + + def test_invalid_endianness_typo(self): + """A random typo must raise ValueError.""" + with self.assertRaises(ValueError): + inflater(bytearray(4), endianness="typo") + + def test_valid_endianness_big(self): + """'big' is accepted without error.""" + lib = inflater(bytearray(4), endianness="big") + self.assertIsNotNone(lib) + + def test_valid_endianness_little(self): + """'little' is accepted without error.""" + lib = inflater(bytearray(4), endianness="little") + self.assertIsNotNone(lib) + + def test_from_bytes_invalid_endianness(self): + """from_bytes with invalid endianness must raise ValueError.""" + with self.assertRaises(ValueError): + c_int.from_bytes(b"\x00\x00\x00\x00", endianness="big-endian") + + +class StructAttributeCollisionTest(unittest.TestCase): + """Struct members named after internal attributes must not break core methods.""" + + def test_struct_with_frozen_field_to_bytes(self): + """A struct with a field named '_frozen' must still serialize correctly after freeze.""" + # This field name collides with obj._frozen used in to_bytes() + class s_t(struct): + _frozen: c_int + b: c_int + + memory = b"" + memory += (10).to_bytes(4, "little") + memory += (20).to_bytes(4, "little") + + s = s_t.from_bytes(memory) + # to_bytes must return the correct serialized data, not crash + self.assertEqual(s.to_bytes(), memory) + + def test_struct_with_frozen_field_hexdump(self): + """A struct with a '_frozen' field must still produce a hexdump.""" + class s_t(struct): + _frozen: c_int + + s = s_t.from_bytes((42).to_bytes(4, "little")) + # hexdump must not crash + dump = s.hexdump() + self.assertIn("2a", dump) + + def test_struct_with_members_field_eq(self): + """A struct with a field named '_members' must still support equality.""" + class s_t(struct): + _members: c_int + + a = s_t.from_bytes((1).to_bytes(4, "little")) + b = s_t.from_bytes((1).to_bytes(4, "little")) + self.assertEqual(a, b) + + def test_struct_with_members_field_to_dict(self): + """A struct with a field named '_members' must still support to_dict.""" + class s_t(struct): + _members: c_int + + s = s_t.from_bytes((5).to_bytes(4, "little")) + d = s.to_dict() + self.assertEqual(d["_members"], 5) + + def test_struct_with_frozen_struct_bytes_field(self): + """A field named '_frozen_struct_bytes' must not break freeze/to_bytes.""" + class s_t(struct): + _frozen_struct_bytes: c_int + + memory = (99).to_bytes(4, "little") + s = s_t.from_bytes(memory) + self.assertEqual(s.to_bytes(), memory) + + +class UnionGetAttrSafetyTest(unittest.TestCase): + """union.__getattr__ must produce clear errors, not internal AttributeError.""" + + def test_missing_attribute_error_message(self): + """Accessing a nonexistent attribute on a union should mention the attribute name, not '_variants'.""" + u = union(None, None, 4) + with self.assertRaises(AttributeError) as ctx: + _ = u.nonexistent_attr + # The error message must mention the user's attribute, not internal implementation details + self.assertIn("nonexistent_attr", str(ctx.exception)) + self.assertNotIn("_variants", str(ctx.exception)) + + def test_getattr_after_del_variants(self): + """Even if _variants is somehow missing, __getattr__ should not expose internal details.""" + u = union(None, None, 4) + del u.__dict__["_variants"] + with self.assertRaises(AttributeError) as ctx: + _ = u.something + self.assertIn("something", str(ctx.exception)) + + +class FloatDuplicationRegressionTest(unittest.TestCase): + """After refactoring c_float/c_double to a shared base, core behavior must be preserved.""" + + def test_c_float_read_write(self): + memory = bytearray(4) + lib = inflater(memory) + f = lib.inflate(c_float, 0) + f.value = 3.14 + self.assertAlmostEqual(f.value, 3.14, places=5) + + def test_c_double_read_write(self): + memory = bytearray(8) + lib = inflater(memory) + d = lib.inflate(c_double, 0) + d.value = 2.718281828 + self.assertAlmostEqual(d.value, 2.718281828, places=8) + + def test_c_float_freeze_diff_reset(self): + memory = bytearray(4) + lib = inflater(memory) + f = lib.inflate(c_float, 0) + f.value = 1.5 + f.freeze() + self.assertAlmostEqual(f.value, 1.5, places=5) + with self.assertRaises(ValueError): + f.value = 2.0 + + def test_c_double_freeze_diff_reset(self): + memory = bytearray(8) + lib = inflater(memory) + d = lib.inflate(c_double, 0) + d.value = 1.5 + d.freeze() + self.assertAlmostEqual(d.value, 1.5, places=5) + with self.assertRaises(ValueError): + d.value = 2.0 + + def test_c_float_from_bytes(self): + data = pystruct.pack("f", 3.14) + f = c_float.from_bytes(original, endianness="big") + self.assertAlmostEqual(f.value, 3.14, places=5) + self.assertEqual(f.to_bytes(), original) + + def test_c_double_big_endian(self): + original = pystruct.pack(">d", 2.718) + d = c_double.from_bytes(original, endianness="big") + self.assertAlmostEqual(d.value, 2.718, places=3) + self.assertEqual(d.to_bytes(), original) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/review_fix_test_2.py b/test/scripts/review_fix_test_2.py new file mode 100644 index 0000000..bf09ce3 --- /dev/null +++ b/test/scripts/review_fix_test_2.py @@ -0,0 +1,289 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +"""Tests that expose bugs found during the second-pass code review.""" + +import unittest + +from libdestruct import ( + array, + bitfield_of, + c_int, + c_uint, + inflater, + size_of, + struct, +) +from libdestruct.c.struct_parser import clear_parser_cache, definition_to_type + + +class ComparisonOperatorSafetyTest(unittest.TestCase): + """Comparison operators must not raise TypeError for incompatible obj types.""" + + def test_lt_primitive_vs_struct_returns_not_implemented(self): + """c_int < struct should return NotImplemented, not raise TypeError.""" + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + # Must not raise TypeError + result = val.__lt__(s) + self.assertIs(result, NotImplemented) + + def test_gt_primitive_vs_struct_returns_not_implemented(self): + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + result = val.__gt__(s) + self.assertIs(result, NotImplemented) + + def test_le_primitive_vs_struct_returns_not_implemented(self): + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + result = val.__le__(s) + self.assertIs(result, NotImplemented) + + def test_ge_primitive_vs_struct_returns_not_implemented(self): + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + result = val.__ge__(s) + self.assertIs(result, NotImplemented) + + def test_eq_primitive_vs_struct_returns_not_implemented(self): + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + result = val.__eq__(s) + self.assertIs(result, NotImplemented) + + def test_ne_primitive_vs_struct_returns_not_implemented(self): + class s_t(struct): + x: c_int + + memory = bytearray(4) + lib = inflater(memory) + val = lib.inflate(c_int, 0) + s = lib.inflate(s_t, 0) + + result = val.__ne__(s) + self.assertIs(result, NotImplemented) + + def test_lt_between_compatible_primitives_works(self): + """Comparisons between compatible primitives should still work.""" + memory = bytearray(8) + lib = inflater(memory) + a = lib.inflate(c_int, 0) + b = lib.inflate(c_int, 4) + a.value = 1 + b.value = 2 + + self.assertTrue(a < b) + self.assertFalse(b < a) + + def test_comparison_with_raw_int(self): + memory = bytearray(4) + lib = inflater(memory) + a = lib.inflate(c_int, 0) + a.value = 5 + + self.assertTrue(a < 10) + self.assertTrue(a > 2) + self.assertTrue(a <= 5) + self.assertTrue(a >= 5) + + +class NegativeArrayCountTest(unittest.TestCase): + """array[T, N] must reject non-positive counts at handler time.""" + + def test_negative_count_raises(self): + """array[c_int, -5] must raise ValueError.""" + with self.assertRaises(ValueError): + class s_t(struct): + data: array[c_int, -5] + # Force size computation + size_of(s_t) + + def test_zero_count_raises(self): + """array[c_int, 0] must raise ValueError.""" + with self.assertRaises(ValueError): + class s_t(struct): + data: array[c_int, 0] + size_of(s_t) + + def test_positive_count_works(self): + """array[c_int, 3] must work fine.""" + class s_t(struct): + data: array[c_int, 3] + self.assertEqual(size_of(s_t), 12) + + +class BitfieldFreezeSafetyTest(unittest.TestCase): + """Frozen bitfields must reject writes even for non-owners.""" + + def test_non_owner_bitfield_rejects_write_after_freeze(self): + """The second bitfield in a group (non-owner) must reject writes when frozen.""" + class s_t(struct): + a: c_uint = bitfield_of(c_uint, 1) + b: c_uint = bitfield_of(c_uint, 1) + + memory = bytearray(4) + lib = inflater(memory) + s = lib.inflate(s_t, 0) + + s.a.value = 1 + s.b.value = 1 + + # Freeze the entire struct (which freezes all members) + s.freeze() + + # Both bitfields should reject writes + with self.assertRaises(ValueError): + s.a.value = 0 + + with self.assertRaises(ValueError): + s.b.value = 0 + + def test_individually_frozen_non_owner_rejects_write(self): + """Freezing a non-owner bitfield individually must also reject writes.""" + class s_t(struct): + a: c_uint = bitfield_of(c_uint, 1) + b: c_uint = bitfield_of(c_uint, 1) + + memory = bytearray(4) + lib = inflater(memory) + s = lib.inflate(s_t, 0) + + s.b.value = 1 + + # Freeze only the non-owner bitfield b + s.b.freeze() + + with self.assertRaises(ValueError): + s.b.value = 0 + + +class TypeRegistryDeduplicationTest(unittest.TestCase): + """Repeated handler registration must not accumulate duplicates.""" + + def test_generic_handler_not_duplicated(self): + """Registering the same handler twice must not produce duplicate entries.""" + from libdestruct.common.type_registry import TypeRegistry + + registry = TypeRegistry() + + class DummyType: + pass + + def dummy_handler(item, args, owner): + return None + + initial_count = len(registry.generic_handlers.get(DummyType, [])) + + registry.register_generic_handler(DummyType, dummy_handler) + registry.register_generic_handler(DummyType, dummy_handler) + + count = len(registry.generic_handlers[DummyType]) + self.assertEqual(count, initial_count + 1) + + def test_instance_handler_not_duplicated(self): + """Registering the same instance handler twice must not produce duplicate entries.""" + from libdestruct.common.type_registry import TypeRegistry + + registry = TypeRegistry() + + class DummyField: + pass + + def dummy_handler(item, annotation, owner): + return None + + initial_count = len(registry.instance_handlers.get(DummyField, [])) + + registry.register_instance_handler(DummyField, dummy_handler) + registry.register_instance_handler(DummyField, dummy_handler) + + count = len(registry.instance_handlers[DummyField]) + self.assertEqual(count, initial_count + 1) + + def test_type_handler_not_duplicated(self): + """Registering the same type handler twice must not produce duplicate entries.""" + from libdestruct.common.type_registry import TypeRegistry + + registry = TypeRegistry() + + class DummyParent: + pass + + def dummy_handler(item): + return None + + initial_count = len(registry.type_handlers.get(DummyParent, [])) + + registry.register_type_handler(DummyParent, dummy_handler) + registry.register_type_handler(DummyParent, dummy_handler) + + count = len(registry.type_handlers[DummyParent]) + self.assertEqual(count, initial_count + 1) + + +class ForwardTypedefTest(unittest.TestCase): + """Forward typedef references are a known parser limitation.""" + + def setUp(self): + clear_parser_cache() + + def tearDown(self): + clear_parser_cache() + + def test_chained_typedefs_in_order(self): + """Chained typedefs in declaration order must work.""" + t = definition_to_type(""" + typedef unsigned int u32; + typedef u32 mytype; + struct S { mytype x; }; + """) + data = (42).to_bytes(4, "little") + s = t.from_bytes(data) + self.assertEqual(s.x.value, 42) + + def test_forward_typedef_reference_raises(self): + """Forward typedef reference (use before define) must raise a clear error, not crash.""" + with self.assertRaises((ValueError, TypeError)): + definition_to_type(""" + typedef mytype1 mytype2; + typedef unsigned int mytype1; + struct S { mytype2 x; }; + """) + + +if __name__ == "__main__": + unittest.main() From 8b8ca90a15899c7314fcd2a8061090bb5e3f7a2c Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Wed, 1 Apr 2026 12:52:33 -0500 Subject: [PATCH 43/46] fix: correct wrong enum_of implementation in code and docs --- SKILL.md | 2 +- docs/advanced/forward_refs.md | 4 +- docs/basics/enums.md | 2 +- libdestruct/common/struct/struct_impl.py | 8 ++- libdestruct/common/union/union.py | 11 ++- libdestruct/common/utils.py | 9 +++ test/scripts/struct_unit_test.py | 87 ++++++++++++++++++++++++ test/scripts/to_dict_test.py | 4 +- 8 files changed, 114 insertions(+), 13 deletions(-) diff --git a/SKILL.md b/SKILL.md index 8f4eb95..237b040 100644 --- a/SKILL.md +++ b/SKILL.md @@ -234,7 +234,7 @@ Legacy syntax with `enum_of()` is still supported: ```python class pixel_t(struct): - color: c_int = enum_of(Color) + color: enum = enum_of(Color) alpha: c_int ``` diff --git a/docs/advanced/forward_refs.md b/docs/advanced/forward_refs.md index 1a27fab..44a39d2 100644 --- a/docs/advanced/forward_refs.md +++ b/docs/advanced/forward_refs.md @@ -21,11 +21,11 @@ At inflation time, the string `"Node"` is resolved to the actual `Node` class. T For the common case of a pointer to the enclosing struct, the legacy `ptr_to_self` syntax is also available: ```python -from libdestruct import struct, c_int, ptr_to_self +from libdestruct import struct, c_int, ptr, ptr_to_self class Node(struct): val: c_int - next: ptr_to_self + next: ptr = ptr_to_self() ``` This is equivalent to `ptr["Node"]` but doesn't require you to spell out the type name. The `ptr["TypeName"]` syntax is preferred as it is more explicit. diff --git a/docs/basics/enums.md b/docs/basics/enums.md index 96c5efc..7edfba4 100644 --- a/docs/basics/enums.md +++ b/docs/basics/enums.md @@ -32,7 +32,7 @@ The legacy `enum_of()` syntax is also supported: from libdestruct import struct, c_int, enum_of class pixel_t(struct): - color: enum_of(Color, c_int) + color: enum = enum_of(Color) x: c_int y: c_int ``` diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index b3b7ead..da93cd8 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -259,7 +259,13 @@ def hexdump(self: struct_impl) -> str: """Return a hex dump of this struct's bytes with field annotations.""" member_offsets = object.__getattribute__(self, "_member_offsets") members = object.__getattribute__(self, "_members") - annotations = {member_offsets[name]: name for name in members} + annotations: dict[int, str] = {} + for name in members: + off = member_offsets[name] + if off in annotations: + annotations[off] += ", " + name + else: + annotations[off] = name address = struct_impl.address.fget(self) if not object.__getattribute__(self, "_frozen") else 0 return format_hexdump(self.to_bytes(), address, annotations) diff --git a/libdestruct/common/union/union.py b/libdestruct/common/union/union.py index 4e1991e..9e73f04 100644 --- a/libdestruct/common/union/union.py +++ b/libdestruct/common/union/union.py @@ -101,12 +101,11 @@ def diff(self: union) -> tuple[object, object]: return {name: v.diff() for name, v in self._variants.items()} def reset(self: union) -> None: - """Reset the union to its frozen value.""" - if self._variant is not None: - self._variant.reset() - else: - for v in self._variants.values(): - v.reset() + """Reset the union to its frozen value by restoring the full frozen byte region.""" + if self._frozen_bytes is None: + raise RuntimeError("Cannot reset a union that has not been frozen.") + if self.resolver is not None: + self.resolver.modify(self.size, 0, self._frozen_bytes) def to_str(self: union, indent: int = 0) -> str: """Return a string representation of the union.""" diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index 6aecfd2..dc00645 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -27,12 +27,21 @@ def is_field_bound_method(item: obj) -> bool: def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: """Return the size in bytes of a type, instance, or field descriptor.""" + from types import GenericAlias + # Field instances (e.g. array_of, ptr_to) — must come before .size check if isinstance(item_or_inflater, Field): return item_or_inflater.get_size() if is_field_bound_method(item_or_inflater): return item_or_inflater.__self__.get_size() + # Subscripted GenericAlias types (e.g. array[c_int, 10], enum[Color], ptr[T]) + if isinstance(item_or_inflater, GenericAlias): + from libdestruct.common.type_registry import TypeRegistry + + inflater = TypeRegistry().inflater_for(item_or_inflater) + return size_of(inflater) + # Struct types: size is on the inflated _type_impl class if isinstance(item_or_inflater, type) and hasattr(item_or_inflater, "_type_impl"): return item_or_inflater._type_impl.size diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index d97e695..1e1854d 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -524,5 +524,92 @@ class s_t(struct): self.assertEqual(size_of(s_t), 2) +class SizeOfGenericAliasTest(unittest.TestCase): + """size_of() must handle subscripted GenericAlias types like array[c_int, 10].""" + + def test_size_of_subscripted_array(self): + self.assertEqual(size_of(array[c_int, 10]), 40) + + def test_size_of_subscripted_enum(self): + from enum import IntEnum + + class Color(IntEnum): + RED = 0 + + self.assertEqual(size_of(enum[Color]), 4) + self.assertEqual(size_of(enum[Color, c_short]), 2) + + def test_size_of_subscripted_ptr(self): + self.assertEqual(size_of(ptr[c_int]), 8) + + +class UnionResetTest(unittest.TestCase): + """union.reset() must restore the full frozen byte region.""" + + def test_tagged_union_reset_struct_variant(self): + """reset() on a tagged union with a struct variant must not crash.""" + import struct as pystruct + + class point_t(struct): + x: c_int + y: c_int + + class msg_t(struct): + tag: c_int + payload: union = tagged_union("tag", {0: c_int, 1: point_t}) + + memory = bytearray(12) + memory[0:4] = pystruct.pack(" Date: Wed, 1 Apr 2026 13:08:53 -0500 Subject: [PATCH 44/46] test/docs: automatically check doc snippets to verity their correctness --- .github/workflows/test.yml | 2 +- SKILL.md | 2 +- docs/advanced/alignment.md | 2 +- docs/advanced/bitfields.md | 2 +- docs/advanced/freeze_diff.md | 23 ++++++++++-- docs/advanced/offset.md | 11 +----- docs/advanced/tagged_unions.md | 6 +++- docs/basics/arrays.md | 2 +- docs/basics/pointers.md | 12 +++---- docs/basics/structs.md | 8 +++-- docs/basics/types.md | 2 +- docs/memory/inflater.md | 9 +++++ libdestruct/common/enum/enum.py | 8 ++++- libdestruct/common/struct/struct_impl.py | 9 +++++ test/scripts/doctest_snippets.py | 46 ++++++++++++++++++++++++ test/scripts/struct_unit_test.py | 40 +++++++++++++++++++++ 16 files changed, 155 insertions(+), 29 deletions(-) create mode 100644 test/scripts/doctest_snippets.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 1d18ccc..1365aa9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -39,7 +39,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade wheel build - python -m pip install pwntools pytest libdebug + python -m pip install pwntools pytest libdebug mktestdocs - name: Install library run: | diff --git a/SKILL.md b/SKILL.md index 237b040..aca8ca6 100644 --- a/SKILL.md +++ b/SKILL.md @@ -197,7 +197,7 @@ Legacy syntax with `array_of()` is still supported: ```python class packet_t(struct): length: c_int - data: array_of(c_int, 8) + data: array = array_of(c_int, 8) ``` Access array elements: diff --git a/docs/advanced/alignment.md b/docs/advanced/alignment.md index 42f5fe5..b1fbed3 100644 --- a/docs/advanced/alignment.md +++ b/docs/advanced/alignment.md @@ -7,7 +7,7 @@ You can opt into natural alignment (matching standard C struct layout) by settin ## Enabling Alignment ```python -from libdestruct import struct, c_char, c_int, c_long, size_of +from libdestruct import struct, c_char, c_int, c_long, c_short, size_of, alignment_of class packed_t(struct): a: c_char diff --git a/docs/advanced/bitfields.md b/docs/advanced/bitfields.md index e96d634..1feea34 100644 --- a/docs/advanced/bitfields.md +++ b/docs/advanced/bitfields.md @@ -7,7 +7,7 @@ Bitfields let you pack multiple values into a single integer, just like C bitfie Use `bitfield_of(backing_type, bit_width)` as a struct field descriptor: ```python -from libdestruct import struct, c_uint, bitfield_of +from libdestruct import struct, c_uint, c_long, bitfield_of class flags_t(struct): read: c_uint = bitfield_of(c_uint, 1) diff --git a/docs/advanced/freeze_diff.md b/docs/advanced/freeze_diff.md index 791cfcc..3352f7d 100644 --- a/docs/advanced/freeze_diff.md +++ b/docs/advanced/freeze_diff.md @@ -32,6 +32,10 @@ except ValueError: Use `diff()` to compare the frozen value with the current live value: ```python +memory = bytearray(4) +lib = inflater(memory) +x = lib.inflate(c_int, 0) + x.value = 42 x.freeze() @@ -96,14 +100,25 @@ except ValueError: A typical workflow for detecting changes: ```python +class game_state_t(struct): + health: c_int + score: c_int + level: c_int + +memory = bytearray(12) +lib = inflater(memory) + # 1. Inflate the struct -state = lib.inflate(game_state_t, addr) +state = lib.inflate(game_state_t, 0) # 2. Freeze the current state +state.health.value = 100 +state.score.value = 500 +state.level.value = 3 state.freeze() -# 3. Let the program run (memory changes externally) -# ... +# 3. Something changes the underlying memory +memory[4:8] = (9999).to_bytes(4, "little") # score changed # 4. Check what changed for name in ["health", "score", "level"]: @@ -111,7 +126,9 @@ for name in ["health", "score", "level"]: old, new = member.diff() if old != new: print(f"{name}: {old} -> {new}") +# score: 500 -> 9999 # 5. Optionally reset to the frozen state state.reset() +print(state.score.value) # 500 (restored) ``` diff --git a/docs/advanced/offset.md b/docs/advanced/offset.md index 9e27190..face80e 100644 --- a/docs/advanced/offset.md +++ b/docs/advanced/offset.md @@ -102,14 +102,5 @@ class example_t(struct): items: Annotated[array[c_int, 4], offset(0x10)] ``` -With the legacy syntax, `offset()` can be combined with `Field` attributes using a tuple: - -```python -from libdestruct.common.field import Field - -class example_t(struct): - data: c_int = (Field(), offset(8)) -``` - !!! note - When using tuples of attributes, only one `Field` is allowed per annotation. Multiple `OffsetAttribute`s are also not typical — use a single `offset()` to set the position. + The `Annotated` syntax is preferred for combining offsets with complex types. The legacy default-value syntax also supports offset combined with other field descriptors using a tuple (e.g., `data: c_int = (enum_of(Color), offset(8))`). diff --git a/docs/advanced/tagged_unions.md b/docs/advanced/tagged_unions.md index d5cf462..1e7b742 100644 --- a/docs/advanced/tagged_unions.md +++ b/docs/advanced/tagged_unions.md @@ -153,6 +153,7 @@ size_of(msg_t) # 12 (4 + max(4, 8)) Use the `variant` property to get the active variant object directly: ```python +data = pystruct.pack(" Enum: """Return the value of the enum.""" - return self.python_enum(self._backing_type.get()) + raw = self._backing_type.get() + if self.lenient: + try: + return self.python_enum(raw) + except ValueError: + return raw + return self.python_enum(raw) def _set(self: enum, value: Enum) -> None: """Set the value of the enum.""" diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index da93cd8..3e2c4cb 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -284,6 +284,15 @@ def freeze(self: struct_impl) -> None: super().freeze() + def reset(self: struct_impl) -> None: + """Reset each member to its frozen value.""" + if not self._frozen: + raise RuntimeError("Cannot reset a struct that has not been frozen.") + + members = object.__getattribute__(self, "_members") + for member in members.values(): + member.reset() + def to_str(self: struct_impl, indent: int = 0) -> str: """Return a string representation of the struct.""" name = object.__getattribute__(self, "_struct_name") diff --git a/test/scripts/doctest_snippets.py b/test/scripts/doctest_snippets.py new file mode 100644 index 0000000..bb9cb42 --- /dev/null +++ b/test/scripts/doctest_snippets.py @@ -0,0 +1,46 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +"""Test that all documentation code snippets execute without errors.""" + +import glob +import os +import unittest + +from mktestdocs import check_md_file + +# Resolve docs/ relative to the repo root, not the working directory. +_REPO_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +# Files that contain code blocks requiring unavailable system tools +# (e.g., C preprocessor for #include directives) or intentionally +# erroneous code (e.g., demonstrating ValueError on invalid input). +SKIP_FILES = { + os.path.join(_REPO_ROOT, "docs/advanced/c_parser.md"), +} + + +def _make_test(fpath): + def test_func(self): + check_md_file(fpath, memory=True) + + test_func.__doc__ = f"Snippets in {fpath} execute without errors" + return test_func + + +class DocSnippetTest(unittest.TestCase): + """Auto-generated tests for documentation code snippets.""" + + +for _path in sorted(glob.glob(os.path.join(_REPO_ROOT, "docs/**/*.md"), recursive=True)): + if _path in SKIP_FILES: + continue + _name = "test_" + os.path.relpath(_path, _REPO_ROOT).replace("/", "_").replace(".", "_").replace("-", "_") + setattr(DocSnippetTest, _name, _make_test(_path)) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/struct_unit_test.py b/test/scripts/struct_unit_test.py index 1e1854d..3d8f704 100644 --- a/test/scripts/struct_unit_test.py +++ b/test/scripts/struct_unit_test.py @@ -230,6 +230,46 @@ class test_t(struct): test.a.value = 99 +class StructResetTest(unittest.TestCase): + """Struct reset semantics.""" + + def test_struct_reset_restores_members(self): + """reset() on a frozen struct should restore all member values.""" + class test_t(struct): + a: c_int + b: c_int + + memory = bytearray(8) + lib = inflater(memory) + test = lib.inflate(test_t, 0) + test.a.value = 10 + test.b.value = 20 + test.freeze() + + # Modify underlying memory + memory[0:4] = (99).to_bytes(4, "little") + memory[4:8] = (88).to_bytes(4, "little") + # .get() reads live memory; .value returns frozen value + self.assertEqual(test.a.get(), 99) + self.assertEqual(test.b.get(), 88) + + # Reset should restore frozen values to memory + test.reset() + self.assertEqual(test.a.get(), 10) + self.assertEqual(test.b.get(), 20) + + def test_struct_reset_without_freeze_raises(self): + """reset() without freeze should raise.""" + class test_t(struct): + a: c_int + + memory = bytearray(4) + lib = inflater(memory) + test = lib.inflate(test_t, 0) + with self.assertRaises(RuntimeError): + test.reset() + + class ForwardRefPtrTest(unittest.TestCase): """Forward reference ptr["Type"] syntax.""" From 6bf5c451d64f67246b46def4db8cb900b4cc9998 Mon Sep 17 00:00:00 2001 From: Roberto Bertolini Date: Thu, 2 Apr 2026 09:51:47 -0500 Subject: [PATCH 45/46] feat: implement support for VLAs and file-backed inflaters --- SKILL.md | 147 ++++++-- docs/advanced/inheritance.md | 107 ++++++ docs/advanced/vla.md | 154 +++++++++ docs/basics/flags.md | 117 +++++++ docs/memory/file_inflater.md | 108 ++++++ libdestruct/__init__.py | 10 +- libdestruct/common/array/__init__.py | 6 +- .../common/array/array_field_inflater.py | 8 +- libdestruct/common/array/vla_field.py | 42 +++ .../common/array/vla_field_inflater.py | 53 +++ libdestruct/common/array/vla_impl.py | 72 ++++ libdestruct/common/array/vla_of.py | 25 ++ libdestruct/common/flags/__init__.py | 12 + libdestruct/common/flags/flags.py | 79 +++++ libdestruct/common/flags/flags_field.py | 27 ++ .../common/flags/flags_field_inflater.py | 50 +++ libdestruct/common/flags/flags_of.py | 23 ++ libdestruct/common/flags/int_flag_field.py | 60 ++++ libdestruct/common/struct/struct_impl.py | 30 ++ libdestruct/common/utils.py | 30 +- libdestruct/libdestruct.py | 55 ++- mkdocs.yml | 4 + pyproject.toml | 3 + test/scripts/file_inflater_test.py | 106 ++++++ test/scripts/flags_test.py | 161 +++++++++ test/scripts/inheritance_test.py | 134 ++++++++ test/scripts/vla_test.py | 317 ++++++++++++++++++ 27 files changed, 1900 insertions(+), 40 deletions(-) create mode 100644 docs/advanced/inheritance.md create mode 100644 docs/advanced/vla.md create mode 100644 docs/basics/flags.md create mode 100644 docs/memory/file_inflater.md create mode 100644 libdestruct/common/array/vla_field.py create mode 100644 libdestruct/common/array/vla_field_inflater.py create mode 100644 libdestruct/common/array/vla_impl.py create mode 100644 libdestruct/common/array/vla_of.py create mode 100644 libdestruct/common/flags/__init__.py create mode 100644 libdestruct/common/flags/flags.py create mode 100644 libdestruct/common/flags/flags_field.py create mode 100644 libdestruct/common/flags/flags_field_inflater.py create mode 100644 libdestruct/common/flags/flags_of.py create mode 100644 libdestruct/common/flags/int_flag_field.py create mode 100644 test/scripts/file_inflater_test.py create mode 100644 test/scripts/flags_test.py create mode 100644 test/scripts/inheritance_test.py create mode 100644 test/scripts/vla_test.py diff --git a/SKILL.md b/SKILL.md index aca8ca6..046865e 100644 --- a/SKILL.md +++ b/SKILL.md @@ -18,7 +18,7 @@ All types inherit from `obj`. Every `obj` has: - `.hexdump()` for a hex dump of the object's bytes - `.from_bytes(data)` class method to create a read-only instance from raw bytes -Memory is accessed through an `inflater`, which wraps a `bytes` or `bytearray` buffer. Use `bytearray` for read/write access. +Memory is accessed through an `inflater`, which wraps a `bytes`, `bytearray`, or `mmap.mmap` buffer. Use `bytearray` or writable mmap for read/write access. For file-backed memory, use `inflater_from_file()`. ## Quick Reference @@ -27,24 +27,30 @@ Memory is accessed through an `inflater`, which wraps a `bytes` or `bytearray` b ```python from typing import Annotated from libdestruct import ( - inflater, # memory wrapper - struct, # struct base class - c_int, c_uint, # 32-bit integers (signed/unsigned) - c_long, c_ulong, # 64-bit integers (signed/unsigned) - c_float, c_double, # IEEE 754 floats (32/64-bit) - c_str, # null-terminated C string - ptr, # 8-byte pointer - ptr_to, # typed pointer field descriptor (legacy) - ptr_to_self, # self-referential pointer field descriptor (legacy) - array, array_of, # array type + field descriptor - enum, enum_of, # enum type + field descriptor - bitfield_of, # bitfield descriptor - union, # union annotation type - union_of, # plain union field descriptor - tagged_union, # tagged union field descriptor - offset, # explicit field offset - size_of, # get size in bytes of any type/instance/field - alignment_of, # get natural alignment of any type/instance + inflater, # memory wrapper (bytearray / mmap) + inflater_from_file, # file-backed inflater (convenience) + FileInflater, # file-backed inflater class + struct, # struct base class + c_int, c_uint, # 32-bit integers (signed/unsigned) + c_long, c_ulong, # 64-bit integers (signed/unsigned) + c_short, c_ushort, # 16-bit integers (signed/unsigned) + c_char, c_uchar, # 8-bit integers (signed/unsigned) + c_float, c_double, # IEEE 754 floats (32/64-bit) + c_str, # null-terminated C string + ptr, # 8-byte pointer + ptr_to, # typed pointer field descriptor (legacy) + ptr_to_self, # self-referential pointer field descriptor (legacy) + array, array_of, # array type + field descriptor + vla_of, # variable-length array field descriptor + enum, enum_of, # enum type + field descriptor + flags, flags_of, # bit flags type + field descriptor + bitfield_of, # bitfield descriptor + union, # union annotation type + union_of, # plain union field descriptor + tagged_union, # tagged union field descriptor + offset, # explicit field offset + size_of, # get size in bytes of any type/instance/field + alignment_of, # get natural alignment of any type/instance ) ``` @@ -210,6 +216,32 @@ for element in pkt.data: print(element.value) ``` +### Variable-Length Arrays + +VLAs model C flexible array members: the count is read from a sibling field at inflation time. + +```python +class packet_t(struct): + length: c_int + data: array[c_int, "length"] # subscript syntax (string = VLA) +``` + +Or with the descriptor: + +```python +class packet_t(struct): + length: c_int + data: array = vla_of(c_int, "length") +``` + +```python +pkt = lib.inflate(packet_t, 0) +print(len(pkt.data)) # reads from pkt.length.value +print(pkt.data[0].value) # first element +``` + +Size semantics: `size_of(packet_t)` returns the fixed part only (excludes VLA). `size_of(instance)` includes VLA data. VLA must be the last field in the struct. VLA elements can be structs. + ### Enums ```python @@ -243,6 +275,44 @@ pixel = lib.inflate(pixel_t, 0) print(pixel.color.value) # Color.RED ``` +### Bit Flags + +Use Python's `IntFlag` for bitmask fields: + +```python +from enum import IntFlag + +class Perms(IntFlag): + READ = 1 + WRITE = 2 + EXEC = 4 + +class file_t(struct): + mode: flags[Perms] # subscript syntax (defaults to c_int backing) + size: c_int + +# With a custom backing type: +class file_t(struct): + mode: flags[Perms, c_short] # 2-byte backing + size: c_int +``` + +Legacy syntax with `flags_of()`: + +```python +class file_t(struct): + mode: flags = flags_of(Perms) + size: c_int +``` + +```python +f = lib.inflate(file_t, 0) +print(f.mode.value) # Perms.READ|Perms.WRITE +print(Perms.READ in f.mode.value) # True +``` + +By default flags are lenient (unknown bits produce raw int). Use `flags_of(Perms, lenient=False)` for strict mode that raises `ValueError` on unknown bits. + ### Bitfields ```python @@ -350,6 +420,27 @@ e = lib.inflate(entity_t, 0) print(e.pos.x.value) ``` +### Struct Inheritance + +Structs support Python class inheritance. Derived structs include all parent fields first, then their own. + +```python +class base_t(struct): + a: c_int + +class derived_t(base_t): + b: c_int +``` + +```python +d = derived_t.from_bytes(pystruct.pack(" B -> C) and alignment inheritance both work. Parent fields always appear first in layout and `to_dict()`. + ### size_of ```python @@ -450,6 +541,24 @@ player.health.value = 999 open("save.bin", "wb").write(data) ``` +### File-backed inflater + +Read (and optionally write) binary files directly via mmap, without loading the entire file into memory: + +```python +# Read-only +with inflater_from_file("firmware.bin") as lib: + header = lib.inflate(header_t, 0) + print(header.magic.value) + +# Writable — changes are persisted to the file +with inflater_from_file("save.bin", writable=True) as lib: + player = lib.inflate(player_t, 0x100) + player.health.value = 999 +``` + +You can also pass an `mmap.mmap` object directly to `inflater()`. + ### Working with libdebug libdestruct integrates with [libdebug](https://github.com/libdebug/libdebug) for live process memory inspection. The debugger's memory view can be passed directly to `inflater`. diff --git a/docs/advanced/inheritance.md b/docs/advanced/inheritance.md new file mode 100644 index 0000000..7406c32 --- /dev/null +++ b/docs/advanced/inheritance.md @@ -0,0 +1,107 @@ +# Struct Inheritance + +libdestruct structs support Python class inheritance. A derived struct inherits all fields from its parent, with new fields appended after the parent's fields. + +## Basic Usage + +```python +from libdestruct import struct, c_int, size_of + +class base_t(struct): + a: c_int + +class derived_t(base_t): + b: c_int + +size_of(base_t) # 4 +size_of(derived_t) # 8 (a + b) +``` + +Reading and writing works as expected: + +```python +import struct as pystruct + +data = pystruct.pack(" Callable[[Resolver], obj] | None: - """Handle subscripted array types like array[c_int, 3].""" + """Handle subscripted array types like array[c_int, 3] or array[c_int, 'length'].""" if len(args) != 2: return None element_type, count = args + if isinstance(count, str): + # Variable-length array: count is a field name + field = VLAField(element_type, count) + return vla_field_inflater(field, type(None), owner) if not isinstance(count, int) or count <= 0: raise ValueError(f"array count must be a positive integer, got {count}") field = LinearArrayField(element_type, count) diff --git a/libdestruct/common/array/vla_field.py b/libdestruct/common/array/vla_field.py new file mode 100644 index 0000000..73df868 --- /dev/null +++ b/libdestruct/common/array/vla_field.py @@ -0,0 +1,42 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.array.array_field import ArrayField + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class VLAField(ArrayField): + """A generator for a variable-length array whose count is determined by another struct field. + + At size-computation time, ``vla_field_inflater`` returns ``field.inflate`` + as a bound method so that ``size_of`` can call ``get_size()`` (returns 0). + ``inflate`` itself is never invoked; the real inflation is handled by the + closure built in ``vla_field_inflater`` which reads the count at runtime. + """ + + def __init__(self: VLAField, element_type: type[obj], count_field: str) -> None: + """Initialize the field.""" + self.item = element_type + self.count_field = count_field + + def inflate(self: VLAField, resolver: Resolver) -> None: + """Placeholder — never called at runtime. + + ``size_of`` detects the bound method via ``is_field_bound_method`` and + calls ``get_size()`` directly, so this body is unreachable. + """ + raise NotImplementedError("VLAField.inflate is a size-computation stub; use vla_field_inflater instead") + + def get_size(self: VLAField) -> int: + """VLA has zero static size — actual size is determined at inflation time.""" + return 0 diff --git a/libdestruct/common/array/vla_field_inflater.py b/libdestruct/common/array/vla_field_inflater.py new file mode 100644 index 0000000..3961d7a --- /dev/null +++ b/libdestruct/common/array/vla_field_inflater.py @@ -0,0 +1,53 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.array.vla_field import VLAField +from libdestruct.common.array.vla_impl import vla_impl +from libdestruct.common.type_registry import TypeRegistry + +if TYPE_CHECKING: # pragma: no cover + from collections.abc import Callable + + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + +registry = TypeRegistry() + + +def vla_field_inflater( + field: VLAField, + _: type[obj], + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj]: + """Return the inflater for a variable-length array field. + + During size computation (owner[0] is None), returns field.inflate which + is a bound method on a Field — ``size_of`` detects this and calls + ``get_size()`` (returns 0) without ever invoking the method. + + During actual inflation, returns a closure that creates a ``vla_impl`` + holding a reference to the count member for dynamic count reads. + """ + if owner is None or owner[0] is None: + field.item = registry.inflater_for(field.item) + return field.inflate + + struct_instance = owner[0] + element_inflater = registry.inflater_for(field.item) + + def inflate_vla(resolver: Resolver) -> vla_impl: + members = object.__getattribute__(struct_instance, "_members") + count_member = members[field.count_field] + return vla_impl(resolver, element_inflater, count_member) + + return inflate_vla + + +registry.register_instance_handler(VLAField, vla_field_inflater) diff --git a/libdestruct/common/array/vla_impl.py b/libdestruct/common/array/vla_impl.py new file mode 100644 index 0000000..839b6bb --- /dev/null +++ b/libdestruct/common/array/vla_impl.py @@ -0,0 +1,72 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.array.array import array +from libdestruct.common.array.array_impl import array_impl +from libdestruct.common.utils import size_of + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class vla_impl(array_impl): + """An array whose element count is read dynamically from a sibling struct field.""" + + _count_member: obj + """The struct member whose .value gives the current element count.""" + + def __init__( + self: vla_impl, + resolver: Resolver, + backing_type: obj, + count_member: obj, + ) -> None: + """Initialize the VLA. + + Unlike array_impl, the count is not a fixed integer but a reference + to a sibling struct member that is read on every access. + """ + # Skip array_impl.__init__ — it stores a fixed _count. + # Call array (grandparent) init only. + array.__init__(self, resolver) + self.backing_type = backing_type + self._count_member = count_member + self.item_size = size_of(backing_type) + + @property # type: ignore[override] + def _count(self: vla_impl) -> int: + """Read the current element count from the sibling field.""" + count = self._count_member.value + + if not isinstance(count, int): + raise TypeError( + f"VLA count field must be an integer type, got {type(count).__name__}", + ) + + if count < 0: + raise ValueError( + f"VLA count field must be non-negative, got {count}", + ) + + return count + + @_count.setter + def _count(self: vla_impl, _: int) -> None: + """No-op — count is always derived from the sibling member.""" + + @property # type: ignore[override] + def size(self: vla_impl) -> int: + """Return the current byte size of the VLA data.""" + return self.item_size * self._count + + @size.setter + def size(self: vla_impl, _: int) -> None: + """No-op — size is always derived from count.""" diff --git a/libdestruct/common/array/vla_of.py b/libdestruct/common/array/vla_of.py new file mode 100644 index 0000000..cc04dc3 --- /dev/null +++ b/libdestruct/common/array/vla_of.py @@ -0,0 +1,25 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.common.array.vla_field import VLAField + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.common.array.array_field import ArrayField + from libdestruct.common.obj import obj + + +def vla_of(element_type: type[obj], count_field: str) -> ArrayField: + """Return a new variable-length array field. + + Args: + element_type: The type of each element in the array. + count_field: The name of the struct field that holds the element count. + """ + return VLAField(element_type, count_field) diff --git a/libdestruct/common/flags/__init__.py b/libdestruct/common/flags/__init__.py new file mode 100644 index 0000000..631cfeb --- /dev/null +++ b/libdestruct/common/flags/__init__.py @@ -0,0 +1,12 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from libdestruct.common.flags.flags import flags +from libdestruct.common.flags.flags_of import flags_of + +__all__ = ["flags", "flags_of"] + +import libdestruct.common.flags.flags_field_inflater # noqa: F401 diff --git a/libdestruct/common/flags/flags.py b/libdestruct/common/flags/flags.py new file mode 100644 index 0000000..eb324de --- /dev/null +++ b/libdestruct/common/flags/flags.py @@ -0,0 +1,79 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from types import GenericAlias +from typing import TYPE_CHECKING + +from libdestruct.common.obj import obj +from libdestruct.common.type_registry import TypeRegistry + +if TYPE_CHECKING: # pragma: no cover + from enum import IntFlag + + from libdestruct.backing.resolver import Resolver + + +class flags(obj): + """A generic bit flags field.""" + + def __class_getitem__(cls, params: tuple) -> GenericAlias: + """Support flags[MyFlags] and flags[MyFlags, c_short] subscript syntax.""" + if not isinstance(params, tuple): + params = (params,) + return GenericAlias(cls, params) + + python_flag: type[IntFlag] + """The backing Python IntFlag.""" + + _backing_type: type[obj] + """The backing type.""" + + lenient: bool + """Whether the conversion is lenient or not.""" + + def __init__( + self: flags, + resolver: Resolver, + python_flag: type[IntFlag], + backing_type: type[obj], + lenient: bool = True, + ) -> None: + """Initialize the flags object.""" + super().__init__(resolver) + + self.python_flag = python_flag + self._backing_type = TypeRegistry().inflater_for(backing_type)(resolver) + self.lenient = lenient + + self.size = self._backing_type.size + + def get(self: flags) -> IntFlag: + """Return the value of the flags.""" + raw = self._backing_type.get() + if not self.lenient: + # Compute the mask of all defined flag bits + all_bits = 0 + for member in self.python_flag: + all_bits |= member.value + if raw & ~all_bits: + raise ValueError( + f"Unknown bits 0x{raw & ~all_bits:x} in {self.python_flag.__name__}({raw!r})" + ) + return self.python_flag(raw) + + def _set(self: flags, value: IntFlag) -> None: + """Set the value of the flags.""" + self._backing_type.set(int(value)) + + def to_bytes(self: flags) -> bytes: + """Return the serialized representation of the flags.""" + return self._backing_type.to_bytes() + + def to_str(self: obj, indent: int = 0) -> str: + """Return a string representation of the object.""" + return f"{self.get()!r}" diff --git a/libdestruct/common/flags/flags_field.py b/libdestruct/common/flags/flags_field.py new file mode 100644 index 0000000..0acffb7 --- /dev/null +++ b/libdestruct/common/flags/flags_field.py @@ -0,0 +1,27 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from abc import abstractmethod +from typing import TYPE_CHECKING + +from libdestruct.common.field import Field +from libdestruct.common.flags.flags import flags + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.backing.resolver import Resolver + from libdestruct.common.obj import obj + + +class FlagsField(Field): + """A generator for a flags field.""" + + base_type: type[obj] = flags + + @abstractmethod + def inflate(self: FlagsField, resolver: Resolver) -> flags: + """Inflate the field.""" diff --git a/libdestruct/common/flags/flags_field_inflater.py b/libdestruct/common/flags/flags_field_inflater.py new file mode 100644 index 0000000..f0c35ce --- /dev/null +++ b/libdestruct/common/flags/flags_field_inflater.py @@ -0,0 +1,50 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.c.c_integer_types import c_int +from libdestruct.common.flags.flags import flags +from libdestruct.common.flags.int_flag_field import IntFlagField +from libdestruct.common.type_registry import TypeRegistry + +if TYPE_CHECKING: # pragma: no cover + from collections.abc import Callable + + from libdestruct.backing.resolver import Resolver + from libdestruct.common.flags.flags_field import FlagsField + from libdestruct.common.obj import obj + +registry = TypeRegistry() + + +def generic_flags_field_inflater( + field: FlagsField, + _: type[obj], + __: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj]: + """Returns the inflater for a flags field of a struct.""" + return field.inflate + + +def _subscripted_flags_handler( + item: object, + args: tuple, + owner: tuple[obj, type[obj]] | None, +) -> Callable[[Resolver], obj] | None: + """Handle subscripted flags types like flags[Perms] or flags[Perms, c_short].""" + if not args: + return None + python_flag = args[0] + backing_type = args[1] if len(args) > 1 else c_int + field = IntFlagField(python_flag, backing_type=backing_type) + return field.inflate + + +registry.register_instance_handler(IntFlagField, generic_flags_field_inflater) +registry.register_generic_handler(flags, _subscripted_flags_handler) diff --git a/libdestruct/common/flags/flags_of.py b/libdestruct/common/flags/flags_of.py new file mode 100644 index 0000000..3b54689 --- /dev/null +++ b/libdestruct/common/flags/flags_of.py @@ -0,0 +1,23 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from enum import IntFlag +from typing import TYPE_CHECKING + +from libdestruct.common.flags.int_flag_field import IntFlagField + +if TYPE_CHECKING: # pragma: no cover + from libdestruct.common.flags.flags_field import FlagsField + + +def flags_of(flag_type: type[IntFlag], lenient: bool = True, size: int = 4) -> FlagsField: + """Return a new flags field.""" + if not issubclass(flag_type, IntFlag): + raise TypeError("The flag type must be a subclass of IntFlag.") + + return IntFlagField(flag_type, lenient, size) diff --git a/libdestruct/common/flags/int_flag_field.py b/libdestruct/common/flags/int_flag_field.py new file mode 100644 index 0000000..b79d5f5 --- /dev/null +++ b/libdestruct/common/flags/int_flag_field.py @@ -0,0 +1,60 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from libdestruct.c.c_integer_types import c_char, c_int, c_long, c_short +from libdestruct.common.flags.flags import flags +from libdestruct.common.flags.flags_field import FlagsField + +if TYPE_CHECKING: # pragma: no cover + from enum import IntFlag + + from libdestruct.backing.resolver import Resolver + + +class IntFlagField(FlagsField): + """A generator for an IntFlag-based flags field.""" + + def __init__( + self: IntFlagField, + flag_type: type[IntFlag], + lenient: bool = True, + size: int = 4, + backing_type: type | None = None, + ) -> None: + """Initialize the field.""" + self.flag_type = flag_type + self.lenient = lenient + + if backing_type is not None: + self.backing_type = backing_type + return + + if not 0 < size <= 8: + raise ValueError("The size of the field must be between 1 and 8 bytes.") + + match size: + case 1: + self.backing_type = c_char + case 2: + self.backing_type = c_short + case 4: + self.backing_type = c_int + case 8: + self.backing_type = c_long + case _: + raise ValueError("The size of the field must be a power of 2.") + + def inflate(self: IntFlagField, resolver: Resolver) -> flags: + """Inflate the field.""" + return flags(resolver, self.flag_type, self.backing_type, self.lenient) + + def get_size(self: IntFlagField) -> int: + """Returns the size of the object inflated by this field.""" + return self.backing_type.size diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index 3e2c4cb..c67ca76 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -6,12 +6,14 @@ from __future__ import annotations +from types import GenericAlias from typing import Annotated, get_args, get_origin from typing_extensions import Self from libdestruct.backing.fake_resolver import FakeResolver from libdestruct.backing.resolver import Resolver +from libdestruct.common.array.vla_field import VLAField from libdestruct.common.attributes.offset_attribute import OffsetAttribute from libdestruct.common.bitfield.bitfield_field import BitfieldField from libdestruct.common.bitfield.bitfield_tracker import BitfieldTracker @@ -119,6 +121,17 @@ def _inflate_struct_attributes( current_offset += bf_tracker.flush() + # For VLA structs, size must be computed dynamically since the count + # can change at runtime. Detect VLA by duck-typing: vla_impl has a + # _count_member attribute that plain array_impl does not. + members = object.__getattribute__(self, "_members") + last_member = list(members.values())[-1] if members else None + if last_member is not None and hasattr(last_member, "_count_member"): + last_name = list(self._members.keys())[-1] + self._vla_fixed_offset = self._member_offsets[last_name] + else: + self.size = current_offset + @staticmethod def _resolve_field( name: str, @@ -181,11 +194,28 @@ def compute_own_size(cls: type[struct_impl], reference_type: type) -> None: max_alignment = 1 bf_tracker = BitfieldTracker() aligned = getattr(reference_type, "_aligned_", False) + seen_vla = False for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): if name == "_aligned_": continue + # VLA must be the last field + if seen_vla: + raise ValueError( + f"Variable-length array must be the last field in a struct. " + f"Field '{name}' follows a VLA." + ) + # Detect VLA from default value or subscript annotation + default = getattr(reference, name, None) if hasattr(reference, name) else None + is_vla = isinstance(default, VLAField) + if not is_vla and isinstance(annotation, GenericAlias): + args = annotation.__args__ + if len(args) == 2 and isinstance(args[1], str): + is_vla = True + if is_vla: + seen_vla = True + resolved_type, bitfield_field, explicit_offset = struct_impl._resolve_field( name, annotation, reference, cls._inflater, owner=(None, cls), ) diff --git a/libdestruct/common/utils.py b/libdestruct/common/utils.py index dc00645..d37836a 100644 --- a/libdestruct/common/utils.py +++ b/libdestruct/common/utils.py @@ -8,10 +8,11 @@ import contextlib import sys -from types import MethodType +from types import GenericAlias, MethodType from typing import TYPE_CHECKING, Any, ForwardRef from libdestruct.common.field import Field +from libdestruct.common.type_registry import TypeRegistry if TYPE_CHECKING: # pragma: no cover from collections.abc import Generator @@ -27,8 +28,6 @@ def is_field_bound_method(item: obj) -> bool: def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: """Return the size in bytes of a type, instance, or field descriptor.""" - from types import GenericAlias - # Field instances (e.g. array_of, ptr_to) — must come before .size check if isinstance(item_or_inflater, Field): return item_or_inflater.get_size() @@ -37,19 +36,15 @@ def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: # Subscripted GenericAlias types (e.g. array[c_int, 10], enum[Color], ptr[T]) if isinstance(item_or_inflater, GenericAlias): - from libdestruct.common.type_registry import TypeRegistry - inflater = TypeRegistry().inflater_for(item_or_inflater) return size_of(inflater) - # Struct types: size is on the inflated _type_impl class - if isinstance(item_or_inflater, type) and hasattr(item_or_inflater, "_type_impl"): + # Struct types: size is on the inflated _type_impl class (check own __dict__ to avoid MRO leaks) + if isinstance(item_or_inflater, type) and "_type_impl" in item_or_inflater.__dict__: return item_or_inflater._type_impl.size # Struct types not yet inflated: trigger inflation to compute size if isinstance(item_or_inflater, type) and not hasattr(item_or_inflater, "size"): - from libdestruct.common.type_registry import TypeRegistry - impl = TypeRegistry().inflater_for(item_or_inflater) if hasattr(impl, "size") and isinstance(impl.size, int): return impl.size @@ -58,9 +53,16 @@ def size_of(item_or_inflater: obj | callable[[Resolver], obj]) -> int: if isinstance(item_or_inflater, type): if hasattr(item_or_inflater, "size") and isinstance(item_or_inflater.size, int): return item_or_inflater.size - elif hasattr(item_or_inflater.__class__, "size"): - return item_or_inflater.__class__.size + elif "_vla_fixed_offset" in item_or_inflater.__dict__: + # VLA struct: size = fixed offset + dynamic VLA size + vla_offset = item_or_inflater.__dict__["_vla_fixed_offset"] + members = object.__getattribute__(item_or_inflater, "_members") + last_member = list(members.values())[-1] + return vla_offset + last_member.size + elif "size" in item_or_inflater.__dict__: + return item_or_inflater.__dict__["size"] elif hasattr(item_or_inflater, "size"): + # Handles both class-level attributes and properties (e.g. vla_impl.size) return item_or_inflater.size raise ValueError(f"Cannot determine the size of {item_or_inflater}") @@ -74,12 +76,12 @@ def alignment_of(item: obj | type[obj]) -> int: For packed structs (the default), alignment is 1. """ # For uninflated struct types, trigger inflation first so alignment is computed - if isinstance(item, type) and not hasattr(item, "size") and not hasattr(item, "_type_impl"): + if isinstance(item, type) and not hasattr(item, "size") and "_type_impl" not in item.__dict__: with contextlib.suppress(ValueError, TypeError): size_of(item) - # Struct types with computed alignment - if isinstance(item, type) and hasattr(item, "_type_impl"): + # Struct types with computed alignment (check own __dict__ to avoid MRO leaks) + if isinstance(item, type) and "_type_impl" in item.__dict__: impl = item._type_impl if hasattr(impl, "alignment"): return impl.alignment diff --git a/libdestruct/libdestruct.py b/libdestruct/libdestruct.py index 882e714..04f05ee 100644 --- a/libdestruct/libdestruct.py +++ b/libdestruct/libdestruct.py @@ -6,22 +6,28 @@ from __future__ import annotations +import mmap from collections.abc import Sequence +from pathlib import Path from typing import TYPE_CHECKING +from typing_extensions import Self + from libdestruct.backing.resolver import Resolver from libdestruct.common.inflater import Inflater if TYPE_CHECKING: # pragma: no cover + import io + from libdestruct.common.obj import obj _VALID_ENDIANNESS = ("little", "big") -def inflater(memory: Sequence, endianness: str = "little") -> Inflater: +def inflater(memory: Sequence | mmap.mmap, endianness: str = "little") -> Inflater: """Return a TypeInflater instance.""" - if not isinstance(memory, Sequence): + if not isinstance(memory, Sequence | mmap.mmap): raise TypeError(f"memory must be a Sequence, not {type(memory).__name__}") if endianness not in _VALID_ENDIANNESS: @@ -30,6 +36,51 @@ def inflater(memory: Sequence, endianness: str = "little") -> Inflater: return Inflater(memory, endianness=endianness) +class FileInflater(Inflater): + """An inflater backed by a memory-mapped file.""" + + def __init__( + self: FileInflater, + file_handle: io.BufferedReader, + mmap_obj: mmap.mmap, + endianness: str = "little", + ) -> None: + """Initialize the file-backed inflater.""" + super().__init__(mmap_obj, endianness=endianness) + self._file_handle = file_handle + self._mmap = mmap_obj + + def __enter__(self: FileInflater) -> Self: + """Enter context manager.""" + return self + + def __exit__(self: FileInflater, *args: object) -> None: + """Close mmap and file handle.""" + self._mmap.close() + self._file_handle.close() + + +def inflater_from_file(path: str, writable: bool = False, endianness: str = "little") -> FileInflater: + """Create an inflater backed by a memory-mapped file. + + Args: + path: Path to the binary file. + writable: If True, writes through the inflater are persisted to the file. + endianness: The byte order ("little" or "big"). + + Returns: + A FileInflater context manager. + """ + if endianness not in _VALID_ENDIANNESS: + raise ValueError(f"endianness must be 'little' or 'big', not {endianness!r}") + + mode = "r+b" if writable else "rb" + access = mmap.ACCESS_WRITE if writable else mmap.ACCESS_READ + file_handle = Path(path).open(mode) # noqa: SIM115 — managed by FileInflater.__exit__ + mmap_obj = mmap.mmap(file_handle.fileno(), 0, access=access) + return FileInflater(file_handle, mmap_obj, endianness=endianness) + + def inflate(item: type, memory: Sequence, address: int | Resolver, endianness: str = "little") -> obj: """Inflate a memory-referencing type. diff --git a/mkdocs.yml b/mkdocs.yml index 08ca91a..2b5e014 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -57,8 +57,10 @@ nav: - Pointers: basics/pointers.md - Arrays: basics/arrays.md - Enums: basics/enums.md + - Bit Flags: basics/flags.md - Memory: - The Inflater: memory/inflater.md + - File-Backed Memory: memory/file_inflater.md - Resolvers: memory/resolvers.md - Advanced: - Bitfields: advanced/bitfields.md @@ -68,4 +70,6 @@ nav: - Hex Dump: advanced/hexdump.md - Unions: advanced/tagged_unions.md - Struct Alignment: advanced/alignment.md + - Struct Inheritance: advanced/inheritance.md + - Variable-Length Arrays: advanced/vla.md - Field Offsets: advanced/offset.md diff --git a/pyproject.toml b/pyproject.toml index 197dc44..06dd261 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,9 @@ dev = [ "rich", ] +[tool.setuptools.packages.find] +include = ["libdestruct*"] + [tool.ruff] line-length = 120 indent-width = 4 diff --git a/test/scripts/file_inflater_test.py b/test/scripts/file_inflater_test.py new file mode 100644 index 0000000..8268e24 --- /dev/null +++ b/test/scripts/file_inflater_test.py @@ -0,0 +1,106 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import mmap +import os +import struct as pystruct +import tempfile +import unittest + +from libdestruct import struct, c_int, c_long, inflater, inflater_from_file, size_of + + +class point_t(struct): + x: c_int + y: c_int + + +class FileInflaterTest(unittest.TestCase): + """File-backed inflater tests.""" + + def setUp(self): + self.tmpfile = tempfile.NamedTemporaryFile(delete=False) + self.tmpfile.write(pystruct.pack(" to_bytes identity.""" + data = pystruct.pack(" 0) + + def test_flags_rejects_non_intflag(self): + """flags_of(IntEnum) raises TypeError.""" + class Color(IntEnum): + RED = 1 + + with self.assertRaises(TypeError): + flags_of(Color) + + def test_size_of_flags(self): + """size_of(flags[Perms]) returns 4.""" + self.assertEqual(size_of(flags[Perms]), 4) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/scripts/inheritance_test.py b/test/scripts/inheritance_test.py new file mode 100644 index 0000000..427c703 --- /dev/null +++ b/test/scripts/inheritance_test.py @@ -0,0 +1,134 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# + +import struct as pystruct +import unittest + +from libdestruct import struct, c_char, c_int, c_long, inflater, size_of, alignment_of + + +class base_t(struct): + a: c_int + + +class derived_t(base_t): + b: c_int + + +class level_a(struct): + x: c_int + + +class level_b(level_a): + y: c_int + + +class level_c(level_b): + z: c_int + + +class InheritanceTest(unittest.TestCase): + """Struct inheritance tests.""" + + def test_basic_inheritance_fields(self): + """Derived struct has both parent and own fields.""" + data = pystruct.pack(" B -> C, each adding c_int.""" + self.assertEqual(size_of(level_c), 12) + data = pystruct.pack(" Date: Thu, 2 Apr 2026 18:30:52 -0500 Subject: [PATCH 46/46] fix: solve the last remaining bugs I could find in the codebase --- docs/advanced/alignment.md | 2 +- docs/advanced/forward_refs.md | 4 +- docs/basics/pointers.md | 12 +- docs/memory/resolvers.md | 4 +- libdestruct/c/struct_parser.py | 6 +- libdestruct/common/array/array_impl.py | 24 +- libdestruct/common/enum/enum.py | 2 +- libdestruct/common/obj.py | 6 +- libdestruct/common/ptr/ptr.py | 4 +- libdestruct/common/struct/struct_impl.py | 49 ++- test/scripts/bug_verification_test.py | 417 +++++++++++++++++++++++ 11 files changed, 503 insertions(+), 27 deletions(-) create mode 100644 test/scripts/bug_verification_test.py diff --git a/docs/advanced/alignment.md b/docs/advanced/alignment.md index b1fbed3..57ec9e0 100644 --- a/docs/advanced/alignment.md +++ b/docs/advanced/alignment.md @@ -109,7 +109,7 @@ class s_t(struct): b: c_int = offset(3) # placed at offset 3, not rounded to 4 c: c_int # aligned normally after b -size_of(s_t) # 7 (3 + 4) +size_of(s_t) # 12 (b at offset 3 + 4 bytes = 7, c aligned to offset 8 + 4 bytes = 12) ``` ## alignment_of() diff --git a/docs/advanced/forward_refs.md b/docs/advanced/forward_refs.md index 44a39d2..c54d1bd 100644 --- a/docs/advanced/forward_refs.md +++ b/docs/advanced/forward_refs.md @@ -50,14 +50,14 @@ memory[4:12] = pystruct.pack(" offset 12 # Node 1 at offset 12 memory[12:16] = pystruct.pack(" null +memory[16:24] = pystruct.pack(" out of bounds lib = inflater(memory) head = lib.inflate(Node, 0) print(head.val.value) # 10 print(head.next.unwrap().val.value) # 20 -print(head.next.unwrap().next.try_unwrap()) # None +print(head.next.unwrap().next.try_unwrap()) # None (address out of bounds) ``` ## Tree Example diff --git a/docs/basics/pointers.md b/docs/basics/pointers.md index e7ae774..9ca2117 100644 --- a/docs/basics/pointers.md +++ b/docs/basics/pointers.md @@ -54,13 +54,17 @@ class node_t(struct): val: c_int next: ptr[c_int] -memory = b"\x0a\x00\x00\x00" + b"\x00" * 8 # val=10, next=null +# next points to address 0xFF..FF which is out of bounds +memory = b"\x0a\x00\x00\x00" + b"\xff" * 8 node = node_t.from_bytes(memory) result = node.next.try_unwrap() print(result) # None ``` +!!! note + A null pointer (address 0) is **not** automatically invalid — address 0 is a valid index in Python byte sequences. `try_unwrap()` only returns `None` when the address causes an `IndexError` or `ValueError` during resolution. + ## Self-Referential Structs Use `ptr["TypeName"]` for self-referential structs: @@ -94,16 +98,16 @@ import struct as pystruct # Node 0 at offset 0: val=10, next -> offset 12 memory[0:4] = pystruct.pack(" null +# Node 1 at offset 12: val=20, next -> out of bounds memory[12:16] = pystruct.pack(" type[obj]: result = struct_to_type(root) - PARSED_STRUCTS[root.name] = result + if root.name: + PARSED_STRUCTS[root.name] = result return result @@ -170,6 +171,9 @@ def arr_to_type(arr: c_ast.ArrayDecl) -> type[obj]: typ = ptr_to_type(arr.type) if isinstance(arr.type, c_ast.PtrDecl) else type_decl_to_type(arr.type) + if arr.dim is None: + raise ValueError("Unsized arrays (flexible array members) are not supported.") + return array_of(typ, int(arr.dim.value)) diff --git a/libdestruct/common/array/array_impl.py b/libdestruct/common/array/array_impl.py index a32e198..94d2e57 100644 --- a/libdestruct/common/array/array_impl.py +++ b/libdestruct/common/array/array_impl.py @@ -48,6 +48,10 @@ def count(self: array_impl) -> int: def get(self: array, index: int = -1) -> object: """Return the element at the given index, or all elements if index is -1.""" + if hasattr(self, "_frozen_elements"): + if index == -1: + return list(self._frozen_elements) + return self._frozen_elements[index] if index == -1: return [self.backing_type(self.resolver.relative_from_own(i * self.item_size, 0)) for i in range(self._count)] return self.backing_type(self.resolver.relative_from_own(index * self.item_size, 0)) @@ -64,8 +68,21 @@ def to_dict(self: array_impl) -> list[object]: """Return a JSON-serializable list of element values.""" return [elem.to_dict() for elem in self] + def freeze(self: array_impl) -> None: + """Freeze the array, individually freezing each element.""" + self._frozen_elements = [ + self.backing_type(self.resolver.relative_from_own(i * self.item_size, 0)) + for i in range(self._count) + ] + for elem in self._frozen_elements: + elem.freeze() + self._frozen_array_bytes = b"".join(bytes(x) for x in self._frozen_elements) + super().freeze() + def to_bytes(self: array_impl) -> bytes: """Return the serialized representation of the array.""" + if self._frozen: + return self._frozen_array_bytes return b"".join(bytes(x) for x in self) def to_str(self: array_impl, indent: int = 0) -> str: @@ -87,5 +104,8 @@ def __setitem__(self: array_impl, index: int, value: obj) -> None: def __iter__(self: array_impl) -> Generator[obj, None, None]: """Iterate over the array.""" - for i in range(self._count): - yield self[i] + if self._frozen: + yield from self._frozen_elements + else: + for i in range(self._count): + yield self[i] diff --git a/libdestruct/common/enum/enum.py b/libdestruct/common/enum/enum.py index 2cc39c8..7aac7af 100644 --- a/libdestruct/common/enum/enum.py +++ b/libdestruct/common/enum/enum.py @@ -64,7 +64,7 @@ def get(self: enum) -> Enum: def _set(self: enum, value: Enum) -> None: """Set the value of the enum.""" - self._backing_type.set(value.value) + self._backing_type.set(int(value)) def to_bytes(self: enum) -> bytes: """Return the serialized representation of the enum.""" diff --git a/libdestruct/common/obj.py b/libdestruct/common/obj.py index 8aa2f8b..4f59bbd 100644 --- a/libdestruct/common/obj.py +++ b/libdestruct/common/obj.py @@ -77,8 +77,8 @@ def set(self: obj, value: object) -> None: def freeze(self: obj) -> None: """Freeze the object.""" - self._frozen_value = self.get() - self._frozen = True + object.__setattr__(self, "_frozen_value", self.get()) + object.__setattr__(self, "_frozen", True) def diff(self: obj) -> tuple[object, object]: """Return the difference between the current value and the frozen value.""" @@ -97,7 +97,7 @@ def reset(self: obj) -> None: def update(self: obj) -> None: """Update the object with the given value.""" try: - self._frozen_value = self.get() + object.__setattr__(self, "_frozen_value", self.get()) except ValueError as e: raise RuntimeError("Could not update the object.") from e diff --git a/libdestruct/common/ptr/ptr.py b/libdestruct/common/ptr/ptr.py index 49bcd11..b550905 100644 --- a/libdestruct/common/ptr/ptr.py +++ b/libdestruct/common/ptr/ptr.py @@ -103,7 +103,7 @@ def unwrap(self: ptr, length: int | None = None) -> obj | bytes: result = self.wrapper(self.resolver.absolute_from_own(address)) else: target_resolver = self.resolver.absolute_from_own(address) - result = target_resolver.resolve(length or 1, 0) + result = target_resolver.resolve(length if length is not None else 1, 0) self._cached_unwrap = result self._cache_valid = True @@ -123,7 +123,7 @@ def try_unwrap(self: ptr, length: int | None = None) -> obj | bytes | None: try: # If the address is invalid, this will raise an IndexError or ValueError. - self.resolver.absolute_from_own(address).resolve(length or 1, 0) + self.resolver.absolute_from_own(address).resolve(length if length is not None else 1, 0) except (IndexError, ValueError): return None diff --git a/libdestruct/common/struct/struct_impl.py b/libdestruct/common/struct/struct_impl.py index c67ca76..7ef8c02 100644 --- a/libdestruct/common/struct/struct_impl.py +++ b/libdestruct/common/struct/struct_impl.py @@ -49,8 +49,8 @@ def __init__(self: struct_impl, resolver: Resolver | None = None, **kwargs: ...) # struct overrides the __init__ method, so we need to call the parent class __init__ method obj.__init__(self, resolver) - self._struct_name = self.__class__.__name__ - self._members = {} + object.__setattr__(self, "_struct_name", self.__class__.__name__) + object.__setattr__(self, "_members", {}) reference_type = self._reference_struct self._inflate_struct_attributes(self._inflater, resolver, reference_type) @@ -69,6 +69,17 @@ def __getattribute__(self: struct_impl, name: str) -> object: pass return super().__getattribute__(name) + def __setattr__(self: struct_impl, name: str, value: object) -> None: + """Set an attribute, delegating to member.value for struct fields.""" + try: + members = object.__getattribute__(self, "_members") + if name in members: + members[name].value = value + return + except AttributeError: + pass + object.__setattr__(self, name, value) + def __new__(cls: struct_impl, *args: ..., **kwargs: ...) -> Self: """Create a new struct.""" # Skip the __new__ method of the parent class @@ -82,9 +93,10 @@ def _inflate_struct_attributes( reference_type: type, ) -> None: current_offset = 0 + max_alignment = 1 bf_tracker = BitfieldTracker() aligned = getattr(reference_type, "_aligned_", False) - self._member_offsets = {} + object.__setattr__(self, "_member_offsets", {}) for name, annotation, reference in iterate_annotation_chain(reference_type, terminate_at=struct): if name == "_aligned_": @@ -103,7 +115,9 @@ def _inflate_struct_attributes( if bitfield_field: if aligned and bf_tracker.needs_new_group(bitfield_field): current_offset += bf_tracker.flush() - current_offset = _align_offset(current_offset, alignment_of(bitfield_field.backing_type)) + field_align = alignment_of(bitfield_field.backing_type) + max_alignment = max(max_alignment, field_align) + current_offset = _align_offset(current_offset, field_align) self._member_offsets[name] = current_offset result, offset_delta = bf_tracker.create_bitfield( bitfield_field, inflater, resolver, current_offset, @@ -112,7 +126,18 @@ def _inflate_struct_attributes( else: current_offset += bf_tracker.flush() if aligned and explicit_offset is None: - current_offset = _align_offset(current_offset, alignment_of(resolved_type)) + # Try alignment from the resolved type directly; for closures + # (e.g. union inflaters) alignment_of can't inspect them, so + # fall back to creating a probe instance. + field_align = alignment_of(resolved_type) + if field_align <= 1: + try: + probe = resolved_type(resolver.relative_from_own(current_offset, 0)) + field_align = alignment_of(probe) + except (ValueError, TypeError): + pass + max_alignment = max(max_alignment, field_align) + current_offset = _align_offset(current_offset, field_align) self._member_offsets[name] = current_offset result = resolved_type(resolver.relative_from_own(current_offset, 0)) current_offset += size_of(result) @@ -121,16 +146,22 @@ def _inflate_struct_attributes( current_offset += bf_tracker.flush() + # Apply tail padding for aligned structs + if aligned: + if isinstance(aligned, int) and aligned is not True: + max_alignment = max(max_alignment, aligned) + current_offset = _align_offset(current_offset, max_alignment) + # For VLA structs, size must be computed dynamically since the count # can change at runtime. Detect VLA by duck-typing: vla_impl has a # _count_member attribute that plain array_impl does not. members = object.__getattribute__(self, "_members") last_member = list(members.values())[-1] if members else None if last_member is not None and hasattr(last_member, "_count_member"): - last_name = list(self._members.keys())[-1] - self._vla_fixed_offset = self._member_offsets[last_name] + last_name = list(members.keys())[-1] + object.__setattr__(self, "_vla_fixed_offset", self._member_offsets[last_name]) else: - self.size = current_offset + object.__setattr__(self, "size", current_offset) @staticmethod def _resolve_field( @@ -316,7 +347,7 @@ def freeze(self: struct_impl) -> None: def reset(self: struct_impl) -> None: """Reset each member to its frozen value.""" - if not self._frozen: + if not object.__getattribute__(self, "_frozen"): raise RuntimeError("Cannot reset a struct that has not been frozen.") members = object.__getattribute__(self, "_members") diff --git a/test/scripts/bug_verification_test.py b/test/scripts/bug_verification_test.py new file mode 100644 index 0000000..9489766 --- /dev/null +++ b/test/scripts/bug_verification_test.py @@ -0,0 +1,417 @@ +# +# This file is part of libdestruct (https://github.com/mrindeciso/libdestruct). +# Copyright (c) 2026 Roberto Alessandro Bertolini. All rights reserved. +# Licensed under the MIT license. See LICENSE file in the project root for details. +# +# Tests that verify bugs found during code review. +# Each test is expected to FAIL on the current dev branch. +# + +import struct as pystruct +import unittest + +from libdestruct import ( + c_char, + c_int, + c_long, + c_short, + inflater, + ptr, + ptr_to, + size_of, + struct, + tagged_union, + union_of, +) +from libdestruct.backing.memory_resolver import MemoryResolver +from libdestruct.common.union.union import union +from libdestruct.common.union.union_field import UnionField +from libdestruct.common.union.tagged_union_field import TaggedUnionField +from libdestruct.common.utils import alignment_of + + +class Bug1_AlignedStructTailPaddingInstance(unittest.TestCase): + """Bug: _inflate_struct_attributes does not apply tail padding for aligned structs. + + compute_own_size (class-level) correctly adds tail padding so that + size_of(aligned_t) == 8, but _inflate_struct_attributes (instance-level) + sets self.size = current_offset without tail padding, giving size 5. + + This means size_of(instance) != size_of(class) for aligned structs + where the last field doesn't end on an aligned boundary. + """ + + def test_instance_size_matches_class_size(self): + """size_of(instance) should equal size_of(class) for aligned structs.""" + class aligned_t(struct): + _aligned_ = True + a: c_int # 4 bytes at offset 0 + b: c_char # 1 byte at offset 4, then 3 bytes tail padding + + # Class size correctly includes tail padding + self.assertEqual(size_of(aligned_t), 8) + + memory = pystruct.pack("