From 76f83a3ff8d7ded937d7422836e9b431b1ad5455 Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Sun, 31 May 2026 18:55:26 -0700 Subject: [PATCH 01/12] First draft of the reusable buffer implementation --- src/mss/buffer.py | 246 ++++++++++++++++++++++++++++++++++++++++ src/tests/test_setup.py | 2 + 2 files changed, 248 insertions(+) create mode 100644 src/mss/buffer.py diff --git a/src/mss/buffer.py b/src/mss/buffer.py new file mode 100644 index 00000000..a18e64b2 --- /dev/null +++ b/src/mss/buffer.py @@ -0,0 +1,246 @@ +"""Buffers with Finalizers + +This is an implementation of buffer objects with Python finalizers, +specific to the needs of MSS. + +# Caller Contract + +The entry point is `finalizing_buffer`. This is intended to be called +by `MSSImplementation` subclasses. They provide a buffer (such as a +ctypes array or mmap object) and a finalizer, and are given a +`memoryview` object. Once the memoryview is garbage collected, and +the consumers downstream of that memoryview have released their views +of the buffer, the finalizer will be invoked (with no arguments). + +At that time, the `MSSImplementation` may release the buffer, return +it to a pool for reuse, etc. + +This finalizer may be called at any time, from any thread. It may be +called after the MSSImplementation's `close()` method has been called. +Implementations must take care not to invalidate their buffers during +`close()`, but rather only after finalization. + +The finalizer may also be called before `finalizing_buffer()` returns. +This may happen if the implementation needs to make a copy rather than +using the originally-provided buffer (which is the case on Python +versions prior to 3.12). + +(Some more caveats appear at the end of this docstring.) + +# Background + +The Python buffer protocol lets different objects share underlying +memory. For instance, a NumPy ndarray, a Python bytearray object, and +a PyTorch Tensor object can all share the same underlying memory. +This allows interoperability between these systems without requiring +copies. + +Copying ("blitting") all the pixels in a screenshot takes time; +copying a 4K (3840x2160) BGRA image can take several milliseconds. If +an application is attempting to operate at 60 FPS, each copy consumes +a meaningful fraction of the frame budget. + +For a high-performance screenshot library such as MSS, it is therefore +important to minimize copies. Ideally, screenshot data would remain +in the buffer originally allocated by the operating system, such as +the memory returned by CreateDIBSection on Windows or a shared memory +segment on X11. This approach is commonly called "zero-copy". + +Getting the buffer to the user is only half the problem. MSS also +needs to know when the user is finished with the buffer's contents so +that the underlying resources can be reused or released. + +Most code that uses the buffer protocol is written in C. Since Python +3.0, the C-level buffer protocol has provided a mechanism for +exporters to learn when their buffers are no longer in use. However, +the corresponding Python-level API (which can be used by C consumers) +was not added until Python 3.12. + +Buffer lifetime is not the same as Python object lifetime. A user may +pass the returned memoryview to NumPy, PIL, PyTorch, or other +libraries. Those libraries may keep the exported buffer alive after +the original Python memoryview object is no longer reachable. + +Therefore, the lifetime of the returned memoryview object is not a +reliable signal that the buffer is no longer in use. Other objects +may still hold references to the buffer after that memoryview has been +destroyed. To know when the buffer can safely be reused or released, +MSS relies on the buffer protocol's release mechanism. + +The buffer protocol permits a wide variety of consumer behaviors and +derived-buffer relationships. Rather than attempting to model all of +those interactions directly, this implementation delegates that +complexity to Python's existing buffer-management machinery. + +## Performance note + +As a rough reference, copying a 3840x2160 BGRA screenshot on +contemporary hardware (Amazon EC2 m8i.large, Intel Xeon 6, DDR5-7200) +takes approximately 2.5 ms. At 60 FPS, that is about 15% of the +available frame time for a single copy. These numbers are intended +only to provide intuition about the cost of copies; actual performance +varies substantially by hardware and memory subsystem. + +# Design + +The central design decision in this file is that MSS interacts with +exactly one downstream buffer consumer: a memoryview. + +A memoryview is Python's standard object for representing a buffer. +It already implements the reference tracking, buffer export, slicing, +and format-conversion behavior required by the buffer protocol. + +Notably, memoryview objects do not pass buffer requests upstream to +arbitrary exporters. Once a memoryview has been created, it manages +downstream consumers itself. + +This means MSS only needs to reason about a single interaction: the +interaction between `_FinalizingBufferIntermediate` and the memoryview +created from it. + +One idea that has been proposed is to attach a weakref finalizer +directly to a memoryview object and use that as the signal that the +buffer is no longer in use. Testing has shown that this is not +sufficient. A memoryview Python object may be finalized while +downstream consumers still hold active references to the underlying +buffer. + +To obtain a correct signal, MSS uses the Python-side buffer protocol +introduced in Python 3.12 via the `__buffer__` and +`__release_buffer__` methods. + +An instance of `_FinalizingBufferIntermediate` is created and exactly +one memoryview is constructed from it. That memoryview is returned to +the caller. + +The memoryview tracks all downstream users of the buffer. When all of +those users have released their references, the memoryview +automatically invokes `_FinalizingBufferIntermediate.__release_buffer__`. + +That method invokes the caller-provided finalizer, which can release +or recycle the underlying storage. + +If this implementation appears more indirect than necessary, that +indirection is intentional. It narrows the portion of the buffer +protocol that MSS must reason about and test. + +# Caveats and Invariants + +* The finalizer may run after `MSSImplementation.close()` has been + called. `close()` must not free, reuse, or otherwise invalidate + buffers that may still be visible to users. + +* The finalizer may run at any time and on any thread. Finalizer code + must therefore be thread-safe and must not assume that it executes + on the thread that created the buffer. + +* On Python versions prior to 3.12, `finalizing_buffer()` creates a + copy of the data and invokes the finalizer immediately. In this + case, the finalizer may run before `finalizing_buffer()` returns. + +* `_FinalizingBufferIntermediate` intentionally supports exactly one + buffer request. This restriction simplifies reasoning about + correctness and should not be removed without carefully considering + the resulting buffer-lifetime semantics. + +* `_FinalizingBufferIntermediate` remains reachable through + `memoryview.obj`. Consumers must treat this as an implementation + detail and must not invoke `__buffer__()` or `__release_buffer__()` + directly. + +* Finalizer execution during interpreter shutdown is not guaranteed. + Implementations should not rely on finalizers running during process + termination. +""" + +import sys +from collections.abc import Callable +from threading import Lock +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing_extensions import Buffer + + +class _FinalizingBufferIntermediate: + """Finalizing buffer class. + + Contrary to the buffer protocol, this class only allows a single + buffer to be created. This simplifies the implementation and + reasoning. + + The creator must provide a finalizer to ensure that resources are + properly released when the underlying buffer is no longer needed. + This will be invoked, with no arguments, after all the downstream + users, such as NumPy or PyTorch, have released their references to + the buffer. + + This is only useful on Python 3.12 and later; earlier versions do + not support the __buffer__ and __release_buffer__ methods. + + This class should only be used by the finalizing_buffer function. + It is not appropriate for other uses! + """ + + def __init__(self, data: "Buffer", finalizer: Callable) -> None: + self._mv: memoryview | None = memoryview(data) + self._finalizer = finalizer + # The remainder of these shouldn't be necessary. As a + # consequence of the __buffer__ contract and the + # implementation of finalizing_buffer, only one call to + # __buffer__ and one call to __release_buffer__ should be + # made, and never simultaneously. But we still include them + # out of an abundance of caution. + self._buffer_invoked = False + self._release_invoked = False + self._lock = Lock() + + def __buffer__(self, _flags: int) -> memoryview: + with self._lock: + assert not self._buffer_invoked, "Buffer can only be requested once" # noqa: S101 + self._buffer_invoked = True + assert self._mv is not None, "Buffer has already been released" # noqa: S101 + return self._mv + + def __release_buffer__(self, _buffer: memoryview) -> None: + with self._lock: + assert not self._release_invoked, "Buffer can only be released once" # noqa: S101 + self._release_invoked = True + self._finalizer() + assert self._mv is not None, "Buffer has already been released" # noqa: S101 + self._mv.release() + self._mv = None # Extra-defensive + + +def finalizing_buffer(data: "Buffer", finalizer: Callable) -> memoryview: + """Create a finalizing buffer or a copy depending on Python version. + + The finalizer will be invoked when the buffer is no longer in use, + with a caveat. This will only track uses downstream of the + returned buffer. If the input buffer is also used in other + places, those are not accounted for. + + On Python 3.12 and later, this returns a memoryview object that + provides a reusable buffer interface. On earlier versions, this + returns a copy of the data, and invokes the finalizer immediately + after the copy is made. + + This preserves read/write semantics of the original data: if the + original buffer is read-only, the returned memoryview will be + read-only. + """ + if sys.version_info >= (3, 12): + # Fast path: we can use the Python 3.12 features + return memoryview(_FinalizingBufferIntermediate(data, finalizer)) + # Slow path: copy the data. + with memoryview(data) as mv: + # We create a memoryview of the original data so that we can + # tell if it's read-only or not. We can't return this + # memoryview, since we're about to invoke the finalizer to + # release the buffer it got its data from. + copied_data = bytes(mv) if mv.readonly else bytearray(mv) + finalizer() + # We could return copied_data directly and still have a perfectly + # fine buffer, but always returning a memoryview provides more + # consistency. + return memoryview(copied_data) diff --git a/src/tests/test_setup.py b/src/tests/test_setup.py index fde4dcea..2381ced9 100644 --- a/src/tests/test_setup.py +++ b/src/tests/test_setup.py @@ -70,6 +70,7 @@ def test_sdist() -> None: f"mss-{__version__}/src/mss/__init__.py", f"mss-{__version__}/src/mss/__main__.py", f"mss-{__version__}/src/mss/base.py", + f"mss-{__version__}/src/mss/buffer.py", f"mss-{__version__}/src/mss/darwin.py", f"mss-{__version__}/src/mss/exception.py", f"mss-{__version__}/src/mss/factory.py", @@ -145,6 +146,7 @@ def test_wheel() -> None: "mss/__init__.py", "mss/__main__.py", "mss/base.py", + "mss/buffer.py", "mss/darwin.py", "mss/exception.py", "mss/factory.py", From c1d3f3c06f61ee69dae069771987fbc1254d5167 Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Sun, 31 May 2026 22:28:26 -0700 Subject: [PATCH 02/12] Add XShmGetImage support for zero-copy buffers --- docs/source/usage.rst | 16 ++ src/mss/base.py | 4 +- src/mss/buffer.py | 40 +++-- src/mss/linux/base.py | 4 + src/mss/linux/xshmgetimage.py | 301 ++++++++++++++++++++++++---------- src/tests/test_gnu_linux.py | 60 ++++--- 6 files changed, 291 insertions(+), 134 deletions(-) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 4562b3c3..50963a16 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -53,6 +53,22 @@ This is a much better usage, memory efficient:: Also, it is a good thing to save the MSS instance inside an attribute of your class and calling it when needed. +Buffer Reuse +============ + +MSS will automatically use zero-copy reusable buffers, if possible. + +On one developer's system, at 3840x2160, taking screenshots as fast as possible and summing the data (to make sure all +the memory is actually accessed): + +- Zero-copy enabled: 18.59 ms +- Zero-copy disabled: 22.64 ms +- Improvement: 17.9% faster + +Zero-copy buffers are only available with Python 3.12 or later. + +Additionally, only GNU/Linux has the necessary support. Support for other operating systems is planned. + Multithreading ============== diff --git a/src/mss/base.py b/src/mss/base.py index 963eb4cd..342cdfc9 100644 --- a/src/mss/base.py +++ b/src/mss/base.py @@ -18,7 +18,7 @@ from collections.abc import Callable, Iterator from types import TracebackType - from typing_extensions import Self + from typing_extensions import Buffer, Self from mss.models import Monitor, Monitors, Size @@ -89,7 +89,7 @@ def cursor(self) -> ScreenShot | None: """Retrieve all cursor data. Pixels have to be RGB.""" @abstractmethod - def grab(self, monitor: Monitor, /) -> bytearray | tuple[bytearray, Size]: + def grab(self, monitor: Monitor, /) -> Buffer | tuple[Buffer, Size]: """Retrieve all pixels from a monitor. Pixels have to be RGB. If the monitor size is not in pixel units, include a Size in diff --git a/src/mss/buffer.py b/src/mss/buffer.py index a18e64b2..7f4a9290 100644 --- a/src/mss/buffer.py +++ b/src/mss/buffer.py @@ -153,14 +153,21 @@ termination. """ +from __future__ import annotations + import sys -from collections.abc import Callable from threading import Lock from typing import TYPE_CHECKING if TYPE_CHECKING: + from collections.abc import Callable + from typing_extensions import Buffer +# You can always use this module, and finalizing_buffer. This variable is only conditionalizing things like test code. +# You shouldn't need it in implementations. +FAST_PATH_AVAILABLE = sys.version_info >= (3, 12) + class _FinalizingBufferIntermediate: """Finalizing buffer class. @@ -182,15 +189,12 @@ class _FinalizingBufferIntermediate: It is not appropriate for other uses! """ - def __init__(self, data: "Buffer", finalizer: Callable) -> None: + def __init__(self, data: Buffer, finalizer: Callable) -> None: self._mv: memoryview | None = memoryview(data) self._finalizer = finalizer - # The remainder of these shouldn't be necessary. As a - # consequence of the __buffer__ contract and the - # implementation of finalizing_buffer, only one call to - # __buffer__ and one call to __release_buffer__ should be - # made, and never simultaneously. But we still include them - # out of an abundance of caution. + # The remainder of these shouldn't be necessary. As a consequence of the __buffer__ contract and the + # implementation of finalizing_buffer, only one call to __buffer__ and one call to __release_buffer__ should be + # made, and never simultaneously. But we still include them out of an abundance of caution. self._buffer_invoked = False self._release_invoked = False self._lock = Lock() @@ -206,13 +210,16 @@ def __release_buffer__(self, _buffer: memoryview) -> None: with self._lock: assert not self._release_invoked, "Buffer can only be released once" # noqa: S101 self._release_invoked = True - self._finalizer() assert self._mv is not None, "Buffer has already been released" # noqa: S101 + # We need to release the memoryview itself, so that when the finalizer is invoked, the underlying buffer object + # doesn't think there are still exported buffers. (mmap, for instance, won't close a region with exported + # buffers.) self._mv.release() self._mv = None # Extra-defensive + self._finalizer() -def finalizing_buffer(data: "Buffer", finalizer: Callable) -> memoryview: +def finalizing_buffer(data: Buffer, finalizer: Callable) -> memoryview: """Create a finalizing buffer or a copy depending on Python version. The finalizer will be invoked when the buffer is no longer in use, @@ -229,18 +236,15 @@ def finalizing_buffer(data: "Buffer", finalizer: Callable) -> memoryview: original buffer is read-only, the returned memoryview will be read-only. """ - if sys.version_info >= (3, 12): + if FAST_PATH_AVAILABLE: # Fast path: we can use the Python 3.12 features return memoryview(_FinalizingBufferIntermediate(data, finalizer)) # Slow path: copy the data. with memoryview(data) as mv: - # We create a memoryview of the original data so that we can - # tell if it's read-only or not. We can't return this - # memoryview, since we're about to invoke the finalizer to - # release the buffer it got its data from. + # We create a memoryview of the original data so that we can tell if it's read-only or not. We can't return + # this memoryview, since we're about to invoke the finalizer to release the buffer it got its data from. copied_data = bytes(mv) if mv.readonly else bytearray(mv) finalizer() - # We could return copied_data directly and still have a perfectly - # fine buffer, but always returning a memoryview provides more - # consistency. + # We could return copied_data directly and still have a perfectly fine buffer, but always returning a memoryview + # provides more consistency. return memoryview(copied_data) diff --git a/src/mss/linux/base.py b/src/mss/linux/base.py index df702aed..972c9ce1 100644 --- a/src/mss/linux/base.py +++ b/src/mss/linux/base.py @@ -453,6 +453,10 @@ def _grab_xgetimage(self, monitor: Monitor, /) -> bytearray: # Now, save the image. This is a reference into the img_reply structure. img_data_arr = xcb.get_image_data(img_reply) # Copy this into a new bytearray, so that it will persist after we clear the image structure. + # + # We might be able to hold onto img_reply in a finalizing_buffer finalizer, so that we can use the image data + # without copying. That would be more efficient, but it would be a bit more complex, and presently the + # XGetImage implementation is already a slow and less-common path. img_data = bytearray(img_data_arr) if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index b7cc3646..8c96d90f 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -15,9 +15,15 @@ import enum import os -from mmap import PROT_READ, mmap # type: ignore[attr-defined] +import sys +from contextlib import suppress +from dataclasses import dataclass +from functools import partial +from mmap import PROT_READ, PROT_WRITE, mmap # type: ignore[attr-defined] +from threading import Lock from typing import TYPE_CHECKING, Any +from mss.buffer import finalizing_buffer from mss.exception import ScreenShotError from mss.linux import xcb from mss.linux.base import ALL_PLANES, MSSImplXCBBase @@ -28,6 +34,27 @@ __all__ = () +# Quick note (this should go in the commit log, or something, not source, but I had to record it somewhere): +# On my home box, at 4k resolution, doing 1000 grabs + NumPy sums of the pixel data, with/without reusable buffers: +# * Enabled: 18.6 ms / each +# * Disabled: 22.6 ms / each +# * Delta: 17.9% faster + +# For Python < 3.12, we only use one buffer. +# +# For Python >= 3.12, we have zero-copy buffers that the user owns. For those, we allocate two initial buffers. This +# is for the common case: +# +# with mss() as sct: +# while True: +# img = sct.grab(...) # noqa: ERA001 +# process(img) # noqa: ERA001 +# +# In that case, each ScreenShot object is not released until the next one has been assigned to img. That means that we +# will need two buffers to handle that case zero-copy. Our free pool can always grow, but we start it with two to keep +# the second capture from having a brief hiccup. +_INITIAL_BUFFER_COUNT = 2 if sys.version_info >= (3, 12) else 1 + class ShmStatus(enum.Enum): """Availability of the MIT-SHM extension for this backend.""" @@ -37,6 +64,13 @@ class ShmStatus(enum.Enum): UNAVAILABLE = enum.auto() # We know SHM GetImage is unusable; always use XGetImage. +@dataclass(slots=True) +class _ShmSlot: + shmseg: xcb.ShmSeg + buf: mmap | None # Set to None when it's closed, for extra verification + size: int + + class MSSImplXShmGetImage(MSSImplXCBBase): """XCB backend using XShmGetImage with an automatic XGetImage fallback. @@ -48,10 +82,15 @@ class MSSImplXShmGetImage(MSSImplXCBBase): def __init__(self, *, display: str | bytes | None = None, with_cursor: bool = False) -> None: super().__init__(display=display, with_cursor=with_cursor) - # These are the objects we need to clean up when we shut down. They are created in _setup_shm. - self._memfd: int | None = None - self._buf: mmap | None = None - self._shmseg: xcb.ShmSeg | None = None + # Free-list ownership model: + # - a slot in this list is idle and available for reuse; + # - a slot removed from this list is owned by grab/finalizer flow; + # - finalization returns it here unless SHM has been closed (in which case it is destroyed) + self._free_shm_slots: list[_ShmSlot] = [] + self._shm_lock = Lock() + # Once this is set, we no longer expect to use SHM and have + # released the idle standby buffers already. + self._shm_closed = False # Rather than trying to track the shm_status, we may be able to raise an exception in __init__ if XShmGetImage # isn't available. The factory in linux/__init__.py could then catch that and switch to XGetImage. @@ -73,7 +112,124 @@ def _shm_report_issue(self, msg: str, *args: Any) -> None: full_msg += " | " + ", ".join(str(arg) for arg in args) self.performance_status.append(full_msg) - def _setup_shm(self) -> ShmStatus: # noqa: PLR0911 + def _create_shm_slot(self, size: int) -> _ShmSlot: + """Allocate and attach one shared-memory slot. + + This is called when the free list is empty when a grab is + requested. The caller owns the new slot, and is responsible for + ensuring it is put on the free list or destroyed. + """ + assert self.conn is not None # noqa: S101 + + memfd: int | None = None + mm: mmap | None = None + try: + try: + memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type:ignore[attr-defined] + except OSError as exc: + msg = "Cannot allocate MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + try: + os.ftruncate(memfd, size) + except OSError as exc: + msg = "Cannot size MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + try: + mm = mmap(memfd, size, prot=PROT_READ | PROT_WRITE) # type:ignore[call-arg] + except OSError as exc: + msg = "Cannot map MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + shmseg = xcb.ShmSeg(xcb.generate_id(self.conn).value) + + # XCB closes memfd after this call, on success or failure. + fd_for_attach = memfd + memfd = None + try: + xcb.shm_attach_fd(self.conn, shmseg, fd_for_attach, read_only=False) + except xcb.XError as exc: + msg = "Cannot attach MIT-SHM segment" + raise ScreenShotError(msg) from exc + + return _ShmSlot(shmseg=shmseg, buf=mm, size=size) + except Exception: + if mm is not None: + mm.close() + if memfd is not None: + os.close(memfd) + raise + + def _destroy_shm_slot(self, slot: _ShmSlot) -> None: + """Detach and close one shared-memory slot. + + This is only called when or after the SHM pool is cleaned up: + * By _cleanup_shm_slots, on free slots, either during close or + if SHM is found to be unavailable, or + * By the finalizer, if the slot is released after the MSS object + is closed + """ + if slot.buf is None: + return + if self.conn is not None: + with suppress(XProtoError, xcb.XError): + xcb.shm_detach(self.conn, slot.shmseg) + slot.buf.close() + slot.buf = None + + def _acquire_shm_slot(self, required_size: int) -> _ShmSlot: + """Take a slot from the free-list, growing if needed.""" + with self._shm_lock: + assert not self._shm_closed, "SHM pool has already been closed" # noqa: S101 + + for idx, slot in enumerate(self._free_shm_slots): + if slot.buf is not None and slot.size >= required_size: + self._free_shm_slots.pop(idx) + return slot + + # Create a new slot outside the lock to keep the critical section short. + slot = self._create_shm_slot(max(required_size, self._bufsize)) + # Since SHM can only be closed and _acquire can only be called during __init__, grab, or close, and those all + # hold a lock, shm cannot have been closed while we were creating the slot. + assert not self._shm_closed, "SHM pool closed unexpectedly" # noqa: S101 + + return slot + + def _release_shm_slot(self, slot: _ShmSlot) -> None: + """Return a slot to the free-list, or destroy it. + + This is called by the finalizer. It might be called during + grab, if a copy is needed, or at any time later. + """ + with self._shm_lock: + if not self._shm_closed: + self._free_shm_slots.append(slot) + return + # SHM is already closed. Destroy the slot now. + self._destroy_shm_slot(slot) + + def _cleanup_shm_slots(self) -> None: + """Retire SHM use and free any idle slots immediately. + + This is called during MSS close, or if SHM is discovered to be + unusable during setup or grab. + """ + with self._shm_lock: + self._shm_closed = True + idle_slots, self._free_shm_slots = self._free_shm_slots, [] + + for slot in idle_slots: + self._destroy_shm_slot(slot) + + def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: + """Record why SHM was disabled and clean up the pool.""" + self._shm_report_issue(msg, exc) + self._cleanup_shm_slots() + return ShmStatus.UNAVAILABLE + + def _setup_shm(self) -> ShmStatus: + """Probe MIT-SHM and seed the initial buffer pool.""" assert self.conn is not None # noqa: S101 try: @@ -89,115 +245,75 @@ def _setup_shm(self) -> ShmStatus: # noqa: PLR0911 self._shm_report_issue("MIT-SHM version too old", shm_version) return ShmStatus.UNAVAILABLE - # We allocate something large enough for the root, so we don't have to reallocate each time the window is - # resized. + # We allocate something large enough for the root for our initial buffers, to accommodate any grab request. self._bufsize = self.pref_screen.width_in_pixels * self.pref_screen.height_in_pixels * 4 if not hasattr(os, "memfd_create"): self._shm_report_issue("os.memfd_create not available") return ShmStatus.UNAVAILABLE - try: - self._memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type: ignore[attr-defined] - except OSError as e: - return self._shm_unavailable("memfd_create failed", e) - os.ftruncate(self._memfd, self._bufsize) - try: - self._buf = mmap(self._memfd, self._bufsize, prot=PROT_READ) # type: ignore[call-arg] - except OSError as e: - return self._shm_unavailable("mmap failed", e) - self._shmseg = xcb.ShmSeg(xcb.generate_id(self.conn).value) - try: - # This will normally be what raises an exception if you're on a remote connection. - # XCB will close _memfd, on success or on failure. - try: - xcb.shm_attach_fd(self.conn, self._shmseg, self._memfd, read_only=False) - finally: - self._memfd = None - except xcb.XError as e: - return self._shm_unavailable("Cannot attach MIT-SHM segment", e) + # Initialize the number of buffers we expect to need. + for _ in range(_INITIAL_BUFFER_COUNT): + self._free_shm_slots.append(self._create_shm_slot(self._bufsize)) + except ScreenShotError as e: + return self._shm_unavailable("MIT-SHM setup failed", e) except Exception: - self._shutdown_shm() + self._cleanup_shm_slots() raise return ShmStatus.UNKNOWN - def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: - self._shm_report_issue(msg, exc) - self._shutdown_shm() - return ShmStatus.UNAVAILABLE - - def close(self) -> None: - self._shutdown_shm() - super().close() - - def _shutdown_shm(self) -> None: - # It would be nice to also try to tell the server to detach the shmseg, but we might be in an error path - # and don't know if that's possible. It's not like we'll leak a lot of them on the same connection anyway. - # This can be called in the path of partial initialization. - if self._buf is not None: - self._buf.close() - self._buf = None - if self._memfd is not None: - os.close(self._memfd) - self._memfd = None - - def _grab_xshmgetimage(self, monitor: Monitor) -> bytearray: + def _grab_xshmgetimage(self, monitor: Monitor) -> memoryview: + """Capture a monitor directly into a shared-memory slot.""" if self.conn is None: msg = "Cannot take screenshot while the connection is closed" raise ScreenShotError(msg) - assert self._buf is not None # noqa: S101 - assert self._shmseg is not None # noqa: S101 + # Presently, we request a buffer at least as big as our capture area. Another option would be to request a + # buffer at the root size: this uses more memory, but makes it more likely that the buffers can be reused after + # window resizes. This only matters if the initial buffers are in use still, and we have to create a new one. required_size = monitor["width"] * monitor["height"] * 4 - if required_size > self._bufsize: - # This is temporary. The permanent fix will depend on how - # issue https://github.com/BoboTiG/python-mss/issues/432 is resolved. - msg = ( - "Requested capture size exceeds the allocated buffer. If you have resized the screen, " - "please recreate your MSS object." - ) - raise ScreenShotError(msg) + slot = self._acquire_shm_slot(required_size) + assert slot.buf is not None # noqa: S101 - img_reply = xcb.shm_get_image( - self.conn, - self.drawable, - monitor["left"], - monitor["top"], - monitor["width"], - monitor["height"], - ALL_PLANES, - xcb.ImageFormat.ZPixmap, - self._shmseg, - 0, - ) - - if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: - # This should never happen; a window can't change its visual. - msg = ( - "Server returned an image with a depth or visual different than it initially reported: " - f"expected {self.drawable_depth},{hex(self.drawable_visual_id.value)}, " - f"got {img_reply.depth},{hex(img_reply.visual.value)}" + try: + img_reply = xcb.shm_get_image( + self.conn, + self.drawable, + monitor["left"], + monitor["top"], + monitor["width"], + monitor["height"], + ALL_PLANES, + xcb.ImageFormat.ZPixmap, + slot.shmseg, + 0, ) - raise ScreenShotError(msg) - # Snapshot the buffer into new bytearray. - new_size = monitor["width"] * monitor["height"] * 4 - # Slicing the memoryview creates a new memoryview that points to the relevant subregion. Making this and then - # copying it into a fresh bytearray is much faster than slicing the mmap object. Make sure we don't hold an - # open memoryview if an exception happens, since that will prevent us from closing self._buf during the stack - # unwind. - with memoryview(self._buf) as img_mv: - return bytearray(img_mv[:new_size]) + if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: + # This should never happen; a window can't change its visual. + msg = ( + "Server returned an image with a depth or visual different than it initially reported: " + f"expected {self.drawable_depth},{hex(self.drawable_visual_id.value)}, " + f"got {img_reply.depth},{hex(img_reply.visual.value)}" + ) + raise ScreenShotError(msg) # noqa: TRY301 Clearer this way than what TRY301 wants + + finalizer = partial(self._release_shm_slot, slot) + return finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) - def grab(self, monitor: Monitor) -> bytearray: + except Exception: + self._release_shm_slot(slot) + raise + + def grab(self, monitor: Monitor) -> memoryview | bytearray: """Retrieve all pixels from a monitor. Pixels have to be RGBX.""" if self.shm_status == ShmStatus.UNAVAILABLE: return super()._grab_xgetimage(monitor) # The usual path is just the next few lines. try: - rv = self._grab_xshmgetimage(monitor) + rv: memoryview | bytearray = self._grab_xshmgetimage(monitor) if self.shm_status != ShmStatus.AVAILABLE: self.shm_status = ShmStatus.AVAILABLE self.performance_status.append("MIT-SHM is working correctly.") @@ -224,6 +340,11 @@ def grab(self, monitor: Monitor) -> bytearray: # Using XShmGetImage failed, and using XGetImage worked. Use XGetImage in the future. self._shm_report_issue("MIT-SHM GetImage failed", e) self.shm_status = ShmStatus.UNAVAILABLE - self._shutdown_shm() + self._cleanup_shm_slots() return rv + + def close(self) -> None: + """Release SHM resources and then close the XCB connection.""" + self._cleanup_shm_slots() + super().close() diff --git a/src/tests/test_gnu_linux.py b/src/tests/test_gnu_linux.py index 26861aae..6d87f68b 100644 --- a/src/tests/test_gnu_linux.py +++ b/src/tests/test_gnu_linux.py @@ -4,7 +4,6 @@ from __future__ import annotations -import builtins import ctypes.util import platform from ctypes import CFUNCTYPE, POINTER, _Pointer, c_int @@ -14,8 +13,8 @@ import pytest import mss +import mss.buffer import mss.linux -import mss.linux.xcb import mss.linux.xlib from mss import MSS from mss.exception import ScreenShotError @@ -323,30 +322,43 @@ def test_shm_fallback() -> None: assert sct._impl.shm_status == mss.linux.xshmgetimage.ShmStatus.UNAVAILABLE -def test_exception_while_holding_memoryview(monkeypatch: pytest.MonkeyPatch) -> None: - """Verify that an exception at a particular point doesn't prevent cleanup. +def test_exception_while_wrapping_finalizing_buffer(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that wrapping failures still release the in-use SHM slot.""" + + def boom(_data: memoryview, _finalizer: Any) -> memoryview: + msg = "Boom!" + raise RuntimeError(msg) - The particular point is the window when the XShmGetImage's mmapped - buffer has a memoryview still outstanding, and the pixel data is - being copied into a bytearray. This can take a few milliseconds. - """ - # Force an exception during bytearray(img_mv) - real_bytearray = builtins.bytearray - - def boom(*args: list, **kwargs: dict[str, Any]) -> bytearray: - # Only explode when called with the memoryview (the code path we care about). - if len(args) > 0 and isinstance(args[0], memoryview): - # We still need to eliminate args from the stack frame, just like the fix. - del args, kwargs - msg = "Boom!" - raise RuntimeError(msg) - return real_bytearray(*args, **kwargs) - - # We have to be careful about the order in which we catch things. If we were to catch and discard the exception - # before the MSS object closes, it won't trigger the bug. That's why we have the pytest.raises outside the - # mss.MSS block. In addition, we do as much as we can before patching bytearray, to limit its scope. with pytest.raises(RuntimeError, match="Boom!"), mss.MSS(backend="xshmgetimage") as sct: # noqa: PT012 monitor = sct.monitors[0] with monkeypatch.context() as m: - m.setattr(builtins, "bytearray", boom) + m.setattr(mss.linux.xshmgetimage, "finalizing_buffer", boom) sct.grab(monitor) + + +@pytest.mark.skipif(not mss.buffer.FAST_PATH_AVAILABLE, reason="Tests post-3.12 behavior: dynamic pool growth") +def test_dynamic_shm_growth_allocation_failure_raises(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify dynamic pool growth failure raises instead of switching backends.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + monitor = sct.monitors[0] + + first = sct.grab(monitor) + second = sct.grab(monitor) + + # Ensure we are in normal SHM operation before inducing a growth failure. + assert sct._impl.shm_status == mss.linux.xshmgetimage.ShmStatus.AVAILABLE + + def fail_growth(_size: int) -> Any: + msg = "Cannot allocate MIT-SHM buffer" + raise ScreenShotError(msg) + + with monkeypatch.context() as m: + m.setattr(sct._impl, "_create_shm_slot", fail_growth) + with pytest.raises(ScreenShotError, match="Cannot allocate MIT-SHM buffer"): + sct.grab(monitor) + + # Keep references alive until after the third grab attempt. + assert first.width > 0 + assert second.width > 0 From c6e8127b3f55d6fd96d7748a54dac1a531857912 Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Sun, 31 May 2026 23:09:32 -0700 Subject: [PATCH 03/12] Test improvements and small cleanups --- src/mss/buffer.py | 4 ++-- src/mss/linux/xshmgetimage.py | 17 +++++----------- src/tests/test_gnu_linux.py | 38 +++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/src/mss/buffer.py b/src/mss/buffer.py index 7f4a9290..892fc0ba 100644 --- a/src/mss/buffer.py +++ b/src/mss/buffer.py @@ -164,8 +164,8 @@ from typing_extensions import Buffer -# You can always use this module, and finalizing_buffer. This variable is only conditionalizing things like test code. -# You shouldn't need it in implementations. +# You can always use this module, and finalizing_buffer. This variable is for conditionalizing things like test code or +# optimizations, but most code should always follow the same path. FAST_PATH_AVAILABLE = sys.version_info >= (3, 12) diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index 8c96d90f..ae1b5dc4 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -15,7 +15,6 @@ import enum import os -import sys from contextlib import suppress from dataclasses import dataclass from functools import partial @@ -23,7 +22,7 @@ from threading import Lock from typing import TYPE_CHECKING, Any -from mss.buffer import finalizing_buffer +import mss.buffer from mss.exception import ScreenShotError from mss.linux import xcb from mss.linux.base import ALL_PLANES, MSSImplXCBBase @@ -34,12 +33,6 @@ __all__ = () -# Quick note (this should go in the commit log, or something, not source, but I had to record it somewhere): -# On my home box, at 4k resolution, doing 1000 grabs + NumPy sums of the pixel data, with/without reusable buffers: -# * Enabled: 18.6 ms / each -# * Disabled: 22.6 ms / each -# * Delta: 17.9% faster - # For Python < 3.12, we only use one buffer. # # For Python >= 3.12, we have zero-copy buffers that the user owns. For those, we allocate two initial buffers. This @@ -53,7 +46,7 @@ # In that case, each ScreenShot object is not released until the next one has been assigned to img. That means that we # will need two buffers to handle that case zero-copy. Our free pool can always grow, but we start it with two to keep # the second capture from having a brief hiccup. -_INITIAL_BUFFER_COUNT = 2 if sys.version_info >= (3, 12) else 1 +_INITIAL_BUFFER_COUNT = 2 if mss.buffer.FAST_PATH_AVAILABLE else 1 class ShmStatus(enum.Enum): @@ -125,7 +118,7 @@ def _create_shm_slot(self, size: int) -> _ShmSlot: mm: mmap | None = None try: try: - memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type:ignore[attr-defined] + memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type: ignore[attr-defined] except OSError as exc: msg = "Cannot allocate MIT-SHM buffer" raise ScreenShotError(msg) from exc @@ -137,7 +130,7 @@ def _create_shm_slot(self, size: int) -> _ShmSlot: raise ScreenShotError(msg) from exc try: - mm = mmap(memfd, size, prot=PROT_READ | PROT_WRITE) # type:ignore[call-arg] + mm = mmap(memfd, size, prot=PROT_READ | PROT_WRITE) # type: ignore[call-arg] except OSError as exc: msg = "Cannot map MIT-SHM buffer" raise ScreenShotError(msg) from exc @@ -300,7 +293,7 @@ def _grab_xshmgetimage(self, monitor: Monitor) -> memoryview: raise ScreenShotError(msg) # noqa: TRY301 Clearer this way than what TRY301 wants finalizer = partial(self._release_shm_slot, slot) - return finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) + return mss.buffer.finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) except Exception: self._release_shm_slot(slot) diff --git a/src/tests/test_gnu_linux.py b/src/tests/test_gnu_linux.py index 6d87f68b..e1c67d10 100644 --- a/src/tests/test_gnu_linux.py +++ b/src/tests/test_gnu_linux.py @@ -322,18 +322,40 @@ def test_shm_fallback() -> None: assert sct._impl.shm_status == mss.linux.xshmgetimage.ShmStatus.UNAVAILABLE -def test_exception_while_wrapping_finalizing_buffer(monkeypatch: pytest.MonkeyPatch) -> None: - """Verify that wrapping failures still release the in-use SHM slot.""" +def test_finalizing_buffer_releases_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that the returned buffer releases its SHM slot when finalized.""" - def boom(_data: memoryview, _finalizer: Any) -> memoryview: - msg = "Boom!" - raise RuntimeError(msg) + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + if mss.buffer.FAST_PATH_AVAILABLE: + assert release_spy.call_count == 0 + + screenshot._raw.release() + + release_spy.assert_called_once() + + +def test_exception_while_wrapping_finalizing_buffer_releases_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify wrapping failures still release the in-use SHM slot.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + + def boom(_data: memoryview, _finalizer: Any) -> memoryview: + msg = "Boom!" + raise RuntimeError(msg) - with pytest.raises(RuntimeError, match="Boom!"), mss.MSS(backend="xshmgetimage") as sct: # noqa: PT012 - monitor = sct.monitors[0] with monkeypatch.context() as m: m.setattr(mss.linux.xshmgetimage, "finalizing_buffer", boom) - sct.grab(monitor) + with pytest.raises(RuntimeError, match="Boom!"): + sct.grab(sct.monitors[0]) + + release_spy.assert_called_once() @pytest.mark.skipif(not mss.buffer.FAST_PATH_AVAILABLE, reason="Tests post-3.12 behavior: dynamic pool growth") From 4f6fffda375801e62440624e7e74a036f3edbfcd Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Mon, 1 Jun 2026 07:52:08 +0000 Subject: [PATCH 04/12] Improve docs for reusable buffers --- docs/source/release-history/v11.0.0.md | 15 ++++++++++++++- docs/source/usage.rst | 21 ++++++++++----------- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/docs/source/release-history/v11.0.0.md b/docs/source/release-history/v11.0.0.md index 15ed8b8c..1bc0f941 100644 --- a/docs/source/release-history/v11.0.0.md +++ b/docs/source/release-history/v11.0.0.md @@ -33,6 +33,19 @@ Improved error handling when interacting with Win32 API, which will improve diag Device contexts are now acquired and released within each `grab()` call, allowing monitor enumeration to work even when `GetWindowDC(0)` fails (#509). +### Zero-Copy Screenshot Buffers (GNU/Linux, Python 3.12+) + +MSS now supports zero-copy screenshot buffers on GNU/Linux when running under Python 3.12 or later. Screenshot data can +be exposed directly from operating system buffers without first being copied into a Python-owned buffer. + +This removes an additional memory copy from the screenshot path and is enabled automatically with no application changes +required. + +In a benchmark capturing 3840×2160 screenshots as quickly as possible while forcing all pixel data to be read, +processing time decreased from 22.64 ms to 18.59 ms per frame (approximately 18% faster). + +Support for additional operating systems is planned. + ### General Improvements -The MSS context object will now always surface inner exceptions, even if `__exit__` may also generate an exception during tear-down. +The MSS context object will now always surface inner exceptions, even if `__exit__` may also generate an exception during tear-down. \ No newline at end of file diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 50963a16..512ca3e1 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -52,22 +52,21 @@ This is a much better usage, memory efficient:: Also, it is a good thing to save the MSS instance inside an attribute of your class and calling it when needed. +Direct Screenshot Buffers +========================= -Buffer Reuse -============ - -MSS will automatically use zero-copy reusable buffers, if possible. +On supported platforms, MSS can expose screenshot data directly from operating system buffers instead of copying it into +a separate Python-owned buffer. This reduces memory copying and can improve performance when processing screenshots with +libraries that support the Python buffer protocol, such as NumPy and OpenCV. -On one developer's system, at 3840x2160, taking screenshots as fast as possible and summing the data (to make sure all -the memory is actually accessed): +This optimization is enabled automatically and does not require any changes to application code. -- Zero-copy enabled: 18.59 ms -- Zero-copy disabled: 22.64 ms -- Improvement: 17.9% faster +Requirements: -Zero-copy buffers are only available with Python 3.12 or later. +- Python 3.12 or later +- GNU/Linux -Additionally, only GNU/Linux has the necessary support. Support for other operating systems is planned. +Support for additional operating systems is planned. Multithreading ============== From cc6689ae2e3ee50b6e903baa36f2ba3c13eb7efb Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Mon, 1 Jun 2026 08:49:26 +0000 Subject: [PATCH 05/12] Initial draft of tests These are straight from the AI, but I want to commit them to start testing in other versions of Python. --- src/tests/test_buffer.py | 168 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 src/tests/test_buffer.py diff --git a/src/tests/test_buffer.py b/src/tests/test_buffer.py new file mode 100644 index 00000000..df6fbb09 --- /dev/null +++ b/src/tests/test_buffer.py @@ -0,0 +1,168 @@ +"""This is part of the MSS Python's module. +Source: https://github.com/BoboTiG/python-mss. +""" + +import gc + +import numpy as np +import pytest + +from mss.buffer import FAST_PATH_AVAILABLE, _FinalizingBufferIntermediate, finalizing_buffer + + +def test_finalizing_buffer_preserves_readonly_and_returns_memoryview() -> None: + writable = bytearray(b"abcd") + writable_called = 0 + + def writable_finalizer() -> None: + nonlocal writable_called + writable_called += 1 + + writable_view = finalizing_buffer(writable, writable_finalizer) + assert isinstance(writable_view, memoryview) + assert not writable_view.readonly + + readonly = b"abcd" + readonly_called = 0 + + def readonly_finalizer() -> None: + nonlocal readonly_called + readonly_called += 1 + + readonly_view = finalizing_buffer(readonly, readonly_finalizer) + assert isinstance(readonly_view, memoryview) + assert readonly_view.readonly + + writable_view.release() + readonly_view.release() + gc.collect() + + assert writable_called == 1 + assert readonly_called == 1 + + +@pytest.mark.skipif(FAST_PATH_AVAILABLE, reason="Covers behavior only present before Python 3.12") +def test_finalizing_buffer_slow_path_copies_and_finalizes_immediately() -> None: + data = bytearray(b"abcd") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(data, finalizer) + assert finalizer_calls == 1 + + data[0] = ord("Z") + assert wrapped.tobytes() == b"abcd" + + wrapped[1] = ord("Y") + assert data == bytearray(b"Zbcd") + + wrapped.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_finalizing_buffer_fast_path_is_zero_copy() -> None: + data = bytearray(b"abcd") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(data, finalizer) + assert finalizer_calls == 0 + + data[0] = ord("Z") + assert wrapped[0] == ord("Z") + + wrapped[1] = ord("Y") + assert data[1] == ord("Y") + + wrapped.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_child_memoryview_defers_finalizer_until_child_release() -> None: + data = bytearray(b"abcdefgh") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + parent = finalizing_buffer(data, finalizer) + child = memoryview(parent) + + del parent + gc.collect() + assert finalizer_calls == 0 + + child.release() + del child + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_numpy_frombuffer_child_defers_finalizer_until_array_deleted() -> None: + data = bytearray(b"abcdefgh") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + parent = finalizing_buffer(data, finalizer) + array = np.frombuffer(parent, dtype=np.uint8) + + del parent + gc.collect() + assert finalizer_calls == 0 + + del array + gc.collect() + assert finalizer_calls == 1 + + +def test_finalizer_runs_once() -> None: + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(bytearray(b"abcd"), finalizer) + assert finalizer_calls == 0 + + del wrapped + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_intermediate_allows_single_buffer_request_and_release() -> None: + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + intermediate = _FinalizingBufferIntermediate(bytearray(b"abcd"), finalizer) + + view = intermediate.__buffer__(0) + assert view.tobytes() == b"abcd" + + with pytest.raises(AssertionError, match="Buffer can only be requested once"): + intermediate.__buffer__(0) + + intermediate.__release_buffer__(view) + assert finalizer_calls == 1 + + with pytest.raises(AssertionError, match="Buffer can only be released once"): + intermediate.__release_buffer__(view) From b4241def721015f40279694d129385dc1651b110 Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Mon, 1 Jun 2026 21:59:06 +0000 Subject: [PATCH 06/12] Some test fixes Adds a test for the new buffer module. The test is pretty basic, but it at least verifies that the buffer module is working and that the buffers it creates are usable by outside modules. This requires adding NumPy to the [tests] optional-dependencies group for all versions of Python, since that's the buffer consumer we use in the tests. --- pyproject.toml | 3 ++- src/mss/linux/xshmgetimage.py | 6 +++--- src/tests/test_setup.py | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 066ba845..615f2f51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,8 @@ docs = [ "sphinx-new-tab-link==0.8.1 ; python_version >= '3.12'", ] tests = [ - "numpy==2.4.3 ; sys_platform == 'linux' and python_version == '3.13'", + "numpy==2.4.3 ; python_version >= '3.11'", + "numpy==2.2.6 ; python_version < '3.11'", "pillow==12.2.0 ; sys_platform == 'linux' and python_version == '3.13'", "pytest==9.0.3", "pytest-cov==7.1.0", diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index ae1b5dc4..4268de7f 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -22,7 +22,7 @@ from threading import Lock from typing import TYPE_CHECKING, Any -import mss.buffer +from mss.buffer import FAST_PATH_AVAILABLE, finalizing_buffer from mss.exception import ScreenShotError from mss.linux import xcb from mss.linux.base import ALL_PLANES, MSSImplXCBBase @@ -46,7 +46,7 @@ # In that case, each ScreenShot object is not released until the next one has been assigned to img. That means that we # will need two buffers to handle that case zero-copy. Our free pool can always grow, but we start it with two to keep # the second capture from having a brief hiccup. -_INITIAL_BUFFER_COUNT = 2 if mss.buffer.FAST_PATH_AVAILABLE else 1 +_INITIAL_BUFFER_COUNT = 2 if FAST_PATH_AVAILABLE else 1 class ShmStatus(enum.Enum): @@ -293,7 +293,7 @@ def _grab_xshmgetimage(self, monitor: Monitor) -> memoryview: raise ScreenShotError(msg) # noqa: TRY301 Clearer this way than what TRY301 wants finalizer = partial(self._release_shm_slot, slot) - return mss.buffer.finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) + return finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) except Exception: self._release_shm_slot(slot) diff --git a/src/tests/test_setup.py b/src/tests/test_setup.py index 2381ced9..698a82e5 100644 --- a/src/tests/test_setup.py +++ b/src/tests/test_setup.py @@ -95,6 +95,7 @@ def test_sdist() -> None: f"mss-{__version__}/src/tests/conftest.py", f"mss-{__version__}/src/tests/res/monitor-1024x768.raw.zip", f"mss-{__version__}/src/tests/test_bgra_to_rgb.py", + f"mss-{__version__}/src/tests/test_buffer.py", f"mss-{__version__}/src/tests/test_cls_image.py", f"mss-{__version__}/src/tests/test_compat_10_1.py", f"mss-{__version__}/src/tests/test_compat_exports.py", From a677a8f7b015f03d90e299f48f164262d68f5ecb Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Mon, 1 Jun 2026 19:45:09 -0700 Subject: [PATCH 07/12] More test cleanups --- pyproject.toml | 5 +- src/mss/buffer.py | 2 +- src/tests/test_buffer.py | 133 ++++++++++++++++++++++++++------------- 3 files changed, 94 insertions(+), 46 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 615f2f51..20deab42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,9 +85,8 @@ docs = [ "sphinx-new-tab-link==0.8.1 ; python_version >= '3.12'", ] tests = [ - "numpy==2.4.3 ; python_version >= '3.11'", - "numpy==2.2.6 ; python_version < '3.11'", - "pillow==12.2.0 ; sys_platform == 'linux' and python_version == '3.13'", + "numpy==2.4.3 ; python_version >= '3.12'", + "pillow==12.2.0 ; python_version >= '3.12'", "pytest==9.0.3", "pytest-cov==7.1.0", "pytest-rerunfailures==16.3", diff --git a/src/mss/buffer.py b/src/mss/buffer.py index 892fc0ba..2a3605f3 100644 --- a/src/mss/buffer.py +++ b/src/mss/buffer.py @@ -179,7 +179,7 @@ class _FinalizingBufferIntermediate: The creator must provide a finalizer to ensure that resources are properly released when the underlying buffer is no longer needed. This will be invoked, with no arguments, after all the downstream - users, such as NumPy or PyTorch, have released their references to + users, such as NumPy or PIL, have released their references to the buffer. This is only useful on Python 3.12 and later; earlier versions do diff --git a/src/tests/test_buffer.py b/src/tests/test_buffer.py index df6fbb09..fffbf0e2 100644 --- a/src/tests/test_buffer.py +++ b/src/tests/test_buffer.py @@ -4,45 +4,50 @@ import gc -import numpy as np import pytest from mss.buffer import FAST_PATH_AVAILABLE, _FinalizingBufferIntermediate, finalizing_buffer -def test_finalizing_buffer_preserves_readonly_and_returns_memoryview() -> None: - writable = bytearray(b"abcd") - writable_called = 0 +def test_finalizer_runs_once() -> None: + finalizer_calls = 0 - def writable_finalizer() -> None: - nonlocal writable_called - writable_called += 1 + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 - writable_view = finalizing_buffer(writable, writable_finalizer) - assert isinstance(writable_view, memoryview) - assert not writable_view.readonly + wrapped = finalizing_buffer(bytearray(b"abcd"), finalizer) + assert finalizer_calls == 0 - readonly = b"abcd" - readonly_called = 0 + del wrapped + gc.collect() + assert finalizer_calls == 1 - def readonly_finalizer() -> None: - nonlocal readonly_called - readonly_called += 1 - readonly_view = finalizing_buffer(readonly, readonly_finalizer) - assert isinstance(readonly_view, memoryview) - assert readonly_view.readonly +@pytest.mark.parametrize(("buffer_class", "readonly"), [ + (bytearray, False), + (bytes, True), # type: ignore[list-item] +]) +def test_finalizing_buffer_preserves_readonly(buffer_class: type, readonly: bool) -> None: + base_buffer = buffer_class(b"abcd") + finalizer_calls = 0 - writable_view.release() - readonly_view.release() - gc.collect() + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + view = finalizing_buffer(base_buffer, finalizer) + assert finalizer_calls == 0 + assert isinstance(view, memoryview) + assert view.readonly == readonly - assert writable_called == 1 - assert readonly_called == 1 + view.release() + gc.collect() + assert finalizer_calls == 1 -@pytest.mark.skipif(FAST_PATH_AVAILABLE, reason="Covers behavior only present before Python 3.12") -def test_finalizing_buffer_slow_path_copies_and_finalizes_immediately() -> None: +@pytest.mark.skipif(FAST_PATH_AVAILABLE, reason="Covers behavior only present prior to Python 3.12") +def test_finalizing_buffer_slow_path() -> None: data = bytearray(b"abcd") finalizer_calls = 0 @@ -53,9 +58,9 @@ def finalizer() -> None: wrapped = finalizing_buffer(data, finalizer) assert finalizer_calls == 1 + # Ensure that it made a copy data[0] = ord("Z") assert wrapped.tobytes() == b"abcd" - wrapped[1] = ord("Y") assert data == bytearray(b"Zbcd") @@ -78,7 +83,6 @@ def finalizer() -> None: data[0] = ord("Z") assert wrapped[0] == ord("Z") - wrapped[1] = ord("Y") assert data[1] == ord("Y") @@ -88,7 +92,12 @@ def finalizer() -> None: @pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") -def test_child_memoryview_defers_finalizer_until_child_release() -> None: +def test_memoryview_release() -> None: + """Releasing a memoryview releases the buffer immediately + + CPython special-cases a memoryview of a memoryview (and + finalizing_buffer returns a memoryview), so we test it specially. + """ data = bytearray(b"abcdefgh") finalizer_calls = 0 @@ -96,21 +105,26 @@ def finalizer() -> None: nonlocal finalizer_calls finalizer_calls += 1 - parent = finalizing_buffer(data, finalizer) - child = memoryview(parent) + base = finalizing_buffer(data, finalizer) + child = memoryview(base) - del parent + del base gc.collect() assert finalizer_calls == 0 child.release() - del child gc.collect() assert finalizer_calls == 1 @pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") -def test_numpy_frombuffer_child_defers_finalizer_until_array_deleted() -> None: +def test_memoryview_del() -> None: + """Garbage-collecting a memoryview releases the buffer immediately + + CPython special-cases a memoryview of a memoryview (and + finalizing_buffer returns a memoryview), so we test it specially. + """ + data = bytearray(b"abcdefgh") finalizer_calls = 0 @@ -118,35 +132,65 @@ def finalizer() -> None: nonlocal finalizer_calls finalizer_calls += 1 - parent = finalizing_buffer(data, finalizer) - array = np.frombuffer(parent, dtype=np.uint8) + base = finalizing_buffer(data, finalizer) + child = memoryview(base) - del parent + del base gc.collect() assert finalizer_calls == 0 - del array + del child gc.collect() assert finalizer_calls == 1 -def test_finalizer_runs_once() -> None: +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_tree() -> None: + """A complex tree retains a single buffer until it's completely gone""" + import numpy as np # noqa: PLC0415 + from PIL import Image # noqa: PLC0415 + + data = bytearray(b"\x76\xB9\x00\xFF" * (100 * 100)) finalizer_calls = 0 def finalizer() -> None: nonlocal finalizer_calls finalizer_calls += 1 - wrapped = finalizing_buffer(bytearray(b"abcd"), finalizer) + # base + # \- array + # \- mv + # \- array_shaped + # \- img + + base = finalizing_buffer(data, finalizer) + array = np.frombuffer(base, dtype=np.uint8) + array_shaped = array.reshape((100, 100, 4)) + mv = memoryview(array) + img = Image.frombuffer("RGBA", (100, 100), array_shaped, "raw", "RGBA", 0, 1) + + # Ensure that the tree is zero-copy. + data[0] = 42 + assert img.getpixel((0, 0)) == (42, 0xB9, 0, 0xFF) + + # Ensure that if we delete much of the tree, the buffer still is retained. + del base + del array + del img + mv.release() # We explicitly call release, to test its path too, but just del would suffice. + del mv + gc.collect() assert finalizer_calls == 0 - del wrapped + # Now, it all gets released when we delete the last reference to the buffer. + del array_shaped gc.collect() assert finalizer_calls == 1 @pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") -def test_intermediate_allows_single_buffer_request_and_release() -> None: +def test_intermediate_enforces_single_use() -> None: + """Trying to reuse a _FinalizingBufferIntermediate asserts out.""" finalizer_calls = 0 def finalizer() -> None: @@ -155,7 +199,7 @@ def finalizer() -> None: intermediate = _FinalizingBufferIntermediate(bytearray(b"abcd"), finalizer) - view = intermediate.__buffer__(0) + view = intermediate.__buffer__(0) # 0: PyBUF_SIMPLE assert view.tobytes() == b"abcd" with pytest.raises(AssertionError, match="Buffer can only be requested once"): @@ -166,3 +210,8 @@ def finalizer() -> None: with pytest.raises(AssertionError, match="Buffer can only be released once"): intermediate.__release_buffer__(view) + + with pytest.raises(AssertionError, match="Buffer can only be requested once"): + intermediate.__buffer__(0) + + assert finalizer_calls == 1 From d9fba0752187e9ea27033d7822c836dc48726630 Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Mon, 1 Jun 2026 21:31:34 -0700 Subject: [PATCH 08/12] Bugfix previous commit In my test cleanup and expansion, I accidentally included an assumption that's only valid on 3.12+. --- src/tests/test_buffer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/test_buffer.py b/src/tests/test_buffer.py index fffbf0e2..9d659835 100644 --- a/src/tests/test_buffer.py +++ b/src/tests/test_buffer.py @@ -17,7 +17,7 @@ def finalizer() -> None: finalizer_calls += 1 wrapped = finalizing_buffer(bytearray(b"abcd"), finalizer) - assert finalizer_calls == 0 + assert finalizer_calls == 0 if FAST_PATH_AVAILABLE else 1 del wrapped gc.collect() @@ -37,7 +37,7 @@ def finalizer() -> None: finalizer_calls += 1 view = finalizing_buffer(base_buffer, finalizer) - assert finalizer_calls == 0 + assert finalizer_calls == 0 if FAST_PATH_AVAILABLE else 1 assert isinstance(view, memoryview) assert view.readonly == readonly From fb825ce3d7a48bab6601a334110b1b0ef23b086c Mon Sep 17 00:00:00 2001 From: Joel Ray Holveck Date: Mon, 1 Jun 2026 21:59:15 -0700 Subject: [PATCH 09/12] Last round of cleanups before submitting to a PR, I hope --- src/mss/linux/xshmgetimage.py | 30 ++++++++++++++++++------------ src/tests/test_buffer.py | 22 +++++++++++++--------- src/tests/test_setup.py | 2 +- 3 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index 4268de7f..942f116c 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -15,7 +15,6 @@ import enum import os -from contextlib import suppress from dataclasses import dataclass from functools import partial from mmap import PROT_READ, PROT_WRITE, mmap # type: ignore[attr-defined] @@ -154,7 +153,7 @@ def _create_shm_slot(self, size: int) -> _ShmSlot: os.close(memfd) raise - def _destroy_shm_slot(self, slot: _ShmSlot) -> None: + def _destroy_shm_slot(self, slot: _ShmSlot, *, closing_conn: bool) -> None: """Detach and close one shared-memory slot. This is only called when or after the SHM pool is cleaned up: @@ -162,12 +161,19 @@ def _destroy_shm_slot(self, slot: _ShmSlot) -> None: if SHM is found to be unavailable, or * By the finalizer, if the slot is released after the MSS object is closed + + If the connection is being closed (rather than just falling back + to XGetImage), then we also tell the server that we're done with + the memory region. """ if slot.buf is None: return - if self.conn is not None: - with suppress(XProtoError, xcb.XError): - xcb.shm_detach(self.conn, slot.shmseg) + # If we're about to close the X connection, there's no need to explicitly tell the server about the detaches. + # What's more, the connection might be in an error state. We'll let the server detach all the segments at once + # when we disconnect. However, if we're destroying our SHM slots because XShmGetImage was for some reason found + # to be unsuitable after we created them, then we should be nice and let the server clean up resources. + if not closing_conn and self.conn is not None: + xcb.shm_detach(self.conn, slot.shmseg) slot.buf.close() slot.buf = None @@ -200,9 +206,9 @@ def _release_shm_slot(self, slot: _ShmSlot) -> None: self._free_shm_slots.append(slot) return # SHM is already closed. Destroy the slot now. - self._destroy_shm_slot(slot) + self._destroy_shm_slot(slot, closing_conn=False) - def _cleanup_shm_slots(self) -> None: + def _cleanup_shm_slots(self, *, closing_conn: bool) -> None: """Retire SHM use and free any idle slots immediately. This is called during MSS close, or if SHM is discovered to be @@ -213,12 +219,12 @@ def _cleanup_shm_slots(self) -> None: idle_slots, self._free_shm_slots = self._free_shm_slots, [] for slot in idle_slots: - self._destroy_shm_slot(slot) + self._destroy_shm_slot(slot, closing_conn=closing_conn) def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: """Record why SHM was disabled and clean up the pool.""" self._shm_report_issue(msg, exc) - self._cleanup_shm_slots() + self._cleanup_shm_slots(closing_conn=False) return ShmStatus.UNAVAILABLE def _setup_shm(self) -> ShmStatus: @@ -251,7 +257,7 @@ def _setup_shm(self) -> ShmStatus: except ScreenShotError as e: return self._shm_unavailable("MIT-SHM setup failed", e) except Exception: - self._cleanup_shm_slots() + self._cleanup_shm_slots(closing_conn=False) raise return ShmStatus.UNKNOWN @@ -333,11 +339,11 @@ def grab(self, monitor: Monitor) -> memoryview | bytearray: # Using XShmGetImage failed, and using XGetImage worked. Use XGetImage in the future. self._shm_report_issue("MIT-SHM GetImage failed", e) self.shm_status = ShmStatus.UNAVAILABLE - self._cleanup_shm_slots() + self._cleanup_shm_slots(closing_conn=False) return rv def close(self) -> None: """Release SHM resources and then close the XCB connection.""" - self._cleanup_shm_slots() + self._cleanup_shm_slots(closing_conn=True) super().close() diff --git a/src/tests/test_buffer.py b/src/tests/test_buffer.py index 9d659835..518fab07 100644 --- a/src/tests/test_buffer.py +++ b/src/tests/test_buffer.py @@ -24,10 +24,13 @@ def finalizer() -> None: assert finalizer_calls == 1 -@pytest.mark.parametrize(("buffer_class", "readonly"), [ - (bytearray, False), - (bytes, True), # type: ignore[list-item] -]) +@pytest.mark.parametrize( + ("buffer_class", "readonly"), + [ + (bytearray, False), + (bytes, True), # type: ignore[list-item] + ], +) def test_finalizing_buffer_preserves_readonly(buffer_class: type, readonly: bool) -> None: base_buffer = buffer_class(b"abcd") finalizer_calls = 0 @@ -124,7 +127,6 @@ def test_memoryview_del() -> None: CPython special-cases a memoryview of a memoryview (and finalizing_buffer returns a memoryview), so we test it specially. """ - data = bytearray(b"abcdefgh") finalizer_calls = 0 @@ -147,27 +149,29 @@ def finalizer() -> None: @pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") def test_tree() -> None: """A complex tree retains a single buffer until it's completely gone""" + # These imports are here instead of at the top, since we only install Pillow and NumPy on Python 3.12 and later. import numpy as np # noqa: PLC0415 from PIL import Image # noqa: PLC0415 - data = bytearray(b"\x76\xB9\x00\xFF" * (100 * 100)) + # Since we're using Pillow as one stage, we need something image-like: here, a rectangle of a pleasing green color. + data = bytearray(b"\x76\xb9\x00\xff" * (320 * 200)) finalizer_calls = 0 def finalizer() -> None: nonlocal finalizer_calls finalizer_calls += 1 + # Set up a tree of derived buffers of different types: # base # \- array # \- mv # \- array_shaped # \- img - base = finalizing_buffer(data, finalizer) array = np.frombuffer(base, dtype=np.uint8) - array_shaped = array.reshape((100, 100, 4)) + array_shaped = array.reshape((320, 200, 4)) mv = memoryview(array) - img = Image.frombuffer("RGBA", (100, 100), array_shaped, "raw", "RGBA", 0, 1) + img = Image.frombuffer("RGBA", (320, 200), array_shaped, "raw", "RGBA", 0, 1) # Ensure that the tree is zero-copy. data[0] = 42 diff --git a/src/tests/test_setup.py b/src/tests/test_setup.py index 698a82e5..e5c0c472 100644 --- a/src/tests/test_setup.py +++ b/src/tests/test_setup.py @@ -95,7 +95,7 @@ def test_sdist() -> None: f"mss-{__version__}/src/tests/conftest.py", f"mss-{__version__}/src/tests/res/monitor-1024x768.raw.zip", f"mss-{__version__}/src/tests/test_bgra_to_rgb.py", - f"mss-{__version__}/src/tests/test_buffer.py", + f"mss-{__version__}/src/tests/test_buffer.py", f"mss-{__version__}/src/tests/test_cls_image.py", f"mss-{__version__}/src/tests/test_compat_10_1.py", f"mss-{__version__}/src/tests/test_compat_exports.py", From e9af908550f2f16c4ca981eac2593c1a225fa13f Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Tue, 2 Jun 2026 06:54:52 +0000 Subject: [PATCH 10/12] Add tests for the MSS-level buffer reuse expectations We already had tests for the finalizing_buffer behavior, but this makes sure it survives at the higher level MSS API. --- src/tests/test_gnu_linux.py | 49 +++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/tests/test_gnu_linux.py b/src/tests/test_gnu_linux.py index e1c67d10..c415c359 100644 --- a/src/tests/test_gnu_linux.py +++ b/src/tests/test_gnu_linux.py @@ -358,6 +358,55 @@ def boom(_data: memoryview, _finalizer: Any) -> memoryview: release_spy.assert_called_once() +@pytest.mark.skipif( + not mss.buffer.FAST_PATH_AVAILABLE, + reason="Tests post-3.12 behavior: finalization after close", +) +def test_finalizer_after_close_destroys_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a live buffer finalized after close destroys its SHM slot.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + destroy_spy = spy_and_patch(monkeypatch, sct._impl, "_destroy_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + destroyed_before_release = destroy_spy.call_count + + screenshot._raw.release() + + assert destroy_spy.call_count == destroyed_before_release + 1 + assert destroy_spy.call_args_list[-1].kwargs["closing_conn"] is False + + +@pytest.mark.skipif( + mss.buffer.FAST_PATH_AVAILABLE, + reason="Covers behavior only present prior to Python 3.12", +) +def test_finalizer_before_close_releases_shm_slot_immediately(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that the slow path finalizes the SHM slot before close.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + destroy_spy = spy_and_patch(monkeypatch, sct._impl, "_destroy_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + # In slow-path environments, the buffer is finalized and released (returned to the pool) immediately. + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 0 + + # At this point, the buffer should have been destroyed at close, since the slow path made a copy. + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 1 + + screenshot._raw.release() + + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 1 + + @pytest.mark.skipif(not mss.buffer.FAST_PATH_AVAILABLE, reason="Tests post-3.12 behavior: dynamic pool growth") def test_dynamic_shm_growth_allocation_failure_raises(monkeypatch: pytest.MonkeyPatch) -> None: """Verify dynamic pool growth failure raises instead of switching backends.""" From 44fbd58cf286258c1dccac91eeb82ef6f24b750f Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Tue, 2 Jun 2026 07:51:19 +0000 Subject: [PATCH 11/12] Fix an obscure race condition The buffer finalizers can run at any time, from any thread. This could even happen while the XCB connection is being closed. While it's hard to imagine, it could happen from the same thread that's closing the XCB connection, given some crazy weak references or something. Make sure that we don't call shm_detach from a finalizer at the same time as we're closing the XCB connection. If we're about to close the connection, even from a different thread than the one that is running the finalizer, there's no need to call shm_detach. --- src/mss/linux/xshmgetimage.py | 39 ++++++++++++++------- src/tests/test_gnu_linux.py | 65 ++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 14 deletions(-) diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index 942f116c..e7211459 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -18,7 +18,7 @@ from dataclasses import dataclass from functools import partial from mmap import PROT_READ, PROT_WRITE, mmap # type: ignore[attr-defined] -from threading import Lock +from threading import RLock from typing import TYPE_CHECKING, Any from mss.buffer import FAST_PATH_AVAILABLE, finalizing_buffer @@ -74,15 +74,24 @@ class MSSImplXShmGetImage(MSSImplXCBBase): def __init__(self, *, display: str | bytes | None = None, with_cursor: bool = False) -> None: super().__init__(display=display, with_cursor=with_cursor) + # Protects SHM pool state and serializes XCB detach/disconnect calls. + # RLock is intentional: finalizers may run in re-entrant contexts. + self._shm_lock = RLock() # Free-list ownership model: # - a slot in this list is idle and available for reuse; # - a slot removed from this list is owned by grab/finalizer flow; # - finalization returns it here unless SHM has been closed (in which case it is destroyed) + # Protected by _shm_lock. self._free_shm_slots: list[_ShmSlot] = [] - self._shm_lock = Lock() # Once this is set, we no longer expect to use SHM and have # released the idle standby buffers already. + # Protected by _shm_lock. self._shm_closed = False + # Once set, SHM slot destruction should not attempt XCB shm_detach. This is because we're about to close the + # XCB connection (possibly in a different thread), and so XCB calls may fail. We'll just let the X server clean + # up the segments when the connection closes. + # Protected by _shm_lock. + self._closing_conn = False # Rather than trying to track the shm_status, we may be able to raise an exception in __init__ if XShmGetImage # isn't available. The factory in linux/__init__.py could then catch that and switch to XGetImage. @@ -153,7 +162,7 @@ def _create_shm_slot(self, size: int) -> _ShmSlot: os.close(memfd) raise - def _destroy_shm_slot(self, slot: _ShmSlot, *, closing_conn: bool) -> None: + def _destroy_shm_slot(self, slot: _ShmSlot) -> None: """Detach and close one shared-memory slot. This is only called when or after the SHM pool is cleaned up: @@ -168,12 +177,13 @@ def _destroy_shm_slot(self, slot: _ShmSlot, *, closing_conn: bool) -> None: """ if slot.buf is None: return + with self._shm_lock: # If we're about to close the X connection, there's no need to explicitly tell the server about the detaches. # What's more, the connection might be in an error state. We'll let the server detach all the segments at once # when we disconnect. However, if we're destroying our SHM slots because XShmGetImage was for some reason found # to be unsuitable after we created them, then we should be nice and let the server clean up resources. - if not closing_conn and self.conn is not None: - xcb.shm_detach(self.conn, slot.shmseg) + if not self._closing_conn: + xcb.shm_detach(self.conn, slot.shmseg) slot.buf.close() slot.buf = None @@ -206,9 +216,9 @@ def _release_shm_slot(self, slot: _ShmSlot) -> None: self._free_shm_slots.append(slot) return # SHM is already closed. Destroy the slot now. - self._destroy_shm_slot(slot, closing_conn=False) + self._destroy_shm_slot(slot) - def _cleanup_shm_slots(self, *, closing_conn: bool) -> None: + def _cleanup_shm_slots(self) -> None: """Retire SHM use and free any idle slots immediately. This is called during MSS close, or if SHM is discovered to be @@ -219,12 +229,12 @@ def _cleanup_shm_slots(self, *, closing_conn: bool) -> None: idle_slots, self._free_shm_slots = self._free_shm_slots, [] for slot in idle_slots: - self._destroy_shm_slot(slot, closing_conn=closing_conn) + self._destroy_shm_slot(slot) def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: """Record why SHM was disabled and clean up the pool.""" self._shm_report_issue(msg, exc) - self._cleanup_shm_slots(closing_conn=False) + self._cleanup_shm_slots() return ShmStatus.UNAVAILABLE def _setup_shm(self) -> ShmStatus: @@ -257,7 +267,7 @@ def _setup_shm(self) -> ShmStatus: except ScreenShotError as e: return self._shm_unavailable("MIT-SHM setup failed", e) except Exception: - self._cleanup_shm_slots(closing_conn=False) + self._cleanup_shm_slots() raise return ShmStatus.UNKNOWN @@ -339,11 +349,14 @@ def grab(self, monitor: Monitor) -> memoryview | bytearray: # Using XShmGetImage failed, and using XGetImage worked. Use XGetImage in the future. self._shm_report_issue("MIT-SHM GetImage failed", e) self.shm_status = ShmStatus.UNAVAILABLE - self._cleanup_shm_slots(closing_conn=False) + self._cleanup_shm_slots() return rv def close(self) -> None: """Release SHM resources and then close the XCB connection.""" - self._cleanup_shm_slots(closing_conn=True) - super().close() + with self._shm_lock: + self._closing_conn = True + self._cleanup_shm_slots() + with self._shm_lock: + super().close() diff --git a/src/tests/test_gnu_linux.py b/src/tests/test_gnu_linux.py index c415c359..5ccb5408 100644 --- a/src/tests/test_gnu_linux.py +++ b/src/tests/test_gnu_linux.py @@ -6,6 +6,7 @@ import ctypes.util import platform +import threading from ctypes import CFUNCTYPE, POINTER, _Pointer, c_int from typing import TYPE_CHECKING, Any from unittest.mock import Mock, NonCallableMock, patch @@ -376,7 +377,69 @@ def test_finalizer_after_close_destroys_shm_slot(monkeypatch: pytest.MonkeyPatch screenshot._raw.release() assert destroy_spy.call_count == destroyed_before_release + 1 - assert destroy_spy.call_args_list[-1].kwargs["closing_conn"] is False + + +@pytest.mark.skipif( + not mss.buffer.FAST_PATH_AVAILABLE, + reason="Tests post-3.12 behavior: threaded release during close", +) +def test_release_thread_during_close_does_not_detach(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify release from another thread during close does not call shm_detach.""" + + disconnect_started = threading.Event() + allow_disconnect = threading.Event() + release_started = threading.Event() + + real_disconnect = mss.linux.xcb.disconnect + detach_spy = Mock(wraps=mss.linux.xcb.shm_detach) + + def blocking_disconnect(conn: Any) -> None: + disconnect_started.set() + assert allow_disconnect.wait(timeout=5), "Timed out waiting to unblock disconnect" + real_disconnect(conn) + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + screenshot = sct.grab(sct.monitors[0]) + + monkeypatch.setattr(mss.linux.xcb, "disconnect", blocking_disconnect) + monkeypatch.setattr(mss.linux.xcb, "shm_detach", detach_spy) + + close_error: list[BaseException] = [] + release_error: list[BaseException] = [] + + def close_target() -> None: + try: + sct.close() + except BaseException as exc: # noqa: BLE001 + close_error.append(exc) + + def release_target() -> None: + try: + release_started.set() + screenshot._raw.release() + except BaseException as exc: # noqa: BLE001 + release_error.append(exc) + + close_thread = threading.Thread(target=close_target) + release_thread = threading.Thread(target=release_target) + + close_thread.start() + assert disconnect_started.wait(timeout=5), "Timed out waiting for close to reach disconnect" + + release_thread.start() + assert release_started.wait(timeout=5), "Timed out waiting for release thread to start" + + allow_disconnect.set() + + close_thread.join(timeout=5) + release_thread.join(timeout=5) + + assert not close_thread.is_alive(), "close thread did not finish" + assert not release_thread.is_alive(), "release thread did not finish" + assert not close_error + assert not release_error + detach_spy.assert_not_called() @pytest.mark.skipif( From e51e1ac5b5c4acb416525d3c429f1766af60fd6b Mon Sep 17 00:00:00 2001 From: Joel Holveck Date: Tue, 2 Jun 2026 08:00:08 +0000 Subject: [PATCH 12/12] Formatting fix --- src/mss/linux/xshmgetimage.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index e7211459..766880e1 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -178,11 +178,13 @@ def _destroy_shm_slot(self, slot: _ShmSlot) -> None: if slot.buf is None: return with self._shm_lock: - # If we're about to close the X connection, there's no need to explicitly tell the server about the detaches. - # What's more, the connection might be in an error state. We'll let the server detach all the segments at once - # when we disconnect. However, if we're destroying our SHM slots because XShmGetImage was for some reason found - # to be unsuitable after we created them, then we should be nice and let the server clean up resources. + # If we're about to close the X connection, there's no need to explicitly tell the server about the + # detaches. What's more, the connection might be in an error state. We'll let the server detach all the + # segments at once when we disconnect. However, if we're destroying our SHM slots because XShmGetImage was + # for some reason found to be unsuitable after we created them, then we should be nice and let the server + # clean up resources. if not self._closing_conn: + assert self.conn is not None # noqa: S101 For MyPy xcb.shm_detach(self.conn, slot.shmseg) slot.buf.close() slot.buf = None