diff --git a/docs/source/release-history/v11.0.0.md b/docs/source/release-history/v11.0.0.md index 50ca654b..6601a08b 100644 --- a/docs/source/release-history/v11.0.0.md +++ b/docs/source/release-history/v11.0.0.md @@ -33,6 +33,19 @@ Improved error handling when interacting with Win32 API, which will improve diag Device contexts are now acquired and released within each `grab()` call, allowing monitor enumeration to work even when `GetWindowDC(0)` fails (#509). +### Zero-Copy Screenshot Buffers (GNU/Linux, Python 3.12+) + +MSS now supports zero-copy screenshot buffers on GNU/Linux when running under Python 3.12 or later. Screenshot data can +be exposed directly from operating system buffers without first being copied into a Python-owned buffer. + +This removes an additional memory copy from the screenshot path and is enabled automatically with no application changes +required. + +In a benchmark capturing 3840×2160 screenshots as quickly as possible while forcing all pixel data to be read, +processing time decreased from 22.64 ms to 18.59 ms per frame (approximately 18% faster). + +Support for additional operating systems is planned. + ### General Improvements The MSS context object will now always surface inner exceptions, even if `__exit__` may also generate an exception during tear-down. diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 4562b3c3..512ca3e1 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -52,6 +52,21 @@ This is a much better usage, memory efficient:: Also, it is a good thing to save the MSS instance inside an attribute of your class and calling it when needed. +Direct Screenshot Buffers +========================= + +On supported platforms, MSS can expose screenshot data directly from operating system buffers instead of copying it into +a separate Python-owned buffer. This reduces memory copying and can improve performance when processing screenshots with +libraries that support the Python buffer protocol, such as NumPy and OpenCV. + +This optimization is enabled automatically and does not require any changes to application code. + +Requirements: + +- Python 3.12 or later +- GNU/Linux + +Support for additional operating systems is planned. Multithreading ============== diff --git a/pyproject.toml b/pyproject.toml index 47de8d7b..0babaf7e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,8 +85,8 @@ docs = [ "sphinx-new-tab-link==0.8.1 ; python_version >= '3.12'", ] tests = [ - "numpy==2.4.3 ; sys_platform == 'linux' and python_version == '3.13'", - "pillow==12.2.0 ; sys_platform == 'linux' and python_version == '3.13'", + "numpy==2.4.3 ; python_version >= '3.12'", + "pillow==12.2.0 ; python_version >= '3.12'", "pytest==9.0.3", "pytest-cov==7.1.0", "pytest-rerunfailures==16.3", diff --git a/src/mss/base.py b/src/mss/base.py index 963eb4cd..342cdfc9 100644 --- a/src/mss/base.py +++ b/src/mss/base.py @@ -18,7 +18,7 @@ from collections.abc import Callable, Iterator from types import TracebackType - from typing_extensions import Self + from typing_extensions import Buffer, Self from mss.models import Monitor, Monitors, Size @@ -89,7 +89,7 @@ def cursor(self) -> ScreenShot | None: """Retrieve all cursor data. Pixels have to be RGB.""" @abstractmethod - def grab(self, monitor: Monitor, /) -> bytearray | tuple[bytearray, Size]: + def grab(self, monitor: Monitor, /) -> Buffer | tuple[Buffer, Size]: """Retrieve all pixels from a monitor. Pixels have to be RGB. If the monitor size is not in pixel units, include a Size in diff --git a/src/mss/buffer.py b/src/mss/buffer.py new file mode 100644 index 00000000..2a3605f3 --- /dev/null +++ b/src/mss/buffer.py @@ -0,0 +1,250 @@ +"""Buffers with Finalizers + +This is an implementation of buffer objects with Python finalizers, +specific to the needs of MSS. + +# Caller Contract + +The entry point is `finalizing_buffer`. This is intended to be called +by `MSSImplementation` subclasses. They provide a buffer (such as a +ctypes array or mmap object) and a finalizer, and are given a +`memoryview` object. Once the memoryview is garbage collected, and +the consumers downstream of that memoryview have released their views +of the buffer, the finalizer will be invoked (with no arguments). + +At that time, the `MSSImplementation` may release the buffer, return +it to a pool for reuse, etc. + +This finalizer may be called at any time, from any thread. It may be +called after the MSSImplementation's `close()` method has been called. +Implementations must take care not to invalidate their buffers during +`close()`, but rather only after finalization. + +The finalizer may also be called before `finalizing_buffer()` returns. +This may happen if the implementation needs to make a copy rather than +using the originally-provided buffer (which is the case on Python +versions prior to 3.12). + +(Some more caveats appear at the end of this docstring.) + +# Background + +The Python buffer protocol lets different objects share underlying +memory. For instance, a NumPy ndarray, a Python bytearray object, and +a PyTorch Tensor object can all share the same underlying memory. +This allows interoperability between these systems without requiring +copies. + +Copying ("blitting") all the pixels in a screenshot takes time; +copying a 4K (3840x2160) BGRA image can take several milliseconds. If +an application is attempting to operate at 60 FPS, each copy consumes +a meaningful fraction of the frame budget. + +For a high-performance screenshot library such as MSS, it is therefore +important to minimize copies. Ideally, screenshot data would remain +in the buffer originally allocated by the operating system, such as +the memory returned by CreateDIBSection on Windows or a shared memory +segment on X11. This approach is commonly called "zero-copy". + +Getting the buffer to the user is only half the problem. MSS also +needs to know when the user is finished with the buffer's contents so +that the underlying resources can be reused or released. + +Most code that uses the buffer protocol is written in C. Since Python +3.0, the C-level buffer protocol has provided a mechanism for +exporters to learn when their buffers are no longer in use. However, +the corresponding Python-level API (which can be used by C consumers) +was not added until Python 3.12. + +Buffer lifetime is not the same as Python object lifetime. A user may +pass the returned memoryview to NumPy, PIL, PyTorch, or other +libraries. Those libraries may keep the exported buffer alive after +the original Python memoryview object is no longer reachable. + +Therefore, the lifetime of the returned memoryview object is not a +reliable signal that the buffer is no longer in use. Other objects +may still hold references to the buffer after that memoryview has been +destroyed. To know when the buffer can safely be reused or released, +MSS relies on the buffer protocol's release mechanism. + +The buffer protocol permits a wide variety of consumer behaviors and +derived-buffer relationships. Rather than attempting to model all of +those interactions directly, this implementation delegates that +complexity to Python's existing buffer-management machinery. + +## Performance note + +As a rough reference, copying a 3840x2160 BGRA screenshot on +contemporary hardware (Amazon EC2 m8i.large, Intel Xeon 6, DDR5-7200) +takes approximately 2.5 ms. At 60 FPS, that is about 15% of the +available frame time for a single copy. These numbers are intended +only to provide intuition about the cost of copies; actual performance +varies substantially by hardware and memory subsystem. + +# Design + +The central design decision in this file is that MSS interacts with +exactly one downstream buffer consumer: a memoryview. + +A memoryview is Python's standard object for representing a buffer. +It already implements the reference tracking, buffer export, slicing, +and format-conversion behavior required by the buffer protocol. + +Notably, memoryview objects do not pass buffer requests upstream to +arbitrary exporters. Once a memoryview has been created, it manages +downstream consumers itself. + +This means MSS only needs to reason about a single interaction: the +interaction between `_FinalizingBufferIntermediate` and the memoryview +created from it. + +One idea that has been proposed is to attach a weakref finalizer +directly to a memoryview object and use that as the signal that the +buffer is no longer in use. Testing has shown that this is not +sufficient. A memoryview Python object may be finalized while +downstream consumers still hold active references to the underlying +buffer. + +To obtain a correct signal, MSS uses the Python-side buffer protocol +introduced in Python 3.12 via the `__buffer__` and +`__release_buffer__` methods. + +An instance of `_FinalizingBufferIntermediate` is created and exactly +one memoryview is constructed from it. That memoryview is returned to +the caller. + +The memoryview tracks all downstream users of the buffer. When all of +those users have released their references, the memoryview +automatically invokes `_FinalizingBufferIntermediate.__release_buffer__`. + +That method invokes the caller-provided finalizer, which can release +or recycle the underlying storage. + +If this implementation appears more indirect than necessary, that +indirection is intentional. It narrows the portion of the buffer +protocol that MSS must reason about and test. + +# Caveats and Invariants + +* The finalizer may run after `MSSImplementation.close()` has been + called. `close()` must not free, reuse, or otherwise invalidate + buffers that may still be visible to users. + +* The finalizer may run at any time and on any thread. Finalizer code + must therefore be thread-safe and must not assume that it executes + on the thread that created the buffer. + +* On Python versions prior to 3.12, `finalizing_buffer()` creates a + copy of the data and invokes the finalizer immediately. In this + case, the finalizer may run before `finalizing_buffer()` returns. + +* `_FinalizingBufferIntermediate` intentionally supports exactly one + buffer request. This restriction simplifies reasoning about + correctness and should not be removed without carefully considering + the resulting buffer-lifetime semantics. + +* `_FinalizingBufferIntermediate` remains reachable through + `memoryview.obj`. Consumers must treat this as an implementation + detail and must not invoke `__buffer__()` or `__release_buffer__()` + directly. + +* Finalizer execution during interpreter shutdown is not guaranteed. + Implementations should not rely on finalizers running during process + termination. +""" + +from __future__ import annotations + +import sys +from threading import Lock +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Callable + + from typing_extensions import Buffer + +# You can always use this module, and finalizing_buffer. This variable is for conditionalizing things like test code or +# optimizations, but most code should always follow the same path. +FAST_PATH_AVAILABLE = sys.version_info >= (3, 12) + + +class _FinalizingBufferIntermediate: + """Finalizing buffer class. + + Contrary to the buffer protocol, this class only allows a single + buffer to be created. This simplifies the implementation and + reasoning. + + The creator must provide a finalizer to ensure that resources are + properly released when the underlying buffer is no longer needed. + This will be invoked, with no arguments, after all the downstream + users, such as NumPy or PIL, have released their references to + the buffer. + + This is only useful on Python 3.12 and later; earlier versions do + not support the __buffer__ and __release_buffer__ methods. + + This class should only be used by the finalizing_buffer function. + It is not appropriate for other uses! + """ + + def __init__(self, data: Buffer, finalizer: Callable) -> None: + self._mv: memoryview | None = memoryview(data) + self._finalizer = finalizer + # The remainder of these shouldn't be necessary. As a consequence of the __buffer__ contract and the + # implementation of finalizing_buffer, only one call to __buffer__ and one call to __release_buffer__ should be + # made, and never simultaneously. But we still include them out of an abundance of caution. + self._buffer_invoked = False + self._release_invoked = False + self._lock = Lock() + + def __buffer__(self, _flags: int) -> memoryview: + with self._lock: + assert not self._buffer_invoked, "Buffer can only be requested once" # noqa: S101 + self._buffer_invoked = True + assert self._mv is not None, "Buffer has already been released" # noqa: S101 + return self._mv + + def __release_buffer__(self, _buffer: memoryview) -> None: + with self._lock: + assert not self._release_invoked, "Buffer can only be released once" # noqa: S101 + self._release_invoked = True + assert self._mv is not None, "Buffer has already been released" # noqa: S101 + # We need to release the memoryview itself, so that when the finalizer is invoked, the underlying buffer object + # doesn't think there are still exported buffers. (mmap, for instance, won't close a region with exported + # buffers.) + self._mv.release() + self._mv = None # Extra-defensive + self._finalizer() + + +def finalizing_buffer(data: Buffer, finalizer: Callable) -> memoryview: + """Create a finalizing buffer or a copy depending on Python version. + + The finalizer will be invoked when the buffer is no longer in use, + with a caveat. This will only track uses downstream of the + returned buffer. If the input buffer is also used in other + places, those are not accounted for. + + On Python 3.12 and later, this returns a memoryview object that + provides a reusable buffer interface. On earlier versions, this + returns a copy of the data, and invokes the finalizer immediately + after the copy is made. + + This preserves read/write semantics of the original data: if the + original buffer is read-only, the returned memoryview will be + read-only. + """ + if FAST_PATH_AVAILABLE: + # Fast path: we can use the Python 3.12 features + return memoryview(_FinalizingBufferIntermediate(data, finalizer)) + # Slow path: copy the data. + with memoryview(data) as mv: + # We create a memoryview of the original data so that we can tell if it's read-only or not. We can't return + # this memoryview, since we're about to invoke the finalizer to release the buffer it got its data from. + copied_data = bytes(mv) if mv.readonly else bytearray(mv) + finalizer() + # We could return copied_data directly and still have a perfectly fine buffer, but always returning a memoryview + # provides more consistency. + return memoryview(copied_data) diff --git a/src/mss/linux/base.py b/src/mss/linux/base.py index df702aed..972c9ce1 100644 --- a/src/mss/linux/base.py +++ b/src/mss/linux/base.py @@ -453,6 +453,10 @@ def _grab_xgetimage(self, monitor: Monitor, /) -> bytearray: # Now, save the image. This is a reference into the img_reply structure. img_data_arr = xcb.get_image_data(img_reply) # Copy this into a new bytearray, so that it will persist after we clear the image structure. + # + # We might be able to hold onto img_reply in a finalizing_buffer finalizer, so that we can use the image data + # without copying. That would be more efficient, but it would be a bit more complex, and presently the + # XGetImage implementation is already a slow and less-common path. img_data = bytearray(img_data_arr) if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: diff --git a/src/mss/linux/xshmgetimage.py b/src/mss/linux/xshmgetimage.py index b7cc3646..b7cd2981 100644 --- a/src/mss/linux/xshmgetimage.py +++ b/src/mss/linux/xshmgetimage.py @@ -15,9 +15,13 @@ import enum import os -from mmap import PROT_READ, mmap # type: ignore[attr-defined] +from dataclasses import dataclass +from functools import partial +from mmap import PROT_READ, PROT_WRITE, mmap # type: ignore[attr-defined] +from threading import RLock from typing import TYPE_CHECKING, Any +from mss.buffer import FAST_PATH_AVAILABLE, finalizing_buffer from mss.exception import ScreenShotError from mss.linux import xcb from mss.linux.base import ALL_PLANES, MSSImplXCBBase @@ -28,6 +32,21 @@ __all__ = () +# For Python < 3.12, we only use one buffer. +# +# For Python >= 3.12, we have zero-copy buffers that the user owns. For those, we allocate two initial buffers. This +# is for the common case: +# +# with mss() as sct: +# while True: +# img = sct.grab(...) # noqa: ERA001 +# process(img) # noqa: ERA001 +# +# In that case, each ScreenShot object is not released until the next one has been assigned to img. That means that we +# will need two buffers to handle that case zero-copy. Our free pool can always grow, but we start it with two to keep +# the second capture from having a brief hiccup. +_INITIAL_BUFFER_COUNT = 2 if FAST_PATH_AVAILABLE else 1 + class ShmStatus(enum.Enum): """Availability of the MIT-SHM extension for this backend.""" @@ -37,6 +56,13 @@ class ShmStatus(enum.Enum): UNAVAILABLE = enum.auto() # We know SHM GetImage is unusable; always use XGetImage. +@dataclass(slots=True) +class _ShmSlot: + shmseg: xcb.ShmSeg + buf: mmap | None # Set to None when it's closed, for extra verification + size: int + + class MSSImplXShmGetImage(MSSImplXCBBase): """XCB backend using XShmGetImage with an automatic XGetImage fallback. @@ -48,10 +74,24 @@ class MSSImplXShmGetImage(MSSImplXCBBase): def __init__(self, *, display: str | bytes | None = None, with_cursor: bool = False) -> None: super().__init__(display=display, with_cursor=with_cursor) - # These are the objects we need to clean up when we shut down. They are created in _setup_shm. - self._memfd: int | None = None - self._buf: mmap | None = None - self._shmseg: xcb.ShmSeg | None = None + # Protects SHM pool state and serializes XCB detach/disconnect calls. + # RLock is intentional: finalizers may run in re-entrant contexts. + self._shm_lock = RLock() + # Free-list ownership model: + # - a slot in this list is idle and available for reuse; + # - a slot removed from this list is owned by grab/finalizer flow; + # - finalization returns it here unless SHM has been closed (in which case it is destroyed) + # Protected by _shm_lock. + self._free_shm_slots: list[_ShmSlot] = [] + # Once this is set, we no longer expect to use SHM and have + # released the idle standby buffers already. + # Protected by _shm_lock. + self._shm_closed = False + # Once set, SHM slot destruction should not attempt XCB shm_detach. This is because we're about to close the + # XCB connection (possibly in a different thread), and so XCB calls may fail. We'll just let the X server clean + # up the segments when the connection closes. + # Protected by _shm_lock. + self._closing_conn = False # Rather than trying to track the shm_status, we may be able to raise an exception in __init__ if XShmGetImage # isn't available. The factory in linux/__init__.py could then catch that and switch to XGetImage. @@ -73,7 +113,136 @@ def _shm_report_issue(self, msg: str, *args: Any) -> None: full_msg += " | " + ", ".join(str(arg) for arg in args) self.performance_status.append(full_msg) - def _setup_shm(self) -> ShmStatus: # noqa: PLR0911 + def _create_shm_slot(self, size: int) -> _ShmSlot: + """Allocate and attach one shared-memory slot. + + This is called when the free list is empty when a grab is + requested. The caller owns the new slot, and is responsible for + ensuring it is put on the free list or destroyed. + """ + assert self.conn is not None # noqa: S101 + + memfd: int | None = None + mm: mmap | None = None + try: + try: + memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type: ignore[attr-defined] + except OSError as exc: + msg = "Cannot allocate MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + try: + os.ftruncate(memfd, size) + except OSError as exc: + msg = "Cannot size MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + try: + mm = mmap(memfd, size, prot=PROT_READ | PROT_WRITE) # type: ignore[call-arg] + except OSError as exc: + msg = "Cannot map MIT-SHM buffer" + raise ScreenShotError(msg) from exc + + shmseg = xcb.ShmSeg(xcb.generate_id(self.conn).value) + + # XCB closes memfd after this call, on success or failure. + fd_for_attach = memfd + memfd = None + try: + xcb.shm_attach_fd(self.conn, shmseg, fd_for_attach, read_only=False) + except xcb.XError as exc: + msg = "Cannot attach MIT-SHM segment" + raise ScreenShotError(msg) from exc + + return _ShmSlot(shmseg=shmseg, buf=mm, size=size) + except Exception: + if mm is not None: + mm.close() + if memfd is not None: + os.close(memfd) + raise + + def _destroy_shm_slot(self, slot: _ShmSlot) -> None: + """Detach and close one shared-memory slot. + + This is only called when or after the SHM pool is cleaned up: + * By _cleanup_shm_slots, on free slots, either during close or + if SHM is found to be unavailable, or + * By the finalizer, if the slot is released after the MSS object + is closed + + If the connection is being closed (rather than just falling back + to XGetImage), then we also tell the server that we're done with + the memory region. + """ + if slot.buf is None: + return + with self._shm_lock: + # If we're about to close the X connection, there's no need to explicitly tell the server about the + # detaches. What's more, the connection might be in an error state. We'll let the server detach all the + # segments at once when we disconnect. However, if we're destroying our SHM slots because XShmGetImage was + # for some reason found to be unsuitable after we created them, then we should be nice and let the server + # clean up resources. + if not self._closing_conn: + assert self.conn is not None # noqa: S101 For MyPy + # One possibility might be to make this a best-effort shutdown, not a hard failure. However, I + # generally don't like suppressing errors if there's not a compelling reason. + xcb.shm_detach(self.conn, slot.shmseg) + slot.buf.close() + slot.buf = None + + def _acquire_shm_slot(self, required_size: int) -> _ShmSlot: + """Take a slot from the free-list, growing if needed.""" + with self._shm_lock: + assert not self._shm_closed, "SHM pool has already been closed" # noqa: S101 + + for idx, slot in enumerate(self._free_shm_slots): + if slot.buf is not None and slot.size >= required_size: + self._free_shm_slots.pop(idx) + return slot + + # Create a new slot outside the lock to keep the critical section short. + slot = self._create_shm_slot(max(required_size, self._bufsize)) + # Since SHM can only be closed and _acquire can only be called during __init__, grab, or close, and those all + # hold a lock, shm cannot have been closed while we were creating the slot. + assert not self._shm_closed, "SHM pool closed unexpectedly" # noqa: S101 + + return slot + + def _release_shm_slot(self, slot: _ShmSlot) -> None: + """Return a slot to the free-list, or destroy it. + + This is called by the finalizer. It might be called during + grab, if a copy is needed, or at any time later. + """ + with self._shm_lock: + if not self._shm_closed: + self._free_shm_slots.append(slot) + return + # SHM is already closed. Destroy the slot now. + self._destroy_shm_slot(slot) + + def _cleanup_shm_slots(self) -> None: + """Retire SHM use and free any idle slots immediately. + + This is called during MSS close, or if SHM is discovered to be + unusable during setup or grab. + """ + with self._shm_lock: + self._shm_closed = True + idle_slots, self._free_shm_slots = self._free_shm_slots, [] + + for slot in idle_slots: + self._destroy_shm_slot(slot) + + def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: + """Record why SHM was disabled and clean up the pool.""" + self._shm_report_issue(msg, exc) + self._cleanup_shm_slots() + return ShmStatus.UNAVAILABLE + + def _setup_shm(self) -> ShmStatus: + """Probe MIT-SHM and seed the initial buffer pool.""" assert self.conn is not None # noqa: S101 try: @@ -89,115 +258,75 @@ def _setup_shm(self) -> ShmStatus: # noqa: PLR0911 self._shm_report_issue("MIT-SHM version too old", shm_version) return ShmStatus.UNAVAILABLE - # We allocate something large enough for the root, so we don't have to reallocate each time the window is - # resized. + # We allocate something large enough for the root for our initial buffers, to accommodate any grab request. self._bufsize = self.pref_screen.width_in_pixels * self.pref_screen.height_in_pixels * 4 if not hasattr(os, "memfd_create"): self._shm_report_issue("os.memfd_create not available") return ShmStatus.UNAVAILABLE - try: - self._memfd = os.memfd_create("mss-shm-buf", flags=os.MFD_CLOEXEC) # type: ignore[attr-defined] - except OSError as e: - return self._shm_unavailable("memfd_create failed", e) - os.ftruncate(self._memfd, self._bufsize) - try: - self._buf = mmap(self._memfd, self._bufsize, prot=PROT_READ) # type: ignore[call-arg] - except OSError as e: - return self._shm_unavailable("mmap failed", e) - self._shmseg = xcb.ShmSeg(xcb.generate_id(self.conn).value) - try: - # This will normally be what raises an exception if you're on a remote connection. - # XCB will close _memfd, on success or on failure. - try: - xcb.shm_attach_fd(self.conn, self._shmseg, self._memfd, read_only=False) - finally: - self._memfd = None - except xcb.XError as e: - return self._shm_unavailable("Cannot attach MIT-SHM segment", e) + # Initialize the number of buffers we expect to need. + for _ in range(_INITIAL_BUFFER_COUNT): + self._free_shm_slots.append(self._create_shm_slot(self._bufsize)) + except ScreenShotError as e: + return self._shm_unavailable("MIT-SHM setup failed", e) except Exception: - self._shutdown_shm() + self._cleanup_shm_slots() raise return ShmStatus.UNKNOWN - def _shm_unavailable(self, msg: str, exc: Exception) -> ShmStatus: - self._shm_report_issue(msg, exc) - self._shutdown_shm() - return ShmStatus.UNAVAILABLE - - def close(self) -> None: - self._shutdown_shm() - super().close() - - def _shutdown_shm(self) -> None: - # It would be nice to also try to tell the server to detach the shmseg, but we might be in an error path - # and don't know if that's possible. It's not like we'll leak a lot of them on the same connection anyway. - # This can be called in the path of partial initialization. - if self._buf is not None: - self._buf.close() - self._buf = None - if self._memfd is not None: - os.close(self._memfd) - self._memfd = None - - def _grab_xshmgetimage(self, monitor: Monitor) -> bytearray: + def _grab_xshmgetimage(self, monitor: Monitor) -> memoryview: + """Capture a monitor directly into a shared-memory slot.""" if self.conn is None: msg = "Cannot take screenshot while the connection is closed" raise ScreenShotError(msg) - assert self._buf is not None # noqa: S101 - assert self._shmseg is not None # noqa: S101 + # Presently, we request a buffer at least as big as our capture area. Another option would be to request a + # buffer at the root size: this uses more memory, but makes it more likely that the buffers can be reused after + # window resizes. This only matters if the initial buffers are in use still, and we have to create a new one. required_size = monitor["width"] * monitor["height"] * 4 - if required_size > self._bufsize: - # This is temporary. The permanent fix will depend on how - # issue https://github.com/BoboTiG/python-mss/issues/432 is resolved. - msg = ( - "Requested capture size exceeds the allocated buffer. If you have resized the screen, " - "please recreate your MSS object." - ) - raise ScreenShotError(msg) + slot = self._acquire_shm_slot(required_size) + assert slot.buf is not None # noqa: S101 - img_reply = xcb.shm_get_image( - self.conn, - self.drawable, - monitor["left"], - monitor["top"], - monitor["width"], - monitor["height"], - ALL_PLANES, - xcb.ImageFormat.ZPixmap, - self._shmseg, - 0, - ) - - if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: - # This should never happen; a window can't change its visual. - msg = ( - "Server returned an image with a depth or visual different than it initially reported: " - f"expected {self.drawable_depth},{hex(self.drawable_visual_id.value)}, " - f"got {img_reply.depth},{hex(img_reply.visual.value)}" + try: + img_reply = xcb.shm_get_image( + self.conn, + self.drawable, + monitor["left"], + monitor["top"], + monitor["width"], + monitor["height"], + ALL_PLANES, + xcb.ImageFormat.ZPixmap, + slot.shmseg, + 0, ) - raise ScreenShotError(msg) - # Snapshot the buffer into new bytearray. - new_size = monitor["width"] * monitor["height"] * 4 - # Slicing the memoryview creates a new memoryview that points to the relevant subregion. Making this and then - # copying it into a fresh bytearray is much faster than slicing the mmap object. Make sure we don't hold an - # open memoryview if an exception happens, since that will prevent us from closing self._buf during the stack - # unwind. - with memoryview(self._buf) as img_mv: - return bytearray(img_mv[:new_size]) + if img_reply.depth != self.drawable_depth or img_reply.visual != self.drawable_visual_id: + # This should never happen; a window can't change its visual. + msg = ( + "Server returned an image with a depth or visual different than it initially reported: " + f"expected {self.drawable_depth},{hex(self.drawable_visual_id.value)}, " + f"got {img_reply.depth},{hex(img_reply.visual.value)}" + ) + raise ScreenShotError(msg) # noqa: TRY301 Clearer this way than what TRY301 wants - def grab(self, monitor: Monitor) -> bytearray: + finalizer = partial(self._release_shm_slot, slot) + return finalizing_buffer(memoryview(slot.buf)[:required_size], finalizer) + + except Exception: + self._release_shm_slot(slot) + raise + + def grab(self, monitor: Monitor) -> memoryview | bytearray: """Retrieve all pixels from a monitor. Pixels have to be RGBX.""" if self.shm_status == ShmStatus.UNAVAILABLE: return super()._grab_xgetimage(monitor) # The usual path is just the next few lines. try: - rv = self._grab_xshmgetimage(monitor) + rv: memoryview | bytearray = self._grab_xshmgetimage(monitor) if self.shm_status != ShmStatus.AVAILABLE: self.shm_status = ShmStatus.AVAILABLE self.performance_status.append("MIT-SHM is working correctly.") @@ -224,6 +353,14 @@ def grab(self, monitor: Monitor) -> bytearray: # Using XShmGetImage failed, and using XGetImage worked. Use XGetImage in the future. self._shm_report_issue("MIT-SHM GetImage failed", e) self.shm_status = ShmStatus.UNAVAILABLE - self._shutdown_shm() + self._cleanup_shm_slots() return rv + + def close(self) -> None: + """Release SHM resources and then close the XCB connection.""" + with self._shm_lock: + self._closing_conn = True + self._cleanup_shm_slots() + with self._shm_lock: + super().close() diff --git a/src/tests/test_buffer.py b/src/tests/test_buffer.py new file mode 100644 index 00000000..39eeaa67 --- /dev/null +++ b/src/tests/test_buffer.py @@ -0,0 +1,221 @@ +"""This is part of the MSS Python's module. +Source: https://github.com/BoboTiG/python-mss. +""" + +import gc + +import pytest + +from mss.buffer import FAST_PATH_AVAILABLE, _FinalizingBufferIntermediate, finalizing_buffer + + +def test_finalizer_runs_once() -> None: + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(bytearray(b"abcd"), finalizer) + assert finalizer_calls == (0 if FAST_PATH_AVAILABLE else 1) + + del wrapped + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.parametrize( + ("buffer_class", "readonly"), + [ + (bytearray, False), + (bytes, True), # type: ignore[list-item] + ], +) +def test_finalizing_buffer_preserves_readonly(buffer_class: type, readonly: bool) -> None: + base_buffer = buffer_class(b"abcd") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + view = finalizing_buffer(base_buffer, finalizer) + assert finalizer_calls == (0 if FAST_PATH_AVAILABLE else 1) + assert isinstance(view, memoryview) + assert view.readonly == readonly + + view.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(FAST_PATH_AVAILABLE, reason="Covers behavior only present prior to Python 3.12") +def test_finalizing_buffer_slow_path() -> None: + data = bytearray(b"abcd") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(data, finalizer) + assert finalizer_calls == 1 + + # Ensure that it made a copy + data[0] = ord("Z") + assert wrapped.tobytes() == b"abcd" + wrapped[1] = ord("Y") + assert data == bytearray(b"Zbcd") + + wrapped.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_finalizing_buffer_fast_path_is_zero_copy() -> None: + data = bytearray(b"abcd") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + wrapped = finalizing_buffer(data, finalizer) + assert finalizer_calls == 0 + + data[0] = ord("Z") + assert wrapped[0] == ord("Z") + wrapped[1] = ord("Y") + assert data[1] == ord("Y") + + wrapped.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_memoryview_release() -> None: + """Releasing a memoryview releases the buffer immediately + + CPython special-cases a memoryview of a memoryview (and + finalizing_buffer returns a memoryview), so we test it specially. + """ + data = bytearray(b"abcdefgh") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + base = finalizing_buffer(data, finalizer) + child = memoryview(base) + + del base + gc.collect() + assert finalizer_calls == 0 + + child.release() + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_memoryview_del() -> None: + """Garbage-collecting a memoryview releases the buffer immediately + + CPython special-cases a memoryview of a memoryview (and + finalizing_buffer returns a memoryview), so we test it specially. + """ + data = bytearray(b"abcdefgh") + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + base = finalizing_buffer(data, finalizer) + child = memoryview(base) + + del base + gc.collect() + assert finalizer_calls == 0 + + del child + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_tree() -> None: + """A complex tree retains a single buffer until it's completely gone""" + # These imports are here instead of at the top, since we only install Pillow and NumPy on Python 3.12 and later. + import numpy as np # noqa: PLC0415 + from PIL import Image # noqa: PLC0415 + + # Since we're using Pillow as one stage, we need something image-like: here, a rectangle of a pleasing green color. + data = bytearray(b"\x76\xb9\x00\xff" * (320 * 200)) + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + # Set up a tree of derived buffers of different types: + # base + # \- array + # \- mv + # \- array_shaped + # \- img + base = finalizing_buffer(data, finalizer) + array = np.frombuffer(base, dtype=np.uint8) + array_shaped = array.reshape((320, 200, 4)) + mv = memoryview(array) + img = Image.frombuffer("RGBA", (320, 200), array_shaped, "raw", "RGBA", 0, 1) + + # Ensure that the tree is zero-copy. + data[0] = 42 + assert img.getpixel((0, 0)) == (42, 0xB9, 0, 0xFF) + + # Ensure that if we delete much of the tree, the buffer still is retained. + del base + del array + del img + mv.release() # We explicitly call release, to test its path too, but just del would suffice. + del mv + gc.collect() + assert finalizer_calls == 0 + + # Now, it all gets released when we delete the last reference to the buffer. + del array_shaped + gc.collect() + assert finalizer_calls == 1 + + +@pytest.mark.skipif(not FAST_PATH_AVAILABLE, reason="Covers behavior only present in Python 3.12+") +def test_intermediate_enforces_single_use() -> None: + """Trying to reuse a _FinalizingBufferIntermediate asserts out.""" + finalizer_calls = 0 + + def finalizer() -> None: + nonlocal finalizer_calls + finalizer_calls += 1 + + intermediate = _FinalizingBufferIntermediate(bytearray(b"abcd"), finalizer) + + view = intermediate.__buffer__(0) # 0: PyBUF_SIMPLE + assert view.tobytes() == b"abcd" + + with pytest.raises(AssertionError, match="Buffer can only be requested once"): + intermediate.__buffer__(0) + + intermediate.__release_buffer__(view) + assert finalizer_calls == 1 + + with pytest.raises(AssertionError, match="Buffer can only be released once"): + intermediate.__release_buffer__(view) + + with pytest.raises(AssertionError, match="Buffer can only be requested once"): + intermediate.__buffer__(0) + + assert finalizer_calls == 1 diff --git a/src/tests/test_gnu_linux.py b/src/tests/test_gnu_linux.py index 26861aae..5ccb5408 100644 --- a/src/tests/test_gnu_linux.py +++ b/src/tests/test_gnu_linux.py @@ -4,9 +4,9 @@ from __future__ import annotations -import builtins import ctypes.util import platform +import threading from ctypes import CFUNCTYPE, POINTER, _Pointer, c_int from typing import TYPE_CHECKING, Any from unittest.mock import Mock, NonCallableMock, patch @@ -14,8 +14,8 @@ import pytest import mss +import mss.buffer import mss.linux -import mss.linux.xcb import mss.linux.xlib from mss import MSS from mss.exception import ScreenShotError @@ -323,30 +323,176 @@ def test_shm_fallback() -> None: assert sct._impl.shm_status == mss.linux.xshmgetimage.ShmStatus.UNAVAILABLE -def test_exception_while_holding_memoryview(monkeypatch: pytest.MonkeyPatch) -> None: - """Verify that an exception at a particular point doesn't prevent cleanup. +def test_finalizing_buffer_releases_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that the returned buffer releases its SHM slot when finalized.""" - The particular point is the window when the XShmGetImage's mmapped - buffer has a memoryview still outstanding, and the pixel data is - being copied into a bytearray. This can take a few milliseconds. - """ - # Force an exception during bytearray(img_mv) - real_bytearray = builtins.bytearray - - def boom(*args: list, **kwargs: dict[str, Any]) -> bytearray: - # Only explode when called with the memoryview (the code path we care about). - if len(args) > 0 and isinstance(args[0], memoryview): - # We still need to eliminate args from the stack frame, just like the fix. - del args, kwargs + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + if mss.buffer.FAST_PATH_AVAILABLE: + assert release_spy.call_count == 0 + + screenshot._raw.release() + + release_spy.assert_called_once() + + +def test_exception_while_wrapping_finalizing_buffer_releases_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify wrapping failures still release the in-use SHM slot.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + + def boom(_data: memoryview, _finalizer: Any) -> memoryview: msg = "Boom!" raise RuntimeError(msg) - return real_bytearray(*args, **kwargs) - # We have to be careful about the order in which we catch things. If we were to catch and discard the exception - # before the MSS object closes, it won't trigger the bug. That's why we have the pytest.raises outside the - # mss.MSS block. In addition, we do as much as we can before patching bytearray, to limit its scope. - with pytest.raises(RuntimeError, match="Boom!"), mss.MSS(backend="xshmgetimage") as sct: # noqa: PT012 + with monkeypatch.context() as m: + m.setattr(mss.linux.xshmgetimage, "finalizing_buffer", boom) + with pytest.raises(RuntimeError, match="Boom!"): + sct.grab(sct.monitors[0]) + + release_spy.assert_called_once() + + +@pytest.mark.skipif( + not mss.buffer.FAST_PATH_AVAILABLE, + reason="Tests post-3.12 behavior: finalization after close", +) +def test_finalizer_after_close_destroys_shm_slot(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that a live buffer finalized after close destroys its SHM slot.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + destroy_spy = spy_and_patch(monkeypatch, sct._impl, "_destroy_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + destroyed_before_release = destroy_spy.call_count + + screenshot._raw.release() + + assert destroy_spy.call_count == destroyed_before_release + 1 + + +@pytest.mark.skipif( + not mss.buffer.FAST_PATH_AVAILABLE, + reason="Tests post-3.12 behavior: threaded release during close", +) +def test_release_thread_during_close_does_not_detach(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify release from another thread during close does not call shm_detach.""" + + disconnect_started = threading.Event() + allow_disconnect = threading.Event() + release_started = threading.Event() + + real_disconnect = mss.linux.xcb.disconnect + detach_spy = Mock(wraps=mss.linux.xcb.shm_detach) + + def blocking_disconnect(conn: Any) -> None: + disconnect_started.set() + assert allow_disconnect.wait(timeout=5), "Timed out waiting to unblock disconnect" + real_disconnect(conn) + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + screenshot = sct.grab(sct.monitors[0]) + + monkeypatch.setattr(mss.linux.xcb, "disconnect", blocking_disconnect) + monkeypatch.setattr(mss.linux.xcb, "shm_detach", detach_spy) + + close_error: list[BaseException] = [] + release_error: list[BaseException] = [] + + def close_target() -> None: + try: + sct.close() + except BaseException as exc: # noqa: BLE001 + close_error.append(exc) + + def release_target() -> None: + try: + release_started.set() + screenshot._raw.release() + except BaseException as exc: # noqa: BLE001 + release_error.append(exc) + + close_thread = threading.Thread(target=close_target) + release_thread = threading.Thread(target=release_target) + + close_thread.start() + assert disconnect_started.wait(timeout=5), "Timed out waiting for close to reach disconnect" + + release_thread.start() + assert release_started.wait(timeout=5), "Timed out waiting for release thread to start" + + allow_disconnect.set() + + close_thread.join(timeout=5) + release_thread.join(timeout=5) + + assert not close_thread.is_alive(), "close thread did not finish" + assert not release_thread.is_alive(), "release thread did not finish" + assert not close_error + assert not release_error + detach_spy.assert_not_called() + + +@pytest.mark.skipif( + mss.buffer.FAST_PATH_AVAILABLE, + reason="Covers behavior only present prior to Python 3.12", +) +def test_finalizer_before_close_releases_shm_slot_immediately(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify that the slow path finalizes the SHM slot before close.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy + release_spy = spy_and_patch(monkeypatch, sct._impl, "_release_shm_slot") + destroy_spy = spy_and_patch(monkeypatch, sct._impl, "_destroy_shm_slot") + + screenshot = sct.grab(sct.monitors[0]) + + # In slow-path environments, the buffer is finalized and released (returned to the pool) immediately. + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 0 + + # At this point, the buffer should have been destroyed at close, since the slow path made a copy. + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 1 + + screenshot._raw.release() + + assert release_spy.call_count == 1 + assert destroy_spy.call_count == 1 + + +@pytest.mark.skipif(not mss.buffer.FAST_PATH_AVAILABLE, reason="Tests post-3.12 behavior: dynamic pool growth") +def test_dynamic_shm_growth_allocation_failure_raises(monkeypatch: pytest.MonkeyPatch) -> None: + """Verify dynamic pool growth failure raises instead of switching backends.""" + + with mss.MSS(backend="xshmgetimage") as sct: + assert isinstance(sct._impl, mss.linux.xshmgetimage.MSSImplXShmGetImage) # For Mypy monitor = sct.monitors[0] + + first = sct.grab(monitor) + second = sct.grab(monitor) + + # Ensure we are in normal SHM operation before inducing a growth failure. + assert sct._impl.shm_status == mss.linux.xshmgetimage.ShmStatus.AVAILABLE + + def fail_growth(_size: int) -> Any: + msg = "Cannot allocate MIT-SHM buffer" + raise ScreenShotError(msg) + with monkeypatch.context() as m: - m.setattr(builtins, "bytearray", boom) - sct.grab(monitor) + m.setattr(sct._impl, "_create_shm_slot", fail_growth) + with pytest.raises(ScreenShotError, match="Cannot allocate MIT-SHM buffer"): + sct.grab(monitor) + + # Keep references alive until after the third grab attempt. + assert first.width > 0 + assert second.width > 0 diff --git a/src/tests/test_setup.py b/src/tests/test_setup.py index fde4dcea..e5c0c472 100644 --- a/src/tests/test_setup.py +++ b/src/tests/test_setup.py @@ -70,6 +70,7 @@ def test_sdist() -> None: f"mss-{__version__}/src/mss/__init__.py", f"mss-{__version__}/src/mss/__main__.py", f"mss-{__version__}/src/mss/base.py", + f"mss-{__version__}/src/mss/buffer.py", f"mss-{__version__}/src/mss/darwin.py", f"mss-{__version__}/src/mss/exception.py", f"mss-{__version__}/src/mss/factory.py", @@ -94,6 +95,7 @@ def test_sdist() -> None: f"mss-{__version__}/src/tests/conftest.py", f"mss-{__version__}/src/tests/res/monitor-1024x768.raw.zip", f"mss-{__version__}/src/tests/test_bgra_to_rgb.py", + f"mss-{__version__}/src/tests/test_buffer.py", f"mss-{__version__}/src/tests/test_cls_image.py", f"mss-{__version__}/src/tests/test_compat_10_1.py", f"mss-{__version__}/src/tests/test_compat_exports.py", @@ -145,6 +147,7 @@ def test_wheel() -> None: "mss/__init__.py", "mss/__main__.py", "mss/base.py", + "mss/buffer.py", "mss/darwin.py", "mss/exception.py", "mss/factory.py",