From 9ec8a847eb27e7b9a1acee11a9e8d7f1d6cc6e71 Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 15 Dec 2020 11:57:03 -0500 Subject: [PATCH 01/20] Windows pywin32 locking Locking ranges is not currently supported on Windows. pywintypes.OVERLAPPED in the Windows lock implementation may not be a class member (#22244) Windows: Lock entire file even if lock-by-range is requested (#24183) Windows lock timeout (#25189) Fixup improper cross-platform lock catching Add locking test to Windows CI (#25233) Co-authored-by: lou.lawrence@kitware.com --- lib/spack/spack/llnl/util/lock.py | 133 ++++++++++++++++++++++++++++-- 1 file changed, 124 insertions(+), 9 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index dfecfbbf5f6613..28b2ac5d5db69e 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -9,15 +9,22 @@ import time from datetime import datetime from types import TracebackType -from typing import IO, Callable, Dict, Generator, Optional, Tuple, Type +from typing import IO, Callable, ContextManager, Dict, Generator, Optional, Tuple, Type, Union +from sys import platform as _platform +from typing import Dict, Tuple # novm from spack.llnl.util import lang, tty from ..string import plural -if sys.platform != "win32": +is_windows = _platform == 'win32' +if not is_windows: import fcntl +else: + import win32con + import win32file + import pywintypes # isort:skip __all__ = [ "Lock", @@ -151,8 +158,17 @@ def _attempts_str(wait_time, nattempts): class LockType: - READ = 0 - WRITE = 1 + if is_windows: + self.LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK # exclusive lock + self.LOCK_SH = 0 # shared lock, default + self.LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY # non-blocking + self.LOCK_CATCH = pywintypes.error + else: + self.LOCK_EX = fcntl.LOCK_EX + self.LOCK_SH = fcntl.LOCK_SH + self.LOCK_NB = fcntl.LOCK_NB + self.LOCK_UN = fcntl.LOCK_UN + self.LOCK_CATCH = IOError @staticmethod def to_str(tid): @@ -173,6 +189,26 @@ def is_valid(op: int) -> bool: return op == LockType.READ or op == LockType.WRITE +def lock_checking(func): + from functools import wraps + + @wraps(func) + def win_lock(self, *args, **kwargs): + if is_windows and self._reads > 0: + self._partial_unlock() + try: + suc = func(self, *args, **kwargs) + except Exception as e: + if self._current_lock: + timeout = kwargs.get('timeout', None) + self._lock(self._current_lock, timeout=timeout) + raise e + else: + suc = func(self, *args, **kwargs) + return suc + return win_lock + + class Lock: """This is an implementation of a filesystem lock using Python's lockf. @@ -243,6 +279,10 @@ def __init__( self.host: Optional[str] = None self.old_host: Optional[str] = None + # Mapping of supported locks to description + self.lock_type = {self.LOCK_SH: 'read', self.LOCK_EX: 'write'} + self._current_lock = None + def _ensure_valid_handle(self) -> IO[bytes]: """Return a valid file handle for the lock file, opening or re-opening as needed. @@ -292,6 +332,16 @@ def _ensure_valid_handle(self) -> IO[bytes]: return self._file_ref.fh + def __lock_fail_condition(self, e): + if is_windows: + # 33 "The process cannot access the file because another + # process has locked a portion of the file." + # 32 "The process cannot access the file because it is being + # used by another process" + return e.args[0] not in (32, 33) + else: + return e.errno not in (errno.EAGAIN, errno.EACCES) + @staticmethod def _poll_interval_generator( _wait_times: Optional[Tuple[float, float, float]] = None, @@ -345,6 +395,7 @@ def __setstate__(self, state): self._reads = 0 self._writes = 0 + @lock_checking def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: """This takes a lock using POSIX locks (``fcntl.lockf``). @@ -394,9 +445,25 @@ def _poll_lock(self, op: int) -> bool: assert self._file_ref is not None, "cannot poll a lock without the file being set" fh = self._file_ref.fh.fileno() module_op = LockType.to_module(op) + try: # Try to get the lock (will raise if not available.) - fcntl.lockf(fh, module_op | fcntl.LOCK_NB, self._length, self._start, os.SEEK_SET) + if is_windows: + hfile = win32file._get_osfhandle(self._file.fileno()) + win32file.LockFileEx(hfile, + module_op | self.LOCK_NB, # flags + 0, + 0xffff0000, + pywintypes.OVERLAPPED()) + else: + # Try to get the lock (will raise if not available.) + fcntl.lockf( + self._fh, + module_op | fcntl.LOCK_NB, + self._length, + self._start, + os.SEEK_SET, + ) # help for debugging distributed locking if self.debug: @@ -409,7 +476,7 @@ def _poll_lock(self, op: int) -> bool: ) # Exclusive locks write their PID/host - if module_op == fcntl.LOCK_EX: + if op == self.LOCK_EX: self._write_log_debug_data() return True @@ -418,12 +485,29 @@ def _poll_lock(self, op: int) -> bool: # EAGAIN and EACCES == locked by another process (so try again) if e.errno not in (errno.EAGAIN, errno.EACCES): raise + except self.LOCK_CATCH as e: + # check if lock failure or lock is already held + if self.__lock_fail_condition(e): + raise return False + + def _ensure_parent_directory(self) -> str: + parent = os.path.dirname(self.path) + # relative paths to lockfiles in the current directory have no parent + if not parent: + return "." + os.makedirs(parent, exist_ok=True) + return parent + def _read_log_debug_data(self) -> None: """Read PID and host data out of the file if it is there.""" assert self._file_ref is not None, "cannot read debug log without the file being set" + if is_windows: + # Not implemented for windows + return + self.old_pid = self.pid self.old_host = self.host @@ -438,6 +522,10 @@ def _read_log_debug_data(self) -> None: def _write_log_debug_data(self) -> None: """Write PID and host data to the file, recording old values.""" assert self._file_ref is not None, "cannot write debug log without the file being set" + if is_windows: + # Not implemented for windows + return + self.old_pid = self.pid self.old_host = self.host @@ -451,7 +539,7 @@ def _write_log_debug_data(self) -> None: self._file_ref.fh.flush() os.fsync(self._file_ref.fh.fileno()) - def _unlock(self) -> None: + def _partial_unlock(self) -> None: """Releases a lock using POSIX locks (``fcntl.lockf``) Releases the lock regardless of mode. Note that read locks may be masquerading as write @@ -461,6 +549,31 @@ def _unlock(self) -> None: fcntl.lockf( self._file_ref.fh.fileno(), fcntl.LOCK_UN, self._length, self._start, os.SEEK_SET ) + FILE_TRACKER.release_by_fh(self._file) + + if is_windows: + hfile = win32file._get_osfhandle(self._file.fileno()) + win32file.UnlockFileEx(hfile, + 0, + 0xffff0000, + pywintypes.OVERLAPPED()) + else: + fcntl.lockf(self._file, self.LOCK_UN, + self._length, self._start, os.SEEK_SET) + + def _unlock(self): + """Releases a lock using POSIX locks (``fcntl.lockf``) + + Releases the lock regardless of mode. Note that read locks may + be masquerading as write locks, but this removes either. + + Reset all lock attributes to initial states + """ + self._partial_unlock() + file_tracker.release_fh(self.path) + + self._file = None + self._file_mode = "" self._reads = 0 self._writes = 0 @@ -479,6 +592,7 @@ def acquire_read(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) self._reads += 1 + self._current_lock = self.LOCK_SH # Log if acquired, which includes counts when verbose self._log_acquired("READ LOCK", wait_time, nattempts) return True @@ -503,6 +617,7 @@ def acquire_write(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._writes += 1 + self._current_lock = self.LOCK_EX # Log if acquired, which includes counts when verbose self._log_acquired("WRITE LOCK", wait_time, nattempts) @@ -589,7 +704,7 @@ def downgrade_write_to_read(self, timeout: Optional[float] = None) -> None: """ timeout = timeout or self.default_timeout - if self._writes == 1 and self._reads == 0: + if self._writes == 1: self._log_downgrading() # can raise LockError. wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) @@ -607,7 +722,7 @@ def upgrade_read_to_write(self, timeout: Optional[float] = None) -> None: """ timeout = timeout or self.default_timeout - if self._reads == 1 and self._writes == 0: + if self._reads >= 1 and self._writes == 0: self._log_upgrading() # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) From 60ff2890f78b06406bd0232c7db33331e6de7e7a Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 25 Jan 2022 17:30:20 -0500 Subject: [PATCH 02/20] Add locking Windows actions --- share/spack/qa/configuration/windows_locking_config.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 share/spack/qa/configuration/windows_locking_config.yaml diff --git a/share/spack/qa/configuration/windows_locking_config.yaml b/share/spack/qa/configuration/windows_locking_config.yaml new file mode 100644 index 00000000000000..266c7c42df93b0 --- /dev/null +++ b/share/spack/qa/configuration/windows_locking_config.yaml @@ -0,0 +1,8 @@ +config: + locks: true + install_tree: + root: $spack\opt\spack + projections: + all: '${ARCHITECTURE}\${COMPILERNAME}-${COMPILERVER}\${PACKAGE}-${VERSION}-${HASH}' + build_stage: + - ~/.spack/stage From ad9e4c213bda6774fcb5f0f95744f7165e1a1af0 Mon Sep 17 00:00:00 2001 From: John Parent Date: Thu, 7 Apr 2022 14:16:21 -0400 Subject: [PATCH 03/20] Update locks for win --- etc/spack/defaults/windows/config.yaml | 2 +- lib/spack/spack/llnl/util/lock.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/etc/spack/defaults/windows/config.yaml b/etc/spack/defaults/windows/config.yaml index af50575a3c1a60..b901de7024484a 100644 --- a/etc/spack/defaults/windows/config.yaml +++ b/etc/spack/defaults/windows/config.yaml @@ -1,5 +1,5 @@ config: - locks: false + locks: true build_stage:: - '$user_cache_path/stage' stage_name: '{name}-{version}-{hash:7}' diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 28b2ac5d5db69e..b12d45ef4e51b3 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -158,17 +158,19 @@ def _attempts_str(wait_time, nattempts): class LockType: + READ = 0 + WRITE = 1 if is_windows: - self.LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK # exclusive lock - self.LOCK_SH = 0 # shared lock, default - self.LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY # non-blocking - self.LOCK_CATCH = pywintypes.error + LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK # exclusive lock + LOCK_SH = 0 # shared lock, default + LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY # non-blocking + LOCK_CATCH = pywintypes.error else: - self.LOCK_EX = fcntl.LOCK_EX - self.LOCK_SH = fcntl.LOCK_SH - self.LOCK_NB = fcntl.LOCK_NB - self.LOCK_UN = fcntl.LOCK_UN - self.LOCK_CATCH = IOError + LOCK_EX = fcntl.LOCK_EX + LOCK_SH = fcntl.LOCK_SH + LOCK_NB = fcntl.LOCK_NB + LOCK_UN = fcntl.LOCK_UN + LOCK_CATCH = IOError @staticmethod def to_str(tid): @@ -179,9 +181,9 @@ def to_str(tid): @staticmethod def to_module(tid): - lock = fcntl.LOCK_SH + lock = LockType.LOCK_SH if tid == LockType.WRITE: - lock = fcntl.LOCK_EX + lock = LockType.LOCK_EX return lock @staticmethod @@ -412,7 +414,7 @@ def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: fh = self._ensure_valid_handle() - if LockType.to_module(op) == fcntl.LOCK_EX and fh.mode == "rb": + if LockType.to_module(op) == LockType.LOCK_EX and self._file.mode == "rb": # Attempt to upgrade to write lock w/a read-only file. # If the file were writable, we'd have opened it rb+ raise LockROFileError(self.path) From 3ee0cea688ed06c01ede818780a5191c8b162cf0 Mon Sep 17 00:00:00 2001 From: John Parent Date: Wed, 9 Jul 2025 19:09:42 -0400 Subject: [PATCH 04/20] Remove locking guard on Windows --- lib/spack/spack/util/lock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spack/spack/util/lock.py b/lib/spack/spack/util/lock.py index 39a6bdaeef7351..eef4e246bba18e 100644 --- a/lib/spack/spack/util/lock.py +++ b/lib/spack/spack/util/lock.py @@ -39,7 +39,7 @@ def __init__( desc: str = "", enable: bool = True, ) -> None: - self._enable = sys.platform != "win32" and enable + self._enable = enable super().__init__( path, start=start, From b9fc90bb3abc3ff0848fda248bf8d764d33339f4 Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 11 Jul 2025 10:25:35 -0400 Subject: [PATCH 05/20] Style --- lib/spack/spack/llnl/util/lock.py | 64 ++++++++++++++++++------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index b12d45ef4e51b3..7b3e547a72cb9d 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -11,14 +11,26 @@ from types import TracebackType from typing import IO, Callable, ContextManager, Dict, Generator, Optional, Tuple, Type, Union from sys import platform as _platform -from typing import Dict, Tuple # novm +from types import TracebackType +from typing import ( # novm + IO, + Any, + Callable, + ContextManager, + Dict, + Generator, + Optional, + Tuple, + Type, + Union, +) from spack.llnl.util import lang, tty from ..string import plural -is_windows = _platform == 'win32' -if not is_windows: +IS_WINDOWS = _platform == "win32" +if not IS_WINDOWS: import fcntl else: import win32con @@ -160,7 +172,7 @@ def _attempts_str(wait_time, nattempts): class LockType: READ = 0 WRITE = 1 - if is_windows: + if IS_WINDOWS: LOCK_EX = win32con.LOCKFILE_EXCLUSIVE_LOCK # exclusive lock LOCK_SH = 0 # shared lock, default LOCK_NB = win32con.LOCKFILE_FAIL_IMMEDIATELY # non-blocking @@ -196,18 +208,19 @@ def lock_checking(func): @wraps(func) def win_lock(self, *args, **kwargs): - if is_windows and self._reads > 0: + if IS_WINDOWS and self._reads > 0: self._partial_unlock() try: suc = func(self, *args, **kwargs) except Exception as e: if self._current_lock: - timeout = kwargs.get('timeout', None) + timeout = kwargs.get("timeout", None) self._lock(self._current_lock, timeout=timeout) raise e else: suc = func(self, *args, **kwargs) return suc + return win_lock @@ -282,7 +295,7 @@ def __init__( self.old_host: Optional[str] = None # Mapping of supported locks to description - self.lock_type = {self.LOCK_SH: 'read', self.LOCK_EX: 'write'} + self.lock_type = {self.LOCK_SH: "read", self.LOCK_EX: "write"} self._current_lock = None def _ensure_valid_handle(self) -> IO[bytes]: @@ -335,7 +348,7 @@ def _ensure_valid_handle(self) -> IO[bytes]: return self._file_ref.fh def __lock_fail_condition(self, e): - if is_windows: + if IS_WINDOWS: # 33 "The process cannot access the file because another # process has locked a portion of the file." # 32 "The process cannot access the file because it is being @@ -373,8 +386,10 @@ def __repr__(self) -> str: """Formal representation of the lock.""" rep = f"{self.__class__.__name__}(" for attr, value in self.__dict__.items(): - rep += f"{attr}={value.__repr__()}, " - return f"{rep.strip(', ')})" + rep += "{0}={1}, ".format(attr, value.__repr__()) + if attr != "LOCK_CATCH": + rep += "{0}={1}, ".format(attr, value.__repr__()) + return "{0})".format(rep.strip(", ")) def __str__(self) -> str: """Readable string (with key fields) of the lock.""" @@ -449,14 +464,15 @@ def _poll_lock(self, op: int) -> bool: module_op = LockType.to_module(op) try: - # Try to get the lock (will raise if not available.) - if is_windows: + if IS_WINDOWS: hfile = win32file._get_osfhandle(self._file.fileno()) - win32file.LockFileEx(hfile, - module_op | self.LOCK_NB, # flags - 0, - 0xffff0000, - pywintypes.OVERLAPPED()) + win32file.LockFileEx( + hfile, + module_op | self.LOCK_NB, # flags + 0, + 0xFFFF0000, + pywintypes.OVERLAPPED(), + ) else: # Try to get the lock (will raise if not available.) fcntl.lockf( @@ -506,7 +522,7 @@ def _ensure_parent_directory(self) -> str: def _read_log_debug_data(self) -> None: """Read PID and host data out of the file if it is there.""" assert self._file_ref is not None, "cannot read debug log without the file being set" - if is_windows: + if IS_WINDOWS: # Not implemented for windows return @@ -524,7 +540,7 @@ def _read_log_debug_data(self) -> None: def _write_log_debug_data(self) -> None: """Write PID and host data to the file, recording old values.""" assert self._file_ref is not None, "cannot write debug log without the file being set" - if is_windows: + if IS_WINDOWS: # Not implemented for windows return @@ -553,15 +569,11 @@ def _partial_unlock(self) -> None: ) FILE_TRACKER.release_by_fh(self._file) - if is_windows: + if IS_WINDOWS: hfile = win32file._get_osfhandle(self._file.fileno()) - win32file.UnlockFileEx(hfile, - 0, - 0xffff0000, - pywintypes.OVERLAPPED()) + win32file.UnlockFileEx(hfile, 0, 0xFFFF0000, pywintypes.OVERLAPPED()) else: - fcntl.lockf(self._file, self.LOCK_UN, - self._length, self._start, os.SEEK_SET) + fcntl.lockf(self._file, self.LOCK_UN, self._length, self._start, os.SEEK_SET) def _unlock(self): """Releases a lock using POSIX locks (``fcntl.lockf``) From af6fc5b7bea86c140e556d0b143e089dc200cba8 Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 11 Jul 2025 11:33:14 -0400 Subject: [PATCH 06/20] Fixup import --- lib/spack/spack/llnl/util/lock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 7b3e547a72cb9d..4138dce1f27a02 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -33,10 +33,10 @@ if not IS_WINDOWS: import fcntl else: + import pywintypes import win32con import win32file - import pywintypes # isort:skip __all__ = [ "Lock", From 85b8654fec2387a092bdf6580cc0bc042271070b Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 11 Jul 2025 14:43:27 -0400 Subject: [PATCH 07/20] wip --- lib/spack/spack/llnl/util/lock.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 4138dce1f27a02..3d4285e90b580b 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -573,7 +573,8 @@ def _partial_unlock(self) -> None: hfile = win32file._get_osfhandle(self._file.fileno()) win32file.UnlockFileEx(hfile, 0, 0xFFFF0000, pywintypes.OVERLAPPED()) else: - fcntl.lockf(self._file, self.LOCK_UN, self._length, self._start, os.SEEK_SET) + fcntl.lockf(self._file.fileno(), fcntl.LOCK_UN, self._length, self._start, os.SEEK_SET) + def _unlock(self): """Releases a lock using POSIX locks (``fcntl.lockf``) @@ -584,7 +585,7 @@ def _unlock(self): Reset all lock attributes to initial states """ self._partial_unlock() - file_tracker.release_fh(self.path) + FILE_TRACKER.release_by_fh(self._file) self._file = None self._file_mode = "" From 981f656515687d049b29bb7fff411fde18d32d2c Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 2 Dec 2025 18:55:00 -0500 Subject: [PATCH 08/20] implement partial locking Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 44 +++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 3d4285e90b580b..caec21ad3756fa 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: (Apache-2.0 OR MIT) +import contextlib import errno import os import socket @@ -169,6 +170,20 @@ def _attempts_str(wait_time, nattempts): return " after {} and {}".format(lang.pretty_seconds(wait_time), attempts) +@contextlib.contextmanager +def _partial_upgrade(path:str, start:int = 0, length:int = 0, timeout:Optional[int] = None): + lock_path = path + ".lock" + l = Lock(lock_path, start=start, length=length) + try: + l.acquire_write(timeout=timeout) + yield + except LockError as e: + pass + finally: + if l.is_write_locked(): + l.release_write() + + class LockType: READ = 0 WRITE = 1 @@ -294,8 +309,6 @@ def __init__( self.host: Optional[str] = None self.old_host: Optional[str] = None - # Mapping of supported locks to description - self.lock_type = {self.LOCK_SH: "read", self.LOCK_EX: "write"} self._current_lock = None def _ensure_valid_handle(self) -> IO[bytes]: @@ -412,6 +425,8 @@ def __setstate__(self, state): self._reads = 0 self._writes = 0 + + @lock_checking def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: """This takes a lock using POSIX locks (``fcntl.lockf``). @@ -468,7 +483,7 @@ def _poll_lock(self, op: int) -> bool: hfile = win32file._get_osfhandle(self._file.fileno()) win32file.LockFileEx( hfile, - module_op | self.LOCK_NB, # flags + module_op | LockType.LOCK_NB, # flags 0, 0xFFFF0000, pywintypes.OVERLAPPED(), @@ -477,7 +492,7 @@ def _poll_lock(self, op: int) -> bool: # Try to get the lock (will raise if not available.) fcntl.lockf( self._fh, - module_op | fcntl.LOCK_NB, + module_op | LockType.LOCK_NB, self._length, self._start, os.SEEK_SET, @@ -494,7 +509,7 @@ def _poll_lock(self, op: int) -> bool: ) # Exclusive locks write their PID/host - if op == self.LOCK_EX: + if op == LockType.LOCK_EX: self._write_log_debug_data() return True @@ -503,7 +518,7 @@ def _poll_lock(self, op: int) -> bool: # EAGAIN and EACCES == locked by another process (so try again) if e.errno not in (errno.EAGAIN, errno.EACCES): raise - except self.LOCK_CATCH as e: + except LockType.LOCK_CATCH as e: # check if lock failure or lock is already held if self.__lock_fail_condition(e): raise @@ -573,7 +588,7 @@ def _partial_unlock(self) -> None: hfile = win32file._get_osfhandle(self._file.fileno()) win32file.UnlockFileEx(hfile, 0, 0xFFFF0000, pywintypes.OVERLAPPED()) else: - fcntl.lockf(self._file.fileno(), fcntl.LOCK_UN, self._length, self._start, os.SEEK_SET) + fcntl.lockf(self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET) def _unlock(self): @@ -607,7 +622,7 @@ def acquire_read(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) self._reads += 1 - self._current_lock = self.LOCK_SH + self._current_lock = LockType.LOCK_SH # Log if acquired, which includes counts when verbose self._log_acquired("READ LOCK", wait_time, nattempts) return True @@ -632,7 +647,7 @@ def acquire_write(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._writes += 1 - self._current_lock = self.LOCK_EX + self._current_lock = LockType.LOCK_EX # Log if acquired, which includes counts when verbose self._log_acquired("WRITE LOCK", wait_time, nattempts) @@ -739,11 +754,12 @@ def upgrade_read_to_write(self, timeout: Optional[float] = None) -> None: if self._reads >= 1 and self._writes == 0: self._log_upgrading() - # can raise LockError. - wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) - self._reads = 0 - self._writes = 1 - self._log_upgraded(wait_time, nattempts) + with partial_upgrade(self.path): + # can raise LockError. + wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) + self._reads = 0 + self._writes = 1 + self._log_upgraded(wait_time, nattempts) else: raise LockUpgradeError(self.path) From d715ef37aafc5002096ed77bb2c93a1ebf09ad01 Mon Sep 17 00:00:00 2001 From: John Parent Date: Thu, 4 Dec 2025 18:59:11 -0500 Subject: [PATCH 09/20] Switchover locking Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 4 ++-- lib/spack/spack/test/llnl/util/lock.py | 2 +- lib/spack/spack/util/lock.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index caec21ad3756fa..44c621090ace3b 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -6,7 +6,6 @@ import errno import os import socket -import sys import time from datetime import datetime from types import TracebackType @@ -172,6 +171,7 @@ def _attempts_str(wait_time, nattempts): @contextlib.contextmanager def _partial_upgrade(path:str, start:int = 0, length:int = 0, timeout:Optional[int] = None): + # probably something like foo.lock.lock lock_path = path + ".lock" l = Lock(lock_path, start=start, length=length) try: @@ -754,7 +754,7 @@ def upgrade_read_to_write(self, timeout: Optional[float] = None) -> None: if self._reads >= 1 and self._writes == 0: self._log_upgrading() - with partial_upgrade(self.path): + with _partial_upgrade(self.path): # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._reads = 0 diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index a7b3f9aa6da651..2336183359a43e 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -50,7 +50,7 @@ if sys.platform != "win32": import fcntl -pytestmark = pytest.mark.not_on_windows("does not run on windows") +# pytestmark = pytest.mark.not_on_windows("does not run on windows") # diff --git a/lib/spack/spack/util/lock.py b/lib/spack/spack/util/lock.py index eef4e246bba18e..d058faab2e05bb 100644 --- a/lib/spack/spack/util/lock.py +++ b/lib/spack/spack/util/lock.py @@ -6,7 +6,6 @@ import os import stat -import sys from typing import Optional, Tuple import spack.error From a389677ce56b6649befd5c4e0eb7388337cd5160 Mon Sep 17 00:00:00 2001 From: John Parent Date: Mon, 8 Dec 2025 17:14:41 -0500 Subject: [PATCH 10/20] Remove old lock upgrade approach Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 48 ++++--------------------------- 1 file changed, 6 insertions(+), 42 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 44c621090ace3b..8195585b5beb08 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -218,27 +218,6 @@ def is_valid(op: int) -> bool: return op == LockType.READ or op == LockType.WRITE -def lock_checking(func): - from functools import wraps - - @wraps(func) - def win_lock(self, *args, **kwargs): - if IS_WINDOWS and self._reads > 0: - self._partial_unlock() - try: - suc = func(self, *args, **kwargs) - except Exception as e: - if self._current_lock: - timeout = kwargs.get("timeout", None) - self._lock(self._current_lock, timeout=timeout) - raise e - else: - suc = func(self, *args, **kwargs) - return suc - - return win_lock - - class Lock: """This is an implementation of a filesystem lock using Python's lockf. @@ -426,8 +405,6 @@ def __setstate__(self, state): self._writes = 0 - - @lock_checking def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: """This takes a lock using POSIX locks (``fcntl.lockf``). @@ -572,34 +549,21 @@ def _write_log_debug_data(self) -> None: self._file_ref.fh.flush() os.fsync(self._file_ref.fh.fileno()) - def _partial_unlock(self) -> None: + def _unlock(self): """Releases a lock using POSIX locks (``fcntl.lockf``) - Releases the lock regardless of mode. Note that read locks may be masquerading as write - locks, but this removes either. + Releases the lock regardless of mode. Note that read locks may + be masquerading as write locks, but this removes either. + + Reset all lock attributes to initial states """ - assert self._file_ref is not None, "cannot unlock without the file being set" - fcntl.lockf( - self._file_ref.fh.fileno(), fcntl.LOCK_UN, self._length, self._start, os.SEEK_SET - ) - FILE_TRACKER.release_by_fh(self._file) + assert self._file is not None, "cannot unlock without the file being set" if IS_WINDOWS: hfile = win32file._get_osfhandle(self._file.fileno()) win32file.UnlockFileEx(hfile, 0, 0xFFFF0000, pywintypes.OVERLAPPED()) else: fcntl.lockf(self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET) - - - def _unlock(self): - """Releases a lock using POSIX locks (``fcntl.lockf``) - - Releases the lock regardless of mode. Note that read locks may - be masquerading as write locks, but this removes either. - - Reset all lock attributes to initial states - """ - self._partial_unlock() FILE_TRACKER.release_by_fh(self._file) self._file = None From e2871f5001e893af5c9f73cbd337534f6a72c582 Mon Sep 17 00:00:00 2001 From: John Parent Date: Mon, 8 Dec 2025 18:43:47 -0500 Subject: [PATCH 11/20] win32file locking test Signed-off-by: John Parent --- lib/spack/spack/test/llnl/util/lock.py | 37 +++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 2336183359a43e..32498138c35d94 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -49,6 +49,8 @@ if sys.platform != "win32": import fcntl +else: + import win32file # pytestmark = pytest.mark.not_on_windows("does not run on windows") @@ -1222,7 +1224,7 @@ def test_downgrade_write_fails(tmp_path: pathlib.Path): lock.downgrade_write_to_read() lock.release_read() - +@pytest.mark.skipif(sys.platform=="win32", reason="fcntl unavailable on Windows") @pytest.mark.parametrize( "err_num,err_msg", [ @@ -1245,14 +1247,43 @@ def _lockf(fd, cmd, len, start, whence): monkeypatch.setattr(fcntl, "lockf", _lockf) if err_num in [errno.EAGAIN, errno.EACCES]: - assert not lock._poll_lock(fcntl.LOCK_EX) + assert not lock._poll_lock(lk.LockType.LOCK_EX) else: with pytest.raises(OSError, match=err_msg): - lock._poll_lock(fcntl.LOCK_EX) + lock._poll_lock(lk.LockType.LOCK_EX) monkeypatch.undo() lock.release_read() +@pytest.mark.skipif(sys.platform != "win32", reason="win32file only available on Windows") +@pytest.mark.parametrize( + "err_num,err_msg", + [ + (32, "Fake EACCES error analog"), + (33, "Fake EAGAIN error analog"), + ], +) +def test_poll_lock_exception_win(tmp_path: pathlib.Path, monkeypatch, err_num, err_msg): + """Test poll lock exception handling.""" + + def LockFileEx(hfile, int_, int1_, int2_, ol): + raise OSError(err_num, err_msg) + + with working_dir(str(tmp_path)): + lockfile = "lockfile" + lock = lk.Lock(lockfile) + lock.acquire_read() + + monkeypatch.setattr(win32file, "LockFileEx", LockFileEx) + + if err_num in [errno.EAGAIN, errno.EACCES]: + assert not lock._poll_lock(lk.LockType.LOCK_EX) + else: + with pytest.raises(OSError, match=err_msg): + lock._poll_lock(lk.LockType.LOCK_EX) + + monkeypatch.undo() + lock.release_read() def test_upgrade_read_okay(tmp_path: pathlib.Path): """Test the lock read-to-write upgrade operation.""" From 0ff7da5cf9e3c6dcb520076a04e98a095f461abb Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 6 Jan 2026 15:31:36 -0500 Subject: [PATCH 12/20] Support ranges properly Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 158 ++++++++++++++++++------- lib/spack/spack/test/llnl/util/lock.py | 15 +-- 2 files changed, 125 insertions(+), 48 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 8195585b5beb08..d309b759fa2d35 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -52,6 +52,8 @@ "CantCreateLockError", ] +WHOLE_FILE_RANGE = 0XFFFFFFFF if IS_WINDOWS else 0 + ExitFnType = Callable[ [Optional[Type[BaseException]], Optional[BaseException], Optional[TracebackType]], @@ -169,21 +171,93 @@ def _attempts_str(wait_time, nattempts): return " after {} and {}".format(lang.pretty_seconds(wait_time), attempts) +def _low_high(value): + low = value & 0XFFFFFFFF + high = (value >> 32) & 0XFFFFFFFF + return low, high + + +def _setup_overlapped(offset): + overlapped = pywintypes.OVERLAPPED() + # hEvent needs to be null per lockfileex docs + overlapped.hEvent = 0 + offset_low, offset_high = _low_high(offset) + overlapped.Offset = offset_low + overlapped.OffsetHigh = offset_high + return overlapped + + @contextlib.contextmanager -def _partial_upgrade(path:str, start:int = 0, length:int = 0, timeout:Optional[int] = None): +def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): + """Lock upgrade guard for Windows, designed to allow for lock upgrading + which Windows file locks do not natively support + Uses one additional lockfile as a "gate" for upgrades. + + If a process is attempting to upgrade from a read to a write + it must first take a write lock on this intermediate file + before releasing existing read lock and retaking a lock on the same + file but exclusively. + This gate file prevents any other process/lock from catching + the lock with a competing write during the release part of the upgrade. + """ # probably something like foo.lock.lock - lock_path = path + ".lock" - l = Lock(lock_path, start=start, length=length) + timeout = timeout or lock.default_timeout + lock_path = lock.path + ".lock" + l = Lock(lock_path, start=lock._start, length=lock._length) + # cache previous value for read locks + _reads = lock._reads try: - l.acquire_write(timeout=timeout) + # don't use the acquire lock method to avoid + # recursion + l._lock(LockType.WRITE, timeout=timeout) + # lock gate acquired, drop read lock if there is one + # cannot use release read as we need to drop + # the read lock if there is one, not just decrement the nested + # lock tracker + if _reads: + lock._release_lock() yield except LockError as e: - pass + # doesn't matter what the error was + # if there was a read lock previously + # restore it + if _reads > 0: + lock._lock(LockType.READ, timeout=timeout) + raise finally: if l.is_write_locked(): l.release_write() +@contextlib.contextmanager +def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): + """Windows can hold shared and exclusive locks on the same time, but only + if the exclusive lock is held first. When the shared lock overlaps the exclusive + range, the exclusive lock must also be unlocked and must be unlocked *first* + From the MSFT docs: + + A shared lock can overlap an exclusive lock if both locks were created using + the same file handle. If the same range is locked with an exclusive and a + shared lock, two unlock operations are necessary to unlock the region; the + first unlock operation unlocks the exclusive lock, the second unlock + operation unlocks the shared lock. + + Since we are "downgrading" we no longer care to have an exclusive lock + so we just give it up + """ + timeout = timeout or lock.default_timeout + # release the read lock first + # so in the event on an error, we still have the exclusive lock + yield + # From the docstring: + # first unlock operation unlocks the exclusive lock, the second unlock + # operation unlocks the shared lock. + # + # This will remove the exclusive lock + lock._release_lock() + + + class LockType: READ = 0 WRITE = 1 @@ -236,7 +310,7 @@ def __init__( path: str, *, start: int = 0, - length: int = 0, + length: int = WHOLE_FILE_RANGE, default_timeout: Optional[float] = None, debug: bool = False, desc: str = "", @@ -457,13 +531,15 @@ def _poll_lock(self, op: int) -> bool: try: if IS_WINDOWS: + overlapped = _setup_overlapped(self._start) + range_low, range_high = _low_high(self._length) hfile = win32file._get_osfhandle(self._file.fileno()) win32file.LockFileEx( hfile, module_op | LockType.LOCK_NB, # flags - 0, - 0xFFFF0000, - pywintypes.OVERLAPPED(), + range_low, + range_high, + overlapped, ) else: # Try to get the lock (will raise if not available.) @@ -486,7 +562,7 @@ def _poll_lock(self, op: int) -> bool: ) # Exclusive locks write their PID/host - if op == LockType.LOCK_EX: + if op == LockType.WRITE: self._write_log_debug_data() return True @@ -514,9 +590,6 @@ def _ensure_parent_directory(self) -> str: def _read_log_debug_data(self) -> None: """Read PID and host data out of the file if it is there.""" assert self._file_ref is not None, "cannot read debug log without the file being set" - if IS_WINDOWS: - # Not implemented for windows - return self.old_pid = self.pid self.old_host = self.host @@ -532,16 +605,12 @@ def _read_log_debug_data(self) -> None: def _write_log_debug_data(self) -> None: """Write PID and host data to the file, recording old values.""" assert self._file_ref is not None, "cannot write debug log without the file being set" - if IS_WINDOWS: - # Not implemented for windows - return self.old_pid = self.pid self.old_host = self.host self.pid = os.getpid() self.host = socket.gethostname() - # write pid, host to disk to sync over FS self._file_ref.fh.seek(0) self._file_ref.fh.write(f"pid={self.pid},host={self.host}".encode("utf-8")) @@ -549,6 +618,15 @@ def _write_log_debug_data(self) -> None: self._file_ref.fh.flush() os.fsync(self._file_ref.fh.fileno()) + def _release_lock(self): + if IS_WINDOWS: + hfile = win32file._get_osfhandle(self._file.fileno()) + overlapped = _setup_overlapped(self._start) + low_range, high_range = _low_high(self._length) + win32file.UnlockFileEx(hfile, low_range, high_range, overlapped) + else: + fcntl.lockf(self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET) + def _unlock(self): """Releases a lock using POSIX locks (``fcntl.lockf``) @@ -559,11 +637,7 @@ def _unlock(self): """ assert self._file is not None, "cannot unlock without the file being set" - if IS_WINDOWS: - hfile = win32file._get_osfhandle(self._file.fileno()) - win32file.UnlockFileEx(hfile, 0, 0xFFFF0000, pywintypes.OVERLAPPED()) - else: - fcntl.lockf(self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET) + self._release_lock() FILE_TRACKER.release_by_fh(self._file) self._file = None @@ -608,18 +682,19 @@ def acquire_write(self, timeout: Optional[float] = None) -> bool: timeout = timeout or self.default_timeout if self._writes == 0: - # can raise LockError. - wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) - self._writes += 1 - self._current_lock = LockType.LOCK_EX - # Log if acquired, which includes counts when verbose - self._log_acquired("WRITE LOCK", wait_time, nattempts) - - # return True only if we weren't nested in a read lock. - # TODO: we may need to return two values: whether we got - # the write lock, and whether this is acquiring a read OR - # write lock for the first time. Now it returns the latter. - return self._reads == 0 + with _safe_exclusion(self): + # can raise LockError. + wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) + self._writes += 1 + self._current_lock = LockType.LOCK_EX + # Log if acquired, which includes counts when verbose + self._log_acquired("WRITE LOCK", wait_time, nattempts) + + # return True only if we weren't nested in a read lock. + # TODO: we may need to return two values: whether we got + # the write lock, and whether this is acquiring a read OR + # write lock for the first time. Now it returns the latter. + return self._reads == 0 else: # Increment the write count for nested lock tracking self._reaffirm_lock() @@ -699,12 +774,13 @@ def downgrade_write_to_read(self, timeout: Optional[float] = None) -> None: timeout = timeout or self.default_timeout if self._writes == 1: - self._log_downgrading() - # can raise LockError. - wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) - self._reads = 1 - self._writes = 0 - self._log_downgraded(wait_time, nattempts) + with _safe_downgrade(self): + self._log_downgrading() + # can raise LockError. + wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) + self._reads = 1 + self._writes = 0 + self._log_downgraded(wait_time, nattempts) else: raise LockDowngradeError(self.path) @@ -718,7 +794,7 @@ def upgrade_read_to_write(self, timeout: Optional[float] = None) -> None: if self._reads >= 1 and self._writes == 0: self._log_upgrading() - with _partial_upgrade(self.path): + with _safe_exclusion(self): # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._reads = 0 diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 32498138c35d94..7abd0bbc6e10b8 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -281,7 +281,7 @@ def wait(self): # Process snippets below can be composed into tests. # class AcquireWrite: - def __init__(self, lock_path, start=0, length=0): + def __init__(self, lock_path, start=0, length=1): self.lock_path = lock_path self.start = start self.length = length @@ -298,7 +298,7 @@ def __call__(self, barrier): class AcquireRead: - def __init__(self, lock_path, start=0, length=0): + def __init__(self, lock_path, start=0, length=1): self.lock_path = lock_path self.start = start self.length = length @@ -315,7 +315,7 @@ def __call__(self, barrier): class TimeoutWrite: - def __init__(self, lock_path, start=0, length=0): + def __init__(self, lock_path, start=0, length=1): self.lock_path = lock_path self.start = start self.length = length @@ -333,7 +333,7 @@ def __call__(self, barrier): class TimeoutRead: - def __init__(self, lock_path, start=0, length=0): + def __init__(self, lock_path, start=0, length=1): self.lock_path = lock_path self.start = start self.length = length @@ -571,7 +571,7 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path): ) -@pytest.mark.skipif(getuid() == 0, reason="user is root") +@pytest.mark.skipif(sys.platform == "win32" or getuid() == 0, reason="user is root") def test_read_lock_on_read_only_lockfile(lock_dir, lock_path): """read-only directory, read-only lockfile.""" touch(lock_path) @@ -599,7 +599,8 @@ def test_read_lock_read_only_dir_writable_lockfile(lock_dir, lock_path): pass -@pytest.mark.skipif(False if sys.platform == "win32" else getuid() == 0, reason="user is root") +# skipping on Windows as spack cannot currently make directories read only +@pytest.mark.skipif(sys.platform == "win32" or getuid() == 0, reason="user is root") def test_read_lock_no_lockfile(lock_dir, lock_path): """read-only directory, no lockfile (so can't create).""" with read_only(lock_dir): @@ -1198,7 +1199,7 @@ def test_attempts_str(): def test_lock_str(): lock = lk.Lock("lockfile") lockstr = str(lock) - assert "lockfile[0:0]" in lockstr + assert f"lockfile[0:{lk.WHOLE_FILE_RANGE}]" in lockstr assert "timeout=None" in lockstr assert "#reads=0, #writes=0" in lockstr From 8e756a7dccd510b096171f97b2cc4d5a1d1f848e Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 6 Jan 2026 15:35:41 -0500 Subject: [PATCH 13/20] style Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 40 ++++++++++++-------------- lib/spack/spack/test/llnl/util/lock.py | 11 ++++--- 2 files changed, 23 insertions(+), 28 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index d309b759fa2d35..87f656045ccc25 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -52,7 +52,7 @@ "CantCreateLockError", ] -WHOLE_FILE_RANGE = 0XFFFFFFFF if IS_WINDOWS else 0 +WHOLE_FILE_RANGE = 0xFFFFFFFF if IS_WINDOWS else 0 ExitFnType = Callable[ @@ -172,8 +172,8 @@ def _attempts_str(wait_time, nattempts): def _low_high(value): - low = value & 0XFFFFFFFF - high = (value >> 32) & 0XFFFFFFFF + low = value & 0xFFFFFFFF + high = (value >> 32) & 0xFFFFFFFF return low, high @@ -192,7 +192,7 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): """Lock upgrade guard for Windows, designed to allow for lock upgrading which Windows file locks do not natively support Uses one additional lockfile as a "gate" for upgrades. - + If a process is attempting to upgrade from a read to a write it must first take a write lock on this intermediate file before releasing existing read lock and retaking a lock on the same @@ -203,13 +203,13 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): # probably something like foo.lock.lock timeout = timeout or lock.default_timeout lock_path = lock.path + ".lock" - l = Lock(lock_path, start=lock._start, length=lock._length) + lk = Lock(lock_path, start=lock._start, length=lock._length) # cache previous value for read locks _reads = lock._reads try: # don't use the acquire lock method to avoid # recursion - l._lock(LockType.WRITE, timeout=timeout) + lk._lock(LockType.WRITE, timeout=timeout) # lock gate acquired, drop read lock if there is one # cannot use release read as we need to drop # the read lock if there is one, not just decrement the nested @@ -217,7 +217,7 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): if _reads: lock._release_lock() yield - except LockError as e: + except LockError: # doesn't matter what the error was # if there was a read lock previously # restore it @@ -225,8 +225,8 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): lock._lock(LockType.READ, timeout=timeout) raise finally: - if l.is_write_locked(): - l.release_write() + if lk.is_write_locked(): + lk.release_write() @contextlib.contextmanager @@ -235,13 +235,13 @@ def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): if the exclusive lock is held first. When the shared lock overlaps the exclusive range, the exclusive lock must also be unlocked and must be unlocked *first* From the MSFT docs: - + A shared lock can overlap an exclusive lock if both locks were created using - the same file handle. If the same range is locked with an exclusive and a + the same file handle. If the same range is locked with an exclusive and a shared lock, two unlock operations are necessary to unlock the region; the first unlock operation unlocks the exclusive lock, the second unlock operation unlocks the shared lock. - + Since we are "downgrading" we no longer care to have an exclusive lock so we just give it up """ @@ -249,14 +249,13 @@ def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): # release the read lock first # so in the event on an error, we still have the exclusive lock yield - # From the docstring: + # From the docstring: # first unlock operation unlocks the exclusive lock, the second unlock # operation unlocks the shared lock. # # This will remove the exclusive lock lock._release_lock() - - + class LockType: READ = 0 @@ -478,7 +477,6 @@ def __setstate__(self, state): self._reads = 0 self._writes = 0 - def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: """This takes a lock using POSIX locks (``fcntl.lockf``). @@ -535,11 +533,7 @@ def _poll_lock(self, op: int) -> bool: range_low, range_high = _low_high(self._length) hfile = win32file._get_osfhandle(self._file.fileno()) win32file.LockFileEx( - hfile, - module_op | LockType.LOCK_NB, # flags - range_low, - range_high, - overlapped, + hfile, module_op | LockType.LOCK_NB, range_low, range_high, overlapped # flags ) else: # Try to get the lock (will raise if not available.) @@ -625,7 +619,9 @@ def _release_lock(self): low_range, high_range = _low_high(self._length) win32file.UnlockFileEx(hfile, low_range, high_range, overlapped) else: - fcntl.lockf(self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET) + fcntl.lockf( + self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET + ) def _unlock(self): """Releases a lock using POSIX locks (``fcntl.lockf``) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 7abd0bbc6e10b8..1d8bd64cf552ad 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -1225,7 +1225,8 @@ def test_downgrade_write_fails(tmp_path: pathlib.Path): lock.downgrade_write_to_read() lock.release_read() -@pytest.mark.skipif(sys.platform=="win32", reason="fcntl unavailable on Windows") + +@pytest.mark.skipif(sys.platform == "win32", reason="fcntl unavailable on Windows") @pytest.mark.parametrize( "err_num,err_msg", [ @@ -1256,13 +1257,10 @@ def _lockf(fd, cmd, len, start, whence): monkeypatch.undo() lock.release_read() + @pytest.mark.skipif(sys.platform != "win32", reason="win32file only available on Windows") @pytest.mark.parametrize( - "err_num,err_msg", - [ - (32, "Fake EACCES error analog"), - (33, "Fake EAGAIN error analog"), - ], + "err_num,err_msg", [(32, "Fake EACCES error analog"), (33, "Fake EAGAIN error analog")] ) def test_poll_lock_exception_win(tmp_path: pathlib.Path, monkeypatch, err_num, err_msg): """Test poll lock exception handling.""" @@ -1286,6 +1284,7 @@ def LockFileEx(hfile, int_, int1_, int2_, ol): monkeypatch.undo() lock.release_read() + def test_upgrade_read_okay(tmp_path: pathlib.Path): """Test the lock read-to-write upgrade operation.""" with working_dir(str(tmp_path)): From 9ce7897bf532c1b994d8ce0c554a5f4e7c7ec9d5 Mon Sep 17 00:00:00 2001 From: John Parent Date: Tue, 14 Apr 2026 17:23:27 -0400 Subject: [PATCH 14/20] cleanup Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 87f656045ccc25..e201c58785b847 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -200,9 +200,11 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): This gate file prevents any other process/lock from catching the lock with a competing write during the release part of the upgrade. """ - # probably something like foo.lock.lock timeout = timeout or lock.default_timeout - lock_path = lock.path + ".lock" + # Lock used for exclusive lock access is based on lock it's facilliating + # exclusive access to, ensure exclusion locks are as distinct as the locks + # they're gating. Name is .gate_lock i.e. db.lock.gate_lock + lock_path = lock.path + ".gate_lock" lk = Lock(lock_path, start=lock._start, length=lock._length) # cache previous value for read locks _reads = lock._reads @@ -412,7 +414,7 @@ def _ensure_valid_handle(self) -> IO[bytes]: return self._file_ref.fh - def __lock_fail_condition(self, e): + def _lock_fail_condition(self, e): if IS_WINDOWS: # 33 "The process cannot access the file because another # process has locked a portion of the file." @@ -452,8 +454,6 @@ def __repr__(self) -> str: rep = f"{self.__class__.__name__}(" for attr, value in self.__dict__.items(): rep += "{0}={1}, ".format(attr, value.__repr__()) - if attr != "LOCK_CATCH": - rep += "{0}={1}, ".format(attr, value.__repr__()) return "{0})".format(rep.strip(", ")) def __str__(self) -> str: @@ -493,7 +493,7 @@ def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: fh = self._ensure_valid_handle() - if LockType.to_module(op) == LockType.LOCK_EX and self._file.mode == "rb": + if LockType.to_module(op) == LockType.LOCK_EX and fh.mode == "rb": # Attempt to upgrade to write lock w/a read-only file. # If the file were writable, we'd have opened it rb+ raise LockROFileError(self.path) @@ -531,7 +531,7 @@ def _poll_lock(self, op: int) -> bool: if IS_WINDOWS: overlapped = _setup_overlapped(self._start) range_low, range_high = _low_high(self._length) - hfile = win32file._get_osfhandle(self._file.fileno()) + hfile = win32file._get_osfhandle(self._file_ref.fh.fileno()) win32file.LockFileEx( hfile, module_op | LockType.LOCK_NB, range_low, range_high, overlapped # flags ) @@ -561,13 +561,11 @@ def _poll_lock(self, op: int) -> bool: return True - except OSError as e: - # EAGAIN and EACCES == locked by another process (so try again) - if e.errno not in (errno.EAGAIN, errno.EACCES): - raise except LockType.LOCK_CATCH as e: # check if lock failure or lock is already held - if self.__lock_fail_condition(e): + # lock being held means backoff, other failure reasons + # are valid and we should fail + if self._lock_fail_condition(e): raise return False @@ -614,13 +612,13 @@ def _write_log_debug_data(self) -> None: def _release_lock(self): if IS_WINDOWS: - hfile = win32file._get_osfhandle(self._file.fileno()) + hfile = win32file._get_osfhandle(self._file_ref.fh.fileno()) overlapped = _setup_overlapped(self._start) low_range, high_range = _low_high(self._length) win32file.UnlockFileEx(hfile, low_range, high_range, overlapped) else: fcntl.lockf( - self._file.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET + self._file_ref.fh.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET ) def _unlock(self): @@ -631,13 +629,10 @@ def _unlock(self): Reset all lock attributes to initial states """ - assert self._file is not None, "cannot unlock without the file being set" + assert self._file_ref is not None, "cannot unlock without the file being set" self._release_lock() - FILE_TRACKER.release_by_fh(self._file) - self._file = None - self._file_mode = "" self._reads = 0 self._writes = 0 From 3e89fe333f07b9ea3107e10d78a9117df6196114 Mon Sep 17 00:00:00 2001 From: John Parent Date: Wed, 15 Apr 2026 17:15:09 -0400 Subject: [PATCH 15/20] fixup locking class for windows upgrades Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 49 +++++++++++++++++++++++-------- 1 file changed, 36 insertions(+), 13 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index e201c58785b847..748b5a41cde08b 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -197,9 +197,16 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): it must first take a write lock on this intermediate file before releasing existing read lock and retaking a lock on the same file but exclusively. + Processes taking write locks in any context must first take a write lock + on this gate to ensure they respect the upgrade of a read to a write + and cannot take a write on the primary lock while the upgrade takes a write + on the gate. This gate file prevents any other process/lock from catching the lock with a competing write during the release part of the upgrade. """ + if not IS_WINDOWS: + yield + return timeout = timeout or lock.default_timeout # Lock used for exclusive lock access is based on lock it's facilliating # exclusive access to, ensure exclusion locks are as distinct as the locks @@ -248,15 +255,22 @@ def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): so we just give it up """ timeout = timeout or lock.default_timeout - # release the read lock first - # so in the event on an error, we still have the exclusive lock + # yield first, this allows the calling context to take the + # "downgraded lock". On Windows, we don't downgrade atomically, we + # just take an additional lock and then release the exclusive. yield # From the docstring: # first unlock operation unlocks the exclusive lock, the second unlock # operation unlocks the shared lock. # - # This will remove the exclusive lock - lock._release_lock() + # This will remove the exclusive lock, unlockfile works in a fifo and the + # only possible ordering where we have a shared and exclusive lock + # is when the exclusive lock is acquired first + try: + lock._release_lock() + except LockType.LOCK_CATCH as e: + # any error when releasing is an issue + raise class LockType: @@ -696,7 +710,12 @@ def _reaffirm_lock(self) -> None: """Fork-safety: always re-affirm the lock with one non-blocking attempt. In the same process, re-locking an already-held byte range succeeds instantly (POSIX). In a forked child that doesn't own the POSIX lock, the call fails immediately and we raise. Use WRITE - if we hold an exclusive lock so we don't accidentally downgrade it.""" + if we hold an exclusive lock so we don't accidentally downgrade it. + + No-op on Windows (Spawn only) + """ + if IS_WINDOWS: + return if self._writes > 0: op = LockType.WRITE elif self._reads > 0: @@ -729,14 +748,18 @@ def try_acquire_write(self) -> bool: Returns True if the lock was acquired, False if it would block. """ - if self._writes == 0: - fh = self._ensure_valid_handle() - if LockType.to_module(LockType.WRITE) == fcntl.LOCK_EX and fh.mode == "rb": - raise LockROFileError(self.path) - if not self._poll_lock(LockType.WRITE): - return False - self._writes += 1 - self._log_acquired("WRITE LOCK", 0, 1) + if self._writes == 0 and self._reads == 0: + with _safe_exclusion(self): + fh = self._ensure_valid_handle() + if LockType.to_module(LockType.WRITE) == LockType.LOCK_EX and fh.mode == "rb": + raise LockROFileError(self.path) + if not self._poll_lock(LockType.WRITE): + return False + self._writes += 1 + self._log_acquired("WRITE LOCK", 0, 1) + return True + elif self._reads > 0: + return True else: self._reaffirm_lock() From 253591e68afd7a115982228b8fe2d7352d92181f Mon Sep 17 00:00:00 2001 From: John Parent Date: Wed, 15 Apr 2026 17:22:46 -0400 Subject: [PATCH 16/20] fixup tests Signed-off-by: John Parent --- lib/spack/spack/test/llnl/util/lock.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 1d8bd64cf552ad..b69ac614dcdb4a 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -52,8 +52,7 @@ else: import win32file -# pytestmark = pytest.mark.not_on_windows("does not run on windows") - +FULL_FILE_LOCK = 0 if sys.platform != "win32" else 0xffffffff # # This test can be run with MPI. MPI is "enabled" if we can import @@ -315,7 +314,7 @@ def __call__(self, barrier): class TimeoutWrite: - def __init__(self, lock_path, start=0, length=1): + def __init__(self, lock_path, start=FULL_FILE_LOCK, length=1): self.lock_path = lock_path self.start = start self.length = length @@ -571,7 +570,8 @@ def test_write_lock_timeout_with_multiple_readers_3_2_ranges(lock_path): ) -@pytest.mark.skipif(sys.platform == "win32" or getuid() == 0, reason="user is root") +@pytest.mark.skipif(getuid() == 0, reason="user is root") +@pytest.mark.skipif(sys.platform == "win32", reason="Cannot make readonly dir on Windows") def test_read_lock_on_read_only_lockfile(lock_dir, lock_path): """read-only directory, read-only lockfile.""" touch(lock_path) From fdabe6d9dbb3026496ec4e03b3c0578f4aea3f39 Mon Sep 17 00:00:00 2001 From: John Parent Date: Thu, 16 Apr 2026 15:11:50 -0400 Subject: [PATCH 17/20] Ensure timeouts are respected Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 748b5a41cde08b..f1dd8711482d4c 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -687,7 +687,7 @@ def acquire_write(self, timeout: Optional[float] = None) -> bool: timeout = timeout or self.default_timeout if self._writes == 0: - with _safe_exclusion(self): + with _safe_exclusion(self, timeout=timeout): # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._writes += 1 @@ -788,7 +788,7 @@ def downgrade_write_to_read(self, timeout: Optional[float] = None) -> None: timeout = timeout or self.default_timeout if self._writes == 1: - with _safe_downgrade(self): + with _safe_downgrade(self, timeout=timeout): self._log_downgrading() # can raise LockError. wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) @@ -808,7 +808,7 @@ def upgrade_read_to_write(self, timeout: Optional[float] = None) -> None: if self._reads >= 1 and self._writes == 0: self._log_upgrading() - with _safe_exclusion(self): + with _safe_exclusion(self, timeout=timeout): # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._reads = 0 From 7625e07db1d902ff3a1254f16fdec7ed1bf76254 Mon Sep 17 00:00:00 2001 From: John Parent Date: Thu, 16 Apr 2026 15:12:00 -0400 Subject: [PATCH 18/20] Start should be 0 Signed-off-by: John Parent --- lib/spack/spack/test/llnl/util/lock.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index b69ac614dcdb4a..16afb4b1430e41 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -314,7 +314,7 @@ def __call__(self, barrier): class TimeoutWrite: - def __init__(self, lock_path, start=FULL_FILE_LOCK, length=1): + def __init__(self, lock_path, start=0, length=1): self.lock_path = lock_path self.start = start self.length = length From 0257ad8fa6cddd4154e94712a5850e9372b4707c Mon Sep 17 00:00:00 2001 From: John Parent Date: Fri, 15 May 2026 17:23:53 -0400 Subject: [PATCH 19/20] cleanup Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 55 ++++++++------------------ lib/spack/spack/test/llnl/util/lock.py | 2 +- 2 files changed, 17 insertions(+), 40 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index f1dd8711482d4c..1e506f81a97726 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -8,22 +8,9 @@ import socket import time from datetime import datetime -from types import TracebackType -from typing import IO, Callable, ContextManager, Dict, Generator, Optional, Tuple, Type, Union from sys import platform as _platform from types import TracebackType -from typing import ( # novm - IO, - Any, - Callable, - ContextManager, - Dict, - Generator, - Optional, - Tuple, - Type, - Union, -) +from typing import IO, Callable, Dict, Generator, Optional, Tuple, Type # novm from spack.llnl.util import lang, tty @@ -268,7 +255,7 @@ def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): # is when the exclusive lock is acquired first try: lock._release_lock() - except LockType.LOCK_CATCH as e: + except LockType.LOCK_CATCH: # any error when releasing is an issue raise @@ -377,8 +364,6 @@ def __init__( self.host: Optional[str] = None self.old_host: Optional[str] = None - self._current_lock = None - def _ensure_valid_handle(self) -> IO[bytes]: """Return a valid file handle for the lock file, opening or re-opening as needed. @@ -467,8 +452,8 @@ def __repr__(self) -> str: """Formal representation of the lock.""" rep = f"{self.__class__.__name__}(" for attr, value in self.__dict__.items(): - rep += "{0}={1}, ".format(attr, value.__repr__()) - return "{0})".format(rep.strip(", ")) + rep += f"{attr}={value.__repr__()}, " + return f"{rep.strip(', ')})" def __str__(self) -> str: """Readable string (with key fields) of the lock.""" @@ -545,18 +530,18 @@ def _poll_lock(self, op: int) -> bool: if IS_WINDOWS: overlapped = _setup_overlapped(self._start) range_low, range_high = _low_high(self._length) - hfile = win32file._get_osfhandle(self._file_ref.fh.fileno()) + hfile = win32file._get_osfhandle(fh) win32file.LockFileEx( - hfile, module_op | LockType.LOCK_NB, range_low, range_high, overlapped # flags + hfile, + module_op | LockType.LOCK_NB, + range_low, + range_high, + overlapped, # flags ) else: # Try to get the lock (will raise if not available.) fcntl.lockf( - self._fh, - module_op | LockType.LOCK_NB, - self._length, - self._start, - os.SEEK_SET, + fh, module_op | LockType.LOCK_NB, self._length, self._start, os.SEEK_SET ) # help for debugging distributed locking @@ -584,15 +569,6 @@ def _poll_lock(self, op: int) -> bool: return False - - def _ensure_parent_directory(self) -> str: - parent = os.path.dirname(self.path) - # relative paths to lockfiles in the current directory have no parent - if not parent: - return "." - os.makedirs(parent, exist_ok=True) - return parent - def _read_log_debug_data(self) -> None: """Read PID and host data out of the file if it is there.""" assert self._file_ref is not None, "cannot read debug log without the file being set" @@ -632,7 +608,11 @@ def _release_lock(self): win32file.UnlockFileEx(hfile, low_range, high_range, overlapped) else: fcntl.lockf( - self._file_ref.fh.fileno(), LockType.LOCK_UN, self._length, self._start, os.SEEK_SET + self._file_ref.fh.fileno(), + LockType.LOCK_UN, + self._length, + self._start, + os.SEEK_SET, ) def _unlock(self): @@ -665,7 +645,6 @@ def acquire_read(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) self._reads += 1 - self._current_lock = LockType.LOCK_SH # Log if acquired, which includes counts when verbose self._log_acquired("READ LOCK", wait_time, nattempts) return True @@ -691,7 +670,6 @@ def acquire_write(self, timeout: Optional[float] = None) -> bool: # can raise LockError. wait_time, nattempts = self._lock(LockType.WRITE, timeout=timeout) self._writes += 1 - self._current_lock = LockType.LOCK_EX # Log if acquired, which includes counts when verbose self._log_acquired("WRITE LOCK", wait_time, nattempts) @@ -759,7 +737,6 @@ def try_acquire_write(self) -> bool: self._log_acquired("WRITE LOCK", 0, 1) return True elif self._reads > 0: - return True else: self._reaffirm_lock() diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 16afb4b1430e41..1d510471a6145c 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -52,7 +52,7 @@ else: import win32file -FULL_FILE_LOCK = 0 if sys.platform != "win32" else 0xffffffff +FULL_FILE_LOCK = 0 if sys.platform != "win32" else 0xFFFFFFFF # # This test can be run with MPI. MPI is "enabled" if we can import From d7382ed2e043288cae3a49b17c4fbd1a9033b323 Mon Sep 17 00:00:00 2001 From: John Parent Date: Thu, 21 May 2026 15:56:28 -0400 Subject: [PATCH 20/20] wip initial support Signed-off-by: John Parent --- lib/spack/spack/llnl/util/lock.py | 198 ++++++++++++++++--------- lib/spack/spack/test/llnl/util/lock.py | 30 +++- 2 files changed, 156 insertions(+), 72 deletions(-) diff --git a/lib/spack/spack/llnl/util/lock.py b/lib/spack/spack/llnl/util/lock.py index 1e506f81a97726..bba0834c082e14 100644 --- a/lib/spack/spack/llnl/util/lock.py +++ b/lib/spack/spack/llnl/util/lock.py @@ -177,7 +177,7 @@ def _setup_overlapped(offset): @contextlib.contextmanager def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): """Lock upgrade guard for Windows, designed to allow for lock upgrading - which Windows file locks do not natively support + which Windows file locks do not natively support. Uses one additional lockfile as a "gate" for upgrades. If a process is attempting to upgrade from a read to a write @@ -190,74 +190,74 @@ def _safe_exclusion(lock: "Lock", timeout: Optional[float] = None): on the gate. This gate file prevents any other process/lock from catching the lock with a competing write during the release part of the upgrade. + + Note: on Windows the effective timeout for a caller is up to 2x ``timeout`` + because the gate acquisition and the primary lock acquisition each consume + up to ``timeout`` seconds independently. + + Note: ``.gate_lock`` sidecar files are never deleted; this is a known + limitation. They are harmless empty files and accumulate only up to one + per unique lock path. """ if not IS_WINDOWS: yield return timeout = timeout or lock.default_timeout - # Lock used for exclusive lock access is based on lock it's facilliating + # Lock used for exclusive lock access is based on lock it's facilitating # exclusive access to, ensure exclusion locks are as distinct as the locks # they're gating. Name is .gate_lock i.e. db.lock.gate_lock - lock_path = lock.path + ".gate_lock" - lk = Lock(lock_path, start=lock._start, length=lock._length) - # cache previous value for read locks - _reads = lock._reads + gate_lock_path = lock.path + ".gate_lock" + gate_lk = Lock(gate_lock_path, start=lock._start, length=lock._length) + acquired = False + _read_dropped = False try: - # don't use the acquire lock method to avoid - # recursion - lk._lock(LockType.WRITE, timeout=timeout) + # don't use the acquire lock method to avoid recursion + gate_lk._lock(LockType.WRITE, timeout=timeout) + acquired = True # lock gate acquired, drop read lock if there is one - # cannot use release read as we need to drop - # the read lock if there is one, not just decrement the nested - # lock tracker - if _reads: + # cannot use release_read as we need to drop the OS-level lock, + # not just decrement the nested lock tracker + if lock._reads: lock._release_lock() + _read_dropped = True yield except LockError: - # doesn't matter what the error was - # if there was a read lock previously - # restore it - if _reads > 0: + # If the read was actually dropped before the error, restore it. + # Do NOT rely on lock._reads here — it still reflects the pre-drop + # value whether or not the gate was ever acquired. + if _read_dropped: lock._lock(LockType.READ, timeout=timeout) raise finally: - if lk.is_write_locked(): - lk.release_write() + if acquired: + gate_lk._release_lock() @contextlib.contextmanager -def _safe_downgrade(lock: "Lock", timeout: Optional[float] = None): - """Windows can hold shared and exclusive locks on the same time, but only - if the exclusive lock is held first. When the shared lock overlaps the exclusive - range, the exclusive lock must also be unlocked and must be unlocked *first* - From the MSFT docs: - - A shared lock can overlap an exclusive lock if both locks were created using - the same file handle. If the same range is locked with an exclusive and a - shared lock, two unlock operations are necessary to unlock the region; the - first unlock operation unlocks the exclusive lock, the second unlock - operation unlocks the shared lock. - - Since we are "downgrading" we no longer care to have an exclusive lock - so we just give it up +def _safe_downgrade(lock: "Lock"): + """Downgrade an exclusive lock to a shared lock. + + On POSIX, fcntl.lockf(LOCK_SH) atomically replaces the exclusive lock with a shared + lock in a single syscall; no _release_lock() is needed afterward. + + On Windows, LockFileEx permits overlapping locks on the same file handle (exclusive + then shared). However, LOCKFILE_FAIL_IMMEDIATELY rejects the overlapping request even + from the same handle, so a blocking LockFileEx call (without LOCKFILE_FAIL_IMMEDIATELY) + is required to stack the shared lock. UnlockFileEx then removes the exclusive lock + first (FIFO order), leaving only the shared lock. The blocking call always returns + immediately here because the same handle already holds the exclusive lock. """ - timeout = timeout or lock.default_timeout - # yield first, this allows the calling context to take the - # "downgraded lock". On Windows, we don't downgrade atomically, we - # just take an additional lock and then release the exclusive. yield - # From the docstring: - # first unlock operation unlocks the exclusive lock, the second unlock - # operation unlocks the shared lock. - # - # This will remove the exclusive lock, unlockfile works in a fifo and the - # only possible ordering where we have a shared and exclusive lock - # is when the exclusive lock is acquired first - try: + assert lock._file_ref is not None, "_safe_downgrade called without an open file handle" + fh = lock._file_ref.fh.fileno() + if IS_WINDOWS: + overlapped = _setup_overlapped(lock._start) + range_low, range_high = _low_high(lock._length) + hfile = win32file._get_osfhandle(fh) + win32file.LockFileEx(hfile, LockType.LOCK_SH, range_low, range_high, overlapped) lock._release_lock() - except LockType.LOCK_CATCH: - # any error when releasing is an issue - raise + else: + fcntl.lockf(fh, LockType.LOCK_SH, lock._length, lock._start, os.SEEK_SET) class LockType: @@ -476,6 +476,22 @@ def __setstate__(self, state): self._reads = 0 self._writes = 0 + def __del__(self) -> None: + # On Windows, os.unlink() fails (WinError 32) if any process has the file open, + # even without a byte-range lock held. When a Lock is dropped without an explicit + # release (e.g. a test that raises before cleanup), CPython's reference counting + # calls __del__ immediately, which lets us close the handle before the filesystem + # tries to delete the file. On POSIX the tracker intentionally keeps handles open + # (see OpenFileTracker); guard so we never change POSIX behaviour. + if not IS_WINDOWS or self._file_ref is None: + return + try: + FILE_TRACKER.release(self._file_ref) + except Exception: + pass + self._file_ref = None + self._cached_key = None + def _lock(self, op: int, timeout: Optional[float] = None) -> Tuple[float, int]: """This takes a lock using POSIX locks (``fcntl.lockf``). @@ -533,10 +549,10 @@ def _poll_lock(self, op: int) -> bool: hfile = win32file._get_osfhandle(fh) win32file.LockFileEx( hfile, - module_op | LockType.LOCK_NB, + module_op | LockType.LOCK_NB, # flags range_low, range_high, - overlapped, # flags + overlapped, ) else: # Try to get the lock (will raise if not available.) @@ -616,7 +632,7 @@ def _release_lock(self): ) def _unlock(self): - """Releases a lock using POSIX locks (``fcntl.lockf``) + """Release the OS-level lock and reset all lock state. Releases the lock regardless of mode. Note that read locks may be masquerading as write locks, but this removes either. @@ -627,6 +643,16 @@ def _unlock(self): self._release_lock() + # On Windows, os.unlink() fails (WinError 32) if any process has the file open. + # Release our file-tracker reference here so the handle closes and the file + # can be deleted. On POSIX the handle is kept alive for performance (the next + # lock operation skips reopening it) and because closing it would drop all + # fcntl locks this process holds on the inode — neither concern applies on Windows. + if IS_WINDOWS: + FILE_TRACKER.release(self._file_ref) + self._file_ref = None + self._cached_key = None + self._reads = 0 self._writes = 0 @@ -725,24 +751,52 @@ def try_acquire_write(self) -> bool: """Non-blocking attempt to acquire an exclusive write lock. Returns True if the lock was acquired, False if it would block. + Handles three cases: no lock held (fresh acquire), read held (upgrade), + or write already held (nested). """ - if self._writes == 0 and self._reads == 0: - with _safe_exclusion(self): - fh = self._ensure_valid_handle() - if LockType.to_module(LockType.WRITE) == LockType.LOCK_EX and fh.mode == "rb": - raise LockROFileError(self.path) - if not self._poll_lock(LockType.WRITE): - return False - self._writes += 1 - self._log_acquired("WRITE LOCK", 0, 1) - return True - elif self._reads > 0: - return True - else: + if self._writes > 0: + # Already hold exclusive — just nest self._reaffirm_lock() self._writes += 1 return True + fh = self._ensure_valid_handle() + if LockType.to_module(LockType.WRITE) == LockType.LOCK_EX and fh.mode == "rb": + raise LockROFileError(self.path) + + if not IS_WINDOWS: + # POSIX: fcntl atomically replaces shared with exclusive, or acquires fresh + if not self._poll_lock(LockType.WRITE): + return False + self._reads = 0 + self._writes = 1 + self._log_acquired("WRITE LOCK", 0, 1) + return True + + # Windows: gate coordination prevents races during acquisition and upgrade. + # Both the gate and the primary lock are attempted non-blockingly so that + # this method never spins or sleeps. + gate_lk = Lock(self.path + ".gate_lock", start=self._start, length=self._length) + gate_lk._ensure_valid_handle() + if not gate_lk._poll_lock(LockType.WRITE): + return False + had_read = self._reads > 0 + try: + if had_read: + self._release_lock() # drop shared before acquiring exclusive + if not self._poll_lock(LockType.WRITE): + if had_read: + # Unreachable in practice: holding the gate means no competing + # exclusive writer can exist while we attempt to upgrade. + self._poll_lock(LockType.READ) + return False + finally: + gate_lk._release_lock() + self._reads = 0 + self._writes = 1 + self._log_acquired("WRITE LOCK", 0, 1) + return True + def is_write_locked(self) -> bool: """Returns ``True`` if the path is write locked, otherwise, ``False``""" try: @@ -765,13 +819,16 @@ def downgrade_write_to_read(self, timeout: Optional[float] = None) -> None: timeout = timeout or self.default_timeout if self._writes == 1: - with _safe_downgrade(self, timeout=timeout): - self._log_downgrading() - # can raise LockError. - wait_time, nattempts = self._lock(LockType.READ, timeout=timeout) + self._log_downgrading() + start_time = time.monotonic() + with _safe_downgrade(self): + # Update state inside the yield body, before the post-yield + # OS-level transition in _safe_downgrade. The transient + # inconsistency (_reads=1 while exclusive is still held) is + # harmless because Lock is not thread-safe. self._reads = 1 self._writes = 0 - self._log_downgraded(wait_time, nattempts) + self._log_downgraded(time.monotonic() - start_time, 1) else: raise LockDowngradeError(self.path) @@ -853,7 +910,8 @@ def release_write(self, release_fn: ReleaseFnType = None) -> bool: result = release_fn() if self._reads > 0: - self._lock(LockType.READ) + with _safe_downgrade(self): + pass else: self._unlock() # can raise LockError. diff --git a/lib/spack/spack/test/llnl/util/lock.py b/lib/spack/spack/test/llnl/util/lock.py index 1d510471a6145c..894ad28dfb7504 100644 --- a/lib/spack/spack/test/llnl/util/lock.py +++ b/lib/spack/spack/test/llnl/util/lock.py @@ -52,7 +52,6 @@ else: import win32file -FULL_FILE_LOCK = 0 if sys.platform != "win32" else 0xFFFFFFFF # # This test can be run with MPI. MPI is "enabled" if we can import @@ -649,7 +648,10 @@ def test_upgrade_read_to_write(private_lock_path): lock.release_read() assert lock._reads == 0 assert lock._writes == 0 - assert not lock._file_ref.fh.closed # recycle the file handle for next lock + # On Windows, _unlock() closes the file handle so the file can be deleted + # (Windows raises WinError 32 on unlink if any process has the file open). + if not lk.IS_WINDOWS: + assert not lock._file_ref.fh.closed # recycle the file handle for next lock def test_release_write_downgrades_to_shared(private_lock_path): @@ -1419,6 +1421,30 @@ def test_try_acquire_write(tmp_path: pathlib.Path): lock.release_read() +def test_try_acquire_write_from_read(tmp_path: pathlib.Path): + """try_acquire_write must properly upgrade an existing read to a write.""" + lock = lk.Lock(str(tmp_path / "lockfile")) + + # Acquire a read, then non-blockingly upgrade to write + lock.acquire_read() + assert lock._reads == 1 + assert lock._writes == 0 + + assert lock.try_acquire_write() is True + assert lock._reads == 0 + assert lock._writes == 1 + + # Nested try_acquire_write while holding write + assert lock.try_acquire_write() is True + assert lock._writes == 2 + + lock.release_write() + assert lock._writes == 1 + lock.release_write() + assert lock._reads == 0 + assert lock._writes == 0 + + def _child_fails_to_acquire_read(_lock: lk.Lock): try: _lock.acquire_read(timeout=1e-9)