Coverage for cuda/core/utils/_program_cache/_file_stream.py: 89.08%

284 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-13 01:38 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5"""On-disk bytes-in / bytes-out program cache. 

6 

7Atomic writes via :func:`os.replace`. Concurrent readers see either the 

8old entry or the new one, never a partial file. Each entry is the raw 

9compiled binary so files are directly consumable by external NVIDIA 

10tools (``cuobjdump``, ``nvdisasm``, ``cuda-gdb``). 

11""" 

12 

13from __future__ import annotations 

14 

15import contextlib 

16import errno 

17import hashlib 

18import os 

19import tempfile 

20import threading 

21import time 

22from pathlib import Path 

23from typing import Any, Callable, Iterable 

24 

25from cuda.core._module import ObjectCode 

26 

27from ._abc import ProgramCacheResource, _as_key_bytes, _extract_bytes 

28 

29_ENTRIES_SUBDIR = "entries" 

30_TMP_SUBDIR = "tmp" 

31# Temp files older than this are assumed to belong to a crashed writer and 

32# are eligible for cleanup. Picked large enough that no real ``os.replace`` 

33# write should still be in flight (writes are bounded by mkstemp + write + 

34# fsync + replace, all fast on healthy disks). 

35_TMP_STALE_AGE_SECONDS = 3600 

36 

37 

38_SHARING_VIOLATION_WINERRORS = (5, 32, 33) # ERROR_ACCESS_DENIED, ERROR_SHARING_VIOLATION, ERROR_LOCK_VIOLATION 

39_REPLACE_RETRY_DELAYS = (0.0, 0.005, 0.010, 0.020, 0.050, 0.100) # ~185ms budget 

40 

41 

42# Exposed as a module-level flag so tests can toggle it without monkeypatching 

43# ``os.name`` itself (pathlib reads ``os.name`` at instantiation time). 

44_IS_WINDOWS = os.name == "nt" 

45 

46 

47def _stat_key(st: os.stat_result) -> tuple[int, int, int]: 

48 """Stat fingerprint used by every stat-guarded path. 

49 

50 ``(st_ino, st_size, st_mtime_ns)`` is the smallest triple that 

51 distinguishes "same file" from "file replaced under us": ``st_ino`` 

52 catches replacement, ``st_size`` and ``st_mtime_ns`` catch a write 

53 that happens to land on the same inode (e.g. truncate-and-write in 

54 place). Centralised so all four readers compare the same fields. 

55 """ 

56 return (st.st_ino, st.st_size, st.st_mtime_ns) 1rvwxybopmqfezAnBWClDEacPgdFhjGM23st

57 

58 

59def _default_cache_dir() -> Path: 

60 """OS-conventional default location for the file-stream cache. 

61 

62 Resolves to the user-cache root for the calling user, with a 

63 ``program-cache`` leaf so future tooling can place sibling caches 

64 under the same ``cuda-python`` vendor directory: 

65 

66 * Linux: ``$XDG_CACHE_HOME/cuda-python/program-cache`` 

67 (default ``~/.cache/cuda-python/program-cache`` per the XDG Base 

68 Directory spec). 

69 * Windows: ``%LOCALAPPDATA%\\cuda-python\\program-cache`` 

70 (Windows uses local AppData -- caches don't roam; falls back to 

71 ``~/AppData/Local`` if the env var is unset). 

72 

73 CUDA does not support macOS, so no macOS branch is provided. 

74 """ 

75 if _IS_WINDOWS: 1'

76 local_app_data = os.environ.get("LOCALAPPDATA") 1'

77 root = Path(local_app_data) if local_app_data else Path.home() / "AppData" / "Local" 1'

78 else: 

79 xdg = os.environ.get("XDG_CACHE_HOME") 1'

80 root = Path(xdg) if xdg else Path.home() / ".cache" 1'

81 return root / "cuda-python" / "program-cache" 1'

82 

83 

84def _with_sharing_retry( 

85 op: Callable[..., Any], *args: Any, on_exhausted: Callable[..., Any] | None = None, **kwargs: Any 

86) -> Any: 

87 """Run ``op(*args, **kwargs)`` retrying transient Windows sharing 

88 violations under the bounded ``_REPLACE_RETRY_DELAYS`` budget. 

89 

90 On Windows, ``os.replace``/``read_bytes``/``unlink`` can surface 

91 winerror 5/32/33 (or bare EACCES via ``_is_windows_sharing_violation``) 

92 while another process briefly holds the file open without share-delete 

93 rights. The retry hides that contention. Other ``PermissionError``s 

94 (real ACLs, unexpected winerror) propagate immediately. 

95 

96 Successful returns and any non-``PermissionError`` exceptions 

97 (including ``FileNotFoundError``) bubble up unchanged. After the 

98 budget is exhausted, the helper either calls ``on_exhausted(last_exc)`` 

99 if provided, or re-raises the last sharing-violation exception. 

100 """ 

101 last_exc: PermissionError | None = None 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOGM423st

102 for delay in _REPLACE_RETRY_DELAYS: 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOGM423st

103 if delay: 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOGM423st

104 time.sleep(delay) 1HIJlud

105 try: 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOGM423st

106 return op(*args, **kwargs) 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOGM423st

107 except PermissionError as exc: 1rbRK6feZHITUJlacQudst

108 if not _is_windows_sharing_violation(exc): 1feZHITUJlQud

109 raise 1feZTUQ

110 last_exc = exc 1HIJlud

111 if on_exhausted is not None: 1HIJd

112 return on_exhausted(last_exc) 1HIJ

113 assert last_exc is not None # at least one iteration ran and caught a PermissionError 1d

114 raise last_exc 1d

115 

116 

117def _replace_with_sharing_retry(tmp_path: Path, target: Path) -> bool: 

118 """Atomic rename with Windows-specific retry on sharing/lock violations. 

119 

120 Returns True on success. Returns False only after the retry budget is 

121 exhausted on Windows with a genuine sharing violation -- the caller then 

122 treats the cache write as dropped. Any other ``PermissionError`` (ACLs, 

123 read-only dir, unexpected winerror, or any POSIX failure) propagates. 

124 

125 ``ERROR_ACCESS_DENIED`` (winerror 5) is treated as a sharing violation 

126 because Windows surfaces it when a file is held open without 

127 ``FILE_SHARE_WRITE`` (Python's default for ``open(p, "wb")``) or while 

128 a previous unlink is in ``PENDING_DELETE`` -- both are transient. 

129 """ 

130 

131 def _do_replace() -> bool: 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

132 os.replace(tmp_path, target) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

133 return True 1rvwxybopmqKfezAXnBWClDEakcPigYQudFNhjS1V0LOG4st

134 

135 return bool(_with_sharing_retry(_do_replace, on_exhausted=lambda _exc: False)) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

136 

137 

138def _stat_and_read_with_sharing_retry(path: Path) -> tuple[os.stat_result, bytes]: 

139 """Snapshot stat and read bytes, retrying briefly on Windows transient 

140 sharing-violation ``PermissionError``. 

141 

142 Reads race the rewriter's ``os.replace``: on Windows, the destination 

143 can be momentarily inaccessible (winerror 5/32/33) while the rename 

144 completes. Mirroring ``_replace_with_sharing_retry``'s budget keeps 

145 transient contention from being mistaken for a real read failure. 

146 

147 Raises ``FileNotFoundError`` on miss or after exhausting the Windows 

148 sharing-retry budget. Non-Windows ``PermissionError`` propagates. 

149 

150 On Windows, EACCES (errno 13) is treated as transient too: ``io.open`` 

151 sometimes surfaces a pending-delete or share-mode mismatch as bare 

152 EACCES with no ``winerror`` attribute, indistinguishable here from 

153 a true sharing violation. Real ACL problems on a path the cache owns 

154 would surface consistently; the bounded retry budget keeps the cost 

155 of treating them as transient negligible. 

156 """ 

157 

158 def _do_stat_and_read() -> tuple[os.stat_result, bytes]: 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

159 return path.stat(), path.read_bytes() 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

160 

161 def _exhausted(last_exc: PermissionError) -> None: 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

162 raise FileNotFoundError(path) from last_exc 

163 

164 return _with_sharing_retry(_do_stat_and_read, on_exhausted=_exhausted) # type: ignore[no-any-return] 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

165 

166 

167_UTIME_SUPPORTS_FD = os.utime in os.supports_fd 

168 

169 

170def _touch_atime(path: Path, st_before: os.stat_result) -> None: 

171 """Bump ``path``'s atime to "now", preserving its mtime, iff the 

172 file's stat still matches ``st_before``. 

173 

174 Eviction sorts by ``st_atime`` so reads must reliably refresh atime 

175 regardless of OS or filesystem default behavior: 

176 

177 * Linux ``relatime`` (default) only updates atime when the existing 

178 atime is older than mtime, which would skew LRU once an entry has 

179 been read once. 

180 * NTFS on Windows Vista+ disables atime updates by default 

181 (``NtfsDisableLastAccessUpdate``) and most modern installations 

182 keep that off, so a bare read never bumps atime. 

183 * ``noatime``-mounted filesystems disable updates entirely. 

184 

185 Calling ``os.utime`` with explicit times bypasses all of the above 

186 and writes atime directly. The stat-guard is critical: if another 

187 process ``os.replace``-d a fresh entry into ``path`` between the 

188 read and this touch, blindly applying ``st_before.st_mtime_ns`` 

189 would roll the new entry's mtime back to the old value and confuse 

190 the eviction stat-guard (which checks ``(ino, size, mtime_ns)``) 

191 into deleting a freshly-committed file. 

192 

193 Where ``os.utime`` supports file descriptors (Linux, macOS), the 

194 fstat-then-utime pair runs against the same open fd: even if another 

195 writer replaces the path between our ``os.open`` and the ``fstat``, 

196 the fd still refers to the file we opened, so the comparison and the 

197 utime both target the same inode. This closes the residual TOCTOU 

198 window that a path-based stat + path-based utime would have. 

199 

200 On Windows, ``os.utime`` is path-only; the fallback re-stats the 

201 path and accepts a small TOCTOU window between the second stat and 

202 the utime. That window is microseconds and the worst-case outcome 

203 is the racing writer's mtime being rolled back by a few hundred 

204 nanoseconds -- the eviction stat-guard would then refuse to evict 

205 the slightly-stale entry, costing one cache miss (recompile) but 

206 not a corrupt eviction. 

207 

208 Best-effort: any ``OSError`` (read-only mount, restrictive ACLs, 

209 ...) is swallowed -- size enforcement still bounds the cache, but 

210 eviction degrades toward FIFO. 

211 """ 

212 new_atime_ns = time.time_ns() 1rvwxybzAnBClDEacPF0LOGM23st

213 if _UTIME_SUPPORTS_FD: 1rvwxybzAnBClDEacPF0LOGM23st

214 try: 1rvwxybzAnBClDEacPFLOGM23st

215 fd = os.open(path, os.O_RDONLY) 1rvwxybzAnBClDEacPFLOGM23st

216 except OSError: 1O

217 return 1O

218 try: 1rvwxybzAnBClDEacPFLGM23st

219 try: 1rvwxybzAnBClDEacPFLGM23st

220 st_now = os.fstat(fd) 1rvwxybzAnBClDEacPFLGM23st

221 except OSError: 1L

222 return 1L

223 if _stat_key(st_now) != _stat_key(st_before): 1rvwxybzAnBClDEacPFGM23st

224 return 1P

225 with contextlib.suppress(OSError): 1rvwxybzAnBClDEacPFGM23st

226 os.utime(fd, ns=(new_atime_ns, st_before.st_mtime_ns)) 1rvwxybzAnBClDEacPFGM23st

227 finally: 

228 os.close(fd) 1rvwxybzAnBClDEacPFLGM23st

229 return 1rvwxybzAnBClDEacPFGM23st

230 

231 # Path-based fallback (Windows). Best-effort -- residual TOCTOU window 

232 # documented above. 

233 try: 10

234 st_now = path.stat() 10

235 except OSError: 10

236 return 10

237 if _stat_key(st_now) != _stat_key(st_before): 

238 return 

239 with contextlib.suppress(OSError): 

240 os.utime(path, ns=(new_atime_ns, st_before.st_mtime_ns)) 

241 

242 

243def _is_windows_sharing_violation(exc: BaseException) -> bool: 

244 """Return True if ``exc`` is a Windows sharing/lock violation that 

245 :func:`_unlink_with_sharing_retry` would have retried. 

246 

247 Used by best-effort callers to filter out the exhausted-retry case 

248 while letting other ``PermissionError`` instances (POSIX ACL 

249 issues, Windows non-sharing winerrors) propagate -- those are real 

250 configuration problems, not transient contention. 

251 

252 The ``EACCES`` fallback only fires when ``winerror`` is absent: a 

253 bare ``EACCES`` (no winerror attached) is the way ``io.open`` 

254 surfaces a pending-delete or share-mode mismatch on Windows. When 

255 ``winerror`` IS set but is NOT in the sharing set, the OS told us 

256 exactly what failed and it isn't a sharing violation -- treating it 

257 as transient would silently swallow real errors like a corrupt 

258 ACL. 

259 """ 

260 if not _IS_WINDOWS: 1feZHITUJlQud%

261 return False 1fZQ%

262 if not isinstance(exc, PermissionError): 1eHITUJlud%

263 return False 

264 winerror = getattr(exc, "winerror", None) 1eHITUJlud%

265 if winerror in _SHARING_VIOLATION_WINERRORS: 1eHITUJlud%

266 return True 1HIJlud%

267 return winerror is None and exc.errno == errno.EACCES 1eTU%

268 

269 

270def _unlink_with_sharing_retry(path: Path) -> None: 

271 """Unlink with Windows-specific retry on sharing/lock violations. 

272 

273 On Windows, ``Path.unlink`` raises ``PermissionError`` (winerror 5, 

274 32, or 33; sometimes bare ``EACCES``) when another process holds 

275 the file open without ``FILE_SHARE_DELETE``. Python's default 

276 ``open(p, "rb")`` does not pass that flag, so a reader from another 

277 process briefly blocks our unlink while it reads. Retry with the 

278 same backoff budget as :func:`_replace_with_sharing_retry` so 

279 transient contention is not turned into a propagated error. 

280 

281 Raises ``FileNotFoundError`` if the file is absent; the last 

282 ``PermissionError`` if the Windows retry budget is exhausted; and 

283 propagates any non-sharing ``PermissionError`` (or any non-Windows 

284 ``PermissionError``) immediately. Best-effort callers should use 

285 :func:`_is_windows_sharing_violation` to filter the exhausted-retry 

286 case and re-raise any other ``PermissionError``. 

287 """ 

288 _with_sharing_retry(path.unlink) 1bopmqKfeWacigQudhj

289 

290 

291def _prune_if_stat_unchanged(path: Path, st_before: os.stat_result) -> None: 

292 """Unlink ``path`` iff its stat still matches ``st_before``. 

293 

294 Guards against a cross-process race: a reader that sees a corrupt 

295 record can have it atomically replaced (via ``os.replace``) by a 

296 writer before the reader decides to prune. Comparing 

297 ``(ino, size, mtime_ns)`` before and after rules out that case -- 

298 any mismatch means someone else wrote a new file and we must not 

299 delete their work. The residual TOCTOU window between stat and 

300 unlink is narrow; worst case, a very-recently-written entry is 

301 removed and the next read recompiles. 

302 

303 Best-effort: a Windows sharing violation that survives the retry 

304 budget leaves the file in place. The caller is in an eviction or 

305 cleanup pass, so re-trying on the next pass is the right outcome. 

306 """ 

307 try: 1opmqWj

308 st_now = path.stat() 1opmqWj

309 except FileNotFoundError: 

310 return 

311 if _stat_key(st_before) != _stat_key(st_now): 1opmqWj

312 return 1pW

313 try: 1opmqWj

314 _unlink_with_sharing_retry(path) 1opmqWj

315 except FileNotFoundError: 

316 pass 

317 except PermissionError as exc: 

318 # Swallow only the exhausted-Windows-sharing case. POSIX ACL 

319 # errors and Windows non-sharing winerrors are real configuration 

320 # problems and must surface, not be silently lost during a prune. 

321 if not _is_windows_sharing_violation(exc): 

322 raise 

323 

324 

325class FileStreamProgramCache(ProgramCacheResource): 

326 """Persistent program cache backed by a directory of atomic files. 

327 

328 Designed for multi-process use: writes stage a temporary file and then 

329 :func:`os.replace` it into place, so concurrent readers never observe a 

330 partially-written entry. Each entry on disk is the raw compiled binary 

331 -- cubin / PTX / LTO-IR -- with no header, framing, or pickle wrapper, 

332 so the files are directly consumable by external NVIDIA tools 

333 (``cuobjdump``, ``nvdisasm``, ``cuda-gdb``). 

334 

335 Eviction is by least-recently-*read* time: every successful read bumps 

336 the entry's ``atime``, and the size enforcer evicts oldest atime 

337 first. 

338 

339 .. note:: **Best-effort writes.** 

340 

341 On Windows, ``os.replace`` raises ``PermissionError`` (winerror 

342 32 / 33) when another process holds the target file open. This 

343 backend retries with bounded backoff (~185 ms) and, if still 

344 failing, drops the cache write silently and returns success-shaped 

345 control flow. The next call will see no entry and recompile. POSIX 

346 and other ``PermissionError`` codes propagate. 

347 

348 .. note:: **Atomic for readers, not crash-durable.** 

349 

350 Each entry's temp file is ``fsync``-ed before ``os.replace``, but 

351 the containing directory is **not** ``fsync``-ed. A host crash 

352 between write and the next directory commit may lose recently 

353 added entries; surviving entries remain consistent. 

354 

355 .. note:: **Cross-version sharing.** 

356 

357 The cache is safe to share across ``cuda.core`` patch releases: 

358 every key produced by :func:`make_program_cache_key` encodes the 

359 relevant backend/compiler/runtime fingerprints for its 

360 compilation path (NVRTC entries pin the NVRTC version, NVVM 

361 entries pin the libNVVM library and IR versions, PTX/linker 

362 entries pin the chosen linker backend and its version -- and, 

363 when the cuLink/driver backend is selected, the driver version 

364 too; nvJitLink-backed PTX entries are deliberately 

365 driver-version independent). Bumping ``_KEY_SCHEMA_VERSION`` 

366 (mixed into the digest by ``make_program_cache_key``) produces 

367 new keys that don't collide with old entries: post-bump 

368 lookups miss the old on-disk paths, and the orphaned files 

369 are reaped on the next size-cap eviction pass. Entries are 

370 stored verbatim as the compiled binary, so cross-patch sharing 

371 only requires that the compiler-pinning surface above stays 

372 stable -- there is no Python-pickle compatibility involved. 

373 

374 Parameters 

375 ---------- 

376 path: 

377 Directory that owns the cache. Created if missing. If omitted, 

378 the OS-conventional user cache directory is used: 

379 ``$XDG_CACHE_HOME/cuda-python/program-cache`` (Linux, defaulting 

380 to ``~/.cache/cuda-python/program-cache``) or 

381 ``%LOCALAPPDATA%\\cuda-python\\program-cache`` (Windows). 

382 max_size_bytes: 

383 Optional soft cap on total on-disk size. Enforced opportunistically 

384 on writes; concurrent writers may briefly exceed it. Eviction is by 

385 least-recently-read time (oldest ``st_atime`` first). 

386 """ 

387 

388 def __init__( 

389 self, 

390 path: str | os.PathLike[str] | None = None, 

391 *, 

392 max_size_bytes: int | None = None, 

393 ) -> None: 

394 if max_size_bytes is not None and max_size_bytes <= 0: 1rvwxybRopmqK6fezAXn#ZHITUJBWC$()lDEakc!PigYQudF8Nhj7S1V90LOGM423st

395 raise ValueError("max_size_bytes must be positive or None (0 would evict every write)") 1()

396 self._root = Path(path) if path is not None else _default_cache_dir() 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

397 self._entries = self._root / _ENTRIES_SUBDIR 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

398 self._tmp = self._root / _TMP_SUBDIR 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

399 self._max_size_bytes = max_size_bytes 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

400 self._root.mkdir(parents=True, exist_ok=True) 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

401 self._entries.mkdir(exist_ok=True) 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

402 self._tmp.mkdir(exist_ok=True) 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

403 # Opportunistic startup sweep of orphaned temp files left by any 

404 # crashed writers. Age-based so concurrent in-flight writes from 

405 # other processes are preserved. 

406 self._sweep_stale_tmp_files() 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

407 # Incremental size tracker. Without it every ``__setitem__`` would 

408 # walk ``entries/`` + ``tmp/`` to compute the total -- O(n) per 

409 # write. With it: writes update the tracker by the net delta in O(1) 

410 # and only walk on eviction (which already needs the scan to sort 

411 # entries by atime). The tracker is seeded by one full scan at open 

412 # time and refreshed on every eviction pass; cross-process drift 

413 # (other writers/deleters) self-corrects the next time eviction 

414 # fires. The lock guards mutations so multi-threaded writers in 

415 # the same process don't interleave the read-modify-write on the 

416 # int. Skipped entirely when ``max_size_bytes is None`` -- without 

417 # a cap the tracker is dead weight. 

418 self._size_lock = threading.Lock() 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

419 self._tracked_size_bytes = self._compute_total_size() if max_size_bytes is not None else 0 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

420 

421 # -- key-to-path helpers ------------------------------------------------- 

422 

423 def _path_for_key(self, key: object) -> Path: 

424 k = _as_key_bytes(key) 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudF8Nhj7S1V0LOGM423st

425 # Hash the key to a fixed-length identifier so arbitrary-length user 

426 # keys never exceed per-component filename limits (typically 255 on 

427 # ext4 / NTFS). 

428 # 

429 # FIPS: must use a FIPS-approved hash algorithm. FIPS-enforcing 

430 # systems can disable non-approved hashlib algorithms (for example 

431 # blake2b) at the OpenSSL level. See #2043. 

432 # 

433 # With a 256-bit SHA-256 digest, the cache relies on collision 

434 # resistance for key uniqueness -- two distinct keys hashing to the 

435 # same path is astronomically unlikely (~2^128 practical collision 

436 # work). 

437 digest = hashlib.sha256(k, usedforsecurity=False).hexdigest() 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudF8Nhj7S1V0LOGM423st

438 return self._entries / digest[:2] / digest[2:] 1rvwxybRopmqK6fezAXnZHITUJBWClDEakcPigYQudF8Nhj7S1V0LOGM423st

439 

440 # -- mapping API --------------------------------------------------------- 

441 

442 def __getitem__(self, key: object) -> bytes: 

443 path = self._path_for_key(key) 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

444 try: 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

445 # The helper retries on Windows transient sharing-violation 

446 # PermissionErrors so a racing rewriter doesn't turn a hit 

447 # into a spurious propagated error. 

448 st, data = _stat_and_read_with_sharing_retry(path) 1rvwxybRK6zAnHIJBClDEacuFLOGM23st

449 except FileNotFoundError: 1rbRK6HIJacust

450 raise KeyError(key) from None 1rbRK6HIJacust

451 # Bump atime to "now" so eviction (which sorts by st_atime) treats 

452 # this read as the entry's most recent use. Best-effort: filesystems 

453 # mounted ``noatime`` or with restrictive ACLs may refuse, in which 

454 # case the cap still bounds size but eviction degrades toward FIFO 

455 # rather than true LRU. 

456 _touch_atime(path, st) 1rvwxybzAnBClDEacFLOGM23st

457 return data 1rvwxybzAnBClDEacFLOGM23st

458 

459 def __setitem__(self, key: object, value: bytes | bytearray | memoryview | ObjectCode) -> None: 

460 data = _extract_bytes(value) 1rvwxybRopmqKfezAXn#ZHITUJBWC$lDEakcPigYQudF8NhjS1V0LOG4st

461 target = self._path_for_key(key) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

462 target.parent.mkdir(parents=True, exist_ok=True) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

463 # Re-create ``tmp/`` if something deleted it after ``__init__`` 

464 # (operators clearing the cache by hand, ``rm -rf cache_dir/tmp``, 

465 # another process's overzealous wipe). Cheap and idempotent; 

466 # without it, every subsequent write would crash with 

467 # FileNotFoundError even though we could trivially recover. 

468 self._tmp.mkdir(parents=True, exist_ok=True) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

469 

470 # Stat the existing entry (if any) BEFORE the replace so we can 

471 # update the tracker by the net delta. A racing writer that lands 

472 # an ``os.replace`` between this stat and our own makes ``old_size`` 

473 # slightly off; the next ``_enforce_size_cap`` reconciles by 

474 # re-scanning. Skipped when ``max_size_bytes is None`` (no tracker). 

475 old_size = 0 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

476 if self._max_size_bytes is not None: 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

477 try: 1bfeakcigdNhj

478 old_size = target.stat().st_size 1bfeakcigdNhj

479 except FileNotFoundError: 1bfeakcigdNhj

480 old_size = 0 1bfeakcigdNhj

481 

482 fd, tmp_name = tempfile.mkstemp(prefix="entry-", dir=self._tmp) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudF8NhjS1V0LOG4st

483 tmp_path = Path(tmp_name) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

484 try: 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

485 with os.fdopen(fd, "wb") as fh: 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

486 fh.write(data) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

487 fh.flush() 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

488 os.fsync(fh.fileno()) 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

489 # Retry os.replace under Windows sharing/lock violations; only 

490 # give up (and drop the cache write) after a bounded backoff, so 

491 # transient contention is not turned into a silent miss. 

492 # Non-sharing PermissionErrors and all POSIX PermissionErrors 

493 # propagate immediately (real config problem). 

494 if not _replace_with_sharing_retry(tmp_path, target): 1rvwxybRopmqKfezAXnZHITUJBWClDEakcPigYQudFNhjS1V0LOG4st

495 with contextlib.suppress(FileNotFoundError): 1HIJ

496 tmp_path.unlink() 1HIJ

497 return 1HIJ

498 except BaseException: 1RZTU

499 with contextlib.suppress(FileNotFoundError): 1RZTU

500 tmp_path.unlink() 1RZTU

501 raise 1RZTU

502 

503 if self._max_size_bytes is None: 1rvwxybopmqKfezAXnBWClDEakcPigYQudFNhjS1V0LOG4st

504 return 1rvwxyopmqKzAXnBWClDEPYQuFS1V0LOG4st

505 

506 # O(1) tracker update. Only run the scan-heavy ``_enforce_size_cap`` 

507 # when this write actually pushes the running total above the cap. 

508 new_size = len(data) 1bfeakcigdNhj

509 with self._size_lock: 1bfeakcigdNhj

510 self._tracked_size_bytes += new_size - old_size 1bfeakcigdNhj

511 over_cap = self._tracked_size_bytes > self._max_size_bytes 1bfeakcigdNhj

512 if over_cap: 1bfeakcigdNhj

513 self._enforce_size_cap() 1bfeakcgdh

514 

515 def __delitem__(self, key: object) -> None: 

516 path = self._path_for_key(key) 1KiQu7

517 # Stat before unlink so we can decrement the tracker by the actual 

518 # on-disk size. Best-effort: if the file vanishes between stat and 

519 # unlink (concurrent eviction), we treat the delete as a miss -- 

520 # matching the behaviour callers expect (KeyError) and leaving the 

521 # tracker untouched (the racing eviction already accounted for it). 

522 size = 0 1KiQu7

523 if self._max_size_bytes is not None: 1KiQu7

524 try: 1i7

525 size = path.stat().st_size 1i7

526 except FileNotFoundError: 17

527 raise KeyError(key) from None 17

528 try: 1KiQu

529 _unlink_with_sharing_retry(path) 1KiQu

530 except FileNotFoundError: 1KQ

531 raise KeyError(key) from None 1K

532 if self._max_size_bytes is not None: 1Kiu

533 with self._size_lock: 1i

534 # Clamp at zero. A racing ``_enforce_size_cap`` can re-seed the 

535 # tracker between our stat and our subtract; if its scan ran 

536 # AFTER we unlinked, its reseed value didn't include ``size``, 

537 # so subtracting ``size`` again here would undercount reality 

538 # by ``size``. Repeated under contention, an unclamped subtract 

539 # walks the tracker negative -- and once negative, the 

540 # ``tracker > cap`` check that gates ``_enforce_size_cap`` 

541 # never fires, so eviction dies silently and there is no 

542 # self-healing path (the only reseed point is the function 

543 # that no longer runs). Clamping leaves us at worst 

544 # undercounting (the next reseed corrects it) instead of 

545 # entering the permanently-broken negative state. 

546 self._tracked_size_bytes = max(0, self._tracked_size_bytes - size) 1i

547 

548 def __len__(self) -> int: 

549 """Return the number of files currently in ``entries/``. 

550 

551 This is a count of on-disk files, not of keys reachable through 

552 ``make_program_cache_key``. After a ``_KEY_SCHEMA_VERSION`` bump 

553 old entries become unreachable by lookup but remain on disk 

554 until eviction reaps them; ``__len__`` keeps counting them 

555 until then. The same is true for entries written by callers 

556 using arbitrary user keys -- the backend has no way to tell a 

557 live entry from an orphan without knowing the caller's keying 

558 scheme. 

559 """ 

560 # ``_iter_entry_paths`` already filters with ``entry.is_file()``, 

561 # so don't stat each path a second time here. 

562 return sum(1 for _ in self._iter_entry_paths()) 1q6XnYjS1V

563 

564 def clear(self) -> None: 

565 # Snapshot stat alongside path so we can refuse to unlink an entry 

566 # that was concurrently replaced by another process between the 

567 # snapshot scan and the unlink. Same stat-guard contract as 

568 # ``_prune_if_stat_unchanged`` and ``_enforce_size_cap``. 

569 snapshot = [] 1opmqj

570 for path in self._iter_entry_paths(): 1opmqj

571 try: 1opmqj

572 snapshot.append((path, path.stat())) 1opmqj

573 except FileNotFoundError: 

574 continue 

575 for path, st_before in snapshot: 1opmqj

576 _prune_if_stat_unchanged(path, st_before) 1opmqj

577 # Sweep ONLY stale temp files. Deleting a young temp would race with 

578 # another process between ``mkstemp`` and ``os.replace`` and turn its 

579 # write into ``FileNotFoundError`` instead of a successful commit. 

580 self._sweep_stale_tmp_files() 1opmqj

581 # Remove empty subdirs (best-effort; concurrent writers may re-create). 

582 if self._entries.exists(): 1opmqj

583 for sub in sorted(self._entries.iterdir(), reverse=True): 1opmqj

584 if sub.is_dir(): 1opmqj

585 with contextlib.suppress(OSError): 1opmqj

586 sub.rmdir() 1opmqj

587 # The directory is now (almost) empty -- but a concurrent writer may 

588 # have landed a fresh entry between the snapshot and the unlink, and 

589 # young temp files were intentionally preserved. Re-derive the 

590 # tracker from the post-clear state instead of zeroing blindly. 

591 if self._max_size_bytes is not None: 1opmqj

592 actual = self._compute_total_size() 1j

593 with self._size_lock: 1j

594 self._tracked_size_bytes = actual 1j

595 

596 # -- internals ----------------------------------------------------------- 

597 

598 def _iter_entry_paths(self) -> Iterable[Path]: 

599 # ``os.scandir`` returns ``DirEntry`` objects whose ``is_dir`` / 

600 # ``is_file`` methods consult the cached dirent type from the 

601 # ``readdir`` result on filesystems that report it (ext4, NTFS, ...), 

602 # avoiding a per-entry ``stat`` syscall. ``Path.iterdir`` also wraps 

603 # ``scandir`` but discards the cached type, forcing a separate 

604 # ``stat`` for every ``Path.is_dir`` / ``Path.is_file``. The ``with`` 

605 # blocks release the underlying directory handle deterministically 

606 # when the consumer stops early -- otherwise a leaked handle blocks 

607 # deletes/renames on Windows until GC. 

608 try: 1bopmq6feXnakcigYdNhj7S1VM

609 with os.scandir(self._entries) as outer: 1bopmq6feXnakcigYdNhj7S1VM

610 for sub in outer: 1bopmq6feXnakcigYdNhj7SVM

611 if not sub.is_dir(follow_symlinks=False): 1bopmqfeXnakcigYdhjSVM

612 continue 1V

613 try: 1bopmqfeXnakcigYdhjSVM

614 with os.scandir(sub.path) as inner: 1bopmqfeXnakcigYdhjSVM

615 yield from (Path(entry.path) for entry in inner if entry.is_file(follow_symlinks=False)) 1bopmqfeXnakcigYdhjSVM

616 except FileNotFoundError: 

617 continue 

618 except FileNotFoundError: 11

619 return 11

620 

621 def _compute_total_size(self) -> int: 

622 """Walk ``entries/`` + ``tmp/`` and return the on-disk byte total. 

623 

624 Used to seed the tracker at open time and to refresh it after every 

625 eviction pass. Best-effort: files that vanish under us during the 

626 walk (concurrent eviction by this or another process) are skipped. 

627 Tracked total may briefly differ from this scan's result under 

628 cross-process contention; the next eviction will reconcile. 

629 """ 

630 total = 0 1bfeakcigdNhj7M

631 for path in self._iter_entry_paths(): 1bfeakcigdNhj7M

632 try: 1bakcM

633 total += path.stat().st_size 1bakcM

634 except FileNotFoundError: 

635 continue 

636 return total + self._sum_tmp_sizes() 1bfeakcigdNhj7M

637 

638 def _iter_tmp_entries(self) -> Iterable[os.DirEntry[str]]: 

639 # Mirror ``_iter_entry_paths``: scandir + cached d_type for the 

640 # file/dir filter + deterministic handle close on early exit. 

641 # Yields ``DirEntry`` (not Path) so callers can use ``entry.stat`` 

642 # / ``entry.path`` directly without an extra wrap. 

643 try: 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

644 with os.scandir(self._tmp) as it: 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

645 yield from (entry for entry in it if entry.is_file(follow_symlinks=False)) 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

646 except FileNotFoundError: 19

647 return 19

648 

649 def _sum_tmp_sizes(self) -> int: 

650 """Sum sizes of every file in ``tmp/``, skipping vanished entries. 

651 

652 Both ``_compute_total_size`` (open-time seed) and 

653 ``_enforce_size_cap`` (eviction reconciliation) need this -- 

654 temp files occupy disk too, so undercounting them would let 

655 bursts of in-flight writes silently exceed ``max_size_bytes``. 

656 """ 

657 total = 0 1bfeakcigdNhj79M

658 for entry in self._iter_tmp_entries(): 1bfeakcigdNhj79M

659 try: 1a

660 total += entry.stat(follow_symlinks=False).st_size 1a

661 except FileNotFoundError: 

662 continue 

663 return total 1bfeakcigdNhj79M

664 

665 def _sweep_stale_tmp_files(self) -> None: 

666 """Remove temp files left behind by crashed writers. 

667 

668 Age threshold is conservative (``_TMP_STALE_AGE_SECONDS``) so an 

669 in-flight write from another process is not interrupted. Best 

670 effort: a missing file or a permission failure is ignored. 

671 """ 

672 cutoff = time.time() - _TMP_STALE_AGE_SECONDS 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

673 for entry in self._iter_tmp_entries(): 1rvwxybRopmqK6fezAXn#ZHITUJBWC$lDEakc!PigYQudF8Nhj7S1V90LOGM423st

674 try: 1oma!

675 if entry.stat(follow_symlinks=False).st_mtime < cutoff: 1oma!

676 os.unlink(entry.path) 1m!

677 except (FileNotFoundError, PermissionError): 

678 continue 

679 

680 def _enforce_size_cap(self) -> None: 

681 if self._max_size_bytes is None: 1bfeakcigdhS

682 return 1S

683 # Sweep stale temp files first so a long-dead writer's leftovers 

684 # don't drag the apparent size up and force needless eviction. 

685 self._sweep_stale_tmp_files() 1bfeakcigdh

686 entries = [] 1bfeakcigdh

687 total = 0 1bfeakcigdh

688 # Count both committed entries AND surviving temp files: temp files 

689 # occupy disk too, even if they're young. Without this the soft cap 

690 # silently undercounts in-flight writes. 

691 # 

692 # Trade-off under burst concurrency: many young temp files (each 

693 # below the stale-sweep threshold) can push ``total`` above 

694 # ``max_size_bytes`` with only committed entries left to evict. 

695 # That can over-evict committed entries during the burst; once 

696 # the burst subsides and the temps land via ``os.replace`` (or 

697 # are reaped by a later sweep), the cap re-stabilises. This is 

698 # consistent with the documented soft-cap contract -- callers 

699 # that need a hard bound should leave the cap None and prune 

700 # externally. 

701 for path in self._iter_entry_paths(): 1bfeakcigdh

702 try: 1bfeakcigdh

703 st = path.stat() 1bfeakcigdh

704 except FileNotFoundError: 

705 continue 

706 # Carry the full stat so eviction can guard against a concurrent 

707 # os.replace that swapped a fresh entry into this path between 

708 # snapshot and unlink. Eviction below sorts by ``st_atime`` so 

709 # entries that callers actually read recently survive 

710 # write-only churn (true LRU instead of FIFO). 

711 entries.append((st.st_atime, st.st_size, path, st)) 1bfeakcigdh

712 total += st.st_size 1bfeakcigdh

713 total += self._sum_tmp_sizes() 1bfeakcigdh

714 if total <= self._max_size_bytes: 1bfeakcigdh

715 # Re-seed the tracker from the scan: catches drift from 

716 # cross-process writers/deleters that the per-write delta 

717 # accounting wouldn't have observed. Reaching here means the 

718 # tracker was over-cap but the disk truth is under-cap, so 

719 # this assignment is the cheapest reconciliation point we get. 

720 with self._size_lock: 1ki

721 self._tracked_size_bytes = total 1ki

722 return 1ki

723 entries.sort(key=lambda e: e[0]) # oldest atime first 1bfeacgdh

724 for _atime, size, path, st_before in entries: 1bfeacgdh

725 if total <= self._max_size_bytes: 1bfeacgdh

726 break 1bacgh

727 # _prune_if_stat_unchanged refuses if a writer replaced the file 

728 # between snapshot and now, so eviction can't silently delete a 

729 # freshly-committed entry from another process. 

730 try: 1bfeacgdh

731 stat_now = path.stat() 1bfeacgdh

732 except FileNotFoundError: 

733 total -= size 

734 continue 

735 if _stat_key(stat_now) != _stat_key(st_before): 1bfeacgdh

736 # File was replaced -- don't unlink, but update ``total`` to 

737 # reflect the replacement's actual size or the cap check 

738 # below could declare us done while still over the limit. 

739 total += stat_now.st_size - size 

740 continue 

741 # Tolerate Windows sharing violations during eviction: another 

742 # process may briefly hold the file open for a read. Skip this 

743 # entry; a later eviction pass will retry. Same outcome as if 

744 # the stat-guard above had triggered. Other PermissionErrors 

745 # (POSIX ACL, Windows non-sharing winerrors) are real config 

746 # problems -- surface them rather than silently exceed the cap. 

747 try: 1bfeacgdh

748 _unlink_with_sharing_retry(path) 1bfeacgdh

749 total -= size 1bacgh

750 except FileNotFoundError: 1fed

751 pass 

752 except PermissionError as exc: 1fed

753 if not _is_windows_sharing_violation(exc): 1fed

754 raise 1fe

755 # Reconcile: after the eviction pass, ``total`` reflects what we 

756 # believe the disk now holds. Re-seed the tracker so the next write 

757 # accumulates from a fresh baseline. 

758 with self._size_lock: 1bacgdh

759 self._tracked_size_bytes = total 1bacgdh