Coverage for cuda / core / utils / _program_cache / _in_memory.py: 100.00%

54 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-22 01:37 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4 

5"""In-memory bytes-in / bytes-out program cache.""" 

6 

7from __future__ import annotations 

8 

9import collections 

10import threading 

11 

12from cuda.core._module import ObjectCode 

13 

14from ._abc import ProgramCacheResource, _as_key_bytes, _extract_bytes 

15 

16 

17class InMemoryProgramCache(ProgramCacheResource): 

18 """In-memory program cache with LRU eviction. 

19 

20 Suitable for single-process workflows that want to avoid disk I/O -- 

21 a typical application compiles its kernels once per process and 

22 looks them up many times. Entries live only for the lifetime of 

23 the process; use :class:`FileStreamProgramCache` when the cache 

24 should persist across runs. 

25 

26 Like :class:`FileStreamProgramCache`, this backend is bytes-in / 

27 bytes-out: ``__setitem__`` accepts ``bytes``, ``bytearray``, 

28 ``memoryview``, or any :class:`~cuda.core.ObjectCode` (path-backed 

29 too -- the file is read at write time so the cached entry holds the 

30 binary content, not a path). ``__getitem__`` returns ``bytes``. 

31 

32 Parameters 

33 ---------- 

34 max_size_bytes: 

35 Optional cap on the sum of stored payload sizes. When exceeded, 

36 LRU eviction runs until the total fits. ``None`` means 

37 unbounded. The size-only bound mirrors 

38 :class:`FileStreamProgramCache`. 

39 

40 Notes 

41 ----- 

42 Recency is updated on :meth:`__getitem__`; ``get`` is the 

43 recommended lookup since the cache deliberately omits 

44 ``__contains__`` (the ``if key in cache: ...`` idiom is racy 

45 across processes; see :class:`ProgramCacheResource`). 

46 

47 Thread safety: a :class:`threading.RLock` serialises every method, 

48 so the cache can be shared across threads without external 

49 locking. 

50 """ 

51 

52 def __init__( 

53 self, 

54 *, 

55 max_size_bytes: int | None = None, 

56 ) -> None: 

57 if max_size_bytes is not None and max_size_bytes <= 0: 1hijcbarskodgeuvwtlfmpn

58 raise ValueError("max_size_bytes must be positive or None (0 would evict every write)") 1vw

59 self._max_size_bytes = max_size_bytes 1hijcbarskodgeutlfmpn

60 # Key insertion order encodes LRU order: oldest first, newest last. 

61 # Each value is ``(payload_bytes, payload_size)``; caching the size 

62 # avoids recomputing ``len(data)`` on every eviction pass. 

63 self._entries: collections.OrderedDict[bytes, tuple[bytes, int]] = collections.OrderedDict() 1hijcbarskodgeutlfmpn

64 self._total_bytes = 0 1hijcbarskodgeutlfmpn

65 # Reentrant so helper methods that also take the lock can nest 

66 # without deadlocking. 

67 self._lock = threading.RLock() 1hijcbarskodgeutlfmpn

68 

69 def __getitem__(self, key: object) -> bytes: 

70 k = _as_key_bytes(key) 1hijcbarskdgetlfmn

71 with self._lock: 1hijcbarskdgelfmn

72 try: 1hijcbarskdgelfmn

73 data, _size = self._entries[k] 1hijcbarskdgelfmn

74 except KeyError: 1cbarsdef

75 raise KeyError(key) from None 1cbarsdef

76 # Touch LRU: a real read promotes the entry to "most recent" 

77 # so eviction prefers genuinely cold entries. 

78 self._entries.move_to_end(k) 1hijbkgelfmn

79 return data 1hijbkgelfmn

80 

81 def __setitem__(self, key: object, value: bytes | bytearray | memoryview | ObjectCode) -> None: 

82 data = _extract_bytes(value) 1hijcbakodgeutlfmpn

83 size = len(data) 1hijcbakodgetlfmpn

84 k = _as_key_bytes(key) 1hijcbakodgetlfmpn

85 with self._lock: 1hijcbakodgelfmpn

86 existing = self._entries.pop(k, None) 1hijcbakodgelfmpn

87 if existing is not None: 1hijcbakodgelfmpn

88 self._total_bytes -= existing[1] 1g

89 self._entries[k] = (data, size) 1hijcbakodgelfmpn

90 self._total_bytes += size 1hijcbakodgelfmpn

91 self._evict_to_caps() 1hijcbakodgelfmpn

92 

93 def __delitem__(self, key: object) -> None: 

94 k = _as_key_bytes(key) 1a

95 with self._lock: 1a

96 try: 1a

97 _data, size = self._entries.pop(k) 1a

98 except KeyError: 1a

99 raise KeyError(key) from None 1a

100 self._total_bytes -= size 1a

101 

102 def __len__(self) -> int: 

103 with self._lock: 1cbrodgp

104 return len(self._entries) 1cbrodgp

105 

106 def clear(self) -> None: 

107 with self._lock: 1c

108 self._entries.clear() 1c

109 self._total_bytes = 0 1c

110 

111 # -- eviction ------------------------------------------------------------ 

112 

113 def _evict_to_caps(self) -> None: 

114 """Evict oldest entries until the size cap is satisfied. 

115 

116 Called from ``__setitem__`` after an insert/update. Pops from 

117 the front of the OrderedDict (oldest first). If the 

118 just-inserted entry on its own exceeds ``max_size_bytes``, the 

119 loop will evict it too -- mirroring 

120 :class:`FileStreamProgramCache` (a write that cannot fit does 

121 not survive its own size-cap pass). 

122 """ 

123 if self._max_size_bytes is None: 1hijcbakodgelfmpn

124 return 1hijcakolmpn

125 while self._entries and self._total_bytes > self._max_size_bytes: 1bdgef

126 _k, (_data, size) = self._entries.popitem(last=False) 1bdef

127 self._total_bytes -= size 1bdef