Coverage for cuda / core / _memory / _managed_memory_resource.pyx: 94.95%

99 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-25 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from cuda.bindings cimport cydriver 

8  

9from cuda.core._memory._memory_pool cimport _MemPool, MP_init_create_pool, MP_init_current_pool 

10from cuda.core._utils.cuda_utils cimport ( 

11 HANDLE_RETURN, 

12 check_or_create_options, 

13) 

14  

15from dataclasses import dataclass 

16import threading 

17import warnings 

18  

19__all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions'] 

20  

21  

22@dataclass 

23cdef class ManagedMemoryResourceOptions: 

24 """Customizable :obj:`~_memory.ManagedMemoryResource` options. 

25  

26 Attributes 

27 ---------- 

28 preferred_location : int | None, optional 

29 A location identifier (device ordinal or NUMA node ID) whose 

30 meaning depends on ``preferred_location_type``. 

31 (Default to ``None``) 

32  

33 preferred_location_type : ``"device"`` | ``"host"`` | ``"host_numa"`` | None, optional 

34 Controls how ``preferred_location`` is interpreted. 

35  

36 When set to ``None`` (the default), legacy behavior is used: 

37 ``preferred_location`` is interpreted as a device ordinal, 

38 ``-1`` for host, or ``None`` for no preference. 

39  

40 When set explicitly, the type determines both the kind of 

41 preferred location and the valid values for 

42 ``preferred_location``: 

43  

44 - ``"device"``: prefer a specific GPU. ``preferred_location`` 

45 must be a device ordinal (``>= 0``). 

46 - ``"host"``: prefer host memory (OS-managed NUMA placement). 

47 ``preferred_location`` must be ``None``. 

48 - ``"host_numa"``: prefer a specific host NUMA node. 

49 ``preferred_location`` must be a NUMA node ID (``>= 0``), 

50 or ``None`` to derive the NUMA node from the current CUDA 

51 device's ``host_numa_id`` attribute (requires an active 

52 CUDA context). 

53  

54 (Default to ``None``) 

55 """ 

56 preferred_location: int | None = None 

57 preferred_location_type: str | None = None 

58  

59  

60cdef class ManagedMemoryResource(_MemPool): 

61 """ 

62 A managed memory resource managing a stream-ordered memory pool. 

63  

64 Managed memory is accessible from both the host and device, with automatic 

65 migration between them as needed. 

66  

67 Parameters 

68 ---------- 

69 options : ManagedMemoryResourceOptions 

70 Memory resource creation options. 

71  

72 If set to `None`, the memory resource uses the driver's current 

73 stream-ordered memory pool. If no memory pool is set as current, 

74 the driver's default memory pool is used. 

75  

76 If not set to `None`, a new memory pool is created, which is owned by 

77 the memory resource. 

78  

79 When using an existing (current or default) memory pool, the returned 

80 managed memory resource does not own the pool (`is_handle_owned` is 

81 `False`), and closing the resource has no effect. 

82  

83 Notes 

84 ----- 

85 IPC (Inter-Process Communication) is not currently supported for managed 

86 memory pools. 

87 """ 

88  

89 def __init__(self, options=None): 

90 _MMR_init(self, options) 1fijcbdahekglumvnwoxpyqzrAst

91  

92 @property 

93 def device_id(self) -> int: 

94 """The preferred device ordinal, or -1 if the preferred location is not a device.""" 

95 if self._pref_loc_type == "device": 1e

96 return self._pref_loc_id 1e

97 return -1 

98  

99 @property 

100 def preferred_location(self) -> tuple | None: 

101 """The preferred location for managed memory allocations. 

102  

103 Returns ``None`` if no preferred location is set (driver decides), 

104 or a tuple ``(type, id)`` where *type* is one of ``"device"``, 

105 ``"host"``, or ``"host_numa"``, and *id* is the device ordinal, 

106 ``None`` (for ``"host"``), or the NUMA node ID, respectively. 

107 """ 

108 if self._pref_loc_type is None: 1jcbd

109 return None 1j

110 if self._pref_loc_type == "host": 1cbd

111 return ("host", None) 1d

112 return (self._pref_loc_type, self._pref_loc_id) 1cb

113  

114 @property 

115 def is_device_accessible(self) -> bool: 

116 """Return True. This memory resource provides device-accessible buffers.""" 

117 return True 1ih

118  

119 @property 

120 def is_host_accessible(self) -> bool: 

121 """Return True. This memory resource provides host-accessible buffers.""" 

122 return True 1ih

123  

124  

125IF CUDA_CORE_BUILD_MAJOR >= 13: 

126 cdef tuple _VALID_LOCATION_TYPES = ("device", "host", "host_numa") 

127  

128  

129 cdef _resolve_preferred_location(ManagedMemoryResourceOptions opts): 

130 """Resolve preferred location options into driver and stored values. 

131  

132 Returns a 4-tuple: 

133 (CUmemLocationType, loc_id, pref_loc_type_str, pref_loc_id) 

134 """ 

135 cdef object pref_loc = opts.preferred_location if opts is not None else None 1fijcbdahekglumvnwoxpyqzrAst

136 cdef object pref_type = opts.preferred_location_type if opts is not None else None 1fijcbdahekglumvnwoxpyqzrAst

137  

138 if pref_type is not None and pref_type not in _VALID_LOCATION_TYPES: 1fijcbdahekglumvnwoxpyqzrAst

139 raise ValueError( 1a

140 f"preferred_location_type must be one of {_VALID_LOCATION_TYPES!r} " 1a

141 f"or None, got {pref_type!r}" 1a

142 ) 

143  

144 if pref_type is None: 1fijcbdahekglumvnwoxpyqzrAst

145 # Legacy behavior 

146 if pref_loc is None: 1fijcbdahekglumvnwoxpyqzrAst

147 return ( 1fijcbdahekglumvnwoxpyqzrAst

148 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_NONE, 1fijcbdahekglumvnwoxpyqzrAst

149 -1, None, -1, 1fijcbdahekglumvnwoxpyqzrAst

150 ) 

151 if pref_loc == -1: 1cdae

152 return ( 1d

153 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1d

154 -1, "host", -1, 

155 ) 

156 if pref_loc < 0: 1cae

157 raise ValueError( 1a

158 f"preferred_location must be a device ordinal (>= 0), -1 for " 1a

159 f"host, or None for no preference, got {pref_loc}" 1a

160 ) 

161 return ( 1ce

162 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1ce

163 pref_loc, "device", pref_loc, 1ce

164 ) 

165  

166 if pref_type == "device": 1fcbda

167 if pref_loc is None or pref_loc < 0: 1ca

168 raise ValueError( 1a

169 f"preferred_location must be a device ordinal (>= 0) when " 1a

170 f"preferred_location_type is 'device', got {pref_loc!r}" 1a

171 ) 

172 return ( 1c

173 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE, 1c

174 pref_loc, "device", pref_loc, 1c

175 ) 

176  

177 if pref_type == "host": 1fbda

178 if pref_loc is not None: 1da

179 raise ValueError( 1a

180 f"preferred_location must be None when " 1a

181 f"preferred_location_type is 'host', got {pref_loc!r}" 1a

182 ) 

183 return ( 1d

184 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST, 1d

185 -1, "host", -1, 

186 ) 

187  

188 # pref_type == "host_numa" 

189 if pref_loc is None: 1fba

190 from .._device import Device 1fb

191 dev = Device() 1fb

192 numa_id = dev.properties.host_numa_id 1fb

193 if numa_id < 0: 1fb

194 raise RuntimeError( 1f

195 "Cannot determine host NUMA ID for the current CUDA device. " 

196 "The system may not support NUMA, or no CUDA context is " 

197 "active. Set preferred_location to an explicit NUMA node ID " 

198 "or call Device.set_current() first." 

199 ) 

200 return ( 1b

201 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

202 numa_id, "host_numa", numa_id, 1b

203 ) 

204 if pref_loc < 0: 1ba

205 raise ValueError( 1a

206 f"preferred_location must be a NUMA node ID (>= 0) or None " 1a

207 f"when preferred_location_type is 'host_numa', got {pref_loc}" 1a

208 ) 

209 return ( 1b

210 cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA, 1b

211 pref_loc, "host_numa", pref_loc, 1b

212 ) 

213  

214  

215cdef inline _MMR_init(ManagedMemoryResource self, options): 

216 IF CUDA_CORE_BUILD_MAJOR >= 13: 

217 cdef ManagedMemoryResourceOptions opts = check_or_create_options( 1fijcbdahekglumvnwoxpyqzrAst

218 ManagedMemoryResourceOptions, options, "ManagedMemoryResource options", 

219 keep_none=True 

220 ) 

221 cdef cydriver.CUmemLocationType loc_type 

222 cdef int loc_id 

223  

224 loc_type, loc_id, self._pref_loc_type, self._pref_loc_id = ( 1fijcbdahekglumvnwoxpyqzrAst

225 _resolve_preferred_location(opts) 1fijcbdahekglumvnwoxpyqzrAst

226 ) 

227  

228 if opts is None: 1fijcbdahekglumvnwoxpyqzrAst

229 MP_init_current_pool( 1fijcbdahekglumvnwoxpyqzrAst

230 self, 

231 loc_type, 

232 loc_id, 

233 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

234 ) 

235 else: 

236 MP_init_create_pool( 1cbdheklmnopqrst

237 self, 

238 loc_type, 

239 loc_id, 

240 cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_MANAGED, 

241 False, 1cbdheklmnopqrst

242 0, 

243 ) 

244  

245 _check_concurrent_managed_access() 1fijcbdahekglumvnwoxpyqzrAst

246 ELSE: 

247 raise RuntimeError("ManagedMemoryResource requires CUDA 13.0 or later") 

248  

249  

250cdef bint _concurrent_access_warned = False 

251cdef object _concurrent_access_lock = threading.Lock() 

252  

253  

254cdef inline _check_concurrent_managed_access(): 

255 """Warn once if the platform lacks concurrent managed memory access.""" 

256 global _concurrent_access_warned 

257 if _concurrent_access_warned: 1fijcbdahekglumvnwoxpyqzrAst

258 return 1fijcbdaheklumvnwoxpyqzrAst

259  

260 cdef int c_concurrent = 0 1g

261 with _concurrent_access_lock: 1g

262 if _concurrent_access_warned: 1g

263 return 

264  

265 # concurrent_managed_access is a system-level attribute for sm_60 and 

266 # later, so any device will do. 

267 with nogil: 1g

268 HANDLE_RETURN(cydriver.cuDeviceGetAttribute( 1g

269 &c_concurrent, 

270 cydriver.CUdevice_attribute.CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS, 

271 0)) 

272 if not c_concurrent: 1g

273 warnings.warn( 

274 "This platform does not support concurrent managed memory access " 

275 "(Device.properties.concurrent_managed_access is False). Host access to any managed " 

276 "allocation is forbidden while any GPU kernel is in flight, even " 

277 "if the kernel does not touch that allocation. Failing to " 

278 "synchronize before host access will cause a segfault. " 

279 "See: https://docs.nvidia.com/cuda/cuda-c-programming-guide/" 

280 "index.html#gpu-exclusive-access-to-managed-memory", 

281 UserWarning, 

282 stacklevel=3 

283 ) 

284  

285 _concurrent_access_warned = True 1g

286  

287  

288def reset_concurrent_access_warning(): 

289 """Reset the concurrent access warning flag for testing purposes.""" 

290 global _concurrent_access_warned 

291 _concurrent_access_warned = False