Coverage for cuda / core / _memory / _pinned_memory_resource.pyx: 71.26%

87 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5from __future__ import annotations 

6  

7from cuda.bindings cimport cydriver 

8from cuda.core._memory._memory_pool cimport _MemPool, _MemPoolOptions 

9from cuda.core._memory cimport _ipc 

10from cuda.core._memory._ipc cimport IPCAllocationHandle 

11from cuda.core._utils.cuda_utils cimport ( 

12 check_or_create_options, 

13 HANDLE_RETURN, 

14) 

15  

16from dataclasses import dataclass 

17import multiprocessing 

18import os 

19import platform # no-cython-lint 

20import subprocess 

21import threading 

22import uuid 

23import warnings 

24  

25from cuda.core._utils.cuda_utils import check_multiprocessing_start_method 

26  

27  

28# Cache to ensure NUMA warning is only raised once per process 

29cdef bint _numa_warning_shown = False 

30cdef object _lock = threading.Lock() 

31  

32  

33def _check_numa_nodes(): 

34 """Check if system has multiple NUMA nodes and warn if so.""" 

35 global _numa_warning_shown 

36 if _numa_warning_shown: 1defghijklmnopaqrstuvwxc

37 return 1defghijklmnopaqrstuvwxc

38  

39 with _lock: 1a

40 if _numa_warning_shown: 1a

41 return 

42  

43 if platform.system() != "Linux": 1a

44 _numa_warning_shown = True 

45 return 

46  

47 numa_count = None 1a

48  

49 # Try /sys filesystem first (most reliable and doesn't require external tools) 

50 try: 1a

51 node_path = "/sys/devices/system/node" 1a

52 if os.path.exists(node_path): 1a

53 # Count directories named "node[0-9]+" 

54 nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()] 1a

55 numa_count = len(nodes) 1a

56 except (OSError, PermissionError): 

57 pass 

58  

59 # Fallback to lscpu if /sys check didn't work 

60 if numa_count is None: 1a

61 try: 

62 result = subprocess.run( 

63 ["lscpu"], 

64 capture_output=True, 

65 text=True, 

66 timeout=1 

67 ) 

68 for line in result.stdout.splitlines(): 

69 if line.startswith("NUMA node(s):"): 

70 numa_count = int(line.split(":")[1].strip()) 

71 break 

72 except (subprocess.SubprocessError, ValueError, FileNotFoundError): 

73 pass 

74  

75 # Warn if multiple NUMA nodes detected 

76 if numa_count is not None and numa_count > 1: 1a

77 warnings.warn( 

78 f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory " 

79 f"uses location ID 0, which may not work correctly with multiple " 

80 f"NUMA nodes.", 

81 UserWarning, 

82 stacklevel=3 

83 ) 

84  

85 _numa_warning_shown = True 1a

86  

87  

88__all__ = ['PinnedMemoryResource', 'PinnedMemoryResourceOptions'] 

89  

90  

91@dataclass 

92cdef class PinnedMemoryResourceOptions: 

93 """Customizable :obj:`~_memory.PinnedMemoryResource` options. 

94  

95 Attributes 

96 ---------- 

97 ipc_enabled : bool, optional 

98 Specifies whether to create an IPC-enabled memory pool. When set to 

99 True, the memory pool and its allocations can be shared with other 

100 processes. (Default to False) 

101  

102 max_size : int, optional 

103 Maximum pool size. When set to 0, defaults to a system-dependent value. 

104 (Default to 0) 

105 """ 

106 ipc_enabled : bool = False 

107 max_size : int = 0 

108  

109  

110cdef class PinnedMemoryResource(_MemPool): 

111 """ 

112 A host-pinned memory resource managing a stream-ordered memory pool. 

113  

114 Parameters 

115 ---------- 

116 options : PinnedMemoryResourceOptions 

117 Memory resource creation options. 

118  

119 If set to `None`, the memory resource uses the driver's current 

120 stream-ordered memory pool. If no memory 

121 pool is set as current, the driver's default memory pool 

122 is used. 

123  

124 If not set to `None`, a new memory pool is created, which is owned by 

125 the memory resource. 

126  

127 When using an existing (current or default) memory pool, the returned 

128 host-pinned memory resource does not own the pool (`is_handle_owned` is 

129 `False`), and closing the resource has no effect. 

130  

131 Notes 

132 ----- 

133 To create an IPC-Enabled memory resource (MR) that is capable of sharing 

134 allocations between processes, specify ``ipc_enabled=True`` in the initializer 

135 option. When IPC is enabled, the location type is automatically set to 

136 CU_MEM_LOCATION_TYPE_HOST_NUMA instead of CU_MEM_LOCATION_TYPE_HOST, 

137 with location ID 0. 

138  

139 Note: IPC support for pinned memory requires a single NUMA node. A warning 

140 is issued if multiple NUMA nodes are detected. 

141  

142 See :class:`DeviceMemoryResource` for more details on IPC usage patterns. 

143 """ 

144  

145 def __init__(self, options=None): 

146 cdef PinnedMemoryResourceOptions opts = check_or_create_options( 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

147 PinnedMemoryResourceOptions, options, "PinnedMemoryResource options", 

148 keep_none=True 

149 ) 

150 cdef _MemPoolOptions opts_base = _MemPoolOptions() 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

151  

152 cdef bint ipc_enabled = False 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

153 if opts: 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

154 ipc_enabled = opts.ipc_enabled 1defghijklmnopaAqBrCsDtEuFvGwHxIJycz

155 if ipc_enabled and not _ipc.is_supported(): 1defghijklmnopaAqBrCsDtEuFvGwHxIJycz

156 raise RuntimeError(f"IPC is not available on {platform.system()}") 

157 if ipc_enabled: 1defghijklmnopaAqBrCsDtEuFvGwHxIJycz

158 # Check for multiple NUMA nodes on Linux 

159 _check_numa_nodes() 1defghijklmnopaqrstuvwxc

160 opts_base._max_size = opts.max_size 1defghijklmnopaAqBrCsDtEuFvGwHxIJycz

161 opts_base._use_current = False 1defghijklmnopaAqBrCsDtEuFvGwHxIJycz

162 opts_base._ipc_enabled = ipc_enabled 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

163 if ipc_enabled: 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

164 opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST_NUMA 1defghijklmnopaqrstuvwxc

165 else: 

166 opts_base._location = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_HOST 1LABCDEFGHIJKyz

167 opts_base._type = cydriver.CUmemAllocationType.CU_MEM_ALLOCATION_TYPE_PINNED 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

168  

169 super().__init__(0 if ipc_enabled else -1, opts_base) 1defghijklmnopaLAqBrCsDtEuFvGwHxIJKycz

170  

171 def __reduce__(self): 

172 return PinnedMemoryResource.from_registry, (self.uuid,) 

173  

174 @staticmethod 

175 def from_registry(uuid: uuid.UUID) -> PinnedMemoryResource: # no-cython-lint 

176 """ 

177 Obtain a registered mapped memory resource. 

178  

179 Raises 

180 ------ 

181 RuntimeError 

182 If no mapped memory resource is found in the registry. 

183 """ 

184 return <PinnedMemoryResource>(_ipc.MP_from_registry(uuid)) 

185  

186 def register(self, uuid: uuid.UUID) -> PinnedMemoryResource: # no-cython-lint 

187 """ 

188 Register a mapped memory resource. 

189  

190 Returns 

191 ------- 

192 The registered mapped memory resource. If one was previously registered 

193 with the given key, it is returned. 

194 """ 

195 return <PinnedMemoryResource>(_ipc.MP_register(self, uuid)) 

196  

197 @classmethod 

198 def from_allocation_handle( 

199 cls, alloc_handle: int | IPCAllocationHandle 

200 ) -> PinnedMemoryResource: 

201 """Create a host-pinned memory resource from an allocation handle. 

202  

203 Construct a new `PinnedMemoryResource` instance that imports a memory 

204 pool from a shareable handle. The memory pool is marked as owned. 

205  

206 Parameters 

207 ---------- 

208 alloc_handle : int | IPCAllocationHandle 

209 The shareable handle of the host-pinned memory resource to import. If an 

210 integer is supplied, it must represent a valid platform-specific 

211 handle. It is the caller's responsibility to close that handle. 

212  

213 Returns 

214 ------- 

215 A new host-pinned memory resource instance with the imported handle. 

216 """ 

217 # cuMemPoolImportFromShareableHandle requires CUDA to be initialized, but in 

218 # a child process CUDA may not be initialized yet. For DeviceMemoryResource, 

219 # this is not a concern because most likely when retrieving the device_id the 

220 # user would have already initialized CUDA. But since PinnedMemoryResource is 

221 # not device-specific it is unlikelt the case. 

222 HANDLE_RETURN(cydriver.cuInit(0)) 

223  

224 cdef PinnedMemoryResource mr = <PinnedMemoryResource>( 

225 _ipc.MP_from_allocation_handle(cls, alloc_handle)) 

226 return mr 

227  

228 def get_allocation_handle(self) -> IPCAllocationHandle: 

229 """Export the memory pool handle to be shared (requires IPC). 

230  

231 The handle can be used to share the memory pool with other processes. 

232 The handle is cached in this `MemoryResource` and owned by it. 

233  

234 Returns 

235 ------- 

236 The shareable handle for the memory pool. 

237 """ 

238 if not self.is_ipc_enabled: 1MNOPQRSZTUVWXYcz

239 raise RuntimeError("Memory resource is not IPC-enabled") 1z

240 return self._ipc_data._alloc_handle 1MNOPQRSZTUVWXYc

241  

242 @property 

243 def is_device_accessible(self) -> bool: 

244 """Return True. This memory resource provides device-accessible buffers.""" 

245 return True 1Kyc

246  

247 @property 

248 def is_host_accessible(self) -> bool: 

249 """Return True. This memory resource provides host-accessible buffers.""" 

250 return True 1Kyc

251  

252  

253def _deep_reduce_pinned_memory_resource(mr): 

254 check_multiprocessing_start_method() 1MNOPQRSTUVWXY

255 alloc_handle = mr.get_allocation_handle() 1MNOPQRSTUVWXY

256 return mr.from_allocation_handle, (alloc_handle,) 1MNOPQRSTUVWXY

257  

258  

259multiprocessing.reduction.register(PinnedMemoryResource, _deep_reduce_pinned_memory_resource)