Coverage for cuda / core / _resource_handles.pyx: 100.00%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-25 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5# This module compiles _cpp/resource_handles.cpp into a shared library. 

6# Consumer modules cimport the functions declared in _resource_handles.pxd. 

7# Since there is only one copy of the C++ code (in this .so), all static and 

8# thread-local state is shared correctly across all consumer modules. 

9# 

10# The cdef extern from declarations below satisfy the .pxd declarations directly, 

11# without needing separate wrapper functions. 

12  

13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer 

14from libc.stddef cimport size_t 

15  

16from cuda.bindings cimport cydriver 

17from cuda.bindings cimport cynvrtc 

18from cuda.bindings cimport cynvvm 

19from cuda.bindings cimport cynvjitlink 

20  

21from ._resource_handles cimport ( 

22 ContextHandle, 

23 StreamHandle, 

24 EventHandle, 

25 MemoryPoolHandle, 

26 DevicePtrHandle, 

27 LibraryHandle, 

28 KernelHandle, 

29 GraphHandle, 

30 GraphicsResourceHandle, 

31 NvrtcProgramHandle, 

32 NvvmProgramHandle, 

33 NvJitLinkHandle, 

34 CuLinkHandle, 

35) 

36  

37import cuda.bindings.cydriver as cydriver 

38import cuda.bindings.cynvrtc as cynvrtc 

39import cuda.bindings.cynvvm as cynvvm 

40import cuda.bindings.cynvjitlink as cynvjitlink 

41  

42# ============================================================================= 

43# C++ function declarations (non-inline, implemented in resource_handles.cpp) 

44# 

45# These declarations satisfy the cdef function declarations in _resource_handles.pxd. 

46# Consumer modules cimport these functions and calls go through this .so. 

47# ============================================================================= 

48  

49cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

50 # Thread-local error handling 

51 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil 

52 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil 

53 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil 

54  

55 # Context handles 

56 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" ( 

57 cydriver.CUcontext ctx) except+ nogil 

58 ContextHandle get_primary_context "cuda_core::get_primary_context" ( 

59 int device_id) except+ nogil 

60 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil 

61  

62 # Stream handles 

63 StreamHandle create_stream_handle "cuda_core::create_stream_handle" ( 

64 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil 

65 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" ( 

66 cydriver.CUstream stream) except+ nogil 

67 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" ( 

68 cydriver.CUstream stream, object owner) except+ nogil 

69 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil 

70 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil 

71  

72 # Event handles (note: _create_event_handle* are internal due to C++ overloading) 

73 EventHandle create_event_handle "cuda_core::create_event_handle" ( 

74 const ContextHandle& h_ctx, unsigned int flags, 

75 bint timing_disabled, bint busy_waited, 

76 bint ipc_enabled, int device_id) except+ nogil 

77 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" ( 

78 unsigned int flags) except+ nogil 

79 EventHandle create_event_handle_ref "cuda_core::create_event_handle_ref" ( 

80 cydriver.CUevent event) except+ nogil 

81 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" ( 

82 const cydriver.CUipcEventHandle& ipc_handle, bint busy_waited) except+ nogil 

83  

84 # Event metadata getters 

85 bint get_event_timing_disabled "cuda_core::get_event_timing_disabled" ( 

86 const EventHandle& h) noexcept nogil 

87 bint get_event_busy_waited "cuda_core::get_event_busy_waited" ( 

88 const EventHandle& h) noexcept nogil 

89 bint get_event_ipc_enabled "cuda_core::get_event_ipc_enabled" ( 

90 const EventHandle& h) noexcept nogil 

91 int get_event_device_id "cuda_core::get_event_device_id" ( 

92 const EventHandle& h) noexcept nogil 

93 ContextHandle get_event_context "cuda_core::get_event_context" ( 

94 const EventHandle& h) noexcept nogil 

95  

96 # Memory pool handles 

97 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" ( 

98 const cydriver.CUmemPoolProps& props) except+ nogil 

99 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" ( 

100 cydriver.CUmemoryPool pool) except+ nogil 

101 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" ( 

102 int device_id) except+ nogil 

103 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" ( 

104 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil 

105  

106 # Device pointer handles 

107 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" ( 

108 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil 

109 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" ( 

110 size_t size, const StreamHandle& h_stream) except+ nogil 

111 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil 

112 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil 

113 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" ( 

114 cydriver.CUdeviceptr ptr) except+ nogil 

115 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" ( 

116 cydriver.CUdeviceptr ptr, object owner) except+ nogil 

117 DevicePtrHandle deviceptr_create_mapped_graphics "cuda_core::deviceptr_create_mapped_graphics" ( 

118 cydriver.CUdeviceptr ptr, 

119 const GraphicsResourceHandle& h_resource, 

120 const StreamHandle& h_stream) except+ nogil 

121  

122 # MR deallocation callback 

123 ctypedef void (*MRDeallocCallback)( 

124 object mr, cydriver.CUdeviceptr ptr, size_t size, 

125 const StreamHandle& stream) noexcept 

126 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" ( 

127 MRDeallocCallback cb) noexcept 

128 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" ( 

129 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil 

130  

131 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" ( 

132 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil 

133 StreamHandle deallocation_stream "cuda_core::deallocation_stream" ( 

134 const DevicePtrHandle& h) noexcept nogil 

135 void set_deallocation_stream "cuda_core::set_deallocation_stream" ( 

136 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil 

137  

138 # Library handles 

139 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" ( 

140 const char* path) except+ nogil 

141 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" ( 

142 const void* data) except+ nogil 

143 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" ( 

144 cydriver.CUlibrary library) except+ nogil 

145  

146 # Kernel handles 

147 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" ( 

148 const LibraryHandle& h_library, const char* name) except+ nogil 

149 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" ( 

150 cydriver.CUkernel kernel) except+ nogil 

151 LibraryHandle get_kernel_library "cuda_core::get_kernel_library" ( 

152 const KernelHandle& h) noexcept nogil 

153  

154 # Graph handles 

155 GraphHandle create_graph_handle "cuda_core::create_graph_handle" ( 

156 cydriver.CUgraph graph) except+ nogil 

157 GraphHandle create_graph_handle_ref "cuda_core::create_graph_handle_ref" ( 

158 cydriver.CUgraph graph, const GraphHandle& h_parent) except+ nogil 

159  

160 # Graph node handles 

161 GraphNodeHandle create_graph_node_handle "cuda_core::create_graph_node_handle" ( 

162 cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil 

163 GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" ( 

164 const GraphNodeHandle& h) noexcept nogil 

165  

166 # Graphics resource handles 

167 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" ( 

168 cydriver.CUgraphicsResource resource) except+ nogil 

169  

170 # NVRTC Program handles 

171 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" ( 

172 cynvrtc.nvrtcProgram prog) except+ nogil 

173 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" ( 

174 cynvrtc.nvrtcProgram prog) except+ nogil 

175  

176 # NVVM Program handles 

177 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" ( 

178 cynvvm.nvvmProgram prog) except+ nogil 

179 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" ( 

180 cynvvm.nvvmProgram prog) except+ nogil 

181  

182 # nvJitLink handles 

183 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" ( 

184 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

185 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" ( 

186 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

187  

188 # cuLink handles 

189 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" ( 

190 cydriver.CUlinkState state) except+ nogil 

191 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" ( 

192 cydriver.CUlinkState state) except+ nogil 

193  

194  

195# ============================================================================= 

196# CUDA Driver API capsule 

197# 

198# This provides resolved CUDA driver function pointers to the C++ code. 

199# ============================================================================= 

200  

201cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1" 

202  

203  

204# ============================================================================= 

205# CUDA driver function pointer initialization 

206# 

207# The C++ code declares extern function pointers (p_cuXxx) that need to be 

208# populated before any handle creation functions are called. We extract these 

209# from cuda.bindings.cydriver.__pyx_capi__ at module import time. 

210# 

211# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)") 

212# allows us to assign void* values to typed function pointer variables. 

213# ============================================================================= 

214  

215# Declare extern variables with reinterpret_cast to allow void* assignment 

216cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

217 # Context 

218 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)" 

219 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)" 

220 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)" 

221  

222 # Stream 

223 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)" 

224 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)" 

225  

226 # Event 

227 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)" 

228 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)" 

229 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)" 

230  

231 # Device 

232 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)" 

233  

234 # Memory pool 

235 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)" 

236 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)" 

237 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)" 

238 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)" 

239 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)" 

240  

241 # Memory allocation 

242 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)" 

243 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)" 

244 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)" 

245 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)" 

246  

247 # Memory deallocation 

248 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)" 

249 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)" 

250 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)" 

251  

252 # IPC 

253 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)" 

254  

255 # Library 

256 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)" 

257 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)" 

258 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)" 

259 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)" 

260  

261 # Graph 

262 void* p_cuGraphDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGraphDestroy)" 

263  

264 # Linker 

265 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)" 

266  

267 # Graphics interop 

268 void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)" 

269 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)" 

270  

271 # NVRTC 

272 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)" 

273  

274 # NVVM 

275 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)" 

276  

277 # nvJitLink 

278 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)" 

279  

280  

281# Initialize driver function pointers from cydriver.__pyx_capi__ at module load 

282cdef void* _get_driver_fn(str name): 

283 capsule = cydriver.__pyx_capi__[name] 

284 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

285  

286# Context 

287p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain") 

288p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease") 

289p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent") 

290  

291# Stream 

292p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority") 

293p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy") 

294  

295# Event 

296p_cuEventCreate = _get_driver_fn("cuEventCreate") 

297p_cuEventDestroy = _get_driver_fn("cuEventDestroy") 

298p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle") 

299  

300# Device 

301p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount") 

302  

303# Memory pool 

304p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess") 

305p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy") 

306p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate") 

307p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool") 

308p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle") 

309  

310# Memory allocation 

311p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync") 

312p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync") 

313p_cuMemAlloc = _get_driver_fn("cuMemAlloc") 

314p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost") 

315  

316# Memory deallocation 

317p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync") 

318p_cuMemFree = _get_driver_fn("cuMemFree") 

319p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost") 

320  

321# IPC 

322p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer") 

323  

324# Library 

325p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile") 

326p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData") 

327p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload") 

328p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel") 

329  

330# Graph 

331p_cuGraphDestroy = _get_driver_fn("cuGraphDestroy") 

332  

333# Linker 

334p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy") 

335  

336# Graphics interop 

337p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources") 

338p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource") 

339  

340# ============================================================================= 

341# NVRTC function pointer initialization 

342# ============================================================================= 

343  

344cdef void* _get_nvrtc_fn(str name): 

345 capsule = cynvrtc.__pyx_capi__[name] 

346 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

347  

348p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram") 

349  

350# ============================================================================= 

351# NVVM function pointer initialization 

352# 

353# NVVM may not be available at runtime, so we handle missing function pointers 

354# gracefully. The C++ deleter checks for null before calling. 

355# ============================================================================= 

356  

357cdef void* _get_nvvm_fn(str name): 

358 capsule = cynvvm.__pyx_capi__[name] 

359 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

360  

361p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram") 

362  

363# ============================================================================= 

364# nvJitLink function pointer initialization 

365# 

366# nvJitLink may not be available at runtime, so we handle missing function 

367# pointers gracefully. The C++ deleter checks for null before calling. 

368# ============================================================================= 

369  

370cdef void* _get_nvjitlink_fn(str name): 

371 capsule = cynvjitlink.__pyx_capi__[name] 

372 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

373  

374p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")