Coverage for cuda / core / _resource_handles.pyx: 100.00%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-29 01:27 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5# This module compiles _cpp/resource_handles.cpp into a shared library. 

6# Consumer modules cimport the functions declared in _resource_handles.pxd. 

7# Since there is only one copy of the C++ code (in this .so), all static and 

8# thread-local state is shared correctly across all consumer modules. 

9# 

10# The cdef extern from declarations below satisfy the .pxd declarations directly, 

11# without needing separate wrapper functions. 

12  

13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer 

14from libc.stddef cimport size_t 

15  

16from cuda.bindings cimport cydriver 

17from cuda.bindings cimport cynvrtc 

18from cuda.bindings cimport cynvvm 

19from cuda.bindings cimport cynvjitlink 

20  

21from ._resource_handles cimport ( 

22 ContextHandle, 

23 StreamHandle, 

24 EventHandle, 

25 MemoryPoolHandle, 

26 DevicePtrHandle, 

27 LibraryHandle, 

28 KernelHandle, 

29 GraphHandle, 

30 GraphicsResourceHandle, 

31 NvrtcProgramHandle, 

32 NvvmProgramHandle, 

33 NvJitLinkHandle, 

34 CuLinkHandle, 

35) 

36  

37import cuda.bindings.cydriver as cydriver 

38import cuda.bindings.cynvrtc as cynvrtc 

39import cuda.bindings.cynvvm as cynvvm 

40import cuda.bindings.cynvjitlink as cynvjitlink 

41  

42# ============================================================================= 

43# C++ function declarations (non-inline, implemented in resource_handles.cpp) 

44# 

45# These declarations satisfy the cdef function declarations in _resource_handles.pxd. 

46# Consumer modules cimport these functions and calls go through this .so. 

47# ============================================================================= 

48  

49cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

50 # Thread-local error handling 

51 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil 

52 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil 

53 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil 

54  

55 # Context handles 

56 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" ( 

57 cydriver.CUcontext ctx) except+ nogil 

58 ContextHandle get_primary_context "cuda_core::get_primary_context" ( 

59 int device_id) except+ nogil 

60 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil 

61  

62 # Stream handles 

63 StreamHandle create_stream_handle "cuda_core::create_stream_handle" ( 

64 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil 

65 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" ( 

66 cydriver.CUstream stream) except+ nogil 

67 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" ( 

68 cydriver.CUstream stream, object owner) except+ nogil 

69 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil 

70 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil 

71  

72 # Event handles (note: _create_event_handle* are internal due to C++ overloading) 

73 EventHandle create_event_handle "cuda_core::create_event_handle" ( 

74 const ContextHandle& h_ctx, unsigned int flags, 

75 bint timing_disabled, bint busy_waited, 

76 bint ipc_enabled, int device_id) except+ nogil 

77 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" ( 

78 unsigned int flags) except+ nogil 

79 EventHandle create_event_handle_ref "cuda_core::create_event_handle_ref" ( 

80 cydriver.CUevent event) except+ nogil 

81 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" ( 

82 const cydriver.CUipcEventHandle& ipc_handle, bint busy_waited) except+ nogil 

83  

84 # Event metadata getters 

85 bint get_event_timing_disabled "cuda_core::get_event_timing_disabled" ( 

86 const EventHandle& h) noexcept nogil 

87 bint get_event_busy_waited "cuda_core::get_event_busy_waited" ( 

88 const EventHandle& h) noexcept nogil 

89 bint get_event_ipc_enabled "cuda_core::get_event_ipc_enabled" ( 

90 const EventHandle& h) noexcept nogil 

91 int get_event_device_id "cuda_core::get_event_device_id" ( 

92 const EventHandle& h) noexcept nogil 

93 ContextHandle get_event_context "cuda_core::get_event_context" ( 

94 const EventHandle& h) noexcept nogil 

95  

96 # Memory pool handles 

97 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" ( 

98 const cydriver.CUmemPoolProps& props) except+ nogil 

99 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" ( 

100 cydriver.CUmemoryPool pool) except+ nogil 

101 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" ( 

102 int device_id) except+ nogil 

103 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" ( 

104 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil 

105  

106 # Device pointer handles 

107 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" ( 

108 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil 

109 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" ( 

110 size_t size, const StreamHandle& h_stream) except+ nogil 

111 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil 

112 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil 

113 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" ( 

114 cydriver.CUdeviceptr ptr) except+ nogil 

115 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" ( 

116 cydriver.CUdeviceptr ptr, object owner) except+ nogil 

117 DevicePtrHandle deviceptr_create_mapped_graphics "cuda_core::deviceptr_create_mapped_graphics" ( 

118 cydriver.CUdeviceptr ptr, 

119 const GraphicsResourceHandle& h_resource, 

120 const StreamHandle& h_stream) except+ nogil 

121  

122 # MR deallocation callback 

123 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" ( 

124 MRDeallocCallback cb) noexcept 

125 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" ( 

126 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil 

127  

128 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" ( 

129 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil 

130 StreamHandle deallocation_stream "cuda_core::deallocation_stream" ( 

131 const DevicePtrHandle& h) noexcept nogil 

132 void set_deallocation_stream "cuda_core::set_deallocation_stream" ( 

133 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil 

134  

135 # Library handles 

136 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" ( 

137 const char* path) except+ nogil 

138 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" ( 

139 const void* data) except+ nogil 

140 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" ( 

141 cydriver.CUlibrary library) except+ nogil 

142  

143 # Kernel handles 

144 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" ( 

145 const LibraryHandle& h_library, const char* name) except+ nogil 

146 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" ( 

147 cydriver.CUkernel kernel) except+ nogil 

148 LibraryHandle get_kernel_library "cuda_core::get_kernel_library" ( 

149 const KernelHandle& h) noexcept nogil 

150  

151 # Graph handles 

152 GraphHandle create_graph_handle "cuda_core::create_graph_handle" ( 

153 cydriver.CUgraph graph) except+ nogil 

154 GraphHandle create_graph_handle_ref "cuda_core::create_graph_handle_ref" ( 

155 cydriver.CUgraph graph, const GraphHandle& h_parent) except+ nogil 

156  

157 # Graph node handles 

158 GraphNodeHandle create_graph_node_handle "cuda_core::create_graph_node_handle" ( 

159 cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil 

160 GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" ( 

161 const GraphNodeHandle& h) noexcept nogil 

162 void invalidate_graph_node "cuda_core::invalidate_graph_node" ( 

163 const GraphNodeHandle& h) noexcept nogil 

164  

165 # Graphics resource handles 

166 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" ( 

167 cydriver.CUgraphicsResource resource) except+ nogil 

168  

169 # NVRTC Program handles 

170 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" ( 

171 cynvrtc.nvrtcProgram prog) except+ nogil 

172 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" ( 

173 cynvrtc.nvrtcProgram prog) except+ nogil 

174  

175 # NVVM Program handles 

176 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" ( 

177 cynvvm.nvvmProgram prog) except+ nogil 

178 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" ( 

179 cynvvm.nvvmProgram prog) except+ nogil 

180  

181 # nvJitLink handles 

182 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" ( 

183 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

184 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" ( 

185 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

186  

187 # cuLink handles 

188 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" ( 

189 cydriver.CUlinkState state) except+ nogil 

190 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" ( 

191 cydriver.CUlinkState state) except+ nogil 

192  

193 # File descriptor handles 

194 FileDescriptorHandle create_fd_handle "cuda_core::create_fd_handle" ( 

195 int fd) except+ nogil 

196 FileDescriptorHandle create_fd_handle_ref "cuda_core::create_fd_handle_ref" ( 

197 int fd) except+ nogil 

198  

199  

200# ============================================================================= 

201# CUDA Driver API capsule 

202# 

203# This provides resolved CUDA driver function pointers to the C++ code. 

204# ============================================================================= 

205  

206cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1" 

207  

208  

209# ============================================================================= 

210# CUDA driver function pointer initialization 

211# 

212# The C++ code declares extern function pointers (p_cuXxx) that need to be 

213# populated before any handle creation functions are called. We extract these 

214# from cuda.bindings.cydriver.__pyx_capi__ at module import time. 

215# 

216# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)") 

217# allows us to assign void* values to typed function pointer variables. 

218# ============================================================================= 

219  

220# Declare extern variables with reinterpret_cast to allow void* assignment 

221cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

222 # Context 

223 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)" 

224 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)" 

225 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)" 

226  

227 # Stream 

228 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)" 

229 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)" 

230  

231 # Event 

232 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)" 

233 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)" 

234 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)" 

235  

236 # Device 

237 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)" 

238  

239 # Memory pool 

240 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)" 

241 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)" 

242 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)" 

243 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)" 

244 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)" 

245  

246 # Memory allocation 

247 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)" 

248 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)" 

249 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)" 

250 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)" 

251  

252 # Memory deallocation 

253 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)" 

254 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)" 

255 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)" 

256  

257 # IPC 

258 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)" 

259  

260 # Library 

261 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)" 

262 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)" 

263 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)" 

264 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)" 

265  

266 # Graph 

267 void* p_cuGraphDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGraphDestroy)" 

268  

269 # Linker 

270 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)" 

271  

272 # Graphics interop 

273 void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)" 

274 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)" 

275  

276 # NVRTC 

277 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)" 

278  

279 # NVVM 

280 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)" 

281  

282 # nvJitLink 

283 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)" 

284  

285  

286# Initialize driver function pointers from cydriver.__pyx_capi__ at module load 

287cdef void* _get_driver_fn(str name): 

288 capsule = cydriver.__pyx_capi__[name] 

289 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

290  

291# Context 

292p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain") 

293p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease") 

294p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent") 

295  

296# Stream 

297p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority") 

298p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy") 

299  

300# Event 

301p_cuEventCreate = _get_driver_fn("cuEventCreate") 

302p_cuEventDestroy = _get_driver_fn("cuEventDestroy") 

303p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle") 

304  

305# Device 

306p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount") 

307  

308# Memory pool 

309p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess") 

310p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy") 

311p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate") 

312p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool") 

313p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle") 

314  

315# Memory allocation 

316p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync") 

317p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync") 

318p_cuMemAlloc = _get_driver_fn("cuMemAlloc") 

319p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost") 

320  

321# Memory deallocation 

322p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync") 

323p_cuMemFree = _get_driver_fn("cuMemFree") 

324p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost") 

325  

326# IPC 

327p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer") 

328  

329# Library 

330p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile") 

331p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData") 

332p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload") 

333p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel") 

334  

335# Graph 

336p_cuGraphDestroy = _get_driver_fn("cuGraphDestroy") 

337  

338# Linker 

339p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy") 

340  

341# Graphics interop 

342p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources") 

343p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource") 

344  

345# ============================================================================= 

346# NVRTC function pointer initialization 

347# ============================================================================= 

348  

349cdef void* _get_nvrtc_fn(str name): 

350 capsule = cynvrtc.__pyx_capi__[name] 

351 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

352  

353p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram") 

354  

355# ============================================================================= 

356# NVVM function pointer initialization 

357# 

358# NVVM may not be available at runtime, so we handle missing function pointers 

359# gracefully. The C++ deleter checks for null before calling. 

360# ============================================================================= 

361  

362cdef void* _get_nvvm_fn(str name): 

363 capsule = cynvvm.__pyx_capi__[name] 

364 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

365  

366p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram") 

367  

368# ============================================================================= 

369# nvJitLink function pointer initialization 

370# 

371# nvJitLink may not be available at runtime, so we handle missing function 

372# pointers gracefully. The C++ deleter checks for null before calling. 

373# ============================================================================= 

374  

375cdef void* _get_nvjitlink_fn(str name): 

376 capsule = cynvjitlink.__pyx_capi__[name] 

377 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

378  

379p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")