Coverage for cuda / core / _resource_handles.pyx: 100.00%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-08 01:07 +0000

1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 

2# 

3# SPDX-License-Identifier: Apache-2.0 

4  

5# This module compiles _cpp/resource_handles.cpp into a shared library. 

6# Consumer modules cimport the functions declared in _resource_handles.pxd. 

7# Since there is only one copy of the C++ code (in this .so), all static and 

8# thread-local state is shared correctly across all consumer modules. 

9# 

10# The cdef extern from declarations below satisfy the .pxd declarations directly, 

11# without needing separate wrapper functions. 

12  

13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer 

14from libc.stddef cimport size_t 

15  

16from cuda.bindings cimport cydriver 

17from cuda.bindings cimport cynvrtc 

18from cuda.bindings cimport cynvvm 

19from cuda.bindings cimport cynvjitlink 

20  

21from ._resource_handles cimport ( 

22 ContextHandle, 

23 StreamHandle, 

24 EventHandle, 

25 MemoryPoolHandle, 

26 DevicePtrHandle, 

27 LibraryHandle, 

28 KernelHandle, 

29 GraphicsResourceHandle, 

30 NvrtcProgramHandle, 

31 NvvmProgramHandle, 

32 NvJitLinkHandle, 

33 CuLinkHandle, 

34) 

35  

36import cuda.bindings.cydriver as cydriver 

37import cuda.bindings.cynvrtc as cynvrtc 

38import cuda.bindings.cynvvm as cynvvm 

39import cuda.bindings.cynvjitlink as cynvjitlink 

40  

41# ============================================================================= 

42# C++ function declarations (non-inline, implemented in resource_handles.cpp) 

43# 

44# These declarations satisfy the cdef function declarations in _resource_handles.pxd. 

45# Consumer modules cimport these functions and calls go through this .so. 

46# ============================================================================= 

47  

48cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

49 # Thread-local error handling 

50 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil 

51 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil 

52 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil 

53  

54 # Context handles 

55 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" ( 

56 cydriver.CUcontext ctx) except+ nogil 

57 ContextHandle get_primary_context "cuda_core::get_primary_context" ( 

58 int device_id) except+ nogil 

59 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil 

60  

61 # Stream handles 

62 StreamHandle create_stream_handle "cuda_core::create_stream_handle" ( 

63 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil 

64 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" ( 

65 cydriver.CUstream stream) except+ nogil 

66 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" ( 

67 cydriver.CUstream stream, object owner) except+ nogil 

68 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil 

69 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil 

70  

71 # Event handles (note: _create_event_handle* are internal due to C++ overloading) 

72 EventHandle create_event_handle "cuda_core::create_event_handle" ( 

73 const ContextHandle& h_ctx, unsigned int flags) except+ nogil 

74 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" ( 

75 unsigned int flags) except+ nogil 

76 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" ( 

77 const cydriver.CUipcEventHandle& ipc_handle) except+ nogil 

78  

79 # Memory pool handles 

80 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" ( 

81 const cydriver.CUmemPoolProps& props) except+ nogil 

82 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" ( 

83 cydriver.CUmemoryPool pool) except+ nogil 

84 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" ( 

85 int device_id) except+ nogil 

86 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" ( 

87 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil 

88  

89 # Device pointer handles 

90 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" ( 

91 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil 

92 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" ( 

93 size_t size, const StreamHandle& h_stream) except+ nogil 

94 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil 

95 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil 

96 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" ( 

97 cydriver.CUdeviceptr ptr) except+ nogil 

98 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" ( 

99 cydriver.CUdeviceptr ptr, object owner) except+ nogil 

100  

101 # MR deallocation callback 

102 ctypedef void (*MRDeallocCallback)( 

103 object mr, cydriver.CUdeviceptr ptr, size_t size, 

104 const StreamHandle& stream) noexcept 

105 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" ( 

106 MRDeallocCallback cb) noexcept 

107 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" ( 

108 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil 

109  

110 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" ( 

111 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil 

112 StreamHandle deallocation_stream "cuda_core::deallocation_stream" ( 

113 const DevicePtrHandle& h) noexcept nogil 

114 void set_deallocation_stream "cuda_core::set_deallocation_stream" ( 

115 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil 

116  

117 # Library handles 

118 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" ( 

119 const char* path) except+ nogil 

120 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" ( 

121 const void* data) except+ nogil 

122 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" ( 

123 cydriver.CUlibrary library) except+ nogil 

124  

125 # Kernel handles 

126 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" ( 

127 const LibraryHandle& h_library, const char* name) except+ nogil 

128 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" ( 

129 cydriver.CUkernel kernel, const LibraryHandle& h_library) except+ nogil 

130  

131 # Graphics resource handles 

132 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" ( 

133 cydriver.CUgraphicsResource resource) except+ nogil 

134  

135 # NVRTC Program handles 

136 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" ( 

137 cynvrtc.nvrtcProgram prog) except+ nogil 

138 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" ( 

139 cynvrtc.nvrtcProgram prog) except+ nogil 

140  

141 # NVVM Program handles 

142 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" ( 

143 cynvvm.nvvmProgram prog) except+ nogil 

144 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" ( 

145 cynvvm.nvvmProgram prog) except+ nogil 

146  

147 # nvJitLink handles 

148 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" ( 

149 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

150 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" ( 

151 cynvjitlink.nvJitLinkHandle handle) except+ nogil 

152  

153 # cuLink handles 

154 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" ( 

155 cydriver.CUlinkState state) except+ nogil 

156 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" ( 

157 cydriver.CUlinkState state) except+ nogil 

158  

159  

160# ============================================================================= 

161# CUDA Driver API capsule 

162# 

163# This provides resolved CUDA driver function pointers to the C++ code. 

164# ============================================================================= 

165  

166cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1" 

167  

168  

169# ============================================================================= 

170# CUDA driver function pointer initialization 

171# 

172# The C++ code declares extern function pointers (p_cuXxx) that need to be 

173# populated before any handle creation functions are called. We extract these 

174# from cuda.bindings.cydriver.__pyx_capi__ at module import time. 

175# 

176# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)") 

177# allows us to assign void* values to typed function pointer variables. 

178# ============================================================================= 

179  

180# Declare extern variables with reinterpret_cast to allow void* assignment 

181cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core": 

182 # Context 

183 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)" 

184 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)" 

185 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)" 

186  

187 # Stream 

188 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)" 

189 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)" 

190  

191 # Event 

192 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)" 

193 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)" 

194 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)" 

195  

196 # Device 

197 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)" 

198  

199 # Memory pool 

200 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)" 

201 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)" 

202 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)" 

203 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)" 

204 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)" 

205  

206 # Memory allocation 

207 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)" 

208 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)" 

209 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)" 

210 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)" 

211  

212 # Memory deallocation 

213 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)" 

214 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)" 

215 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)" 

216  

217 # IPC 

218 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)" 

219  

220 # Library 

221 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)" 

222 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)" 

223 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)" 

224 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)" 

225  

226 # Linker 

227 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)" 

228  

229 # Graphics interop 

230 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)" 

231  

232 # NVRTC 

233 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)" 

234  

235 # NVVM 

236 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)" 

237  

238 # nvJitLink 

239 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)" 

240  

241  

242# Initialize driver function pointers from cydriver.__pyx_capi__ at module load 

243cdef void* _get_driver_fn(str name): 

244 capsule = cydriver.__pyx_capi__[name] 

245 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

246  

247# Context 

248p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain") 

249p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease") 

250p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent") 

251  

252# Stream 

253p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority") 

254p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy") 

255  

256# Event 

257p_cuEventCreate = _get_driver_fn("cuEventCreate") 

258p_cuEventDestroy = _get_driver_fn("cuEventDestroy") 

259p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle") 

260  

261# Device 

262p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount") 

263  

264# Memory pool 

265p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess") 

266p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy") 

267p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate") 

268p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool") 

269p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle") 

270  

271# Memory allocation 

272p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync") 

273p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync") 

274p_cuMemAlloc = _get_driver_fn("cuMemAlloc") 

275p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost") 

276  

277# Memory deallocation 

278p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync") 

279p_cuMemFree = _get_driver_fn("cuMemFree") 

280p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost") 

281  

282# IPC 

283p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer") 

284  

285# Library 

286p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile") 

287p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData") 

288p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload") 

289p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel") 

290  

291# Linker 

292p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy") 

293  

294# Graphics interop 

295p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource") 

296  

297# ============================================================================= 

298# NVRTC function pointer initialization 

299# ============================================================================= 

300  

301cdef void* _get_nvrtc_fn(str name): 

302 capsule = cynvrtc.__pyx_capi__[name] 

303 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

304  

305p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram") 

306  

307# ============================================================================= 

308# NVVM function pointer initialization 

309# 

310# NVVM may not be available at runtime, so we handle missing function pointers 

311# gracefully. The C++ deleter checks for null before calling. 

312# ============================================================================= 

313  

314cdef void* _get_nvvm_fn(str name): 

315 capsule = cynvvm.__pyx_capi__[name] 

316 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

317  

318p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram") 

319  

320# ============================================================================= 

321# nvJitLink function pointer initialization 

322# 

323# nvJitLink may not be available at runtime, so we handle missing function 

324# pointers gracefully. The C++ deleter checks for null before calling. 

325# ============================================================================= 

326  

327cdef void* _get_nvjitlink_fn(str name): 

328 capsule = cynvjitlink.__pyx_capi__[name] 

329 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule)) 

330  

331p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")