Coverage for cuda / core / _resource_handles.pyx: 96.83%
63 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-22 01:37 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5# This module compiles _cpp/resource_handles.cpp into a shared library.
6# Consumer modules cimport the functions declared in _resource_handles.pxd.
7# Since there is only one copy of the C++ code (in this .so), all static and
8# thread-local state is shared correctly across all consumer modules.
9#
10# The cdef extern from declarations below satisfy the .pxd declarations directly,
11# without needing separate wrapper functions.
13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer
14from libc.stddef cimport size_t
16from cuda.bindings cimport cydriver
17from cuda.bindings cimport cynvrtc
18from cuda.bindings cimport cynvvm
19from cuda.bindings cimport cynvjitlink
21from ._resource_handles cimport (
22 ContextHandle,
23 GreenCtxHandle,
24 StreamHandle,
25 EventHandle,
26 MemoryPoolHandle,
27 DevicePtrHandle,
28 LibraryHandle,
29 KernelHandle,
30 GraphHandle,
31 GraphicsResourceHandle,
32 NvrtcProgramHandle,
33 NvvmProgramHandle,
34 NvJitLinkHandle,
35 CuLinkHandle,
36)
38import cuda.bindings.cydriver as cydriver
39import cuda.bindings.cynvrtc as cynvrtc
40import cuda.bindings.cynvvm as cynvvm
41import cuda.bindings.cynvjitlink as cynvjitlink
43# =============================================================================
44# C++ function declarations (non-inline, implemented in resource_handles.cpp)
45#
46# These declarations satisfy the cdef function declarations in _resource_handles.pxd.
47# Consumer modules cimport these functions and calls go through this .so.
48# =============================================================================
50cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
51 # Thread-local error handling
52 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil
53 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil
54 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil
56 # Context handles
57 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
58 cydriver.CUcontext ctx) except+ nogil
59 ContextHandle create_context_handle_from_green_ctx "cuda_core::create_context_handle_from_green_ctx" (
60 const GreenCtxHandle& h_green_ctx) except+ nogil
61 GreenCtxHandle get_context_green_ctx "cuda_core::get_context_green_ctx" (
62 const ContextHandle& h) noexcept nogil
63 GreenCtxHandle create_green_ctx_handle "cuda_core::create_green_ctx_handle" (
64 cydriver.CUdevResource* resources, unsigned int nbResources,
65 cydriver.CUdevice dev, unsigned int flags) except+ nogil
66 GreenCtxHandle create_green_ctx_handle_ref "cuda_core::create_green_ctx_handle_ref" (
67 cydriver.CUgreenCtx ctx) except+ nogil
68 ContextHandle get_primary_context "cuda_core::get_primary_context" (
69 int device_id) except+ nogil
70 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil
72 # Stream handles
73 StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
74 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
75 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
76 cydriver.CUstream stream) except+ nogil
77 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
78 cydriver.CUstream stream, object owner) except+ nogil
79 ContextHandle get_stream_context "cuda_core::get_stream_context" (
80 const StreamHandle& h) noexcept nogil
81 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
82 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil
84 # Event handles (note: _create_event_handle* are internal due to C++ overloading)
85 EventHandle create_event_handle "cuda_core::create_event_handle" (
86 const ContextHandle& h_ctx, unsigned int flags,
87 bint timing_enabled, bint is_blocking_sync,
88 bint ipc_enabled, int device_id) except+ nogil
89 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
90 unsigned int flags) except+ nogil
91 EventHandle create_event_handle_ref "cuda_core::create_event_handle_ref" (
92 cydriver.CUevent event) except+ nogil
93 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
94 const cydriver.CUipcEventHandle& ipc_handle, bint is_blocking_sync) except+ nogil
96 # Event metadata getters
97 bint get_event_timing_enabled "cuda_core::get_event_timing_enabled" (
98 const EventHandle& h) noexcept nogil
99 bint get_event_is_blocking_sync "cuda_core::get_event_is_blocking_sync" (
100 const EventHandle& h) noexcept nogil
101 bint get_event_ipc_enabled "cuda_core::get_event_ipc_enabled" (
102 const EventHandle& h) noexcept nogil
103 int get_event_device_id "cuda_core::get_event_device_id" (
104 const EventHandle& h) noexcept nogil
105 ContextHandle get_event_context "cuda_core::get_event_context" (
106 const EventHandle& h) noexcept nogil
108 # Memory pool handles
109 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
110 const cydriver.CUmemPoolProps& props) except+ nogil
111 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
112 cydriver.CUmemoryPool pool) except+ nogil
113 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
114 int device_id) except+ nogil
115 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
116 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
118 # Device pointer handles
119 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
120 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
121 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
122 size_t size, const StreamHandle& h_stream) except+ nogil
123 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
124 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
125 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
126 cydriver.CUdeviceptr ptr) except+ nogil
127 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
128 cydriver.CUdeviceptr ptr, object owner) except+ nogil
129 DevicePtrHandle deviceptr_create_mapped_graphics "cuda_core::deviceptr_create_mapped_graphics" (
130 cydriver.CUdeviceptr ptr,
131 const GraphicsResourceHandle& h_resource,
132 const StreamHandle& h_stream) except+ nogil
134 # MR deallocation callback
135 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" (
136 MRDeallocCallback cb) noexcept
137 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" (
138 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil
140 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
141 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
142 StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
143 const DevicePtrHandle& h) noexcept nogil
144 void set_deallocation_stream "cuda_core::set_deallocation_stream" (
145 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
147 # Library handles
148 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" (
149 const char* path) except+ nogil
150 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" (
151 const void* data) except+ nogil
152 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" (
153 cydriver.CUlibrary library) except+ nogil
155 # Kernel handles
156 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" (
157 const LibraryHandle& h_library, const char* name) except+ nogil
158 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" (
159 cydriver.CUkernel kernel) except+ nogil
160 LibraryHandle get_kernel_library "cuda_core::get_kernel_library" (
161 const KernelHandle& h) noexcept nogil
163 # Graph handles
164 GraphHandle create_graph_handle "cuda_core::create_graph_handle" (
165 cydriver.CUgraph graph) except+ nogil
166 GraphHandle create_graph_handle_ref "cuda_core::create_graph_handle_ref" (
167 cydriver.CUgraph graph, const GraphHandle& h_parent) except+ nogil
169 # Graph node handles
170 GraphNodeHandle create_graph_node_handle "cuda_core::create_graph_node_handle" (
171 cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil
172 GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" (
173 const GraphNodeHandle& h) noexcept nogil
174 void invalidate_graph_node "cuda_core::invalidate_graph_node" (
175 const GraphNodeHandle& h) noexcept nogil
177 # Graphics resource handles
178 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" (
179 cydriver.CUgraphicsResource resource) except+ nogil
181 # NVRTC Program handles
182 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" (
183 cynvrtc.nvrtcProgram prog) except+ nogil
184 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" (
185 cynvrtc.nvrtcProgram prog) except+ nogil
187 # NVVM Program handles
188 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" (
189 cynvvm.nvvmProgram prog) except+ nogil
190 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" (
191 cynvvm.nvvmProgram prog) except+ nogil
193 # nvJitLink handles
194 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" (
195 cynvjitlink.nvJitLinkHandle handle) except+ nogil
196 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" (
197 cynvjitlink.nvJitLinkHandle handle) except+ nogil
199 # cuLink handles
200 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" (
201 cydriver.CUlinkState state) except+ nogil
202 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" (
203 cydriver.CUlinkState state) except+ nogil
205 # File descriptor handles
206 FileDescriptorHandle create_fd_handle "cuda_core::create_fd_handle" (
207 int fd) except+ nogil
208 FileDescriptorHandle create_fd_handle_ref "cuda_core::create_fd_handle_ref" (
209 int fd) except+ nogil
211 # SM resource split (13.1+ wrapper — avoids direct cydriver cimport)
212 # groupParams is void* to avoid referencing CU_DEV_SM_RESOURCE_GROUP_PARAMS
213 # (which doesn't exist in cuda-bindings 13.0 .pxd). The C++ side casts it.
214 cydriver.CUresult sm_resource_split "cuda_core::sm_resource_split" (
215 cydriver.CUdevResource* result, unsigned int nbGroups,
216 const cydriver.CUdevResource* input, cydriver.CUdevResource* remainder,
217 unsigned int flags, void* groupParams) nogil
218 bint has_sm_resource_split "cuda_core::has_sm_resource_split" () noexcept nogil
221# =============================================================================
222# CUDA Driver API capsule
223#
224# This provides resolved CUDA driver function pointers to the C++ code.
225# =============================================================================
227cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1"
230# =============================================================================
231# CUDA driver function pointer initialization
232#
233# The C++ code declares extern function pointers (p_cuXxx) that need to be
234# populated before any handle creation functions are called. We extract these
235# from cuda.bindings.cydriver.__pyx_capi__ at module import time.
236#
237# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)")
238# allows us to assign void* values to typed function pointer variables.
239# =============================================================================
241# Declare extern variables with reinterpret_cast to allow void* assignment
242cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
243 # Context
244 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)"
245 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)"
246 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)"
247 void* p_cuGreenCtxCreate "reinterpret_cast<void*&>(cuda_core::p_cuGreenCtxCreate)"
248 void* p_cuGreenCtxDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGreenCtxDestroy)"
249 void* p_cuCtxFromGreenCtx "reinterpret_cast<void*&>(cuda_core::p_cuCtxFromGreenCtx)"
250 void* p_cuDevResourceGenerateDesc "reinterpret_cast<void*&>(cuda_core::p_cuDevResourceGenerateDesc)"
251 void* p_cuGreenCtxStreamCreate "reinterpret_cast<void*&>(cuda_core::p_cuGreenCtxStreamCreate)"
253 # Stream
254 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)"
255 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)"
257 # Event
258 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)"
259 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)"
260 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)"
262 # Device
263 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)"
265 # Memory pool
266 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)"
267 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)"
268 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)"
269 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)"
270 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)"
272 # Memory allocation
273 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)"
274 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)"
275 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)"
276 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)"
278 # Memory deallocation
279 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)"
280 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)"
281 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)"
283 # IPC
284 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)"
286 # Library
287 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)"
288 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)"
289 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)"
290 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)"
292 # Graph
293 void* p_cuGraphDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGraphDestroy)"
295 # Linker
296 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)"
298 # Graphics interop
299 void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)"
300 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)"
302 # SM resource split (13.1+)
303 void* p_cuDevSmResourceSplit "reinterpret_cast<void*&>(cuda_core::p_cuDevSmResourceSplit)"
305 # NVRTC
306 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)"
308 # NVVM
309 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)"
311 # nvJitLink
312 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)"
315# Initialize driver function pointers from cydriver.__pyx_capi__ at module load
316cdef void* _get_driver_fn(str name):
317 capsule = cydriver.__pyx_capi__[name]
318 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
321cdef void* _get_optional_driver_fn(str name):
322 try:
323 capsule = cydriver.__pyx_capi__[name]
324 except KeyError:
325 return NULL
326 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
328# Context
329p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain")
330p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease")
331p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent")
332p_cuGreenCtxCreate = _get_optional_driver_fn("cuGreenCtxCreate")
333p_cuGreenCtxDestroy = _get_optional_driver_fn("cuGreenCtxDestroy")
334p_cuCtxFromGreenCtx = _get_optional_driver_fn("cuCtxFromGreenCtx")
335p_cuDevResourceGenerateDesc = _get_optional_driver_fn("cuDevResourceGenerateDesc")
336p_cuGreenCtxStreamCreate = _get_optional_driver_fn("cuGreenCtxStreamCreate")
338# Stream
339p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority")
340p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy")
342# Event
343p_cuEventCreate = _get_driver_fn("cuEventCreate")
344p_cuEventDestroy = _get_driver_fn("cuEventDestroy")
345p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle")
347# Device
348p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount")
350# Memory pool
351p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess")
352p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy")
353p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate")
354p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool")
355p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle")
357# Memory allocation
358p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync")
359p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync")
360p_cuMemAlloc = _get_driver_fn("cuMemAlloc")
361p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost")
363# Memory deallocation
364p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync")
365p_cuMemFree = _get_driver_fn("cuMemFree")
366p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost")
368# IPC
369p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer")
371# Library
372p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile")
373p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData")
374p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload")
375p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel")
377# Graph
378p_cuGraphDestroy = _get_driver_fn("cuGraphDestroy")
380# Linker
381p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy")
383# Graphics interop
384p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources")
385p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource")
387# SM resource split (13.1+ — may not exist in older cuda-bindings)
388p_cuDevSmResourceSplit = _get_optional_driver_fn("cuDevSmResourceSplit")
390# =============================================================================
391# NVRTC function pointer initialization
392# =============================================================================
394cdef void* _get_nvrtc_fn(str name):
395 capsule = cynvrtc.__pyx_capi__[name]
396 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
398p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram")
400# =============================================================================
401# NVVM function pointer initialization
402#
403# NVVM may not be available at runtime, so we handle missing function pointers
404# gracefully. The C++ deleter checks for null before calling.
405# =============================================================================
407cdef void* _get_nvvm_fn(str name):
408 capsule = cynvvm.__pyx_capi__[name]
409 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
411p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram")
413# =============================================================================
414# nvJitLink function pointer initialization
415#
416# nvJitLink may not be available at runtime, so we handle missing function
417# pointers gracefully. The C++ deleter checks for null before calling.
418# =============================================================================
420cdef void* _get_nvjitlink_fn(str name):
421 capsule = cynvjitlink.__pyx_capi__[name]
422 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
424p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")