Coverage for cuda / core / _resource_handles.pyx: 100.00%
51 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-29 01:27 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-29 01:27 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5# This module compiles _cpp/resource_handles.cpp into a shared library.
6# Consumer modules cimport the functions declared in _resource_handles.pxd.
7# Since there is only one copy of the C++ code (in this .so), all static and
8# thread-local state is shared correctly across all consumer modules.
9#
10# The cdef extern from declarations below satisfy the .pxd declarations directly,
11# without needing separate wrapper functions.
13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer
14from libc.stddef cimport size_t
16from cuda.bindings cimport cydriver
17from cuda.bindings cimport cynvrtc
18from cuda.bindings cimport cynvvm
19from cuda.bindings cimport cynvjitlink
21from ._resource_handles cimport (
22 ContextHandle,
23 StreamHandle,
24 EventHandle,
25 MemoryPoolHandle,
26 DevicePtrHandle,
27 LibraryHandle,
28 KernelHandle,
29 GraphHandle,
30 GraphicsResourceHandle,
31 NvrtcProgramHandle,
32 NvvmProgramHandle,
33 NvJitLinkHandle,
34 CuLinkHandle,
35)
37import cuda.bindings.cydriver as cydriver
38import cuda.bindings.cynvrtc as cynvrtc
39import cuda.bindings.cynvvm as cynvvm
40import cuda.bindings.cynvjitlink as cynvjitlink
42# =============================================================================
43# C++ function declarations (non-inline, implemented in resource_handles.cpp)
44#
45# These declarations satisfy the cdef function declarations in _resource_handles.pxd.
46# Consumer modules cimport these functions and calls go through this .so.
47# =============================================================================
49cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
50 # Thread-local error handling
51 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil
52 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil
53 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil
55 # Context handles
56 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
57 cydriver.CUcontext ctx) except+ nogil
58 ContextHandle get_primary_context "cuda_core::get_primary_context" (
59 int device_id) except+ nogil
60 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil
62 # Stream handles
63 StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
64 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
65 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
66 cydriver.CUstream stream) except+ nogil
67 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
68 cydriver.CUstream stream, object owner) except+ nogil
69 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
70 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil
72 # Event handles (note: _create_event_handle* are internal due to C++ overloading)
73 EventHandle create_event_handle "cuda_core::create_event_handle" (
74 const ContextHandle& h_ctx, unsigned int flags,
75 bint timing_disabled, bint busy_waited,
76 bint ipc_enabled, int device_id) except+ nogil
77 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
78 unsigned int flags) except+ nogil
79 EventHandle create_event_handle_ref "cuda_core::create_event_handle_ref" (
80 cydriver.CUevent event) except+ nogil
81 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
82 const cydriver.CUipcEventHandle& ipc_handle, bint busy_waited) except+ nogil
84 # Event metadata getters
85 bint get_event_timing_disabled "cuda_core::get_event_timing_disabled" (
86 const EventHandle& h) noexcept nogil
87 bint get_event_busy_waited "cuda_core::get_event_busy_waited" (
88 const EventHandle& h) noexcept nogil
89 bint get_event_ipc_enabled "cuda_core::get_event_ipc_enabled" (
90 const EventHandle& h) noexcept nogil
91 int get_event_device_id "cuda_core::get_event_device_id" (
92 const EventHandle& h) noexcept nogil
93 ContextHandle get_event_context "cuda_core::get_event_context" (
94 const EventHandle& h) noexcept nogil
96 # Memory pool handles
97 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
98 const cydriver.CUmemPoolProps& props) except+ nogil
99 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
100 cydriver.CUmemoryPool pool) except+ nogil
101 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
102 int device_id) except+ nogil
103 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
104 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
106 # Device pointer handles
107 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
108 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
109 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
110 size_t size, const StreamHandle& h_stream) except+ nogil
111 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
112 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
113 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
114 cydriver.CUdeviceptr ptr) except+ nogil
115 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
116 cydriver.CUdeviceptr ptr, object owner) except+ nogil
117 DevicePtrHandle deviceptr_create_mapped_graphics "cuda_core::deviceptr_create_mapped_graphics" (
118 cydriver.CUdeviceptr ptr,
119 const GraphicsResourceHandle& h_resource,
120 const StreamHandle& h_stream) except+ nogil
122 # MR deallocation callback
123 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" (
124 MRDeallocCallback cb) noexcept
125 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" (
126 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil
128 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
129 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
130 StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
131 const DevicePtrHandle& h) noexcept nogil
132 void set_deallocation_stream "cuda_core::set_deallocation_stream" (
133 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
135 # Library handles
136 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" (
137 const char* path) except+ nogil
138 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" (
139 const void* data) except+ nogil
140 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" (
141 cydriver.CUlibrary library) except+ nogil
143 # Kernel handles
144 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" (
145 const LibraryHandle& h_library, const char* name) except+ nogil
146 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" (
147 cydriver.CUkernel kernel) except+ nogil
148 LibraryHandle get_kernel_library "cuda_core::get_kernel_library" (
149 const KernelHandle& h) noexcept nogil
151 # Graph handles
152 GraphHandle create_graph_handle "cuda_core::create_graph_handle" (
153 cydriver.CUgraph graph) except+ nogil
154 GraphHandle create_graph_handle_ref "cuda_core::create_graph_handle_ref" (
155 cydriver.CUgraph graph, const GraphHandle& h_parent) except+ nogil
157 # Graph node handles
158 GraphNodeHandle create_graph_node_handle "cuda_core::create_graph_node_handle" (
159 cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil
160 GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" (
161 const GraphNodeHandle& h) noexcept nogil
162 void invalidate_graph_node "cuda_core::invalidate_graph_node" (
163 const GraphNodeHandle& h) noexcept nogil
165 # Graphics resource handles
166 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" (
167 cydriver.CUgraphicsResource resource) except+ nogil
169 # NVRTC Program handles
170 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" (
171 cynvrtc.nvrtcProgram prog) except+ nogil
172 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" (
173 cynvrtc.nvrtcProgram prog) except+ nogil
175 # NVVM Program handles
176 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" (
177 cynvvm.nvvmProgram prog) except+ nogil
178 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" (
179 cynvvm.nvvmProgram prog) except+ nogil
181 # nvJitLink handles
182 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" (
183 cynvjitlink.nvJitLinkHandle handle) except+ nogil
184 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" (
185 cynvjitlink.nvJitLinkHandle handle) except+ nogil
187 # cuLink handles
188 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" (
189 cydriver.CUlinkState state) except+ nogil
190 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" (
191 cydriver.CUlinkState state) except+ nogil
193 # File descriptor handles
194 FileDescriptorHandle create_fd_handle "cuda_core::create_fd_handle" (
195 int fd) except+ nogil
196 FileDescriptorHandle create_fd_handle_ref "cuda_core::create_fd_handle_ref" (
197 int fd) except+ nogil
200# =============================================================================
201# CUDA Driver API capsule
202#
203# This provides resolved CUDA driver function pointers to the C++ code.
204# =============================================================================
206cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1"
209# =============================================================================
210# CUDA driver function pointer initialization
211#
212# The C++ code declares extern function pointers (p_cuXxx) that need to be
213# populated before any handle creation functions are called. We extract these
214# from cuda.bindings.cydriver.__pyx_capi__ at module import time.
215#
216# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)")
217# allows us to assign void* values to typed function pointer variables.
218# =============================================================================
220# Declare extern variables with reinterpret_cast to allow void* assignment
221cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
222 # Context
223 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)"
224 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)"
225 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)"
227 # Stream
228 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)"
229 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)"
231 # Event
232 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)"
233 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)"
234 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)"
236 # Device
237 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)"
239 # Memory pool
240 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)"
241 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)"
242 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)"
243 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)"
244 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)"
246 # Memory allocation
247 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)"
248 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)"
249 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)"
250 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)"
252 # Memory deallocation
253 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)"
254 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)"
255 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)"
257 # IPC
258 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)"
260 # Library
261 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)"
262 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)"
263 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)"
264 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)"
266 # Graph
267 void* p_cuGraphDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGraphDestroy)"
269 # Linker
270 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)"
272 # Graphics interop
273 void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)"
274 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)"
276 # NVRTC
277 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)"
279 # NVVM
280 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)"
282 # nvJitLink
283 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)"
286# Initialize driver function pointers from cydriver.__pyx_capi__ at module load
287cdef void* _get_driver_fn(str name):
288 capsule = cydriver.__pyx_capi__[name]
289 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
291# Context
292p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain")
293p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease")
294p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent")
296# Stream
297p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority")
298p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy")
300# Event
301p_cuEventCreate = _get_driver_fn("cuEventCreate")
302p_cuEventDestroy = _get_driver_fn("cuEventDestroy")
303p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle")
305# Device
306p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount")
308# Memory pool
309p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess")
310p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy")
311p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate")
312p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool")
313p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle")
315# Memory allocation
316p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync")
317p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync")
318p_cuMemAlloc = _get_driver_fn("cuMemAlloc")
319p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost")
321# Memory deallocation
322p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync")
323p_cuMemFree = _get_driver_fn("cuMemFree")
324p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost")
326# IPC
327p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer")
329# Library
330p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile")
331p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData")
332p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload")
333p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel")
335# Graph
336p_cuGraphDestroy = _get_driver_fn("cuGraphDestroy")
338# Linker
339p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy")
341# Graphics interop
342p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources")
343p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource")
345# =============================================================================
346# NVRTC function pointer initialization
347# =============================================================================
349cdef void* _get_nvrtc_fn(str name):
350 capsule = cynvrtc.__pyx_capi__[name]
351 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
353p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram")
355# =============================================================================
356# NVVM function pointer initialization
357#
358# NVVM may not be available at runtime, so we handle missing function pointers
359# gracefully. The C++ deleter checks for null before calling.
360# =============================================================================
362cdef void* _get_nvvm_fn(str name):
363 capsule = cynvvm.__pyx_capi__[name]
364 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
366p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram")
368# =============================================================================
369# nvJitLink function pointer initialization
370#
371# nvJitLink may not be available at runtime, so we handle missing function
372# pointers gracefully. The C++ deleter checks for null before calling.
373# =============================================================================
375cdef void* _get_nvjitlink_fn(str name):
376 capsule = cynvjitlink.__pyx_capi__[name]
377 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
379p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")