Coverage for cuda / core / _resource_handles.pyx: 100.00%
51 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-25 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5# This module compiles _cpp/resource_handles.cpp into a shared library.
6# Consumer modules cimport the functions declared in _resource_handles.pxd.
7# Since there is only one copy of the C++ code (in this .so), all static and
8# thread-local state is shared correctly across all consumer modules.
9#
10# The cdef extern from declarations below satisfy the .pxd declarations directly,
11# without needing separate wrapper functions.
13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer
14from libc.stddef cimport size_t
16from cuda.bindings cimport cydriver
17from cuda.bindings cimport cynvrtc
18from cuda.bindings cimport cynvvm
19from cuda.bindings cimport cynvjitlink
21from ._resource_handles cimport (
22 ContextHandle,
23 StreamHandle,
24 EventHandle,
25 MemoryPoolHandle,
26 DevicePtrHandle,
27 LibraryHandle,
28 KernelHandle,
29 GraphHandle,
30 GraphicsResourceHandle,
31 NvrtcProgramHandle,
32 NvvmProgramHandle,
33 NvJitLinkHandle,
34 CuLinkHandle,
35)
37import cuda.bindings.cydriver as cydriver
38import cuda.bindings.cynvrtc as cynvrtc
39import cuda.bindings.cynvvm as cynvvm
40import cuda.bindings.cynvjitlink as cynvjitlink
42# =============================================================================
43# C++ function declarations (non-inline, implemented in resource_handles.cpp)
44#
45# These declarations satisfy the cdef function declarations in _resource_handles.pxd.
46# Consumer modules cimport these functions and calls go through this .so.
47# =============================================================================
49cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
50 # Thread-local error handling
51 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil
52 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil
53 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil
55 # Context handles
56 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
57 cydriver.CUcontext ctx) except+ nogil
58 ContextHandle get_primary_context "cuda_core::get_primary_context" (
59 int device_id) except+ nogil
60 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil
62 # Stream handles
63 StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
64 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
65 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
66 cydriver.CUstream stream) except+ nogil
67 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
68 cydriver.CUstream stream, object owner) except+ nogil
69 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
70 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil
72 # Event handles (note: _create_event_handle* are internal due to C++ overloading)
73 EventHandle create_event_handle "cuda_core::create_event_handle" (
74 const ContextHandle& h_ctx, unsigned int flags,
75 bint timing_disabled, bint busy_waited,
76 bint ipc_enabled, int device_id) except+ nogil
77 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
78 unsigned int flags) except+ nogil
79 EventHandle create_event_handle_ref "cuda_core::create_event_handle_ref" (
80 cydriver.CUevent event) except+ nogil
81 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
82 const cydriver.CUipcEventHandle& ipc_handle, bint busy_waited) except+ nogil
84 # Event metadata getters
85 bint get_event_timing_disabled "cuda_core::get_event_timing_disabled" (
86 const EventHandle& h) noexcept nogil
87 bint get_event_busy_waited "cuda_core::get_event_busy_waited" (
88 const EventHandle& h) noexcept nogil
89 bint get_event_ipc_enabled "cuda_core::get_event_ipc_enabled" (
90 const EventHandle& h) noexcept nogil
91 int get_event_device_id "cuda_core::get_event_device_id" (
92 const EventHandle& h) noexcept nogil
93 ContextHandle get_event_context "cuda_core::get_event_context" (
94 const EventHandle& h) noexcept nogil
96 # Memory pool handles
97 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
98 const cydriver.CUmemPoolProps& props) except+ nogil
99 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
100 cydriver.CUmemoryPool pool) except+ nogil
101 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
102 int device_id) except+ nogil
103 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
104 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
106 # Device pointer handles
107 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
108 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
109 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
110 size_t size, const StreamHandle& h_stream) except+ nogil
111 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
112 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
113 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
114 cydriver.CUdeviceptr ptr) except+ nogil
115 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
116 cydriver.CUdeviceptr ptr, object owner) except+ nogil
117 DevicePtrHandle deviceptr_create_mapped_graphics "cuda_core::deviceptr_create_mapped_graphics" (
118 cydriver.CUdeviceptr ptr,
119 const GraphicsResourceHandle& h_resource,
120 const StreamHandle& h_stream) except+ nogil
122 # MR deallocation callback
123 ctypedef void (*MRDeallocCallback)(
124 object mr, cydriver.CUdeviceptr ptr, size_t size,
125 const StreamHandle& stream) noexcept
126 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" (
127 MRDeallocCallback cb) noexcept
128 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" (
129 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil
131 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
132 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
133 StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
134 const DevicePtrHandle& h) noexcept nogil
135 void set_deallocation_stream "cuda_core::set_deallocation_stream" (
136 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
138 # Library handles
139 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" (
140 const char* path) except+ nogil
141 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" (
142 const void* data) except+ nogil
143 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" (
144 cydriver.CUlibrary library) except+ nogil
146 # Kernel handles
147 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" (
148 const LibraryHandle& h_library, const char* name) except+ nogil
149 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" (
150 cydriver.CUkernel kernel) except+ nogil
151 LibraryHandle get_kernel_library "cuda_core::get_kernel_library" (
152 const KernelHandle& h) noexcept nogil
154 # Graph handles
155 GraphHandle create_graph_handle "cuda_core::create_graph_handle" (
156 cydriver.CUgraph graph) except+ nogil
157 GraphHandle create_graph_handle_ref "cuda_core::create_graph_handle_ref" (
158 cydriver.CUgraph graph, const GraphHandle& h_parent) except+ nogil
160 # Graph node handles
161 GraphNodeHandle create_graph_node_handle "cuda_core::create_graph_node_handle" (
162 cydriver.CUgraphNode node, const GraphHandle& h_graph) except+ nogil
163 GraphHandle graph_node_get_graph "cuda_core::graph_node_get_graph" (
164 const GraphNodeHandle& h) noexcept nogil
166 # Graphics resource handles
167 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" (
168 cydriver.CUgraphicsResource resource) except+ nogil
170 # NVRTC Program handles
171 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" (
172 cynvrtc.nvrtcProgram prog) except+ nogil
173 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" (
174 cynvrtc.nvrtcProgram prog) except+ nogil
176 # NVVM Program handles
177 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" (
178 cynvvm.nvvmProgram prog) except+ nogil
179 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" (
180 cynvvm.nvvmProgram prog) except+ nogil
182 # nvJitLink handles
183 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" (
184 cynvjitlink.nvJitLinkHandle handle) except+ nogil
185 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" (
186 cynvjitlink.nvJitLinkHandle handle) except+ nogil
188 # cuLink handles
189 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" (
190 cydriver.CUlinkState state) except+ nogil
191 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" (
192 cydriver.CUlinkState state) except+ nogil
195# =============================================================================
196# CUDA Driver API capsule
197#
198# This provides resolved CUDA driver function pointers to the C++ code.
199# =============================================================================
201cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1"
204# =============================================================================
205# CUDA driver function pointer initialization
206#
207# The C++ code declares extern function pointers (p_cuXxx) that need to be
208# populated before any handle creation functions are called. We extract these
209# from cuda.bindings.cydriver.__pyx_capi__ at module import time.
210#
211# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)")
212# allows us to assign void* values to typed function pointer variables.
213# =============================================================================
215# Declare extern variables with reinterpret_cast to allow void* assignment
216cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
217 # Context
218 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)"
219 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)"
220 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)"
222 # Stream
223 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)"
224 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)"
226 # Event
227 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)"
228 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)"
229 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)"
231 # Device
232 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)"
234 # Memory pool
235 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)"
236 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)"
237 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)"
238 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)"
239 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)"
241 # Memory allocation
242 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)"
243 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)"
244 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)"
245 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)"
247 # Memory deallocation
248 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)"
249 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)"
250 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)"
252 # IPC
253 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)"
255 # Library
256 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)"
257 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)"
258 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)"
259 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)"
261 # Graph
262 void* p_cuGraphDestroy "reinterpret_cast<void*&>(cuda_core::p_cuGraphDestroy)"
264 # Linker
265 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)"
267 # Graphics interop
268 void* p_cuGraphicsUnmapResources "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnmapResources)"
269 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)"
271 # NVRTC
272 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)"
274 # NVVM
275 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)"
277 # nvJitLink
278 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)"
281# Initialize driver function pointers from cydriver.__pyx_capi__ at module load
282cdef void* _get_driver_fn(str name):
283 capsule = cydriver.__pyx_capi__[name]
284 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
286# Context
287p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain")
288p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease")
289p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent")
291# Stream
292p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority")
293p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy")
295# Event
296p_cuEventCreate = _get_driver_fn("cuEventCreate")
297p_cuEventDestroy = _get_driver_fn("cuEventDestroy")
298p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle")
300# Device
301p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount")
303# Memory pool
304p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess")
305p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy")
306p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate")
307p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool")
308p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle")
310# Memory allocation
311p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync")
312p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync")
313p_cuMemAlloc = _get_driver_fn("cuMemAlloc")
314p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost")
316# Memory deallocation
317p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync")
318p_cuMemFree = _get_driver_fn("cuMemFree")
319p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost")
321# IPC
322p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer")
324# Library
325p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile")
326p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData")
327p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload")
328p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel")
330# Graph
331p_cuGraphDestroy = _get_driver_fn("cuGraphDestroy")
333# Linker
334p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy")
336# Graphics interop
337p_cuGraphicsUnmapResources = _get_driver_fn("cuGraphicsUnmapResources")
338p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource")
340# =============================================================================
341# NVRTC function pointer initialization
342# =============================================================================
344cdef void* _get_nvrtc_fn(str name):
345 capsule = cynvrtc.__pyx_capi__[name]
346 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
348p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram")
350# =============================================================================
351# NVVM function pointer initialization
352#
353# NVVM may not be available at runtime, so we handle missing function pointers
354# gracefully. The C++ deleter checks for null before calling.
355# =============================================================================
357cdef void* _get_nvvm_fn(str name):
358 capsule = cynvvm.__pyx_capi__[name]
359 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
361p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram")
363# =============================================================================
364# nvJitLink function pointer initialization
365#
366# nvJitLink may not be available at runtime, so we handle missing function
367# pointers gracefully. The C++ deleter checks for null before calling.
368# =============================================================================
370cdef void* _get_nvjitlink_fn(str name):
371 capsule = cynvjitlink.__pyx_capi__[name]
372 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
374p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")