Coverage for cuda / core / _resource_handles.pyx: 100.00%
49 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-08 01:07 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2#
3# SPDX-License-Identifier: Apache-2.0
5# This module compiles _cpp/resource_handles.cpp into a shared library.
6# Consumer modules cimport the functions declared in _resource_handles.pxd.
7# Since there is only one copy of the C++ code (in this .so), all static and
8# thread-local state is shared correctly across all consumer modules.
9#
10# The cdef extern from declarations below satisfy the .pxd declarations directly,
11# without needing separate wrapper functions.
13from cpython.pycapsule cimport PyCapsule_GetName, PyCapsule_GetPointer
14from libc.stddef cimport size_t
16from cuda.bindings cimport cydriver
17from cuda.bindings cimport cynvrtc
18from cuda.bindings cimport cynvvm
19from cuda.bindings cimport cynvjitlink
21from ._resource_handles cimport (
22 ContextHandle,
23 StreamHandle,
24 EventHandle,
25 MemoryPoolHandle,
26 DevicePtrHandle,
27 LibraryHandle,
28 KernelHandle,
29 GraphicsResourceHandle,
30 NvrtcProgramHandle,
31 NvvmProgramHandle,
32 NvJitLinkHandle,
33 CuLinkHandle,
34)
36import cuda.bindings.cydriver as cydriver
37import cuda.bindings.cynvrtc as cynvrtc
38import cuda.bindings.cynvvm as cynvvm
39import cuda.bindings.cynvjitlink as cynvjitlink
41# =============================================================================
42# C++ function declarations (non-inline, implemented in resource_handles.cpp)
43#
44# These declarations satisfy the cdef function declarations in _resource_handles.pxd.
45# Consumer modules cimport these functions and calls go through this .so.
46# =============================================================================
48cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
49 # Thread-local error handling
50 cydriver.CUresult get_last_error "cuda_core::get_last_error" () noexcept nogil
51 cydriver.CUresult peek_last_error "cuda_core::peek_last_error" () noexcept nogil
52 void clear_last_error "cuda_core::clear_last_error" () noexcept nogil
54 # Context handles
55 ContextHandle create_context_handle_ref "cuda_core::create_context_handle_ref" (
56 cydriver.CUcontext ctx) except+ nogil
57 ContextHandle get_primary_context "cuda_core::get_primary_context" (
58 int device_id) except+ nogil
59 ContextHandle get_current_context "cuda_core::get_current_context" () except+ nogil
61 # Stream handles
62 StreamHandle create_stream_handle "cuda_core::create_stream_handle" (
63 const ContextHandle& h_ctx, unsigned int flags, int priority) except+ nogil
64 StreamHandle create_stream_handle_ref "cuda_core::create_stream_handle_ref" (
65 cydriver.CUstream stream) except+ nogil
66 StreamHandle create_stream_handle_with_owner "cuda_core::create_stream_handle_with_owner" (
67 cydriver.CUstream stream, object owner) except+ nogil
68 StreamHandle get_legacy_stream "cuda_core::get_legacy_stream" () except+ nogil
69 StreamHandle get_per_thread_stream "cuda_core::get_per_thread_stream" () except+ nogil
71 # Event handles (note: _create_event_handle* are internal due to C++ overloading)
72 EventHandle create_event_handle "cuda_core::create_event_handle" (
73 const ContextHandle& h_ctx, unsigned int flags) except+ nogil
74 EventHandle create_event_handle_noctx "cuda_core::create_event_handle_noctx" (
75 unsigned int flags) except+ nogil
76 EventHandle create_event_handle_ipc "cuda_core::create_event_handle_ipc" (
77 const cydriver.CUipcEventHandle& ipc_handle) except+ nogil
79 # Memory pool handles
80 MemoryPoolHandle create_mempool_handle "cuda_core::create_mempool_handle" (
81 const cydriver.CUmemPoolProps& props) except+ nogil
82 MemoryPoolHandle create_mempool_handle_ref "cuda_core::create_mempool_handle_ref" (
83 cydriver.CUmemoryPool pool) except+ nogil
84 MemoryPoolHandle get_device_mempool "cuda_core::get_device_mempool" (
85 int device_id) except+ nogil
86 MemoryPoolHandle create_mempool_handle_ipc "cuda_core::create_mempool_handle_ipc" (
87 int fd, cydriver.CUmemAllocationHandleType handle_type) except+ nogil
89 # Device pointer handles
90 DevicePtrHandle deviceptr_alloc_from_pool "cuda_core::deviceptr_alloc_from_pool" (
91 size_t size, const MemoryPoolHandle& h_pool, const StreamHandle& h_stream) except+ nogil
92 DevicePtrHandle deviceptr_alloc_async "cuda_core::deviceptr_alloc_async" (
93 size_t size, const StreamHandle& h_stream) except+ nogil
94 DevicePtrHandle deviceptr_alloc "cuda_core::deviceptr_alloc" (size_t size) except+ nogil
95 DevicePtrHandle deviceptr_alloc_host "cuda_core::deviceptr_alloc_host" (size_t size) except+ nogil
96 DevicePtrHandle deviceptr_create_ref "cuda_core::deviceptr_create_ref" (
97 cydriver.CUdeviceptr ptr) except+ nogil
98 DevicePtrHandle deviceptr_create_with_owner "cuda_core::deviceptr_create_with_owner" (
99 cydriver.CUdeviceptr ptr, object owner) except+ nogil
101 # MR deallocation callback
102 ctypedef void (*MRDeallocCallback)(
103 object mr, cydriver.CUdeviceptr ptr, size_t size,
104 const StreamHandle& stream) noexcept
105 void register_mr_dealloc_callback "cuda_core::register_mr_dealloc_callback" (
106 MRDeallocCallback cb) noexcept
107 DevicePtrHandle deviceptr_create_with_mr "cuda_core::deviceptr_create_with_mr" (
108 cydriver.CUdeviceptr ptr, size_t size, object mr) except+ nogil
110 DevicePtrHandle deviceptr_import_ipc "cuda_core::deviceptr_import_ipc" (
111 const MemoryPoolHandle& h_pool, const void* export_data, const StreamHandle& h_stream) except+ nogil
112 StreamHandle deallocation_stream "cuda_core::deallocation_stream" (
113 const DevicePtrHandle& h) noexcept nogil
114 void set_deallocation_stream "cuda_core::set_deallocation_stream" (
115 const DevicePtrHandle& h, const StreamHandle& h_stream) noexcept nogil
117 # Library handles
118 LibraryHandle create_library_handle_from_file "cuda_core::create_library_handle_from_file" (
119 const char* path) except+ nogil
120 LibraryHandle create_library_handle_from_data "cuda_core::create_library_handle_from_data" (
121 const void* data) except+ nogil
122 LibraryHandle create_library_handle_ref "cuda_core::create_library_handle_ref" (
123 cydriver.CUlibrary library) except+ nogil
125 # Kernel handles
126 KernelHandle create_kernel_handle "cuda_core::create_kernel_handle" (
127 const LibraryHandle& h_library, const char* name) except+ nogil
128 KernelHandle create_kernel_handle_ref "cuda_core::create_kernel_handle_ref" (
129 cydriver.CUkernel kernel, const LibraryHandle& h_library) except+ nogil
131 # Graphics resource handles
132 GraphicsResourceHandle create_graphics_resource_handle "cuda_core::create_graphics_resource_handle" (
133 cydriver.CUgraphicsResource resource) except+ nogil
135 # NVRTC Program handles
136 NvrtcProgramHandle create_nvrtc_program_handle "cuda_core::create_nvrtc_program_handle" (
137 cynvrtc.nvrtcProgram prog) except+ nogil
138 NvrtcProgramHandle create_nvrtc_program_handle_ref "cuda_core::create_nvrtc_program_handle_ref" (
139 cynvrtc.nvrtcProgram prog) except+ nogil
141 # NVVM Program handles
142 NvvmProgramHandle create_nvvm_program_handle "cuda_core::create_nvvm_program_handle" (
143 cynvvm.nvvmProgram prog) except+ nogil
144 NvvmProgramHandle create_nvvm_program_handle_ref "cuda_core::create_nvvm_program_handle_ref" (
145 cynvvm.nvvmProgram prog) except+ nogil
147 # nvJitLink handles
148 NvJitLinkHandle create_nvjitlink_handle "cuda_core::create_nvjitlink_handle" (
149 cynvjitlink.nvJitLinkHandle handle) except+ nogil
150 NvJitLinkHandle create_nvjitlink_handle_ref "cuda_core::create_nvjitlink_handle_ref" (
151 cynvjitlink.nvJitLinkHandle handle) except+ nogil
153 # cuLink handles
154 CuLinkHandle create_culink_handle "cuda_core::create_culink_handle" (
155 cydriver.CUlinkState state) except+ nogil
156 CuLinkHandle create_culink_handle_ref "cuda_core::create_culink_handle_ref" (
157 cydriver.CUlinkState state) except+ nogil
160# =============================================================================
161# CUDA Driver API capsule
162#
163# This provides resolved CUDA driver function pointers to the C++ code.
164# =============================================================================
166cdef const char* _CUDA_DRIVER_API_V1_NAME = b"cuda.core._resource_handles._CUDA_DRIVER_API_V1"
169# =============================================================================
170# CUDA driver function pointer initialization
171#
172# The C++ code declares extern function pointers (p_cuXxx) that need to be
173# populated before any handle creation functions are called. We extract these
174# from cuda.bindings.cydriver.__pyx_capi__ at module import time.
175#
176# The Cython string substitution (e.g., "reinterpret_cast<void*&>(...)")
177# allows us to assign void* values to typed function pointer variables.
178# =============================================================================
180# Declare extern variables with reinterpret_cast to allow void* assignment
181cdef extern from "_cpp/resource_handles.hpp" namespace "cuda_core":
182 # Context
183 void* p_cuDevicePrimaryCtxRetain "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRetain)"
184 void* p_cuDevicePrimaryCtxRelease "reinterpret_cast<void*&>(cuda_core::p_cuDevicePrimaryCtxRelease)"
185 void* p_cuCtxGetCurrent "reinterpret_cast<void*&>(cuda_core::p_cuCtxGetCurrent)"
187 # Stream
188 void* p_cuStreamCreateWithPriority "reinterpret_cast<void*&>(cuda_core::p_cuStreamCreateWithPriority)"
189 void* p_cuStreamDestroy "reinterpret_cast<void*&>(cuda_core::p_cuStreamDestroy)"
191 # Event
192 void* p_cuEventCreate "reinterpret_cast<void*&>(cuda_core::p_cuEventCreate)"
193 void* p_cuEventDestroy "reinterpret_cast<void*&>(cuda_core::p_cuEventDestroy)"
194 void* p_cuIpcOpenEventHandle "reinterpret_cast<void*&>(cuda_core::p_cuIpcOpenEventHandle)"
196 # Device
197 void* p_cuDeviceGetCount "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetCount)"
199 # Memory pool
200 void* p_cuMemPoolSetAccess "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolSetAccess)"
201 void* p_cuMemPoolDestroy "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolDestroy)"
202 void* p_cuMemPoolCreate "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolCreate)"
203 void* p_cuDeviceGetMemPool "reinterpret_cast<void*&>(cuda_core::p_cuDeviceGetMemPool)"
204 void* p_cuMemPoolImportFromShareableHandle "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportFromShareableHandle)"
206 # Memory allocation
207 void* p_cuMemAllocFromPoolAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocFromPoolAsync)"
208 void* p_cuMemAllocAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocAsync)"
209 void* p_cuMemAlloc "reinterpret_cast<void*&>(cuda_core::p_cuMemAlloc)"
210 void* p_cuMemAllocHost "reinterpret_cast<void*&>(cuda_core::p_cuMemAllocHost)"
212 # Memory deallocation
213 void* p_cuMemFreeAsync "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeAsync)"
214 void* p_cuMemFree "reinterpret_cast<void*&>(cuda_core::p_cuMemFree)"
215 void* p_cuMemFreeHost "reinterpret_cast<void*&>(cuda_core::p_cuMemFreeHost)"
217 # IPC
218 void* p_cuMemPoolImportPointer "reinterpret_cast<void*&>(cuda_core::p_cuMemPoolImportPointer)"
220 # Library
221 void* p_cuLibraryLoadFromFile "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadFromFile)"
222 void* p_cuLibraryLoadData "reinterpret_cast<void*&>(cuda_core::p_cuLibraryLoadData)"
223 void* p_cuLibraryUnload "reinterpret_cast<void*&>(cuda_core::p_cuLibraryUnload)"
224 void* p_cuLibraryGetKernel "reinterpret_cast<void*&>(cuda_core::p_cuLibraryGetKernel)"
226 # Linker
227 void* p_cuLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_cuLinkDestroy)"
229 # Graphics interop
230 void* p_cuGraphicsUnregisterResource "reinterpret_cast<void*&>(cuda_core::p_cuGraphicsUnregisterResource)"
232 # NVRTC
233 void* p_nvrtcDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvrtcDestroyProgram)"
235 # NVVM
236 void* p_nvvmDestroyProgram "reinterpret_cast<void*&>(cuda_core::p_nvvmDestroyProgram)"
238 # nvJitLink
239 void* p_nvJitLinkDestroy "reinterpret_cast<void*&>(cuda_core::p_nvJitLinkDestroy)"
242# Initialize driver function pointers from cydriver.__pyx_capi__ at module load
243cdef void* _get_driver_fn(str name):
244 capsule = cydriver.__pyx_capi__[name]
245 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
247# Context
248p_cuDevicePrimaryCtxRetain = _get_driver_fn("cuDevicePrimaryCtxRetain")
249p_cuDevicePrimaryCtxRelease = _get_driver_fn("cuDevicePrimaryCtxRelease")
250p_cuCtxGetCurrent = _get_driver_fn("cuCtxGetCurrent")
252# Stream
253p_cuStreamCreateWithPriority = _get_driver_fn("cuStreamCreateWithPriority")
254p_cuStreamDestroy = _get_driver_fn("cuStreamDestroy")
256# Event
257p_cuEventCreate = _get_driver_fn("cuEventCreate")
258p_cuEventDestroy = _get_driver_fn("cuEventDestroy")
259p_cuIpcOpenEventHandle = _get_driver_fn("cuIpcOpenEventHandle")
261# Device
262p_cuDeviceGetCount = _get_driver_fn("cuDeviceGetCount")
264# Memory pool
265p_cuMemPoolSetAccess = _get_driver_fn("cuMemPoolSetAccess")
266p_cuMemPoolDestroy = _get_driver_fn("cuMemPoolDestroy")
267p_cuMemPoolCreate = _get_driver_fn("cuMemPoolCreate")
268p_cuDeviceGetMemPool = _get_driver_fn("cuDeviceGetMemPool")
269p_cuMemPoolImportFromShareableHandle = _get_driver_fn("cuMemPoolImportFromShareableHandle")
271# Memory allocation
272p_cuMemAllocFromPoolAsync = _get_driver_fn("cuMemAllocFromPoolAsync")
273p_cuMemAllocAsync = _get_driver_fn("cuMemAllocAsync")
274p_cuMemAlloc = _get_driver_fn("cuMemAlloc")
275p_cuMemAllocHost = _get_driver_fn("cuMemAllocHost")
277# Memory deallocation
278p_cuMemFreeAsync = _get_driver_fn("cuMemFreeAsync")
279p_cuMemFree = _get_driver_fn("cuMemFree")
280p_cuMemFreeHost = _get_driver_fn("cuMemFreeHost")
282# IPC
283p_cuMemPoolImportPointer = _get_driver_fn("cuMemPoolImportPointer")
285# Library
286p_cuLibraryLoadFromFile = _get_driver_fn("cuLibraryLoadFromFile")
287p_cuLibraryLoadData = _get_driver_fn("cuLibraryLoadData")
288p_cuLibraryUnload = _get_driver_fn("cuLibraryUnload")
289p_cuLibraryGetKernel = _get_driver_fn("cuLibraryGetKernel")
291# Linker
292p_cuLinkDestroy = _get_driver_fn("cuLinkDestroy")
294# Graphics interop
295p_cuGraphicsUnregisterResource = _get_driver_fn("cuGraphicsUnregisterResource")
297# =============================================================================
298# NVRTC function pointer initialization
299# =============================================================================
301cdef void* _get_nvrtc_fn(str name):
302 capsule = cynvrtc.__pyx_capi__[name]
303 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
305p_nvrtcDestroyProgram = _get_nvrtc_fn("nvrtcDestroyProgram")
307# =============================================================================
308# NVVM function pointer initialization
309#
310# NVVM may not be available at runtime, so we handle missing function pointers
311# gracefully. The C++ deleter checks for null before calling.
312# =============================================================================
314cdef void* _get_nvvm_fn(str name):
315 capsule = cynvvm.__pyx_capi__[name]
316 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
318p_nvvmDestroyProgram = _get_nvvm_fn("nvvmDestroyProgram")
320# =============================================================================
321# nvJitLink function pointer initialization
322#
323# nvJitLink may not be available at runtime, so we handle missing function
324# pointers gracefully. The C++ deleter checks for null before calling.
325# =============================================================================
327cdef void* _get_nvjitlink_fn(str name):
328 capsule = cynvjitlink.__pyx_capi__[name]
329 return PyCapsule_GetPointer(capsule, PyCapsule_GetName(capsule))
331p_nvJitLinkDestroy = _get_nvjitlink_fn("nvJitLinkDestroy")