Coverage for cuda / bindings / runtime.pyx: 27%
10974 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-10 01:19 +0000
1# SPDX-FileCopyrightText: Copyright (c) 2021-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2# SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
4# This code was automatically generated with version 13.1.0. Do not modify it directly.
5from typing import Any, Optional
6from enum import IntEnum
7import cython
8import ctypes
9from libc.stdlib cimport calloc, malloc, free
10from libc cimport string
11from libc.stdint cimport int32_t, uint32_t, int64_t, uint64_t, uintptr_t
12from libc.stddef cimport wchar_t
13from libc.limits cimport CHAR_MIN
14from libcpp.vector cimport vector
15from cpython.buffer cimport PyObject_CheckBuffer, PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS
16from cpython.bytes cimport PyBytes_FromStringAndSize
17import cuda.bindings.driver
18from libcpp.map cimport map
20import cuda.bindings.driver as _driver
21_driver = _driver.__dict__
22include "_lib/utils.pxi"
24ctypedef unsigned long long signed_char_ptr
25ctypedef unsigned long long unsigned_char_ptr
26ctypedef unsigned long long char_ptr
27ctypedef unsigned long long short_ptr
28ctypedef unsigned long long unsigned_short_ptr
29ctypedef unsigned long long int_ptr
30ctypedef unsigned long long long_int_ptr
31ctypedef unsigned long long long_long_int_ptr
32ctypedef unsigned long long unsigned_int_ptr
33ctypedef unsigned long long unsigned_long_int_ptr
34ctypedef unsigned long long unsigned_long_long_int_ptr
35ctypedef unsigned long long uint32_t_ptr
36ctypedef unsigned long long uint64_t_ptr
37ctypedef unsigned long long int32_t_ptr
38ctypedef unsigned long long int64_t_ptr
39ctypedef unsigned long long unsigned_ptr
40ctypedef unsigned long long unsigned_long_long_ptr
41ctypedef unsigned long long long_long_ptr
42ctypedef unsigned long long size_t_ptr
43ctypedef unsigned long long long_ptr
44ctypedef unsigned long long float_ptr
45ctypedef unsigned long long double_ptr
46ctypedef unsigned long long void_ptr
48#: Default page-locked allocation flag
49cudaHostAllocDefault = cyruntime.cudaHostAllocDefault
51#: Pinned memory accessible by all CUDA contexts
52cudaHostAllocPortable = cyruntime.cudaHostAllocPortable
54#: Map allocation into device space
55cudaHostAllocMapped = cyruntime.cudaHostAllocMapped
57#: Write-combined memory
58cudaHostAllocWriteCombined = cyruntime.cudaHostAllocWriteCombined
60#: Default host memory registration flag
61cudaHostRegisterDefault = cyruntime.cudaHostRegisterDefault
63#: Pinned memory accessible by all CUDA contexts
64cudaHostRegisterPortable = cyruntime.cudaHostRegisterPortable
66#: Map registered memory into device space
67cudaHostRegisterMapped = cyruntime.cudaHostRegisterMapped
69#: Memory-mapped I/O space
70cudaHostRegisterIoMemory = cyruntime.cudaHostRegisterIoMemory
72#: Memory-mapped read-only
73cudaHostRegisterReadOnly = cyruntime.cudaHostRegisterReadOnly
75#: Default peer addressing enable flag
76cudaPeerAccessDefault = cyruntime.cudaPeerAccessDefault
78#: Default stream flag
79cudaStreamDefault = cyruntime.cudaStreamDefault
81#: Stream does not synchronize with stream 0 (the NULL stream)
82cudaStreamNonBlocking = cyruntime.cudaStreamNonBlocking
84#: Legacy stream handle
85#:
86#: Stream handle that can be passed as a cudaStream_t to use an implicit
87#: stream with legacy synchronization behavior.
88#:
89#: See details of the \link_sync_behavior
90cudaStreamLegacy = cyruntime.cudaStreamLegacy
92#: Per-thread stream handle
93#:
94#: Stream handle that can be passed as a cudaStream_t to use an implicit
95#: stream with per-thread synchronization behavior.
96#:
97#: See details of the \link_sync_behavior
98cudaStreamPerThread = cyruntime.cudaStreamPerThread
100#: Default event flag
101cudaEventDefault = cyruntime.cudaEventDefault
103#: Event uses blocking synchronization
104cudaEventBlockingSync = cyruntime.cudaEventBlockingSync
106#: Event will not record timing data
107cudaEventDisableTiming = cyruntime.cudaEventDisableTiming
109#: Event is suitable for interprocess use. cudaEventDisableTiming must be
110#: set
111cudaEventInterprocess = cyruntime.cudaEventInterprocess
113#: Default event record flag
114cudaEventRecordDefault = cyruntime.cudaEventRecordDefault
116#: Event is captured in the graph as an external event node when performing
117#: stream capture
118cudaEventRecordExternal = cyruntime.cudaEventRecordExternal
120#: Default event wait flag
121cudaEventWaitDefault = cyruntime.cudaEventWaitDefault
123#: Event is captured in the graph as an external event node when performing
124#: stream capture
125cudaEventWaitExternal = cyruntime.cudaEventWaitExternal
127#: Device flag - Automatic scheduling
128cudaDeviceScheduleAuto = cyruntime.cudaDeviceScheduleAuto
130#: Device flag - Spin default scheduling
131cudaDeviceScheduleSpin = cyruntime.cudaDeviceScheduleSpin
133#: Device flag - Yield default scheduling
134cudaDeviceScheduleYield = cyruntime.cudaDeviceScheduleYield
136#: Device flag - Use blocking synchronization
137cudaDeviceScheduleBlockingSync = cyruntime.cudaDeviceScheduleBlockingSync
139#: Device flag - Use blocking synchronization [Deprecated]
140cudaDeviceBlockingSync = cyruntime.cudaDeviceBlockingSync
142#: Device schedule flags mask
143cudaDeviceScheduleMask = cyruntime.cudaDeviceScheduleMask
145#: Device flag - Support mapped pinned allocations
146cudaDeviceMapHost = cyruntime.cudaDeviceMapHost
148#: Device flag - Keep local memory allocation after launch
149cudaDeviceLmemResizeToMax = cyruntime.cudaDeviceLmemResizeToMax
151#: Device flag - Ensure synchronous memory operations on this context will
152#: synchronize
153cudaDeviceSyncMemops = cyruntime.cudaDeviceSyncMemops
155#: Device flags mask
156cudaDeviceMask = cyruntime.cudaDeviceMask
158#: Default CUDA array allocation flag
159cudaArrayDefault = cyruntime.cudaArrayDefault
161#: Must be set in cudaMalloc3DArray to create a layered CUDA array
162cudaArrayLayered = cyruntime.cudaArrayLayered
164#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind
165#: surfaces to the CUDA array
166cudaArraySurfaceLoadStore = cyruntime.cudaArraySurfaceLoadStore
168#: Must be set in cudaMalloc3DArray to create a cubemap CUDA array
169cudaArrayCubemap = cyruntime.cudaArrayCubemap
171#: Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform
172#: texture gather operations on the CUDA array
173cudaArrayTextureGather = cyruntime.cudaArrayTextureGather
175#: Must be set in cudaExternalMemoryGetMappedMipmappedArray if the
176#: mipmapped array is used as a color target in a graphics API
177cudaArrayColorAttachment = cyruntime.cudaArrayColorAttachment
179#: Must be set in cudaMallocArray, cudaMalloc3DArray or
180#: cudaMallocMipmappedArray in order to create a sparse CUDA array or CUDA
181#: mipmapped array
182cudaArraySparse = cyruntime.cudaArraySparse
184#: Must be set in cudaMallocArray, cudaMalloc3DArray or
185#: cudaMallocMipmappedArray in order to create a deferred mapping CUDA
186#: array or CUDA mipmapped array
187cudaArrayDeferredMapping = cyruntime.cudaArrayDeferredMapping
189#: Automatically enable peer access between remote devices as needed
190cudaIpcMemLazyEnablePeerAccess = cyruntime.cudaIpcMemLazyEnablePeerAccess
192#: Memory can be accessed by any stream on any device
193cudaMemAttachGlobal = cyruntime.cudaMemAttachGlobal
195#: Memory cannot be accessed by any stream on any device
196cudaMemAttachHost = cyruntime.cudaMemAttachHost
198#: Memory can only be accessed by a single stream on the associated device
199cudaMemAttachSingle = cyruntime.cudaMemAttachSingle
201#: Default behavior
202cudaOccupancyDefault = cyruntime.cudaOccupancyDefault
204#: Assume global caching is enabled and cannot be automatically turned off
205cudaOccupancyDisableCachingOverride = cyruntime.cudaOccupancyDisableCachingOverride
207#: Device id that represents the CPU
208cudaCpuDeviceId = cyruntime.cudaCpuDeviceId
210#: Device id that represents an invalid device
211cudaInvalidDeviceId = cyruntime.cudaInvalidDeviceId
213#: Tell the CUDA runtime that DeviceFlags is being set in cudaInitDevice
214#: call
215cudaInitDeviceFlagsAreValid = cyruntime.cudaInitDeviceFlagsAreValid
217#: Indicates that the layered sparse CUDA array or CUDA mipmapped array has
218#: a single mip tail region for all layers
219cudaArraySparsePropertiesSingleMipTail = cyruntime.cudaArraySparsePropertiesSingleMipTail
221#: This flag, if set, indicates that the memory will be used as a buffer
222#: for hardware accelerated decompression.
223cudaMemPoolCreateUsageHwDecompress = cyruntime.cudaMemPoolCreateUsageHwDecompress
225#: CUDA IPC Handle Size
226CUDA_IPC_HANDLE_SIZE = cyruntime.CUDA_IPC_HANDLE_SIZE
228#: Indicates that the external memory object is a dedicated resource
229cudaExternalMemoryDedicated = cyruntime.cudaExternalMemoryDedicated
231#: When the /p flags parameter of
232#: :py:obj:`~.cudaExternalSemaphoreSignalParams` contains this flag, it
233#: indicates that signaling an external semaphore object should skip
234#: performing appropriate memory synchronization operations over all the
235#: external memory objects that are imported as
236#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are
237#: performed by default to ensure data coherency with other importers of
238#: the same NvSciBuf memory objects.
239cudaExternalSemaphoreSignalSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreSignalSkipNvSciBufMemSync
241#: When the /p flags parameter of
242#: :py:obj:`~.cudaExternalSemaphoreWaitParams` contains this flag, it
243#: indicates that waiting an external semaphore object should skip
244#: performing appropriate memory synchronization operations over all the
245#: external memory objects that are imported as
246#: :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, which otherwise are
247#: performed by default to ensure data coherency with other importers of
248#: the same NvSciBuf memory objects.
249cudaExternalSemaphoreWaitSkipNvSciBufMemSync = cyruntime.cudaExternalSemaphoreWaitSkipNvSciBufMemSync
251#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to
252#: this, it indicates that application need signaler specific NvSciSyncAttr
253#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
254cudaNvSciSyncAttrSignal = cyruntime.cudaNvSciSyncAttrSignal
256#: When /p flags of :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` is set to
257#: this, it indicates that application need waiter specific NvSciSyncAttr
258#: to be filled by :py:obj:`~.cudaDeviceGetNvSciSyncAttributes`.
259cudaNvSciSyncAttrWait = cyruntime.cudaNvSciSyncAttrWait
261#: This port activates when the kernel has finished executing.
262cudaGraphKernelNodePortDefault = cyruntime.cudaGraphKernelNodePortDefault
264#: This port activates when all blocks of the kernel have performed
265#: cudaTriggerProgrammaticLaunchCompletion() or have terminated. It must be
266#: used with edge type :py:obj:`~.cudaGraphDependencyTypeProgrammatic`. See
267#: also :py:obj:`~.cudaLaunchAttributeProgrammaticEvent`.
268cudaGraphKernelNodePortProgrammatic = cyruntime.cudaGraphKernelNodePortProgrammatic
270#: This port activates when all blocks of the kernel have begun execution.
271#: See also :py:obj:`~.cudaLaunchAttributeLaunchCompletionEvent`.
272cudaGraphKernelNodePortLaunchCompletion = cyruntime.cudaGraphKernelNodePortLaunchCompletion
274cudaStreamAttributeAccessPolicyWindow = cyruntime.cudaStreamAttributeAccessPolicyWindow
276cudaStreamAttributeSynchronizationPolicy = cyruntime.cudaStreamAttributeSynchronizationPolicy
278cudaStreamAttributeMemSyncDomainMap = cyruntime.cudaStreamAttributeMemSyncDomainMap
280cudaStreamAttributeMemSyncDomain = cyruntime.cudaStreamAttributeMemSyncDomain
282cudaStreamAttributePriority = cyruntime.cudaStreamAttributePriority
284cudaKernelNodeAttributeAccessPolicyWindow = cyruntime.cudaKernelNodeAttributeAccessPolicyWindow
286cudaKernelNodeAttributeCooperative = cyruntime.cudaKernelNodeAttributeCooperative
288cudaKernelNodeAttributePriority = cyruntime.cudaKernelNodeAttributePriority
290cudaKernelNodeAttributeClusterDimension = cyruntime.cudaKernelNodeAttributeClusterDimension
292cudaKernelNodeAttributeClusterSchedulingPolicyPreference = cyruntime.cudaKernelNodeAttributeClusterSchedulingPolicyPreference
294cudaKernelNodeAttributeMemSyncDomainMap = cyruntime.cudaKernelNodeAttributeMemSyncDomainMap
296cudaKernelNodeAttributeMemSyncDomain = cyruntime.cudaKernelNodeAttributeMemSyncDomain
298cudaKernelNodeAttributePreferredSharedMemoryCarveout = cyruntime.cudaKernelNodeAttributePreferredSharedMemoryCarveout
300cudaKernelNodeAttributeDeviceUpdatableKernelNode = cyruntime.cudaKernelNodeAttributeDeviceUpdatableKernelNode
302cudaKernelNodeAttributeNvlinkUtilCentricScheduling = cyruntime.cudaKernelNodeAttributeNvlinkUtilCentricScheduling
304cudaSurfaceType1D = cyruntime.cudaSurfaceType1D
306cudaSurfaceType2D = cyruntime.cudaSurfaceType2D
308cudaSurfaceType3D = cyruntime.cudaSurfaceType3D
310cudaSurfaceTypeCubemap = cyruntime.cudaSurfaceTypeCubemap
312cudaSurfaceType1DLayered = cyruntime.cudaSurfaceType1DLayered
314cudaSurfaceType2DLayered = cyruntime.cudaSurfaceType2DLayered
316cudaSurfaceTypeCubemapLayered = cyruntime.cudaSurfaceTypeCubemapLayered
318cudaTextureType1D = cyruntime.cudaTextureType1D
320cudaTextureType2D = cyruntime.cudaTextureType2D
322cudaTextureType3D = cyruntime.cudaTextureType3D
324cudaTextureTypeCubemap = cyruntime.cudaTextureTypeCubemap
326cudaTextureType1DLayered = cyruntime.cudaTextureType1DLayered
328cudaTextureType2DLayered = cyruntime.cudaTextureType2DLayered
330cudaTextureTypeCubemapLayered = cyruntime.cudaTextureTypeCubemapLayered
332#: CUDA Runtime API Version
333CUDART_VERSION = cyruntime.CUDART_VERSION
335__CUDART_API_VERSION = cyruntime.__CUDART_API_VERSION
337#: Maximum number of planes per frame
338CUDA_EGL_MAX_PLANES = cyruntime.CUDA_EGL_MAX_PLANES
340class cudaError_t(IntEnum):
341 """
342 impl_private CUDA error types
343 """
345 #: The API call returned with no errors. In the case of query calls,
346 #: this also means that the operation being queried is complete (see
347 #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`).
348 cudaSuccess = cyruntime.cudaError.cudaSuccess
350 #: This indicates that one or more of the parameters passed to the API
351 #: call is not within an acceptable range of values.
352 cudaErrorInvalidValue = cyruntime.cudaError.cudaErrorInvalidValue
354 #: The API call failed because it was unable to allocate enough memory
355 #: or other resources to perform the requested operation.
356 cudaErrorMemoryAllocation = cyruntime.cudaError.cudaErrorMemoryAllocation
358 #: The API call failed because the CUDA driver and runtime could not be
359 #: initialized.
360 cudaErrorInitializationError = cyruntime.cudaError.cudaErrorInitializationError
362 #: This indicates that a CUDA Runtime API call cannot be executed
363 #: because it is being called during process shut down, at a point in
364 #: time after CUDA driver has been unloaded.
365 cudaErrorCudartUnloading = cyruntime.cudaError.cudaErrorCudartUnloading
367 #: This indicates profiler is not initialized for this run. This can
368 #: happen when the application is running with external profiling tools
369 #: like visual profiler.
370 cudaErrorProfilerDisabled = cyruntime.cudaError.cudaErrorProfilerDisabled
372 #: [Deprecated]
373 cudaErrorProfilerNotInitialized = cyruntime.cudaError.cudaErrorProfilerNotInitialized
375 #: [Deprecated]
376 cudaErrorProfilerAlreadyStarted = cyruntime.cudaError.cudaErrorProfilerAlreadyStarted
378 #: [Deprecated]
379 cudaErrorProfilerAlreadyStopped = cyruntime.cudaError.cudaErrorProfilerAlreadyStopped
381 #: This indicates that a kernel launch is requesting resources that can
382 #: never be satisfied by the current device. Requesting more shared
383 #: memory per block than the device supports will trigger this error,
384 #: as will requesting too many threads or blocks. See
385 #: :py:obj:`~.cudaDeviceProp` for more device limitations.
386 cudaErrorInvalidConfiguration = cyruntime.cudaError.cudaErrorInvalidConfiguration
388 #: This indicates that one or more of the pitch-related parameters
389 #: passed to the API call is not within the acceptable range for pitch.
390 cudaErrorInvalidPitchValue = cyruntime.cudaError.cudaErrorInvalidPitchValue
392 #: This indicates that the symbol name/identifier passed to the API
393 #: call is not a valid name or identifier.
394 cudaErrorInvalidSymbol = cyruntime.cudaError.cudaErrorInvalidSymbol
396 #: This indicates that at least one host pointer passed to the API call
397 #: is not a valid host pointer. [Deprecated]
398 cudaErrorInvalidHostPointer = cyruntime.cudaError.cudaErrorInvalidHostPointer
400 #: This indicates that at least one device pointer passed to the API
401 #: call is not a valid device pointer. [Deprecated]
402 cudaErrorInvalidDevicePointer = cyruntime.cudaError.cudaErrorInvalidDevicePointer
404 #: This indicates that the texture passed to the API call is not a
405 #: valid texture.
406 cudaErrorInvalidTexture = cyruntime.cudaError.cudaErrorInvalidTexture
408 #: This indicates that the texture binding is not valid. This occurs if
409 #: you call :py:obj:`~.cudaGetTextureAlignmentOffset()` with an unbound
410 #: texture.
411 cudaErrorInvalidTextureBinding = cyruntime.cudaError.cudaErrorInvalidTextureBinding
413 #: This indicates that the channel descriptor passed to the API call is
414 #: not valid. This occurs if the format is not one of the formats
415 #: specified by :py:obj:`~.cudaChannelFormatKind`, or if one of the
416 #: dimensions is invalid.
417 cudaErrorInvalidChannelDescriptor = cyruntime.cudaError.cudaErrorInvalidChannelDescriptor
419 #: This indicates that the direction of the memcpy passed to the API
420 #: call is not one of the types specified by
421 #: :py:obj:`~.cudaMemcpyKind`.
422 cudaErrorInvalidMemcpyDirection = cyruntime.cudaError.cudaErrorInvalidMemcpyDirection
424 #: This indicated that the user has taken the address of a constant
425 #: variable, which was forbidden up until the CUDA 3.1 release.
426 #: [Deprecated]
427 cudaErrorAddressOfConstant = cyruntime.cudaError.cudaErrorAddressOfConstant
429 #: This indicated that a texture fetch was not able to be performed.
430 #: This was previously used for device emulation of texture operations.
431 #: [Deprecated]
432 cudaErrorTextureFetchFailed = cyruntime.cudaError.cudaErrorTextureFetchFailed
434 #: This indicated that a texture was not bound for access. This was
435 #: previously used for device emulation of texture operations.
436 #: [Deprecated]
437 cudaErrorTextureNotBound = cyruntime.cudaError.cudaErrorTextureNotBound
439 #: This indicated that a synchronization operation had failed. This was
440 #: previously used for some device emulation functions. [Deprecated]
441 cudaErrorSynchronizationError = cyruntime.cudaError.cudaErrorSynchronizationError
443 #: This indicates that a non-float texture was being accessed with
444 #: linear filtering. This is not supported by CUDA.
445 cudaErrorInvalidFilterSetting = cyruntime.cudaError.cudaErrorInvalidFilterSetting
447 #: This indicates that an attempt was made to read an unsupported data
448 #: type as a normalized float. This is not supported by CUDA.
449 cudaErrorInvalidNormSetting = cyruntime.cudaError.cudaErrorInvalidNormSetting
451 #: Mixing of device and device emulation code was not allowed.
452 #: [Deprecated]
453 cudaErrorMixedDeviceExecution = cyruntime.cudaError.cudaErrorMixedDeviceExecution
455 #: This indicates that the API call is not yet implemented. Production
456 #: releases of CUDA will never return this error. [Deprecated]
457 cudaErrorNotYetImplemented = cyruntime.cudaError.cudaErrorNotYetImplemented
459 #: This indicated that an emulated device pointer exceeded the 32-bit
460 #: address range. [Deprecated]
461 cudaErrorMemoryValueTooLarge = cyruntime.cudaError.cudaErrorMemoryValueTooLarge
463 #: This indicates that the CUDA driver that the application has loaded
464 #: is a stub library. Applications that run with the stub rather than a
465 #: real driver loaded will result in CUDA API returning this error.
466 cudaErrorStubLibrary = cyruntime.cudaError.cudaErrorStubLibrary
468 #: This indicates that the installed NVIDIA CUDA driver is older than
469 #: the CUDA runtime library. This is not a supported configuration.
470 #: Users should install an updated NVIDIA display driver to allow the
471 #: application to run.
472 cudaErrorInsufficientDriver = cyruntime.cudaError.cudaErrorInsufficientDriver
474 #: This indicates that the API call requires a newer CUDA driver than
475 #: the one currently installed. Users should install an updated NVIDIA
476 #: CUDA driver to allow the API call to succeed.
477 cudaErrorCallRequiresNewerDriver = cyruntime.cudaError.cudaErrorCallRequiresNewerDriver
479 #: This indicates that the surface passed to the API call is not a
480 #: valid surface.
481 cudaErrorInvalidSurface = cyruntime.cudaError.cudaErrorInvalidSurface
483 #: This indicates that multiple global or constant variables (across
484 #: separate CUDA source files in the application) share the same string
485 #: name.
486 cudaErrorDuplicateVariableName = cyruntime.cudaError.cudaErrorDuplicateVariableName
488 #: This indicates that multiple textures (across separate CUDA source
489 #: files in the application) share the same string name.
490 cudaErrorDuplicateTextureName = cyruntime.cudaError.cudaErrorDuplicateTextureName
492 #: This indicates that multiple surfaces (across separate CUDA source
493 #: files in the application) share the same string name.
494 cudaErrorDuplicateSurfaceName = cyruntime.cudaError.cudaErrorDuplicateSurfaceName
496 #: This indicates that all CUDA devices are busy or unavailable at the
497 #: current time. Devices are often busy/unavailable due to use of
498 #: :py:obj:`~.cudaComputeModeProhibited`,
499 #: :py:obj:`~.cudaComputeModeExclusiveProcess`, or when long running
500 #: CUDA kernels have filled up the GPU and are blocking new work from
501 #: starting. They can also be unavailable due to memory constraints on
502 #: a device that already has active CUDA work being performed.
503 cudaErrorDevicesUnavailable = cyruntime.cudaError.cudaErrorDevicesUnavailable
505 #: This indicates that the current context is not compatible with this
506 #: the CUDA Runtime. This can only occur if you are using CUDA
507 #: Runtime/Driver interoperability and have created an existing Driver
508 #: context using the driver API. The Driver context may be incompatible
509 #: either because the Driver context was created using an older version
510 #: of the API, because the Runtime API call expects a primary driver
511 #: context and the Driver context is not primary, or because the Driver
512 #: context has been destroyed. Please see :py:obj:`~.Interactions`with
513 #: the CUDA Driver API" for more information.
514 cudaErrorIncompatibleDriverContext = cyruntime.cudaError.cudaErrorIncompatibleDriverContext
516 #: The device function being invoked (usually via
517 #: :py:obj:`~.cudaLaunchKernel()`) was not previously configured via
518 #: the :py:obj:`~.cudaConfigureCall()` function.
519 cudaErrorMissingConfiguration = cyruntime.cudaError.cudaErrorMissingConfiguration
521 #: This indicated that a previous kernel launch failed. This was
522 #: previously used for device emulation of kernel launches.
523 #: [Deprecated]
524 cudaErrorPriorLaunchFailure = cyruntime.cudaError.cudaErrorPriorLaunchFailure
526 #: This error indicates that a device runtime grid launch did not occur
527 #: because the depth of the child grid would exceed the maximum
528 #: supported number of nested grid launches.
529 cudaErrorLaunchMaxDepthExceeded = cyruntime.cudaError.cudaErrorLaunchMaxDepthExceeded
531 #: This error indicates that a grid launch did not occur because the
532 #: kernel uses file-scoped textures which are unsupported by the device
533 #: runtime. Kernels launched via the device runtime only support
534 #: textures created with the Texture Object API's.
535 cudaErrorLaunchFileScopedTex = cyruntime.cudaError.cudaErrorLaunchFileScopedTex
537 #: This error indicates that a grid launch did not occur because the
538 #: kernel uses file-scoped surfaces which are unsupported by the device
539 #: runtime. Kernels launched via the device runtime only support
540 #: surfaces created with the Surface Object API's.
541 cudaErrorLaunchFileScopedSurf = cyruntime.cudaError.cudaErrorLaunchFileScopedSurf
543 #: This error indicates that a call to
544 #: :py:obj:`~.cudaDeviceSynchronize` made from the device runtime
545 #: failed because the call was made at grid depth greater than than
546 #: either the default (2 levels of grids) or user specified device
547 #: limit :py:obj:`~.cudaLimitDevRuntimeSyncDepth`. To be able to
548 #: synchronize on launched grids at a greater depth successfully, the
549 #: maximum nested depth at which :py:obj:`~.cudaDeviceSynchronize` will
550 #: be called must be specified with the
551 #: :py:obj:`~.cudaLimitDevRuntimeSyncDepth` limit to the
552 #: :py:obj:`~.cudaDeviceSetLimit` api before the host-side launch of a
553 #: kernel using the device runtime. Keep in mind that additional levels
554 #: of sync depth require the runtime to reserve large amounts of device
555 #: memory that cannot be used for user allocations. Note that
556 #: :py:obj:`~.cudaDeviceSynchronize` made from device runtime is only
557 #: supported on devices of compute capability < 9.0.
558 cudaErrorSyncDepthExceeded = cyruntime.cudaError.cudaErrorSyncDepthExceeded
560 #: This error indicates that a device runtime grid launch failed
561 #: because the launch would exceed the limit
562 #: :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`. For this launch
563 #: to proceed successfully, :py:obj:`~.cudaDeviceSetLimit` must be
564 #: called to set the :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount`
565 #: to be higher than the upper bound of outstanding launches that can
566 #: be issued to the device runtime. Keep in mind that raising the limit
567 #: of pending device runtime launches will require the runtime to
568 #: reserve device memory that cannot be used for user allocations.
569 cudaErrorLaunchPendingCountExceeded = cyruntime.cudaError.cudaErrorLaunchPendingCountExceeded
571 #: The requested device function does not exist or is not compiled for
572 #: the proper device architecture.
573 cudaErrorInvalidDeviceFunction = cyruntime.cudaError.cudaErrorInvalidDeviceFunction
575 #: This indicates that no CUDA-capable devices were detected by the
576 #: installed CUDA driver.
577 cudaErrorNoDevice = cyruntime.cudaError.cudaErrorNoDevice
579 #: This indicates that the device ordinal supplied by the user does not
580 #: correspond to a valid CUDA device or that the action requested is
581 #: invalid for the specified device.
582 cudaErrorInvalidDevice = cyruntime.cudaError.cudaErrorInvalidDevice
584 #: This indicates that the device doesn't have a valid Grid License.
585 cudaErrorDeviceNotLicensed = cyruntime.cudaError.cudaErrorDeviceNotLicensed
587 #: By default, the CUDA runtime may perform a minimal set of self-
588 #: tests, as well as CUDA driver tests, to establish the validity of
589 #: both. Introduced in CUDA 11.2, this error return indicates that at
590 #: least one of these tests has failed and the validity of either the
591 #: runtime or the driver could not be established.
592 cudaErrorSoftwareValidityNotEstablished = cyruntime.cudaError.cudaErrorSoftwareValidityNotEstablished
594 #: This indicates an internal startup failure in the CUDA runtime.
595 cudaErrorStartupFailure = cyruntime.cudaError.cudaErrorStartupFailure
597 #: This indicates that the device kernel image is invalid.
598 cudaErrorInvalidKernelImage = cyruntime.cudaError.cudaErrorInvalidKernelImage
600 #: This most frequently indicates that there is no context bound to the
601 #: current thread. This can also be returned if the context passed to
602 #: an API call is not a valid handle (such as a context that has had
603 #: :py:obj:`~.cuCtxDestroy()` invoked on it). This can also be returned
604 #: if a user mixes different API versions (i.e. 3010 context with 3020
605 #: API calls). See :py:obj:`~.cuCtxGetApiVersion()` for more details.
606 cudaErrorDeviceUninitialized = cyruntime.cudaError.cudaErrorDeviceUninitialized
608 #: This indicates that the buffer object could not be mapped.
609 cudaErrorMapBufferObjectFailed = cyruntime.cudaError.cudaErrorMapBufferObjectFailed
611 #: This indicates that the buffer object could not be unmapped.
612 cudaErrorUnmapBufferObjectFailed = cyruntime.cudaError.cudaErrorUnmapBufferObjectFailed
614 #: This indicates that the specified array is currently mapped and thus
615 #: cannot be destroyed.
616 cudaErrorArrayIsMapped = cyruntime.cudaError.cudaErrorArrayIsMapped
618 #: This indicates that the resource is already mapped.
619 cudaErrorAlreadyMapped = cyruntime.cudaError.cudaErrorAlreadyMapped
621 #: This indicates that there is no kernel image available that is
622 #: suitable for the device. This can occur when a user specifies code
623 #: generation options for a particular CUDA source file that do not
624 #: include the corresponding device configuration.
625 cudaErrorNoKernelImageForDevice = cyruntime.cudaError.cudaErrorNoKernelImageForDevice
627 #: This indicates that a resource has already been acquired.
628 cudaErrorAlreadyAcquired = cyruntime.cudaError.cudaErrorAlreadyAcquired
630 #: This indicates that a resource is not mapped.
631 cudaErrorNotMapped = cyruntime.cudaError.cudaErrorNotMapped
633 #: This indicates that a mapped resource is not available for access as
634 #: an array.
635 cudaErrorNotMappedAsArray = cyruntime.cudaError.cudaErrorNotMappedAsArray
637 #: This indicates that a mapped resource is not available for access as
638 #: a pointer.
639 cudaErrorNotMappedAsPointer = cyruntime.cudaError.cudaErrorNotMappedAsPointer
641 #: This indicates that an uncorrectable ECC error was detected during
642 #: execution.
643 cudaErrorECCUncorrectable = cyruntime.cudaError.cudaErrorECCUncorrectable
645 #: This indicates that the :py:obj:`~.cudaLimit` passed to the API call
646 #: is not supported by the active device.
647 cudaErrorUnsupportedLimit = cyruntime.cudaError.cudaErrorUnsupportedLimit
649 #: This indicates that a call tried to access an exclusive-thread
650 #: device that is already in use by a different thread.
651 cudaErrorDeviceAlreadyInUse = cyruntime.cudaError.cudaErrorDeviceAlreadyInUse
653 #: This error indicates that P2P access is not supported across the
654 #: given devices.
655 cudaErrorPeerAccessUnsupported = cyruntime.cudaError.cudaErrorPeerAccessUnsupported
657 #: A PTX compilation failed. The runtime may fall back to compiling PTX
658 #: if an application does not contain a suitable binary for the current
659 #: device.
660 cudaErrorInvalidPtx = cyruntime.cudaError.cudaErrorInvalidPtx
662 #: This indicates an error with the OpenGL or DirectX context.
663 cudaErrorInvalidGraphicsContext = cyruntime.cudaError.cudaErrorInvalidGraphicsContext
665 #: This indicates that an uncorrectable NVLink error was detected
666 #: during the execution.
667 cudaErrorNvlinkUncorrectable = cyruntime.cudaError.cudaErrorNvlinkUncorrectable
669 #: This indicates that the PTX JIT compiler library was not found. The
670 #: JIT Compiler library is used for PTX compilation. The runtime may
671 #: fall back to compiling PTX if an application does not contain a
672 #: suitable binary for the current device.
673 cudaErrorJitCompilerNotFound = cyruntime.cudaError.cudaErrorJitCompilerNotFound
675 #: This indicates that the provided PTX was compiled with an
676 #: unsupported toolchain. The most common reason for this, is the PTX
677 #: was generated by a compiler newer than what is supported by the CUDA
678 #: driver and PTX JIT compiler.
679 cudaErrorUnsupportedPtxVersion = cyruntime.cudaError.cudaErrorUnsupportedPtxVersion
681 #: This indicates that the JIT compilation was disabled. The JIT
682 #: compilation compiles PTX. The runtime may fall back to compiling PTX
683 #: if an application does not contain a suitable binary for the current
684 #: device.
685 cudaErrorJitCompilationDisabled = cyruntime.cudaError.cudaErrorJitCompilationDisabled
687 #: This indicates that the provided execution affinity is not supported
688 #: by the device.
689 cudaErrorUnsupportedExecAffinity = cyruntime.cudaError.cudaErrorUnsupportedExecAffinity
691 #: This indicates that the code to be compiled by the PTX JIT contains
692 #: unsupported call to cudaDeviceSynchronize.
693 cudaErrorUnsupportedDevSideSync = cyruntime.cudaError.cudaErrorUnsupportedDevSideSync
695 #: This indicates that an exception occurred on the device that is now
696 #: contained by the GPU's error containment capability. Common causes
697 #: are - a. Certain types of invalid accesses of peer GPU memory over
698 #: nvlink b. Certain classes of hardware errors This leaves the process
699 #: in an inconsistent state and any further CUDA work will return the
700 #: same error. To continue using CUDA, the process must be terminated
701 #: and relaunched.
702 cudaErrorContained = cyruntime.cudaError.cudaErrorContained
704 #: This indicates that the device kernel source is invalid.
705 cudaErrorInvalidSource = cyruntime.cudaError.cudaErrorInvalidSource
707 #: This indicates that the file specified was not found.
708 cudaErrorFileNotFound = cyruntime.cudaError.cudaErrorFileNotFound
710 #: This indicates that a link to a shared object failed to resolve.
711 cudaErrorSharedObjectSymbolNotFound = cyruntime.cudaError.cudaErrorSharedObjectSymbolNotFound
713 #: This indicates that initialization of a shared object failed.
714 cudaErrorSharedObjectInitFailed = cyruntime.cudaError.cudaErrorSharedObjectInitFailed
716 #: This error indicates that an OS call failed.
717 cudaErrorOperatingSystem = cyruntime.cudaError.cudaErrorOperatingSystem
719 #: This indicates that a resource handle passed to the API call was not
720 #: valid. Resource handles are opaque types like
721 #: :py:obj:`~.cudaStream_t` and :py:obj:`~.cudaEvent_t`.
722 cudaErrorInvalidResourceHandle = cyruntime.cudaError.cudaErrorInvalidResourceHandle
724 #: This indicates that a resource required by the API call is not in a
725 #: valid state to perform the requested operation.
726 cudaErrorIllegalState = cyruntime.cudaError.cudaErrorIllegalState
728 #: This indicates an attempt was made to introspect an object in a way
729 #: that would discard semantically important information. This is
730 #: either due to the object using funtionality newer than the API
731 #: version used to introspect it or omission of optional return
732 #: arguments.
733 cudaErrorLossyQuery = cyruntime.cudaError.cudaErrorLossyQuery
735 #: This indicates that a named symbol was not found. Examples of
736 #: symbols are global/constant variable names, driver function names,
737 #: texture names, and surface names.
738 cudaErrorSymbolNotFound = cyruntime.cudaError.cudaErrorSymbolNotFound
740 #: This indicates that asynchronous operations issued previously have
741 #: not completed yet. This result is not actually an error, but must be
742 #: indicated differently than :py:obj:`~.cudaSuccess` (which indicates
743 #: completion). Calls that may return this value include
744 #: :py:obj:`~.cudaEventQuery()` and :py:obj:`~.cudaStreamQuery()`.
745 cudaErrorNotReady = cyruntime.cudaError.cudaErrorNotReady
747 #: The device encountered a load or store instruction on an invalid
748 #: memory address. This leaves the process in an inconsistent state and
749 #: any further CUDA work will return the same error. To continue using
750 #: CUDA, the process must be terminated and relaunched.
751 cudaErrorIllegalAddress = cyruntime.cudaError.cudaErrorIllegalAddress
753 #: This indicates that a launch did not occur because it did not have
754 #: appropriate resources. Although this error is similar to
755 #: :py:obj:`~.cudaErrorInvalidConfiguration`, this error usually
756 #: indicates that the user has attempted to pass too many arguments to
757 #: the device kernel, or the kernel launch specifies too many threads
758 #: for the kernel's register count.
759 cudaErrorLaunchOutOfResources = cyruntime.cudaError.cudaErrorLaunchOutOfResources
761 #: This indicates that the device kernel took too long to execute. This
762 #: can only occur if timeouts are enabled - see the device attribute
763 #: :py:obj:`~.cudaDevAttrKernelExecTimeout` for more information. This
764 #: leaves the process in an inconsistent state and any further CUDA
765 #: work will return the same error. To continue using CUDA, the process
766 #: must be terminated and relaunched.
767 cudaErrorLaunchTimeout = cyruntime.cudaError.cudaErrorLaunchTimeout
769 #: This error indicates a kernel launch that uses an incompatible
770 #: texturing mode.
771 cudaErrorLaunchIncompatibleTexturing = cyruntime.cudaError.cudaErrorLaunchIncompatibleTexturing
773 #: This error indicates that a call to
774 #: :py:obj:`~.cudaDeviceEnablePeerAccess()` is trying to re-enable peer
775 #: addressing on from a context which has already had peer addressing
776 #: enabled.
777 cudaErrorPeerAccessAlreadyEnabled = cyruntime.cudaError.cudaErrorPeerAccessAlreadyEnabled
779 #: This error indicates that :py:obj:`~.cudaDeviceDisablePeerAccess()`
780 #: is trying to disable peer addressing which has not been enabled yet
781 #: via :py:obj:`~.cudaDeviceEnablePeerAccess()`.
782 cudaErrorPeerAccessNotEnabled = cyruntime.cudaError.cudaErrorPeerAccessNotEnabled
784 #: This indicates that the user has called
785 #: :py:obj:`~.cudaSetValidDevices()`, :py:obj:`~.cudaSetDeviceFlags()`,
786 #: :py:obj:`~.cudaD3D9SetDirect3DDevice()`,
787 #: :py:obj:`~.cudaD3D10SetDirect3DDevice`,
788 #: :py:obj:`~.cudaD3D11SetDirect3DDevice()`, or
789 #: :py:obj:`~.cudaVDPAUSetVDPAUDevice()` after initializing the CUDA
790 #: runtime by calling non-device management operations (allocating
791 #: memory and launching kernels are examples of non-device management
792 #: operations). This error can also be returned if using runtime/driver
793 #: interoperability and there is an existing :py:obj:`~.CUcontext`
794 #: active on the host thread.
795 cudaErrorSetOnActiveProcess = cyruntime.cudaError.cudaErrorSetOnActiveProcess
797 #: This error indicates that the context current to the calling thread
798 #: has been destroyed using :py:obj:`~.cuCtxDestroy`, or is a primary
799 #: context which has not yet been initialized.
800 cudaErrorContextIsDestroyed = cyruntime.cudaError.cudaErrorContextIsDestroyed
802 #: An assert triggered in device code during kernel execution. The
803 #: device cannot be used again. All existing allocations are invalid.
804 #: To continue using CUDA, the process must be terminated and
805 #: relaunched.
806 cudaErrorAssert = cyruntime.cudaError.cudaErrorAssert
808 #: This error indicates that the hardware resources required to enable
809 #: peer access have been exhausted for one or more of the devices
810 #: passed to :py:obj:`~.cudaEnablePeerAccess()`.
811 cudaErrorTooManyPeers = cyruntime.cudaError.cudaErrorTooManyPeers
813 #: This error indicates that the memory range passed to
814 #: :py:obj:`~.cudaHostRegister()` has already been registered.
815 cudaErrorHostMemoryAlreadyRegistered = cyruntime.cudaError.cudaErrorHostMemoryAlreadyRegistered
817 #: This error indicates that the pointer passed to
818 #: :py:obj:`~.cudaHostUnregister()` does not correspond to any
819 #: currently registered memory region.
820 cudaErrorHostMemoryNotRegistered = cyruntime.cudaError.cudaErrorHostMemoryNotRegistered
822 #: Device encountered an error in the call stack during kernel
823 #: execution, possibly due to stack corruption or exceeding the stack
824 #: size limit. This leaves the process in an inconsistent state and any
825 #: further CUDA work will return the same error. To continue using
826 #: CUDA, the process must be terminated and relaunched.
827 cudaErrorHardwareStackError = cyruntime.cudaError.cudaErrorHardwareStackError
829 #: The device encountered an illegal instruction during kernel
830 #: execution This leaves the process in an inconsistent state and any
831 #: further CUDA work will return the same error. To continue using
832 #: CUDA, the process must be terminated and relaunched.
833 cudaErrorIllegalInstruction = cyruntime.cudaError.cudaErrorIllegalInstruction
835 #: The device encountered a load or store instruction on a memory
836 #: address which is not aligned. This leaves the process in an
837 #: inconsistent state and any further CUDA work will return the same
838 #: error. To continue using CUDA, the process must be terminated and
839 #: relaunched.
840 cudaErrorMisalignedAddress = cyruntime.cudaError.cudaErrorMisalignedAddress
842 #: While executing a kernel, the device encountered an instruction
843 #: which can only operate on memory locations in certain address spaces
844 #: (global, shared, or local), but was supplied a memory address not
845 #: belonging to an allowed address space. This leaves the process in an
846 #: inconsistent state and any further CUDA work will return the same
847 #: error. To continue using CUDA, the process must be terminated and
848 #: relaunched.
849 cudaErrorInvalidAddressSpace = cyruntime.cudaError.cudaErrorInvalidAddressSpace
851 #: The device encountered an invalid program counter. This leaves the
852 #: process in an inconsistent state and any further CUDA work will
853 #: return the same error. To continue using CUDA, the process must be
854 #: terminated and relaunched.
855 cudaErrorInvalidPc = cyruntime.cudaError.cudaErrorInvalidPc
857 #: An exception occurred on the device while executing a kernel. Common
858 #: causes include dereferencing an invalid device pointer and accessing
859 #: out of bounds shared memory. Less common cases can be system
860 #: specific - more information about these cases can be found in the
861 #: system specific user guide. This leaves the process in an
862 #: inconsistent state and any further CUDA work will return the same
863 #: error. To continue using CUDA, the process must be terminated and
864 #: relaunched.
865 cudaErrorLaunchFailure = cyruntime.cudaError.cudaErrorLaunchFailure
867 #: This error indicates that the number of blocks launched per grid for
868 #: a kernel that was launched via either
869 #: :py:obj:`~.cudaLaunchCooperativeKernel` exceeds the maximum number
870 #: of blocks as allowed by
871 #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor` or
872 #: :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
873 #: times the number of multiprocessors as specified by the device
874 #: attribute :py:obj:`~.cudaDevAttrMultiProcessorCount`.
875 cudaErrorCooperativeLaunchTooLarge = cyruntime.cudaError.cudaErrorCooperativeLaunchTooLarge
877 #: An exception occurred on the device while exiting a kernel using
878 #: tensor memory: the tensor memory was not completely deallocated.
879 #: This leaves the process in an inconsistent state and any further
880 #: CUDA work will return the same error. To continue using CUDA, the
881 #: process must be terminated and relaunched.
882 cudaErrorTensorMemoryLeak = cyruntime.cudaError.cudaErrorTensorMemoryLeak
884 #: This error indicates the attempted operation is not permitted.
885 cudaErrorNotPermitted = cyruntime.cudaError.cudaErrorNotPermitted
887 #: This error indicates the attempted operation is not supported on the
888 #: current system or device.
889 cudaErrorNotSupported = cyruntime.cudaError.cudaErrorNotSupported
891 #: This error indicates that the system is not yet ready to start any
892 #: CUDA work. To continue using CUDA, verify the system configuration
893 #: is in a valid state and all required driver daemons are actively
894 #: running. More information about this error can be found in the
895 #: system specific user guide.
896 cudaErrorSystemNotReady = cyruntime.cudaError.cudaErrorSystemNotReady
898 #: This error indicates that there is a mismatch between the versions
899 #: of the display driver and the CUDA driver. Refer to the
900 #: compatibility documentation for supported versions.
901 cudaErrorSystemDriverMismatch = cyruntime.cudaError.cudaErrorSystemDriverMismatch
903 #: This error indicates that the system was upgraded to run with
904 #: forward compatibility but the visible hardware detected by CUDA does
905 #: not support this configuration. Refer to the compatibility
906 #: documentation for the supported hardware matrix or ensure that only
907 #: supported hardware is visible during initialization via the
908 #: CUDA_VISIBLE_DEVICES environment variable.
909 cudaErrorCompatNotSupportedOnDevice = cyruntime.cudaError.cudaErrorCompatNotSupportedOnDevice
911 #: This error indicates that the MPS client failed to connect to the
912 #: MPS control daemon or the MPS server.
913 cudaErrorMpsConnectionFailed = cyruntime.cudaError.cudaErrorMpsConnectionFailed
915 #: This error indicates that the remote procedural call between the MPS
916 #: server and the MPS client failed.
917 cudaErrorMpsRpcFailure = cyruntime.cudaError.cudaErrorMpsRpcFailure
919 #: This error indicates that the MPS server is not ready to accept new
920 #: MPS client requests. This error can be returned when the MPS server
921 #: is in the process of recovering from a fatal failure.
922 cudaErrorMpsServerNotReady = cyruntime.cudaError.cudaErrorMpsServerNotReady
924 #: This error indicates that the hardware resources required to create
925 #: MPS client have been exhausted.
926 cudaErrorMpsMaxClientsReached = cyruntime.cudaError.cudaErrorMpsMaxClientsReached
928 #: This error indicates the the hardware resources required to device
929 #: connections have been exhausted.
930 cudaErrorMpsMaxConnectionsReached = cyruntime.cudaError.cudaErrorMpsMaxConnectionsReached
932 #: This error indicates that the MPS client has been terminated by the
933 #: server. To continue using CUDA, the process must be terminated and
934 #: relaunched.
935 cudaErrorMpsClientTerminated = cyruntime.cudaError.cudaErrorMpsClientTerminated
937 #: This error indicates, that the program is using CUDA Dynamic
938 #: Parallelism, but the current configuration, like MPS, does not
939 #: support it.
940 cudaErrorCdpNotSupported = cyruntime.cudaError.cudaErrorCdpNotSupported
942 #: This error indicates, that the program contains an unsupported
943 #: interaction between different versions of CUDA Dynamic Parallelism.
944 cudaErrorCdpVersionMismatch = cyruntime.cudaError.cudaErrorCdpVersionMismatch
946 #: The operation is not permitted when the stream is capturing.
947 cudaErrorStreamCaptureUnsupported = cyruntime.cudaError.cudaErrorStreamCaptureUnsupported
949 #: The current capture sequence on the stream has been invalidated due
950 #: to a previous error.
951 cudaErrorStreamCaptureInvalidated = cyruntime.cudaError.cudaErrorStreamCaptureInvalidated
953 #: The operation would have resulted in a merge of two independent
954 #: capture sequences.
955 cudaErrorStreamCaptureMerge = cyruntime.cudaError.cudaErrorStreamCaptureMerge
957 #: The capture was not initiated in this stream.
958 cudaErrorStreamCaptureUnmatched = cyruntime.cudaError.cudaErrorStreamCaptureUnmatched
960 #: The capture sequence contains a fork that was not joined to the
961 #: primary stream.
962 cudaErrorStreamCaptureUnjoined = cyruntime.cudaError.cudaErrorStreamCaptureUnjoined
964 #: A dependency would have been created which crosses the capture
965 #: sequence boundary. Only implicit in-stream ordering dependencies are
966 #: allowed to cross the boundary.
967 cudaErrorStreamCaptureIsolation = cyruntime.cudaError.cudaErrorStreamCaptureIsolation
969 #: The operation would have resulted in a disallowed implicit
970 #: dependency on a current capture sequence from cudaStreamLegacy.
971 cudaErrorStreamCaptureImplicit = cyruntime.cudaError.cudaErrorStreamCaptureImplicit
973 #: The operation is not permitted on an event which was last recorded
974 #: in a capturing stream.
975 cudaErrorCapturedEvent = cyruntime.cudaError.cudaErrorCapturedEvent
977 #: A stream capture sequence not initiated with the
978 #: :py:obj:`~.cudaStreamCaptureModeRelaxed` argument to
979 #: :py:obj:`~.cudaStreamBeginCapture` was passed to
980 #: :py:obj:`~.cudaStreamEndCapture` in a different thread.
981 cudaErrorStreamCaptureWrongThread = cyruntime.cudaError.cudaErrorStreamCaptureWrongThread
983 #: This indicates that the wait operation has timed out.
984 cudaErrorTimeout = cyruntime.cudaError.cudaErrorTimeout
986 #: This error indicates that the graph update was not performed because
987 #: it included changes which violated constraints specific to
988 #: instantiated graph update.
989 cudaErrorGraphExecUpdateFailure = cyruntime.cudaError.cudaErrorGraphExecUpdateFailure
991 #: This indicates that an async error has occurred in a device outside
992 #: of CUDA. If CUDA was waiting for an external device's signal before
993 #: consuming shared data, the external device signaled an error
994 #: indicating that the data is not valid for consumption. This leaves
995 #: the process in an inconsistent state and any further CUDA work will
996 #: return the same error. To continue using CUDA, the process must be
997 #: terminated and relaunched.
998 cudaErrorExternalDevice = cyruntime.cudaError.cudaErrorExternalDevice
1000 #: This indicates that a kernel launch error has occurred due to
1001 #: cluster misconfiguration.
1002 cudaErrorInvalidClusterSize = cyruntime.cudaError.cudaErrorInvalidClusterSize
1004 #: Indiciates a function handle is not loaded when calling an API that
1005 #: requires a loaded function.
1006 cudaErrorFunctionNotLoaded = cyruntime.cudaError.cudaErrorFunctionNotLoaded
1008 #: This error indicates one or more resources passed in are not valid
1009 #: resource types for the operation.
1010 cudaErrorInvalidResourceType = cyruntime.cudaError.cudaErrorInvalidResourceType
1012 #: This error indicates one or more resources are insufficient or non-
1013 #: applicable for the operation.
1014 cudaErrorInvalidResourceConfiguration = cyruntime.cudaError.cudaErrorInvalidResourceConfiguration
1016 #: This error indicates that the requested operation is not permitted
1017 #: because the stream is in a detached state. This can occur if the
1018 #: green context associated with the stream has been destroyed,
1019 #: limiting the stream's operational capabilities.
1020 cudaErrorStreamDetached = cyruntime.cudaError.cudaErrorStreamDetached
1022 #: This indicates that an unknown internal error has occurred.
1023 cudaErrorUnknown = cyruntime.cudaError.cudaErrorUnknown
1024 cudaErrorApiFailureBase = cyruntime.cudaError.cudaErrorApiFailureBase
1026_dict_cudaError_t = dict(((int(v), v) for k, v in cudaError_t.__members__.items()))
1028class cudaGraphDependencyType(IntEnum):
1029 """
1030 Type annotations that can be applied to graph edges as part of
1031 :py:obj:`~.cudaGraphEdgeData`.
1032 """
1034 #: This is an ordinary dependency.
1035 cudaGraphDependencyTypeDefault = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeDefault
1037 #: This dependency type allows the downstream node to use
1038 #: `cudaGridDependencySynchronize()`. It may only be used between
1039 #: kernel nodes, and must be used with either the
1040 #: :py:obj:`~.cudaGraphKernelNodePortProgrammatic` or
1041 #: :py:obj:`~.cudaGraphKernelNodePortLaunchCompletion` outgoing port.
1042 cudaGraphDependencyTypeProgrammatic = cyruntime.cudaGraphDependencyType_enum.cudaGraphDependencyTypeProgrammatic
1044_dict_cudaGraphDependencyType = dict(((int(v), v) for k, v in cudaGraphDependencyType.__members__.items()))
1046class cudaGraphInstantiateResult(IntEnum):
1047 """
1048 Graph instantiation results
1049 """
1051 #: Instantiation succeeded
1052 cudaGraphInstantiateSuccess = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateSuccess
1054 #: Instantiation failed for an unexpected reason which is described in
1055 #: the return value of the function
1056 cudaGraphInstantiateError = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateError
1058 #: Instantiation failed due to invalid structure, such as cycles
1059 cudaGraphInstantiateInvalidStructure = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateInvalidStructure
1061 #: Instantiation for device launch failed because the graph contained
1062 #: an unsupported operation
1063 cudaGraphInstantiateNodeOperationNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateNodeOperationNotSupported
1065 #: Instantiation for device launch failed due to the nodes belonging to
1066 #: different contexts
1067 cudaGraphInstantiateMultipleDevicesNotSupported = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateMultipleDevicesNotSupported
1069 #: One or more conditional handles are not associated with conditional
1070 #: nodes
1071 cudaGraphInstantiateConditionalHandleUnused = cyruntime.cudaGraphInstantiateResult.cudaGraphInstantiateConditionalHandleUnused
1073_dict_cudaGraphInstantiateResult = dict(((int(v), v) for k, v in cudaGraphInstantiateResult.__members__.items()))
1075class cudaLaunchMemSyncDomain(IntEnum):
1076 """
1077 Memory Synchronization Domain A kernel can be launched in a
1078 specified memory synchronization domain that affects all memory
1079 operations issued by that kernel. A memory barrier issued in one
1080 domain will only order memory operations in that domain, thus
1081 eliminating latency increase from memory barriers ordering
1082 unrelated traffic. By default, kernels are launched in domain 0.
1083 Kernel launched with :py:obj:`~.cudaLaunchMemSyncDomainRemote` will
1084 have a different domain ID. User may also alter the domain ID with
1085 :py:obj:`~.cudaLaunchMemSyncDomainMap` for a specific stream /
1086 graph node / kernel launch. See
1087 :py:obj:`~.cudaLaunchAttributeMemSyncDomain`,
1088 :py:obj:`~.cudaStreamSetAttribute`, :py:obj:`~.cudaLaunchKernelEx`,
1089 :py:obj:`~.cudaGraphKernelNodeSetAttribute`. Memory operations
1090 done in kernels launched in different domains are considered
1091 system-scope distanced. In other words, a GPU scoped memory
1092 synchronization is not sufficient for memory order to be observed
1093 by kernels in another memory synchronization domain even if they
1094 are on the same GPU.
1095 """
1097 #: Launch kernels in the default domain
1098 cudaLaunchMemSyncDomainDefault = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainDefault
1100 #: Launch kernels in the remote domain
1101 cudaLaunchMemSyncDomainRemote = cyruntime.cudaLaunchMemSyncDomain.cudaLaunchMemSyncDomainRemote
1103_dict_cudaLaunchMemSyncDomain = dict(((int(v), v) for k, v in cudaLaunchMemSyncDomain.__members__.items()))
1105class cudaLaunchAttributeID(IntEnum):
1106 """
1107 Launch attributes enum; used as id field of
1108 :py:obj:`~.cudaLaunchAttribute`
1109 """
1111 #: Ignored entry, for convenient composition
1112 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore
1114 #: Valid for streams, graph nodes, launches. See
1115 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
1116 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow
1118 #: Valid for graph nodes, launches. See
1119 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
1120 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative
1122 #: Valid for streams. See
1123 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
1124 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy
1126 #: Valid for graph nodes, launches. See
1127 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
1128 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension
1130 #: Valid for graph nodes, launches. See
1131 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
1132 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference
1134 #: Valid for launches. Setting
1135 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
1136 #: to non-0 signals that the kernel will use programmatic means to
1137 #: resolve its stream dependency, so that the CUDA runtime should
1138 #: opportunistically allow the grid's execution to overlap with the
1139 #: previous kernel in the stream, if that kernel requests the overlap.
1140 #: The dependent launches can choose to wait on the dependency using
1141 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
1142 #: PTX instructions).
1143 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization
1145 #: Valid for launches. Set
1146 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
1147 #: event. Event recorded through this launch attribute is guaranteed to
1148 #: only trigger after all block in the associated kernel trigger the
1149 #: event. A block can trigger the event programmatically in a future
1150 #: CUDA release. A trigger can also be inserted at the beginning of
1151 #: each block's execution if triggerAtBlockStart is set to non-0. The
1152 #: dependent launches can choose to wait on the dependency using the
1153 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
1154 #: instructions). Note that dependents (including the CPU thread
1155 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
1156 #: observe the release precisely when it is released. For example,
1157 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
1158 #: trigger long after the associated kernel has completed. This
1159 #: recording type is primarily meant for establishing programmatic
1160 #: dependency between device tasks. Note also this type of dependency
1161 #: allows, but does not guarantee, concurrent execution of tasks.
1162 #: The event supplied must not be an interprocess or interop event.
1163 #: The event must disable timing (i.e. must be created with the
1164 #: :py:obj:`~.cudaEventDisableTiming` flag set).
1165 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent
1167 #: Valid for streams, graph nodes, launches. See
1168 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
1169 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority
1171 #: Valid for streams, graph nodes, launches. See
1172 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
1173 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap
1175 #: Valid for streams, graph nodes, launches. See
1176 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
1177 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain
1179 #: Valid for graph nodes and launches. Set
1180 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow
1181 #: the kernel launch to specify a preferred substitute cluster
1182 #: dimension. Blocks may be grouped according to either the dimensions
1183 #: specified with this attribute (grouped into a "preferred substitute
1184 #: cluster"), or the one specified with
1185 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped
1186 #: into a "regular cluster"). The cluster dimensions of a "preferred
1187 #: substitute cluster" shall be an integer multiple greater than zero
1188 #: of the regular cluster dimensions. The device will attempt - on a
1189 #: best-effort basis - to group thread blocks into preferred clusters
1190 #: over grouping them into regular clusters. When it deems necessary
1191 #: (primarily when the device temporarily runs out of physical
1192 #: resources to launch the larger preferred clusters), the device may
1193 #: switch to launch the regular clusters instead to attempt to utilize
1194 #: as much of the physical device resources as possible.
1195 #: Each type of cluster will have its enumeration / coordinate setup
1196 #: as if the grid consists solely of its type of cluster. For example,
1197 #: if the preferred substitute cluster dimensions double the regular
1198 #: cluster dimensions, there might be simultaneously a regular cluster
1199 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In
1200 #: this example, the preferred substitute cluster (1,0,0) replaces
1201 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.
1202 #: This attribute will only take effect when a regular cluster
1203 #: dimension has been specified. The preferred substitute cluster
1204 #: dimension must be an integer multiple greater than zero of the
1205 #: regular cluster dimension and must divide the grid. It must also be
1206 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's
1207 #: `__launch_bounds__`. Otherwise it must be less than the maximum
1208 #: value the driver can support. Otherwise, setting this attribute to a
1209 #: value physically unable to fit on any particular device is
1210 #: permitted.
1211 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension
1213 #: Valid for launches. Set
1214 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
1215 #: the event.
1216 #: Nominally, the event is triggered once all blocks of the kernel
1217 #: have begun execution. Currently this is a best effort. If a kernel B
1218 #: has a launch completion dependency on a kernel A, B may wait until A
1219 #: is complete. Alternatively, blocks of B may begin before all blocks
1220 #: of A have begun, for example if B can claim execution resources
1221 #: unavailable to A (e.g. they run on different GPUs) or if B is a
1222 #: higher priority than A. Exercise caution if such an ordering
1223 #: inversion could lead to deadlock.
1224 #: A launch completion event is nominally similar to a programmatic
1225 #: event with `triggerAtBlockStart` set except that it is not visible
1226 #: to `cudaGridDependencySynchronize()` and can be used with compute
1227 #: capability less than 9.0.
1228 #: The event supplied must not be an interprocess or interop event.
1229 #: The event must disable timing (i.e. must be created with the
1230 #: :py:obj:`~.cudaEventDisableTiming` flag set).
1231 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent
1233 #: Valid for graph nodes, launches. This attribute is graphs-only, and
1234 #: passing it to a launch in a non-capturing stream will result in an
1235 #: error.
1236 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
1237 #: can only be set to 0 or 1. Setting the field to 1 indicates that the
1238 #: corresponding kernel node should be device-updatable. On success, a
1239 #: handle will be returned via
1240 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
1241 #: which can be passed to the various device-side update functions to
1242 #: update the node's kernel parameters from within another kernel. For
1243 #: more information on the types of device updates that can be made, as
1244 #: well as the relevant limitations thereof, see
1245 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
1246 #: Nodes which are device-updatable have additional restrictions
1247 #: compared to regular kernel nodes. Firstly, device-updatable nodes
1248 #: cannot be removed from their graph via
1249 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
1250 #: this functionality, a node cannot opt out, and any attempt to set
1251 #: the deviceUpdatable attribute to 0 will result in an error. Device-
1252 #: updatable kernel nodes also cannot have their attributes copied
1253 #: to/from another kernel node via
1254 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
1255 #: or more device-updatable nodes also do not allow multiple
1256 #: instantiation, and neither the graph nor its instantiated version
1257 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
1258 #: If a graph contains device-updatable nodes and updates those nodes
1259 #: from the device from within the graph, the graph must be uploaded
1260 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
1261 #: graph, if host-side executable graph updates are made to the device-
1262 #: updatable nodes, the graph must be uploaded before it is launched
1263 #: again.
1264 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode
1266 #: Valid for launches. On devices where the L1 cache and shared memory
1267 #: use the same hardware resources, setting
1268 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
1269 #: percentage between 0-100 signals sets the shared memory carveout
1270 #: preference in percent of the total shared memory for that kernel
1271 #: launch. This attribute takes precedence over
1272 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
1273 #: only a hint, and the driver can choose a different configuration if
1274 #: required for the launch.
1275 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout
1277 #: Valid for streams, graph nodes, launches. This attribute is a hint
1278 #: to the CUDA runtime that the launch should attempt to make the
1279 #: kernel maximize its NVLINK utilization.
1280 #:
1281 #: When possible to honor this hint, CUDA will assume each block in
1282 #: the grid launch will carry out an even amount of NVLINK traffic, and
1283 #: make a best-effort attempt to adjust the kernel launch based on that
1284 #: assumption.
1285 #: This attribute is a hint only. CUDA makes no functional or
1286 #: performance guarantee. Its applicability can be affected by many
1287 #: different factors, including driver version (i.e. CUDA doesn't
1288 #: guarantee the performance characteristics will be maintained between
1289 #: driver versions or a driver update could alter or regress previously
1290 #: observed perf characteristics.) It also doesn't guarantee a
1291 #: successful result, i.e. applying the attribute may not improve the
1292 #: performance of either the targeted kernel or the encapsulating
1293 #: application.
1294 #: Valid values for
1295 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are
1296 #: 0 (disabled) and 1 (enabled).
1297 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling
1299_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))
1301class cudaAsyncNotificationType(IntEnum):
1302 """
1303 Types of async notification that can occur
1304 """
1306 #: Sent when the process has exceeded its device memory budget
1307 cudaAsyncNotificationTypeOverBudget = cyruntime.cudaAsyncNotificationType_enum.cudaAsyncNotificationTypeOverBudget
1309_dict_cudaAsyncNotificationType = dict(((int(v), v) for k, v in cudaAsyncNotificationType.__members__.items()))
1311class cudaLogLevel(IntEnum):
1312 """
1314 """
1315 cudaLogLevelError = cyruntime.CUDAlogLevel_enum.cudaLogLevelError
1316 cudaLogLevelWarning = cyruntime.CUDAlogLevel_enum.cudaLogLevelWarning
1318_dict_cudaLogLevel = dict(((int(v), v) for k, v in cudaLogLevel.__members__.items()))
1320class cudaDataType(IntEnum):
1321 """"""
1322 CUDA_R_32F = cyruntime.cudaDataType_t.CUDA_R_32F
1323 CUDA_R_64F = cyruntime.cudaDataType_t.CUDA_R_64F
1324 CUDA_R_16F = cyruntime.cudaDataType_t.CUDA_R_16F
1325 CUDA_R_8I = cyruntime.cudaDataType_t.CUDA_R_8I
1326 CUDA_C_32F = cyruntime.cudaDataType_t.CUDA_C_32F
1327 CUDA_C_64F = cyruntime.cudaDataType_t.CUDA_C_64F
1328 CUDA_C_16F = cyruntime.cudaDataType_t.CUDA_C_16F
1329 CUDA_C_8I = cyruntime.cudaDataType_t.CUDA_C_8I
1330 CUDA_R_8U = cyruntime.cudaDataType_t.CUDA_R_8U
1331 CUDA_C_8U = cyruntime.cudaDataType_t.CUDA_C_8U
1332 CUDA_R_32I = cyruntime.cudaDataType_t.CUDA_R_32I
1333 CUDA_C_32I = cyruntime.cudaDataType_t.CUDA_C_32I
1334 CUDA_R_32U = cyruntime.cudaDataType_t.CUDA_R_32U
1335 CUDA_C_32U = cyruntime.cudaDataType_t.CUDA_C_32U
1336 CUDA_R_16BF = cyruntime.cudaDataType_t.CUDA_R_16BF
1337 CUDA_C_16BF = cyruntime.cudaDataType_t.CUDA_C_16BF
1338 CUDA_R_4I = cyruntime.cudaDataType_t.CUDA_R_4I
1339 CUDA_C_4I = cyruntime.cudaDataType_t.CUDA_C_4I
1340 CUDA_R_4U = cyruntime.cudaDataType_t.CUDA_R_4U
1341 CUDA_C_4U = cyruntime.cudaDataType_t.CUDA_C_4U
1342 CUDA_R_16I = cyruntime.cudaDataType_t.CUDA_R_16I
1343 CUDA_C_16I = cyruntime.cudaDataType_t.CUDA_C_16I
1344 CUDA_R_16U = cyruntime.cudaDataType_t.CUDA_R_16U
1345 CUDA_C_16U = cyruntime.cudaDataType_t.CUDA_C_16U
1346 CUDA_R_64I = cyruntime.cudaDataType_t.CUDA_R_64I
1347 CUDA_C_64I = cyruntime.cudaDataType_t.CUDA_C_64I
1348 CUDA_R_64U = cyruntime.cudaDataType_t.CUDA_R_64U
1349 CUDA_C_64U = cyruntime.cudaDataType_t.CUDA_C_64U
1350 CUDA_R_8F_E4M3 = cyruntime.cudaDataType_t.CUDA_R_8F_E4M3
1351 CUDA_R_8F_UE4M3 = cyruntime.cudaDataType_t.CUDA_R_8F_UE4M3
1352 CUDA_R_8F_E5M2 = cyruntime.cudaDataType_t.CUDA_R_8F_E5M2
1353 CUDA_R_8F_UE8M0 = cyruntime.cudaDataType_t.CUDA_R_8F_UE8M0
1354 CUDA_R_6F_E2M3 = cyruntime.cudaDataType_t.CUDA_R_6F_E2M3
1355 CUDA_R_6F_E3M2 = cyruntime.cudaDataType_t.CUDA_R_6F_E3M2
1356 CUDA_R_4F_E2M1 = cyruntime.cudaDataType_t.CUDA_R_4F_E2M1
1358_dict_cudaDataType = dict(((int(v), v) for k, v in cudaDataType.__members__.items()))
1360class cudaEmulationStrategy(IntEnum):
1361 """"""
1362 CUDA_EMULATION_STRATEGY_DEFAULT = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_DEFAULT
1363 CUDA_EMULATION_STRATEGY_PERFORMANT = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_PERFORMANT
1364 CUDA_EMULATION_STRATEGY_EAGER = cyruntime.cudaEmulationStrategy_t.CUDA_EMULATION_STRATEGY_EAGER
1366_dict_cudaEmulationStrategy = dict(((int(v), v) for k, v in cudaEmulationStrategy.__members__.items()))
1368class cudaEmulationMantissaControl(IntEnum):
1369 """"""
1370 CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC = cyruntime.cudaEmulationMantissaControl_t.CUDA_EMULATION_MANTISSA_CONTROL_DYNAMIC
1371 CUDA_EMULATION_MANTISSA_CONTROL_FIXED = cyruntime.cudaEmulationMantissaControl_t.CUDA_EMULATION_MANTISSA_CONTROL_FIXED
1373_dict_cudaEmulationMantissaControl = dict(((int(v), v) for k, v in cudaEmulationMantissaControl.__members__.items()))
1375class cudaEmulationSpecialValuesSupport(IntEnum):
1376 """"""
1377 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NONE = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NONE
1378 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_INFINITY = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_INFINITY
1379 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NAN = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_NAN
1380 CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_DEFAULT = cyruntime.cudaEmulationSpecialValuesSupport_t.CUDA_EMULATION_SPECIAL_VALUES_SUPPORT_DEFAULT
1382_dict_cudaEmulationSpecialValuesSupport = dict(((int(v), v) for k, v in cudaEmulationSpecialValuesSupport.__members__.items()))
1384class libraryPropertyType(IntEnum):
1385 """"""
1386 MAJOR_VERSION = cyruntime.libraryPropertyType_t.MAJOR_VERSION
1387 MINOR_VERSION = cyruntime.libraryPropertyType_t.MINOR_VERSION
1388 PATCH_LEVEL = cyruntime.libraryPropertyType_t.PATCH_LEVEL
1390_dict_libraryPropertyType = dict(((int(v), v) for k, v in libraryPropertyType.__members__.items()))
1392class cudaEglFrameType(IntEnum):
1393 """
1394 CUDA EglFrame type - array or pointer
1395 """
1397 #: Frame type CUDA array
1398 cudaEglFrameTypeArray = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypeArray
1400 #: Frame type CUDA pointer
1401 cudaEglFrameTypePitch = cyruntime.cudaEglFrameType_enum.cudaEglFrameTypePitch
1403_dict_cudaEglFrameType = dict(((int(v), v) for k, v in cudaEglFrameType.__members__.items()))
1405class cudaEglResourceLocationFlags(IntEnum):
1406 """
1407 Resource location flags- sysmem or vidmem For CUDA context on
1408 iGPU, since video and system memory are equivalent - these flags
1409 will not have an effect on the execution. For CUDA context on
1410 dGPU, applications can use the flag
1411 :py:obj:`~.cudaEglResourceLocationFlags` to give a hint about the
1412 desired location. :py:obj:`~.cudaEglResourceLocationSysmem` - the
1413 frame data is made resident on the system memory to be accessed by
1414 CUDA. :py:obj:`~.cudaEglResourceLocationVidmem` - the frame data
1415 is made resident on the dedicated video memory to be accessed by
1416 CUDA. There may be an additional latency due to new allocation and
1417 data migration, if the frame is produced on a different memory.
1418 """
1420 #: Resource location sysmem
1421 cudaEglResourceLocationSysmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationSysmem
1423 #: Resource location vidmem
1424 cudaEglResourceLocationVidmem = cyruntime.cudaEglResourceLocationFlags_enum.cudaEglResourceLocationVidmem
1426_dict_cudaEglResourceLocationFlags = dict(((int(v), v) for k, v in cudaEglResourceLocationFlags.__members__.items()))
1428class cudaEglColorFormat(IntEnum):
1429 """
1430 CUDA EGL Color Format - The different planar and multiplanar
1431 formats currently supported for CUDA_EGL interops.
1432 """
1434 #: Y, U, V in three surfaces, each in a separate surface, U/V width =
1435 #: 1/2 Y width, U/V height = 1/2 Y height.
1436 cudaEglColorFormatYUV420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar
1438 #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
1439 #: width, height ratio same as YUV420Planar.
1440 cudaEglColorFormatYUV420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar
1442 #: Y, U, V each in a separate surface, U/V width = 1/2 Y width, U/V
1443 #: height = Y height.
1444 cudaEglColorFormatYUV422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar
1446 #: Y, UV in two surfaces with VU byte ordering, width, height ratio
1447 #: same as YUV422Planar.
1448 cudaEglColorFormatYUV422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar
1450 #: R/G/B/A four channels in one surface with BGRA byte ordering.
1451 cudaEglColorFormatARGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatARGB
1453 #: R/G/B/A four channels in one surface with ABGR byte ordering.
1454 cudaEglColorFormatRGBA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRGBA
1456 #: single luminance channel in one surface.
1457 cudaEglColorFormatL = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatL
1459 #: single color channel in one surface.
1460 cudaEglColorFormatR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatR
1462 #: Y, U, V in three surfaces, each in a separate surface, U/V width = Y
1463 #: width, U/V height = Y height.
1464 cudaEglColorFormatYUV444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar
1466 #: Y, UV in two surfaces (UV as one surface) with VU byte ordering,
1467 #: width, height ratio same as YUV444Planar.
1468 cudaEglColorFormatYUV444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar
1470 #: Y, U, V in one surface, interleaved as UYVY in one channel.
1471 cudaEglColorFormatYUYV422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV422
1473 #: Y, U, V in one surface, interleaved as YUYV in one channel.
1474 cudaEglColorFormatUYVY422 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY422
1476 #: R/G/B/A four channels in one surface with RGBA byte ordering.
1477 cudaEglColorFormatABGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatABGR
1479 #: R/G/B/A four channels in one surface with ARGB byte ordering.
1480 cudaEglColorFormatBGRA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBGRA
1482 #: Alpha color format - one channel in one surface.
1483 cudaEglColorFormatA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatA
1485 #: R/G color format - two channels in one surface with GR byte ordering
1486 cudaEglColorFormatRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatRG
1488 #: Y, U, V, A four channels in one surface, interleaved as VUYA.
1489 cudaEglColorFormatAYUV = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV
1491 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
1492 #: width = Y width, U/V height = Y height.
1493 cudaEglColorFormatYVU444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar
1495 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
1496 #: width = 1/2 Y width, U/V height = Y height.
1497 cudaEglColorFormatYVU422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar
1499 #: Y, VU in two surfaces (VU as one surface) with UV byte ordering, U/V
1500 #: width = 1/2 Y width, U/V height = 1/2 Y height.
1501 cudaEglColorFormatYVU420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar
1503 #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
1504 #: ordering, U/V width = Y width, U/V height = Y height.
1505 cudaEglColorFormatY10V10U10_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar
1507 #: Y10, V10U10 in two surfaces (VU as one surface) with UV byte
1508 #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
1509 cudaEglColorFormatY10V10U10_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar
1511 #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
1512 #: ordering, U/V width = Y width, U/V height = Y height.
1513 cudaEglColorFormatY12V12U12_444SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar
1515 #: Y12, V12U12 in two surfaces (VU as one surface) with UV byte
1516 #: ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
1517 cudaEglColorFormatY12V12U12_420SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar
1519 #: Extended Range Y, U, V in one surface, interleaved as YVYU in one
1520 #: channel.
1521 cudaEglColorFormatVYUY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY_ER
1523 #: Extended Range Y, U, V in one surface, interleaved as YUYV in one
1524 #: channel.
1525 cudaEglColorFormatUYVY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY_ER
1527 #: Extended Range Y, U, V in one surface, interleaved as UYVY in one
1528 #: channel.
1529 cudaEglColorFormatYUYV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUYV_ER
1531 #: Extended Range Y, U, V in one surface, interleaved as VYUY in one
1532 #: channel.
1533 cudaEglColorFormatYVYU_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU_ER
1535 #: Extended Range Y, U, V, A four channels in one surface, interleaved
1536 #: as AVUY.
1537 cudaEglColorFormatYUVA_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA_ER
1539 #: Extended Range Y, U, V, A four channels in one surface, interleaved
1540 #: as VUYA.
1541 cudaEglColorFormatAYUV_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatAYUV_ER
1543 #: Extended Range Y, U, V in three surfaces, U/V width = Y width, U/V
1544 #: height = Y height.
1545 cudaEglColorFormatYUV444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444Planar_ER
1547 #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
1548 #: U/V height = Y height.
1549 cudaEglColorFormatYUV422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422Planar_ER
1551 #: Extended Range Y, U, V in three surfaces, U/V width = 1/2 Y width,
1552 #: U/V height = 1/2 Y height.
1553 cudaEglColorFormatYUV420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_ER
1555 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
1556 #: byte ordering, U/V width = Y width, U/V height = Y height.
1557 cudaEglColorFormatYUV444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV444SemiPlanar_ER
1559 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
1560 #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
1561 cudaEglColorFormatYUV422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV422SemiPlanar_ER
1563 #: Extended Range Y, UV in two surfaces (UV as one surface) with VU
1564 #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
1565 cudaEglColorFormatYUV420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_ER
1567 #: Extended Range Y, V, U in three surfaces, U/V width = Y width, U/V
1568 #: height = Y height.
1569 cudaEglColorFormatYVU444Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar_ER
1571 #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
1572 #: U/V height = Y height.
1573 cudaEglColorFormatYVU422Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar_ER
1575 #: Extended Range Y, V, U in three surfaces, U/V width = 1/2 Y width,
1576 #: U/V height = 1/2 Y height.
1577 cudaEglColorFormatYVU420Planar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_ER
1579 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
1580 #: byte ordering, U/V width = Y width, U/V height = Y height.
1581 cudaEglColorFormatYVU444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444SemiPlanar_ER
1583 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
1584 #: byte ordering, U/V width = 1/2 Y width, U/V height = Y height.
1585 cudaEglColorFormatYVU422SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422SemiPlanar_ER
1587 #: Extended Range Y, VU in two surfaces (VU as one surface) with UV
1588 #: byte ordering, U/V width = 1/2 Y width, U/V height = 1/2 Y height.
1589 cudaEglColorFormatYVU420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_ER
1591 #: Bayer format - one channel in one surface with interleaved RGGB
1592 #: ordering.
1593 cudaEglColorFormatBayerRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRGGB
1595 #: Bayer format - one channel in one surface with interleaved BGGR
1596 #: ordering.
1597 cudaEglColorFormatBayerBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBGGR
1599 #: Bayer format - one channel in one surface with interleaved GRBG
1600 #: ordering.
1601 cudaEglColorFormatBayerGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGRBG
1603 #: Bayer format - one channel in one surface with interleaved GBRG
1604 #: ordering.
1605 cudaEglColorFormatBayerGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerGBRG
1607 #: Bayer10 format - one channel in one surface with interleaved RGGB
1608 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
1609 cudaEglColorFormatBayer10RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10RGGB
1611 #: Bayer10 format - one channel in one surface with interleaved BGGR
1612 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
1613 cudaEglColorFormatBayer10BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10BGGR
1615 #: Bayer10 format - one channel in one surface with interleaved GRBG
1616 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
1617 cudaEglColorFormatBayer10GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GRBG
1619 #: Bayer10 format - one channel in one surface with interleaved GBRG
1620 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
1621 cudaEglColorFormatBayer10GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10GBRG
1623 #: Bayer12 format - one channel in one surface with interleaved RGGB
1624 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1625 cudaEglColorFormatBayer12RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RGGB
1627 #: Bayer12 format - one channel in one surface with interleaved BGGR
1628 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1629 cudaEglColorFormatBayer12BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BGGR
1631 #: Bayer12 format - one channel in one surface with interleaved GRBG
1632 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1633 cudaEglColorFormatBayer12GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GRBG
1635 #: Bayer12 format - one channel in one surface with interleaved GBRG
1636 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1637 cudaEglColorFormatBayer12GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12GBRG
1639 #: Bayer14 format - one channel in one surface with interleaved RGGB
1640 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
1641 cudaEglColorFormatBayer14RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14RGGB
1643 #: Bayer14 format - one channel in one surface with interleaved BGGR
1644 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
1645 cudaEglColorFormatBayer14BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14BGGR
1647 #: Bayer14 format - one channel in one surface with interleaved GRBG
1648 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
1649 cudaEglColorFormatBayer14GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GRBG
1651 #: Bayer14 format - one channel in one surface with interleaved GBRG
1652 #: ordering. Out of 16 bits, 14 bits used 2 bits No-op.
1653 cudaEglColorFormatBayer14GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer14GBRG
1655 #: Bayer20 format - one channel in one surface with interleaved RGGB
1656 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
1657 cudaEglColorFormatBayer20RGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20RGGB
1659 #: Bayer20 format - one channel in one surface with interleaved BGGR
1660 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
1661 cudaEglColorFormatBayer20BGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20BGGR
1663 #: Bayer20 format - one channel in one surface with interleaved GRBG
1664 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
1665 cudaEglColorFormatBayer20GRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GRBG
1667 #: Bayer20 format - one channel in one surface with interleaved GBRG
1668 #: ordering. Out of 32 bits, 20 bits used 12 bits No-op.
1669 cudaEglColorFormatBayer20GBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer20GBRG
1671 #: Y, V, U in three surfaces, each in a separate surface, U/V width = Y
1672 #: width, U/V height = Y height.
1673 cudaEglColorFormatYVU444Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU444Planar
1675 #: Y, V, U in three surfaces, each in a separate surface, U/V width =
1676 #: 1/2 Y width, U/V height = Y height.
1677 cudaEglColorFormatYVU422Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU422Planar
1679 #: Y, V, U in three surfaces, each in a separate surface, U/V width =
1680 #: 1/2 Y width, U/V height = 1/2 Y height.
1681 cudaEglColorFormatYVU420Planar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar
1683 #: Nvidia proprietary Bayer ISP format - one channel in one surface
1684 #: with interleaved RGGB ordering and mapped to opaque integer
1685 #: datatype.
1686 cudaEglColorFormatBayerIspRGGB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspRGGB
1688 #: Nvidia proprietary Bayer ISP format - one channel in one surface
1689 #: with interleaved BGGR ordering and mapped to opaque integer
1690 #: datatype.
1691 cudaEglColorFormatBayerIspBGGR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspBGGR
1693 #: Nvidia proprietary Bayer ISP format - one channel in one surface
1694 #: with interleaved GRBG ordering and mapped to opaque integer
1695 #: datatype.
1696 cudaEglColorFormatBayerIspGRBG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGRBG
1698 #: Nvidia proprietary Bayer ISP format - one channel in one surface
1699 #: with interleaved GBRG ordering and mapped to opaque integer
1700 #: datatype.
1701 cudaEglColorFormatBayerIspGBRG = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerIspGBRG
1703 #: Bayer format - one channel in one surface with interleaved BCCR
1704 #: ordering.
1705 cudaEglColorFormatBayerBCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerBCCR
1707 #: Bayer format - one channel in one surface with interleaved RCCB
1708 #: ordering.
1709 cudaEglColorFormatBayerRCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerRCCB
1711 #: Bayer format - one channel in one surface with interleaved CRBC
1712 #: ordering.
1713 cudaEglColorFormatBayerCRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCRBC
1715 #: Bayer format - one channel in one surface with interleaved CBRC
1716 #: ordering.
1717 cudaEglColorFormatBayerCBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayerCBRC
1719 #: Bayer10 format - one channel in one surface with interleaved CCCC
1720 #: ordering. Out of 16 bits, 10 bits used 6 bits No-op.
1721 cudaEglColorFormatBayer10CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer10CCCC
1723 #: Bayer12 format - one channel in one surface with interleaved BCCR
1724 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1725 cudaEglColorFormatBayer12BCCR = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12BCCR
1727 #: Bayer12 format - one channel in one surface with interleaved RCCB
1728 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1729 cudaEglColorFormatBayer12RCCB = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12RCCB
1731 #: Bayer12 format - one channel in one surface with interleaved CRBC
1732 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1733 cudaEglColorFormatBayer12CRBC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CRBC
1735 #: Bayer12 format - one channel in one surface with interleaved CBRC
1736 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1737 cudaEglColorFormatBayer12CBRC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CBRC
1739 #: Bayer12 format - one channel in one surface with interleaved CCCC
1740 #: ordering. Out of 16 bits, 12 bits used 4 bits No-op.
1741 cudaEglColorFormatBayer12CCCC = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatBayer12CCCC
1743 #: Color format for single Y plane.
1744 cudaEglColorFormatY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY
1746 #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
1747 #: U/V height = 1/2 Y height.
1748 cudaEglColorFormatYUV420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_2020
1750 #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
1751 #: U/V height = 1/2 Y height.
1752 cudaEglColorFormatYVU420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_2020
1754 #: Y, U, V in three surfaces, each in a separate surface, U/V width =
1755 #: 1/2 Y width, U/V height = 1/2 Y height.
1756 cudaEglColorFormatYUV420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_2020
1758 #: Y, V, U in three surfaces, each in a separate surface, U/V width =
1759 #: 1/2 Y width, U/V height = 1/2 Y height.
1760 cudaEglColorFormatYVU420Planar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_2020
1762 #: Y, UV in two surfaces (UV as one surface) U/V width = 1/2 Y width,
1763 #: U/V height = 1/2 Y height.
1764 cudaEglColorFormatYUV420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420SemiPlanar_709
1766 #: Y, VU in two surfaces (VU as one surface) U/V width = 1/2 Y width,
1767 #: U/V height = 1/2 Y height.
1768 cudaEglColorFormatYVU420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420SemiPlanar_709
1770 #: Y, U, V in three surfaces, each in a separate surface, U/V width =
1771 #: 1/2 Y width, U/V height = 1/2 Y height.
1772 cudaEglColorFormatYUV420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUV420Planar_709
1774 #: Y, V, U in three surfaces, each in a separate surface, U/V width =
1775 #: 1/2 Y width, U/V height = 1/2 Y height.
1776 cudaEglColorFormatYVU420Planar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVU420Planar_709
1778 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
1779 #: width, U/V height = 1/2 Y height.
1780 cudaEglColorFormatY10V10U10_420SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709
1782 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
1783 #: width, U/V height = 1/2 Y height.
1784 cudaEglColorFormatY10V10U10_420SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_2020
1786 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
1787 #: width, U/V height = Y height.
1788 cudaEglColorFormatY10V10U10_422SemiPlanar_2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_2020
1790 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
1791 #: width, U/V height = Y height.
1792 cudaEglColorFormatY10V10U10_422SemiPlanar = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar
1794 #: Y10, V10U10 in two surfaces (VU as one surface) U/V width = 1/2 Y
1795 #: width, U/V height = Y height.
1796 cudaEglColorFormatY10V10U10_422SemiPlanar_709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_422SemiPlanar_709
1798 #: Extended Range Color format for single Y plane.
1799 cudaEglColorFormatY_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_ER
1801 #: Extended Range Color format for single Y plane.
1802 cudaEglColorFormatY_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY_709_ER
1804 #: Extended Range Color format for single Y10 plane.
1805 cudaEglColorFormatY10_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_ER
1807 #: Extended Range Color format for single Y10 plane.
1808 cudaEglColorFormatY10_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10_709_ER
1810 #: Extended Range Color format for single Y12 plane.
1811 cudaEglColorFormatY12_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_ER
1813 #: Extended Range Color format for single Y12 plane.
1814 cudaEglColorFormatY12_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12_709_ER
1816 #: Y, U, V, A four channels in one surface, interleaved as AVUY.
1817 cudaEglColorFormatYUVA = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYUVA
1819 #: Y, U, V in one surface, interleaved as YVYU in one channel.
1820 cudaEglColorFormatYVYU = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatYVYU
1822 #: Y, U, V in one surface, interleaved as VYUY in one channel.
1823 cudaEglColorFormatVYUY = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatVYUY
1825 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
1826 #: width = 1/2 Y width, U/V height = 1/2 Y height.
1827 cudaEglColorFormatY10V10U10_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_ER
1829 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
1830 #: width = 1/2 Y width, U/V height = 1/2 Y height.
1831 cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_420SemiPlanar_709_ER
1833 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
1834 #: width = Y width, U/V height = Y height.
1835 cudaEglColorFormatY10V10U10_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_ER
1837 #: Extended Range Y10, V10U10 in two surfaces (VU as one surface) U/V
1838 #: width = Y width, U/V height = Y height.
1839 cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY10V10U10_444SemiPlanar_709_ER
1841 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
1842 #: width = 1/2 Y width, U/V height = 1/2 Y height.
1843 cudaEglColorFormatY12V12U12_420SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_ER
1845 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
1846 #: width = 1/2 Y width, U/V height = 1/2 Y height.
1847 cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_420SemiPlanar_709_ER
1849 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
1850 #: width = Y width, U/V height = Y height.
1851 cudaEglColorFormatY12V12U12_444SemiPlanar_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_ER
1853 #: Extended Range Y12, V12U12 in two surfaces (VU as one surface) U/V
1854 #: width = Y width, U/V height = Y height.
1855 cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatY12V12U12_444SemiPlanar_709_ER
1857 #: Y, U, V in one surface, interleaved as UYVY in one channel.
1858 cudaEglColorFormatUYVY709 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY709
1860 #: Extended Range Y, U, V in one surface, interleaved as UYVY in one
1861 #: channel.
1862 cudaEglColorFormatUYVY709_ER = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY709_ER
1864 #: Y, U, V in one surface, interleaved as UYVY in one channel.
1865 cudaEglColorFormatUYVY2020 = cyruntime.cudaEglColorFormat_enum.cudaEglColorFormatUYVY2020
1867_dict_cudaEglColorFormat = dict(((int(v), v) for k, v in cudaEglColorFormat.__members__.items()))
1869class cudaChannelFormatKind(IntEnum):
1870 """
1871 Channel format kind
1872 """
1874 #: Signed channel format
1875 cudaChannelFormatKindSigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSigned
1877 #: Unsigned channel format
1878 cudaChannelFormatKindUnsigned = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
1880 #: Float channel format
1881 cudaChannelFormatKindFloat = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindFloat
1883 #: No channel format
1884 cudaChannelFormatKindNone = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNone
1886 #: Unsigned 8-bit integers, planar 4:2:0 YUV format
1887 cudaChannelFormatKindNV12 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindNV12
1889 #: 1 channel unsigned 8-bit normalized integer
1890 cudaChannelFormatKindUnsignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X1
1892 #: 2 channel unsigned 8-bit normalized integer
1893 cudaChannelFormatKindUnsignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X2
1895 #: 4 channel unsigned 8-bit normalized integer
1896 cudaChannelFormatKindUnsignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized8X4
1898 #: 1 channel unsigned 16-bit normalized integer
1899 cudaChannelFormatKindUnsignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X1
1901 #: 2 channel unsigned 16-bit normalized integer
1902 cudaChannelFormatKindUnsignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X2
1904 #: 4 channel unsigned 16-bit normalized integer
1905 cudaChannelFormatKindUnsignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized16X4
1907 #: 1 channel signed 8-bit normalized integer
1908 cudaChannelFormatKindSignedNormalized8X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X1
1910 #: 2 channel signed 8-bit normalized integer
1911 cudaChannelFormatKindSignedNormalized8X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X2
1913 #: 4 channel signed 8-bit normalized integer
1914 cudaChannelFormatKindSignedNormalized8X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized8X4
1916 #: 1 channel signed 16-bit normalized integer
1917 cudaChannelFormatKindSignedNormalized16X1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X1
1919 #: 2 channel signed 16-bit normalized integer
1920 cudaChannelFormatKindSignedNormalized16X2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X2
1922 #: 4 channel signed 16-bit normalized integer
1923 cudaChannelFormatKindSignedNormalized16X4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedNormalized16X4
1925 #: 4 channel unsigned normalized block-compressed (BC1 compression)
1926 #: format
1927 cudaChannelFormatKindUnsignedBlockCompressed1 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1
1929 #: 4 channel unsigned normalized block-compressed (BC1 compression)
1930 #: format with sRGB encoding
1931 cudaChannelFormatKindUnsignedBlockCompressed1SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed1SRGB
1933 #: 4 channel unsigned normalized block-compressed (BC2 compression)
1934 #: format
1935 cudaChannelFormatKindUnsignedBlockCompressed2 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2
1937 #: 4 channel unsigned normalized block-compressed (BC2 compression)
1938 #: format with sRGB encoding
1939 cudaChannelFormatKindUnsignedBlockCompressed2SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed2SRGB
1941 #: 4 channel unsigned normalized block-compressed (BC3 compression)
1942 #: format
1943 cudaChannelFormatKindUnsignedBlockCompressed3 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3
1945 #: 4 channel unsigned normalized block-compressed (BC3 compression)
1946 #: format with sRGB encoding
1947 cudaChannelFormatKindUnsignedBlockCompressed3SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed3SRGB
1949 #: 1 channel unsigned normalized block-compressed (BC4 compression)
1950 #: format
1951 cudaChannelFormatKindUnsignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed4
1953 #: 1 channel signed normalized block-compressed (BC4 compression)
1954 #: format
1955 cudaChannelFormatKindSignedBlockCompressed4 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed4
1957 #: 2 channel unsigned normalized block-compressed (BC5 compression)
1958 #: format
1959 cudaChannelFormatKindUnsignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed5
1961 #: 2 channel signed normalized block-compressed (BC5 compression)
1962 #: format
1963 cudaChannelFormatKindSignedBlockCompressed5 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed5
1965 #: 3 channel unsigned half-float block-compressed (BC6H compression)
1966 #: format
1967 cudaChannelFormatKindUnsignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed6H
1969 #: 3 channel signed half-float block-compressed (BC6H compression)
1970 #: format
1971 cudaChannelFormatKindSignedBlockCompressed6H = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindSignedBlockCompressed6H
1973 #: 4 channel unsigned normalized block-compressed (BC7 compression)
1974 #: format
1975 cudaChannelFormatKindUnsignedBlockCompressed7 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7
1977 #: 4 channel unsigned normalized block-compressed (BC7 compression)
1978 #: format with sRGB encoding
1979 cudaChannelFormatKindUnsignedBlockCompressed7SRGB = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedBlockCompressed7SRGB
1981 #: 4 channel unsigned normalized (10-bit, 10-bit, 10-bit, 2-bit) format
1982 cudaChannelFormatKindUnsignedNormalized1010102 = cyruntime.cudaChannelFormatKind.cudaChannelFormatKindUnsignedNormalized1010102
1984_dict_cudaChannelFormatKind = dict(((int(v), v) for k, v in cudaChannelFormatKind.__members__.items()))
1986class cudaMemoryType(IntEnum):
1987 """
1988 CUDA memory types
1989 """
1991 #: Unregistered memory
1992 cudaMemoryTypeUnregistered = cyruntime.cudaMemoryType.cudaMemoryTypeUnregistered
1994 #: Host memory
1995 cudaMemoryTypeHost = cyruntime.cudaMemoryType.cudaMemoryTypeHost
1997 #: Device memory
1998 cudaMemoryTypeDevice = cyruntime.cudaMemoryType.cudaMemoryTypeDevice
2000 #: Managed memory
2001 cudaMemoryTypeManaged = cyruntime.cudaMemoryType.cudaMemoryTypeManaged
2003_dict_cudaMemoryType = dict(((int(v), v) for k, v in cudaMemoryType.__members__.items()))
2005class cudaMemcpyKind(IntEnum):
2006 """
2007 CUDA memory copy types
2008 """
2010 #: Host -> Host
2011 cudaMemcpyHostToHost = cyruntime.cudaMemcpyKind.cudaMemcpyHostToHost
2013 #: Host -> Device
2014 cudaMemcpyHostToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyHostToDevice
2016 #: Device -> Host
2017 cudaMemcpyDeviceToHost = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToHost
2019 #: Device -> Device
2020 cudaMemcpyDeviceToDevice = cyruntime.cudaMemcpyKind.cudaMemcpyDeviceToDevice
2022 #: Direction of the transfer is inferred from the pointer values.
2023 #: Requires unified virtual addressing
2024 cudaMemcpyDefault = cyruntime.cudaMemcpyKind.cudaMemcpyDefault
2026_dict_cudaMemcpyKind = dict(((int(v), v) for k, v in cudaMemcpyKind.__members__.items()))
2028class cudaAccessProperty(IntEnum):
2029 """
2030 Specifies performance hint with :py:obj:`~.cudaAccessPolicyWindow`
2031 for hitProp and missProp members.
2032 """
2034 #: Normal cache persistence.
2035 cudaAccessPropertyNormal = cyruntime.cudaAccessProperty.cudaAccessPropertyNormal
2037 #: Streaming access is less likely to persit from cache.
2038 cudaAccessPropertyStreaming = cyruntime.cudaAccessProperty.cudaAccessPropertyStreaming
2040 #: Persisting access is more likely to persist in cache.
2041 cudaAccessPropertyPersisting = cyruntime.cudaAccessProperty.cudaAccessPropertyPersisting
2043_dict_cudaAccessProperty = dict(((int(v), v) for k, v in cudaAccessProperty.__members__.items()))
2045class cudaStreamCaptureStatus(IntEnum):
2046 """
2047 Possible stream capture statuses returned by
2048 :py:obj:`~.cudaStreamIsCapturing`
2049 """
2051 #: Stream is not capturing
2052 cudaStreamCaptureStatusNone = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusNone
2054 #: Stream is actively capturing
2055 cudaStreamCaptureStatusActive = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusActive
2057 #: Stream is part of a capture sequence that has been invalidated, but
2058 #: not terminated
2059 cudaStreamCaptureStatusInvalidated = cyruntime.cudaStreamCaptureStatus.cudaStreamCaptureStatusInvalidated
2061_dict_cudaStreamCaptureStatus = dict(((int(v), v) for k, v in cudaStreamCaptureStatus.__members__.items()))
2063class cudaStreamCaptureMode(IntEnum):
2064 """
2065 Possible modes for stream capture thread interactions. For more
2066 details see :py:obj:`~.cudaStreamBeginCapture` and
2067 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
2068 """
2069 cudaStreamCaptureModeGlobal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeGlobal
2070 cudaStreamCaptureModeThreadLocal = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeThreadLocal
2071 cudaStreamCaptureModeRelaxed = cyruntime.cudaStreamCaptureMode.cudaStreamCaptureModeRelaxed
2073_dict_cudaStreamCaptureMode = dict(((int(v), v) for k, v in cudaStreamCaptureMode.__members__.items()))
2075class cudaSynchronizationPolicy(IntEnum):
2076 """
2078 """
2079 cudaSyncPolicyAuto = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyAuto
2080 cudaSyncPolicySpin = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicySpin
2081 cudaSyncPolicyYield = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyYield
2082 cudaSyncPolicyBlockingSync = cyruntime.cudaSynchronizationPolicy.cudaSyncPolicyBlockingSync
2084_dict_cudaSynchronizationPolicy = dict(((int(v), v) for k, v in cudaSynchronizationPolicy.__members__.items()))
2086class cudaClusterSchedulingPolicy(IntEnum):
2087 """
2088 Cluster scheduling policies. These may be passed to
2089 :py:obj:`~.cudaFuncSetAttribute`
2090 """
2092 #: the default policy
2093 cudaClusterSchedulingPolicyDefault = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyDefault
2095 #: spread the blocks within a cluster to the SMs
2096 cudaClusterSchedulingPolicySpread = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicySpread
2098 #: allow the hardware to load-balance the blocks in a cluster to the
2099 #: SMs
2100 cudaClusterSchedulingPolicyLoadBalancing = cyruntime.cudaClusterSchedulingPolicy.cudaClusterSchedulingPolicyLoadBalancing
2102_dict_cudaClusterSchedulingPolicy = dict(((int(v), v) for k, v in cudaClusterSchedulingPolicy.__members__.items()))
2104class cudaStreamUpdateCaptureDependenciesFlags(IntEnum):
2105 """
2106 Flags for :py:obj:`~.cudaStreamUpdateCaptureDependencies`
2107 """
2109 #: Add new nodes to the dependency set
2110 cudaStreamAddCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamAddCaptureDependencies
2112 #: Replace the dependency set with the new nodes
2113 cudaStreamSetCaptureDependencies = cyruntime.cudaStreamUpdateCaptureDependenciesFlags.cudaStreamSetCaptureDependencies
2115_dict_cudaStreamUpdateCaptureDependenciesFlags = dict(((int(v), v) for k, v in cudaStreamUpdateCaptureDependenciesFlags.__members__.items()))
2117class cudaUserObjectFlags(IntEnum):
2118 """
2119 Flags for user objects for graphs
2120 """
2122 #: Indicates the destructor execution is not synchronized by any CUDA
2123 #: handle.
2124 cudaUserObjectNoDestructorSync = cyruntime.cudaUserObjectFlags.cudaUserObjectNoDestructorSync
2126_dict_cudaUserObjectFlags = dict(((int(v), v) for k, v in cudaUserObjectFlags.__members__.items()))
2128class cudaUserObjectRetainFlags(IntEnum):
2129 """
2130 Flags for retaining user object references for graphs
2131 """
2133 #: Transfer references from the caller rather than creating new
2134 #: references.
2135 cudaGraphUserObjectMove = cyruntime.cudaUserObjectRetainFlags.cudaGraphUserObjectMove
2137_dict_cudaUserObjectRetainFlags = dict(((int(v), v) for k, v in cudaUserObjectRetainFlags.__members__.items()))
2139class cudaGraphicsRegisterFlags(IntEnum):
2140 """
2141 CUDA graphics interop register flags
2142 """
2144 #: Default
2145 cudaGraphicsRegisterFlagsNone = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone
2147 #: CUDA will not write to this resource
2148 cudaGraphicsRegisterFlagsReadOnly = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsReadOnly
2150 #: CUDA will only write to and will not read from this resource
2151 cudaGraphicsRegisterFlagsWriteDiscard = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard
2153 #: CUDA will bind this resource to a surface reference
2154 cudaGraphicsRegisterFlagsSurfaceLoadStore = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsSurfaceLoadStore
2156 #: CUDA will perform texture gather operations on this resource
2157 cudaGraphicsRegisterFlagsTextureGather = cyruntime.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsTextureGather
2159_dict_cudaGraphicsRegisterFlags = dict(((int(v), v) for k, v in cudaGraphicsRegisterFlags.__members__.items()))
2161class cudaGraphicsMapFlags(IntEnum):
2162 """
2163 CUDA graphics interop map flags
2164 """
2166 #: Default; Assume resource can be read/written
2167 cudaGraphicsMapFlagsNone = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsNone
2169 #: CUDA will not write to this resource
2170 cudaGraphicsMapFlagsReadOnly = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsReadOnly
2172 #: CUDA will only write to and will not read from this resource
2173 cudaGraphicsMapFlagsWriteDiscard = cyruntime.cudaGraphicsMapFlags.cudaGraphicsMapFlagsWriteDiscard
2175_dict_cudaGraphicsMapFlags = dict(((int(v), v) for k, v in cudaGraphicsMapFlags.__members__.items()))
2177class cudaGraphicsCubeFace(IntEnum):
2178 """
2179 CUDA graphics interop array indices for cube maps
2180 """
2182 #: Positive X face of cubemap
2183 cudaGraphicsCubeFacePositiveX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveX
2185 #: Negative X face of cubemap
2186 cudaGraphicsCubeFaceNegativeX = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeX
2188 #: Positive Y face of cubemap
2189 cudaGraphicsCubeFacePositiveY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveY
2191 #: Negative Y face of cubemap
2192 cudaGraphicsCubeFaceNegativeY = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeY
2194 #: Positive Z face of cubemap
2195 cudaGraphicsCubeFacePositiveZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFacePositiveZ
2197 #: Negative Z face of cubemap
2198 cudaGraphicsCubeFaceNegativeZ = cyruntime.cudaGraphicsCubeFace.cudaGraphicsCubeFaceNegativeZ
2200_dict_cudaGraphicsCubeFace = dict(((int(v), v) for k, v in cudaGraphicsCubeFace.__members__.items()))
2202class cudaResourceType(IntEnum):
2203 """
2204 CUDA resource types
2205 """
2207 #: Array resource
2208 cudaResourceTypeArray = cyruntime.cudaResourceType.cudaResourceTypeArray
2210 #: Mipmapped array resource
2211 cudaResourceTypeMipmappedArray = cyruntime.cudaResourceType.cudaResourceTypeMipmappedArray
2213 #: Linear resource
2214 cudaResourceTypeLinear = cyruntime.cudaResourceType.cudaResourceTypeLinear
2216 #: Pitch 2D resource
2217 cudaResourceTypePitch2D = cyruntime.cudaResourceType.cudaResourceTypePitch2D
2219_dict_cudaResourceType = dict(((int(v), v) for k, v in cudaResourceType.__members__.items()))
2221class cudaResourceViewFormat(IntEnum):
2222 """
2223 CUDA texture resource view formats
2224 """
2226 #: No resource view format (use underlying resource format)
2227 cudaResViewFormatNone = cyruntime.cudaResourceViewFormat.cudaResViewFormatNone
2229 #: 1 channel unsigned 8-bit integers
2230 cudaResViewFormatUnsignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar1
2232 #: 2 channel unsigned 8-bit integers
2233 cudaResViewFormatUnsignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar2
2235 #: 4 channel unsigned 8-bit integers
2236 cudaResViewFormatUnsignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedChar4
2238 #: 1 channel signed 8-bit integers
2239 cudaResViewFormatSignedChar1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar1
2241 #: 2 channel signed 8-bit integers
2242 cudaResViewFormatSignedChar2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar2
2244 #: 4 channel signed 8-bit integers
2245 cudaResViewFormatSignedChar4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedChar4
2247 #: 1 channel unsigned 16-bit integers
2248 cudaResViewFormatUnsignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort1
2250 #: 2 channel unsigned 16-bit integers
2251 cudaResViewFormatUnsignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort2
2253 #: 4 channel unsigned 16-bit integers
2254 cudaResViewFormatUnsignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedShort4
2256 #: 1 channel signed 16-bit integers
2257 cudaResViewFormatSignedShort1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort1
2259 #: 2 channel signed 16-bit integers
2260 cudaResViewFormatSignedShort2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort2
2262 #: 4 channel signed 16-bit integers
2263 cudaResViewFormatSignedShort4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedShort4
2265 #: 1 channel unsigned 32-bit integers
2266 cudaResViewFormatUnsignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt1
2268 #: 2 channel unsigned 32-bit integers
2269 cudaResViewFormatUnsignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt2
2271 #: 4 channel unsigned 32-bit integers
2272 cudaResViewFormatUnsignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedInt4
2274 #: 1 channel signed 32-bit integers
2275 cudaResViewFormatSignedInt1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt1
2277 #: 2 channel signed 32-bit integers
2278 cudaResViewFormatSignedInt2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt2
2280 #: 4 channel signed 32-bit integers
2281 cudaResViewFormatSignedInt4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedInt4
2283 #: 1 channel 16-bit floating point
2284 cudaResViewFormatHalf1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf1
2286 #: 2 channel 16-bit floating point
2287 cudaResViewFormatHalf2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf2
2289 #: 4 channel 16-bit floating point
2290 cudaResViewFormatHalf4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatHalf4
2292 #: 1 channel 32-bit floating point
2293 cudaResViewFormatFloat1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat1
2295 #: 2 channel 32-bit floating point
2296 cudaResViewFormatFloat2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat2
2298 #: 4 channel 32-bit floating point
2299 cudaResViewFormatFloat4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatFloat4
2301 #: Block compressed 1
2302 cudaResViewFormatUnsignedBlockCompressed1 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed1
2304 #: Block compressed 2
2305 cudaResViewFormatUnsignedBlockCompressed2 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed2
2307 #: Block compressed 3
2308 cudaResViewFormatUnsignedBlockCompressed3 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed3
2310 #: Block compressed 4 unsigned
2311 cudaResViewFormatUnsignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed4
2313 #: Block compressed 4 signed
2314 cudaResViewFormatSignedBlockCompressed4 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed4
2316 #: Block compressed 5 unsigned
2317 cudaResViewFormatUnsignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed5
2319 #: Block compressed 5 signed
2320 cudaResViewFormatSignedBlockCompressed5 = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed5
2322 #: Block compressed 6 unsigned half-float
2323 cudaResViewFormatUnsignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed6H
2325 #: Block compressed 6 signed half-float
2326 cudaResViewFormatSignedBlockCompressed6H = cyruntime.cudaResourceViewFormat.cudaResViewFormatSignedBlockCompressed6H
2328 #: Block compressed 7
2329 cudaResViewFormatUnsignedBlockCompressed7 = cyruntime.cudaResourceViewFormat.cudaResViewFormatUnsignedBlockCompressed7
2331_dict_cudaResourceViewFormat = dict(((int(v), v) for k, v in cudaResourceViewFormat.__members__.items()))
2333class cudaFuncAttribute(IntEnum):
2334 """
2335 CUDA function attributes that can be set using
2336 :py:obj:`~.cudaFuncSetAttribute`
2337 """
2339 #: Maximum dynamic shared memory size
2340 cudaFuncAttributeMaxDynamicSharedMemorySize = cyruntime.cudaFuncAttribute.cudaFuncAttributeMaxDynamicSharedMemorySize
2342 #: Preferred shared memory-L1 cache split
2343 cudaFuncAttributePreferredSharedMemoryCarveout = cyruntime.cudaFuncAttribute.cudaFuncAttributePreferredSharedMemoryCarveout
2345 #: Indicator to enforce valid cluster dimension specification on kernel
2346 #: launch
2347 cudaFuncAttributeClusterDimMustBeSet = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterDimMustBeSet
2349 #: Required cluster width
2350 cudaFuncAttributeRequiredClusterWidth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterWidth
2352 #: Required cluster height
2353 cudaFuncAttributeRequiredClusterHeight = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterHeight
2355 #: Required cluster depth
2356 cudaFuncAttributeRequiredClusterDepth = cyruntime.cudaFuncAttribute.cudaFuncAttributeRequiredClusterDepth
2358 #: Whether non-portable cluster scheduling policy is supported
2359 cudaFuncAttributeNonPortableClusterSizeAllowed = cyruntime.cudaFuncAttribute.cudaFuncAttributeNonPortableClusterSizeAllowed
2361 #: Required cluster scheduling policy preference
2362 cudaFuncAttributeClusterSchedulingPolicyPreference = cyruntime.cudaFuncAttribute.cudaFuncAttributeClusterSchedulingPolicyPreference
2363 cudaFuncAttributeMax = cyruntime.cudaFuncAttribute.cudaFuncAttributeMax
2365_dict_cudaFuncAttribute = dict(((int(v), v) for k, v in cudaFuncAttribute.__members__.items()))
2367class cudaFuncCache(IntEnum):
2368 """
2369 CUDA function cache configurations
2370 """
2372 #: Default function cache configuration, no preference
2373 cudaFuncCachePreferNone = cyruntime.cudaFuncCache.cudaFuncCachePreferNone
2375 #: Prefer larger shared memory and smaller L1 cache
2376 cudaFuncCachePreferShared = cyruntime.cudaFuncCache.cudaFuncCachePreferShared
2378 #: Prefer larger L1 cache and smaller shared memory
2379 cudaFuncCachePreferL1 = cyruntime.cudaFuncCache.cudaFuncCachePreferL1
2381 #: Prefer equal size L1 cache and shared memory
2382 cudaFuncCachePreferEqual = cyruntime.cudaFuncCache.cudaFuncCachePreferEqual
2384_dict_cudaFuncCache = dict(((int(v), v) for k, v in cudaFuncCache.__members__.items()))
2386class cudaSharedMemConfig(IntEnum):
2387 """
2388 CUDA shared memory configuration [Deprecated]
2389 """
2390 cudaSharedMemBankSizeDefault = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeDefault
2391 cudaSharedMemBankSizeFourByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeFourByte
2392 cudaSharedMemBankSizeEightByte = cyruntime.cudaSharedMemConfig.cudaSharedMemBankSizeEightByte
2394_dict_cudaSharedMemConfig = dict(((int(v), v) for k, v in cudaSharedMemConfig.__members__.items()))
2396class cudaSharedCarveout(IntEnum):
2397 """
2398 Shared memory carveout configurations. These may be passed to
2399 cudaFuncSetAttribute
2400 """
2402 #: No preference for shared memory or L1 (default)
2403 cudaSharedmemCarveoutDefault = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutDefault
2405 #: Prefer maximum available L1 cache, minimum shared memory
2406 cudaSharedmemCarveoutMaxL1 = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxL1
2408 #: Prefer maximum available shared memory, minimum L1 cache
2409 cudaSharedmemCarveoutMaxShared = cyruntime.cudaSharedCarveout.cudaSharedmemCarveoutMaxShared
2411_dict_cudaSharedCarveout = dict(((int(v), v) for k, v in cudaSharedCarveout.__members__.items()))
2413class cudaComputeMode(IntEnum):
2414 """
2415 CUDA device compute modes
2416 """
2418 #: Default compute mode (Multiple threads can use
2419 #: :py:obj:`~.cudaSetDevice()` with this device)
2420 cudaComputeModeDefault = cyruntime.cudaComputeMode.cudaComputeModeDefault
2422 #: Compute-exclusive-thread mode (Only one thread in one process will
2423 #: be able to use :py:obj:`~.cudaSetDevice()` with this device)
2424 cudaComputeModeExclusive = cyruntime.cudaComputeMode.cudaComputeModeExclusive
2426 #: Compute-prohibited mode (No threads can use
2427 #: :py:obj:`~.cudaSetDevice()` with this device)
2428 cudaComputeModeProhibited = cyruntime.cudaComputeMode.cudaComputeModeProhibited
2430 #: Compute-exclusive-process mode (Many threads in one process will be
2431 #: able to use :py:obj:`~.cudaSetDevice()` with this device)
2432 cudaComputeModeExclusiveProcess = cyruntime.cudaComputeMode.cudaComputeModeExclusiveProcess
2434_dict_cudaComputeMode = dict(((int(v), v) for k, v in cudaComputeMode.__members__.items()))
2436class cudaLimit(IntEnum):
2437 """
2438 CUDA Limits
2439 """
2441 #: GPU thread stack size
2442 cudaLimitStackSize = cyruntime.cudaLimit.cudaLimitStackSize
2444 #: GPU printf FIFO size
2445 cudaLimitPrintfFifoSize = cyruntime.cudaLimit.cudaLimitPrintfFifoSize
2447 #: GPU malloc heap size
2448 cudaLimitMallocHeapSize = cyruntime.cudaLimit.cudaLimitMallocHeapSize
2450 #: GPU device runtime synchronize depth
2451 cudaLimitDevRuntimeSyncDepth = cyruntime.cudaLimit.cudaLimitDevRuntimeSyncDepth
2453 #: GPU device runtime pending launch count
2454 cudaLimitDevRuntimePendingLaunchCount = cyruntime.cudaLimit.cudaLimitDevRuntimePendingLaunchCount
2456 #: A value between 0 and 128 that indicates the maximum fetch
2457 #: granularity of L2 (in Bytes). This is a hint
2458 cudaLimitMaxL2FetchGranularity = cyruntime.cudaLimit.cudaLimitMaxL2FetchGranularity
2460 #: A size in bytes for L2 persisting lines cache size
2461 cudaLimitPersistingL2CacheSize = cyruntime.cudaLimit.cudaLimitPersistingL2CacheSize
2463_dict_cudaLimit = dict(((int(v), v) for k, v in cudaLimit.__members__.items()))
2465class cudaMemoryAdvise(IntEnum):
2466 """
2467 CUDA Memory Advise values
2468 """
2470 #: Data will mostly be read and only occassionally be written to
2471 cudaMemAdviseSetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetReadMostly
2473 #: Undo the effect of :py:obj:`~.cudaMemAdviseSetReadMostly`
2474 cudaMemAdviseUnsetReadMostly = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetReadMostly
2476 #: Set the preferred location for the data as the specified device
2477 cudaMemAdviseSetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetPreferredLocation
2479 #: Clear the preferred location for the data
2480 cudaMemAdviseUnsetPreferredLocation = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetPreferredLocation
2482 #: Data will be accessed by the specified device, so prevent page
2483 #: faults as much as possible
2484 cudaMemAdviseSetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseSetAccessedBy
2486 #: Let the Unified Memory subsystem decide on the page faulting policy
2487 #: for the specified device
2488 cudaMemAdviseUnsetAccessedBy = cyruntime.cudaMemoryAdvise.cudaMemAdviseUnsetAccessedBy
2490_dict_cudaMemoryAdvise = dict(((int(v), v) for k, v in cudaMemoryAdvise.__members__.items()))
2492class cudaMemRangeAttribute(IntEnum):
2493 """
2494 CUDA range attributes
2495 """
2497 #: Whether the range will mostly be read and only occassionally be
2498 #: written to
2499 cudaMemRangeAttributeReadMostly = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeReadMostly
2501 #: The preferred location of the range
2502 cudaMemRangeAttributePreferredLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocation
2504 #: Memory range has :py:obj:`~.cudaMemAdviseSetAccessedBy` set for
2505 #: specified device
2506 cudaMemRangeAttributeAccessedBy = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeAccessedBy
2508 #: The last location to which the range was prefetched
2509 cudaMemRangeAttributeLastPrefetchLocation = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocation
2511 #: The preferred location type of the range
2512 cudaMemRangeAttributePreferredLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationType
2514 #: The preferred location id of the range
2515 cudaMemRangeAttributePreferredLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributePreferredLocationId
2517 #: The last location type to which the range was prefetched
2518 cudaMemRangeAttributeLastPrefetchLocationType = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationType
2520 #: The last location id to which the range was prefetched
2521 cudaMemRangeAttributeLastPrefetchLocationId = cyruntime.cudaMemRangeAttribute.cudaMemRangeAttributeLastPrefetchLocationId
2523_dict_cudaMemRangeAttribute = dict(((int(v), v) for k, v in cudaMemRangeAttribute.__members__.items()))
2525class cudaFlushGPUDirectRDMAWritesOptions(IntEnum):
2526 """
2527 CUDA GPUDirect RDMA flush writes APIs supported on the device
2528 """
2530 #: :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()` and its CUDA Driver
2531 #: API counterpart are supported on the device.
2532 cudaFlushGPUDirectRDMAWritesOptionHost = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionHost
2534 #: The :py:obj:`~.CU_STREAM_WAIT_VALUE_FLUSH` flag and the
2535 #: :py:obj:`~.CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES` MemOp are supported
2536 #: on the CUDA device.
2537 cudaFlushGPUDirectRDMAWritesOptionMemOps = cyruntime.cudaFlushGPUDirectRDMAWritesOptions.cudaFlushGPUDirectRDMAWritesOptionMemOps
2539_dict_cudaFlushGPUDirectRDMAWritesOptions = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesOptions.__members__.items()))
2541class cudaGPUDirectRDMAWritesOrdering(IntEnum):
2542 """
2543 CUDA GPUDirect RDMA flush writes ordering features of the device
2544 """
2546 #: The device does not natively support ordering of GPUDirect RDMA
2547 #: writes. :py:obj:`~.cudaFlushGPUDirectRDMAWrites()` can be leveraged
2548 #: if supported.
2549 cudaGPUDirectRDMAWritesOrderingNone = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingNone
2551 #: Natively, the device can consistently consume GPUDirect RDMA writes,
2552 #: although other CUDA devices may not.
2553 cudaGPUDirectRDMAWritesOrderingOwner = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingOwner
2555 #: Any CUDA device in the system can consistently consume GPUDirect
2556 #: RDMA writes to this device.
2557 cudaGPUDirectRDMAWritesOrderingAllDevices = cyruntime.cudaGPUDirectRDMAWritesOrdering.cudaGPUDirectRDMAWritesOrderingAllDevices
2559_dict_cudaGPUDirectRDMAWritesOrdering = dict(((int(v), v) for k, v in cudaGPUDirectRDMAWritesOrdering.__members__.items()))
2561class cudaFlushGPUDirectRDMAWritesScope(IntEnum):
2562 """
2563 CUDA GPUDirect RDMA flush writes scopes
2564 """
2566 #: Blocks until remote writes are visible to the CUDA device context
2567 #: owning the data.
2568 cudaFlushGPUDirectRDMAWritesToOwner = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToOwner
2570 #: Blocks until remote writes are visible to all CUDA device contexts.
2571 cudaFlushGPUDirectRDMAWritesToAllDevices = cyruntime.cudaFlushGPUDirectRDMAWritesScope.cudaFlushGPUDirectRDMAWritesToAllDevices
2573_dict_cudaFlushGPUDirectRDMAWritesScope = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesScope.__members__.items()))
2575class cudaFlushGPUDirectRDMAWritesTarget(IntEnum):
2576 """
2577 CUDA GPUDirect RDMA flush writes targets
2578 """
2580 #: Sets the target for :py:obj:`~.cudaDeviceFlushGPUDirectRDMAWrites()`
2581 #: to the currently active CUDA device context.
2582 cudaFlushGPUDirectRDMAWritesTargetCurrentDevice = cyruntime.cudaFlushGPUDirectRDMAWritesTarget.cudaFlushGPUDirectRDMAWritesTargetCurrentDevice
2584_dict_cudaFlushGPUDirectRDMAWritesTarget = dict(((int(v), v) for k, v in cudaFlushGPUDirectRDMAWritesTarget.__members__.items()))
2586class cudaDeviceAttr(IntEnum):
2587 """
2588 CUDA device attributes
2589 """
2591 #: Maximum number of threads per block
2592 cudaDevAttrMaxThreadsPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerBlock
2594 #: Maximum block dimension X
2595 cudaDevAttrMaxBlockDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimX
2597 #: Maximum block dimension Y
2598 cudaDevAttrMaxBlockDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimY
2600 #: Maximum block dimension Z
2601 cudaDevAttrMaxBlockDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlockDimZ
2603 #: Maximum grid dimension X
2604 cudaDevAttrMaxGridDimX = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimX
2606 #: Maximum grid dimension Y
2607 cudaDevAttrMaxGridDimY = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimY
2609 #: Maximum grid dimension Z
2610 cudaDevAttrMaxGridDimZ = cyruntime.cudaDeviceAttr.cudaDevAttrMaxGridDimZ
2612 #: Maximum shared memory available per block in bytes
2613 cudaDevAttrMaxSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlock
2615 #: Memory available on device for constant variables in a CUDA C kernel
2616 #: in bytes
2617 cudaDevAttrTotalConstantMemory = cyruntime.cudaDeviceAttr.cudaDevAttrTotalConstantMemory
2619 #: Warp size in threads
2620 cudaDevAttrWarpSize = cyruntime.cudaDeviceAttr.cudaDevAttrWarpSize
2622 #: Maximum pitch in bytes allowed by memory copies
2623 cudaDevAttrMaxPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPitch
2625 #: Maximum number of 32-bit registers available per block
2626 cudaDevAttrMaxRegistersPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerBlock
2628 #: Peak clock frequency in kilohertz
2629 cudaDevAttrClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrClockRate
2631 #: Alignment requirement for textures
2632 cudaDevAttrTextureAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTextureAlignment
2634 #: Device can possibly copy memory and execute a kernel concurrently
2635 cudaDevAttrGpuOverlap = cyruntime.cudaDeviceAttr.cudaDevAttrGpuOverlap
2637 #: Number of multiprocessors on device
2638 cudaDevAttrMultiProcessorCount = cyruntime.cudaDeviceAttr.cudaDevAttrMultiProcessorCount
2640 #: Specifies whether there is a run time limit on kernels
2641 cudaDevAttrKernelExecTimeout = cyruntime.cudaDeviceAttr.cudaDevAttrKernelExecTimeout
2643 #: Device is integrated with host memory
2644 cudaDevAttrIntegrated = cyruntime.cudaDeviceAttr.cudaDevAttrIntegrated
2646 #: Device can map host memory into CUDA address space
2647 cudaDevAttrCanMapHostMemory = cyruntime.cudaDeviceAttr.cudaDevAttrCanMapHostMemory
2649 #: Compute mode (See :py:obj:`~.cudaComputeMode` for details)
2650 cudaDevAttrComputeMode = cyruntime.cudaDeviceAttr.cudaDevAttrComputeMode
2652 #: Maximum 1D texture width
2653 cudaDevAttrMaxTexture1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DWidth
2655 #: Maximum 2D texture width
2656 cudaDevAttrMaxTexture2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DWidth
2658 #: Maximum 2D texture height
2659 cudaDevAttrMaxTexture2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DHeight
2661 #: Maximum 3D texture width
2662 cudaDevAttrMaxTexture3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidth
2664 #: Maximum 3D texture height
2665 cudaDevAttrMaxTexture3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeight
2667 #: Maximum 3D texture depth
2668 cudaDevAttrMaxTexture3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepth
2670 #: Maximum 2D layered texture width
2671 cudaDevAttrMaxTexture2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredWidth
2673 #: Maximum 2D layered texture height
2674 cudaDevAttrMaxTexture2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredHeight
2676 #: Maximum layers in a 2D layered texture
2677 cudaDevAttrMaxTexture2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLayeredLayers
2679 #: Alignment requirement for surfaces
2680 cudaDevAttrSurfaceAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrSurfaceAlignment
2682 #: Device can possibly execute multiple kernels concurrently
2683 cudaDevAttrConcurrentKernels = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentKernels
2685 #: Device has ECC support enabled
2686 cudaDevAttrEccEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrEccEnabled
2688 #: PCI bus ID of the device
2689 cudaDevAttrPciBusId = cyruntime.cudaDeviceAttr.cudaDevAttrPciBusId
2691 #: PCI device ID of the device
2692 cudaDevAttrPciDeviceId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDeviceId
2694 #: Device is using TCC driver model
2695 cudaDevAttrTccDriver = cyruntime.cudaDeviceAttr.cudaDevAttrTccDriver
2697 #: Peak memory clock frequency in kilohertz
2698 cudaDevAttrMemoryClockRate = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryClockRate
2700 #: Global memory bus width in bits
2701 cudaDevAttrGlobalMemoryBusWidth = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalMemoryBusWidth
2703 #: Size of L2 cache in bytes
2704 cudaDevAttrL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrL2CacheSize
2706 #: Maximum resident threads per multiprocessor
2707 cudaDevAttrMaxThreadsPerMultiProcessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxThreadsPerMultiProcessor
2709 #: Number of asynchronous engines
2710 cudaDevAttrAsyncEngineCount = cyruntime.cudaDeviceAttr.cudaDevAttrAsyncEngineCount
2712 #: Device shares a unified address space with the host
2713 cudaDevAttrUnifiedAddressing = cyruntime.cudaDeviceAttr.cudaDevAttrUnifiedAddressing
2715 #: Maximum 1D layered texture width
2716 cudaDevAttrMaxTexture1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredWidth
2718 #: Maximum layers in a 1D layered texture
2719 cudaDevAttrMaxTexture1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLayeredLayers
2721 #: Maximum 2D texture width if cudaArrayTextureGather is set
2722 cudaDevAttrMaxTexture2DGatherWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherWidth
2724 #: Maximum 2D texture height if cudaArrayTextureGather is set
2725 cudaDevAttrMaxTexture2DGatherHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DGatherHeight
2727 #: Alternate maximum 3D texture width
2728 cudaDevAttrMaxTexture3DWidthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DWidthAlt
2730 #: Alternate maximum 3D texture height
2731 cudaDevAttrMaxTexture3DHeightAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DHeightAlt
2733 #: Alternate maximum 3D texture depth
2734 cudaDevAttrMaxTexture3DDepthAlt = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture3DDepthAlt
2736 #: PCI domain ID of the device
2737 cudaDevAttrPciDomainId = cyruntime.cudaDeviceAttr.cudaDevAttrPciDomainId
2739 #: Pitch alignment requirement for textures
2740 cudaDevAttrTexturePitchAlignment = cyruntime.cudaDeviceAttr.cudaDevAttrTexturePitchAlignment
2742 #: Maximum cubemap texture width/height
2743 cudaDevAttrMaxTextureCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapWidth
2745 #: Maximum cubemap layered texture width/height
2746 cudaDevAttrMaxTextureCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredWidth
2748 #: Maximum layers in a cubemap layered texture
2749 cudaDevAttrMaxTextureCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTextureCubemapLayeredLayers
2751 #: Maximum 1D surface width
2752 cudaDevAttrMaxSurface1DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DWidth
2754 #: Maximum 2D surface width
2755 cudaDevAttrMaxSurface2DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DWidth
2757 #: Maximum 2D surface height
2758 cudaDevAttrMaxSurface2DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DHeight
2760 #: Maximum 3D surface width
2761 cudaDevAttrMaxSurface3DWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DWidth
2763 #: Maximum 3D surface height
2764 cudaDevAttrMaxSurface3DHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DHeight
2766 #: Maximum 3D surface depth
2767 cudaDevAttrMaxSurface3DDepth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface3DDepth
2769 #: Maximum 1D layered surface width
2770 cudaDevAttrMaxSurface1DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredWidth
2772 #: Maximum layers in a 1D layered surface
2773 cudaDevAttrMaxSurface1DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface1DLayeredLayers
2775 #: Maximum 2D layered surface width
2776 cudaDevAttrMaxSurface2DLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredWidth
2778 #: Maximum 2D layered surface height
2779 cudaDevAttrMaxSurface2DLayeredHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredHeight
2781 #: Maximum layers in a 2D layered surface
2782 cudaDevAttrMaxSurface2DLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurface2DLayeredLayers
2784 #: Maximum cubemap surface width
2785 cudaDevAttrMaxSurfaceCubemapWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapWidth
2787 #: Maximum cubemap layered surface width
2788 cudaDevAttrMaxSurfaceCubemapLayeredWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredWidth
2790 #: Maximum layers in a cubemap layered surface
2791 cudaDevAttrMaxSurfaceCubemapLayeredLayers = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSurfaceCubemapLayeredLayers
2793 #: Maximum 1D linear texture width
2794 cudaDevAttrMaxTexture1DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DLinearWidth
2796 #: Maximum 2D linear texture width
2797 cudaDevAttrMaxTexture2DLinearWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearWidth
2799 #: Maximum 2D linear texture height
2800 cudaDevAttrMaxTexture2DLinearHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearHeight
2802 #: Maximum 2D linear texture pitch in bytes
2803 cudaDevAttrMaxTexture2DLinearPitch = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DLinearPitch
2805 #: Maximum mipmapped 2D texture width
2806 cudaDevAttrMaxTexture2DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedWidth
2808 #: Maximum mipmapped 2D texture height
2809 cudaDevAttrMaxTexture2DMipmappedHeight = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture2DMipmappedHeight
2811 #: Major compute capability version number
2812 cudaDevAttrComputeCapabilityMajor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMajor
2814 #: Minor compute capability version number
2815 cudaDevAttrComputeCapabilityMinor = cyruntime.cudaDeviceAttr.cudaDevAttrComputeCapabilityMinor
2817 #: Maximum mipmapped 1D texture width
2818 cudaDevAttrMaxTexture1DMipmappedWidth = cyruntime.cudaDeviceAttr.cudaDevAttrMaxTexture1DMipmappedWidth
2820 #: Device supports stream priorities
2821 cudaDevAttrStreamPrioritiesSupported = cyruntime.cudaDeviceAttr.cudaDevAttrStreamPrioritiesSupported
2823 #: Device supports caching globals in L1
2824 cudaDevAttrGlobalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrGlobalL1CacheSupported
2826 #: Device supports caching locals in L1
2827 cudaDevAttrLocalL1CacheSupported = cyruntime.cudaDeviceAttr.cudaDevAttrLocalL1CacheSupported
2829 #: Maximum shared memory available per multiprocessor in bytes
2830 cudaDevAttrMaxSharedMemoryPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerMultiprocessor
2832 #: Maximum number of 32-bit registers available per multiprocessor
2833 cudaDevAttrMaxRegistersPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxRegistersPerMultiprocessor
2835 #: Device can allocate managed memory on this system
2836 cudaDevAttrManagedMemory = cyruntime.cudaDeviceAttr.cudaDevAttrManagedMemory
2838 #: Device is on a multi-GPU board
2839 cudaDevAttrIsMultiGpuBoard = cyruntime.cudaDeviceAttr.cudaDevAttrIsMultiGpuBoard
2841 #: Unique identifier for a group of devices on the same multi-GPU board
2842 cudaDevAttrMultiGpuBoardGroupID = cyruntime.cudaDeviceAttr.cudaDevAttrMultiGpuBoardGroupID
2844 #: Link between the device and the host supports native atomic
2845 #: operations
2846 cudaDevAttrHostNativeAtomicSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNativeAtomicSupported
2848 #: Ratio of single precision performance (in floating-point operations
2849 #: per second) to double precision performance
2850 cudaDevAttrSingleToDoublePrecisionPerfRatio = cyruntime.cudaDeviceAttr.cudaDevAttrSingleToDoublePrecisionPerfRatio
2852 #: Device supports coherently accessing pageable memory without calling
2853 #: cudaHostRegister on it
2854 cudaDevAttrPageableMemoryAccess = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccess
2856 #: Device can coherently access managed memory concurrently with the
2857 #: CPU
2858 cudaDevAttrConcurrentManagedAccess = cyruntime.cudaDeviceAttr.cudaDevAttrConcurrentManagedAccess
2860 #: Device supports Compute Preemption
2861 cudaDevAttrComputePreemptionSupported = cyruntime.cudaDeviceAttr.cudaDevAttrComputePreemptionSupported
2863 #: Device can access host registered memory at the same virtual address
2864 #: as the CPU
2865 cudaDevAttrCanUseHostPointerForRegisteredMem = cyruntime.cudaDeviceAttr.cudaDevAttrCanUseHostPointerForRegisteredMem
2866 cudaDevAttrReserved92 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved92
2867 cudaDevAttrReserved93 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved93
2868 cudaDevAttrReserved94 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved94
2870 #: Device supports launching cooperative kernels via
2871 #: :py:obj:`~.cudaLaunchCooperativeKernel`
2872 cudaDevAttrCooperativeLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrCooperativeLaunch
2873 cudaDevAttrReserved96 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved96
2875 #: The maximum optin shared memory per block. This value may vary by
2876 #: chip. See :py:obj:`~.cudaFuncSetAttribute`
2877 cudaDevAttrMaxSharedMemoryPerBlockOptin = cyruntime.cudaDeviceAttr.cudaDevAttrMaxSharedMemoryPerBlockOptin
2879 #: Device supports flushing of outstanding remote writes.
2880 cudaDevAttrCanFlushRemoteWrites = cyruntime.cudaDeviceAttr.cudaDevAttrCanFlushRemoteWrites
2882 #: Device supports host memory registration via
2883 #: :py:obj:`~.cudaHostRegister`.
2884 cudaDevAttrHostRegisterSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterSupported
2886 #: Device accesses pageable memory via the host's page tables.
2887 cudaDevAttrPageableMemoryAccessUsesHostPageTables = cyruntime.cudaDeviceAttr.cudaDevAttrPageableMemoryAccessUsesHostPageTables
2889 #: Host can directly access managed memory on the device without
2890 #: migration.
2891 cudaDevAttrDirectManagedMemAccessFromHost = cyruntime.cudaDeviceAttr.cudaDevAttrDirectManagedMemAccessFromHost
2893 #: Maximum number of blocks per multiprocessor
2894 cudaDevAttrMaxBlocksPerMultiprocessor = cyruntime.cudaDeviceAttr.cudaDevAttrMaxBlocksPerMultiprocessor
2896 #: Maximum L2 persisting lines capacity setting in bytes.
2897 cudaDevAttrMaxPersistingL2CacheSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxPersistingL2CacheSize
2899 #: Maximum value of :py:obj:`~.cudaAccessPolicyWindow.num_bytes`.
2900 cudaDevAttrMaxAccessPolicyWindowSize = cyruntime.cudaDeviceAttr.cudaDevAttrMaxAccessPolicyWindowSize
2902 #: Shared memory reserved by CUDA driver per block in bytes
2903 cudaDevAttrReservedSharedMemoryPerBlock = cyruntime.cudaDeviceAttr.cudaDevAttrReservedSharedMemoryPerBlock
2905 #: Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays
2906 cudaDevAttrSparseCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrSparseCudaArraySupported
2908 #: Device supports using the :py:obj:`~.cudaHostRegister` flag
2909 #: cudaHostRegisterReadOnly to register memory that must be mapped as
2910 #: read-only to the GPU
2911 cudaDevAttrHostRegisterReadOnlySupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostRegisterReadOnlySupported
2913 #: External timeline semaphore interop is supported on the device
2914 cudaDevAttrTimelineSemaphoreInteropSupported = cyruntime.cudaDeviceAttr.cudaDevAttrTimelineSemaphoreInteropSupported
2916 #: Device supports using the :py:obj:`~.cudaMallocAsync` and
2917 #: :py:obj:`~.cudaMemPool` family of APIs
2918 cudaDevAttrMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolsSupported
2920 #: Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
2921 #: https://docs.nvidia.com/cuda/gpudirect-rdma for more information)
2922 cudaDevAttrGPUDirectRDMASupported = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMASupported
2924 #: The returned attribute shall be interpreted as a bitmask, where the
2925 #: individual bits are listed in the
2926 #: :py:obj:`~.cudaFlushGPUDirectRDMAWritesOptions` enum
2927 cudaDevAttrGPUDirectRDMAFlushWritesOptions = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAFlushWritesOptions
2929 #: GPUDirect RDMA writes to the device do not need to be flushed for
2930 #: consumers within the scope indicated by the returned attribute. See
2931 #: :py:obj:`~.cudaGPUDirectRDMAWritesOrdering` for the numerical values
2932 #: returned here.
2933 cudaDevAttrGPUDirectRDMAWritesOrdering = cyruntime.cudaDeviceAttr.cudaDevAttrGPUDirectRDMAWritesOrdering
2935 #: Handle types supported with mempool based IPC
2936 cudaDevAttrMemoryPoolSupportedHandleTypes = cyruntime.cudaDeviceAttr.cudaDevAttrMemoryPoolSupportedHandleTypes
2938 #: Indicates device supports cluster launch
2939 cudaDevAttrClusterLaunch = cyruntime.cudaDeviceAttr.cudaDevAttrClusterLaunch
2941 #: Device supports deferred mapping CUDA arrays and CUDA mipmapped
2942 #: arrays
2943 cudaDevAttrDeferredMappingCudaArraySupported = cyruntime.cudaDeviceAttr.cudaDevAttrDeferredMappingCudaArraySupported
2944 cudaDevAttrReserved122 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved122
2945 cudaDevAttrReserved123 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved123
2946 cudaDevAttrReserved124 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved124
2948 #: Device supports IPC Events.
2949 cudaDevAttrIpcEventSupport = cyruntime.cudaDeviceAttr.cudaDevAttrIpcEventSupport
2951 #: Number of memory synchronization domains the device supports.
2952 cudaDevAttrMemSyncDomainCount = cyruntime.cudaDeviceAttr.cudaDevAttrMemSyncDomainCount
2953 cudaDevAttrReserved127 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved127
2954 cudaDevAttrReserved128 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved128
2955 cudaDevAttrReserved129 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved129
2957 #: NUMA configuration of a device: value is of type
2958 #: :py:obj:`~.cudaDeviceNumaConfig` enum
2959 cudaDevAttrNumaConfig = cyruntime.cudaDeviceAttr.cudaDevAttrNumaConfig
2961 #: NUMA node ID of the GPU memory
2962 cudaDevAttrNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrNumaId
2963 cudaDevAttrReserved132 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved132
2965 #: Contexts created on this device will be shared via MPS
2966 cudaDevAttrMpsEnabled = cyruntime.cudaDeviceAttr.cudaDevAttrMpsEnabled
2968 #: NUMA ID of the host node closest to the device or -1 when system
2969 #: does not support NUMA
2970 cudaDevAttrHostNumaId = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaId
2972 #: Device supports CIG with D3D12.
2973 cudaDevAttrD3D12CigSupported = cyruntime.cudaDeviceAttr.cudaDevAttrD3D12CigSupported
2975 #: Device supports CIG with Vulkan.
2976 cudaDevAttrVulkanCigSupported = cyruntime.cudaDeviceAttr.cudaDevAttrVulkanCigSupported
2978 #: The combined 16-bit PCI device ID and 16-bit PCI vendor ID.
2979 cudaDevAttrGpuPciDeviceId = cyruntime.cudaDeviceAttr.cudaDevAttrGpuPciDeviceId
2981 #: The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor
2982 #: ID.
2983 cudaDevAttrGpuPciSubsystemId = cyruntime.cudaDeviceAttr.cudaDevAttrGpuPciSubsystemId
2984 cudaDevAttrReserved141 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved141
2986 #: Device supports HOST_NUMA location with the
2987 #: :py:obj:`~.cudaMallocAsync` and :py:obj:`~.cudaMemPool` family of
2988 #: APIs
2989 cudaDevAttrHostNumaMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaMemoryPoolsSupported
2991 #: Device supports HostNuma location IPC between nodes in a multi-node
2992 #: system.
2993 cudaDevAttrHostNumaMultinodeIpcSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostNumaMultinodeIpcSupported
2995 #: Device suports HOST location with the :py:obj:`~.cuMemAllocAsync`
2996 #: and :py:obj:`~.cuMemPool` family of APIs
2997 cudaDevAttrHostMemoryPoolsSupported = cyruntime.cudaDeviceAttr.cudaDevAttrHostMemoryPoolsSupported
2998 cudaDevAttrReserved145 = cyruntime.cudaDeviceAttr.cudaDevAttrReserved145
3000 #: Link between the device and the host supports only some native
3001 #: atomic operations
3002 cudaDevAttrOnlyPartialHostNativeAtomicSupported = cyruntime.cudaDeviceAttr.cudaDevAttrOnlyPartialHostNativeAtomicSupported
3003 cudaDevAttrMax = cyruntime.cudaDeviceAttr.cudaDevAttrMax
3005_dict_cudaDeviceAttr = dict(((int(v), v) for k, v in cudaDeviceAttr.__members__.items()))
3007class cudaMemPoolAttr(IntEnum):
3008 """
3009 CUDA memory pool attributes
3010 """
3012 #: (value type = int) Allow cuMemAllocAsync to use memory
3013 #: asynchronously freed in another streams as long as a stream ordering
3014 #: dependency of the allocating stream on the free action exists. Cuda
3015 #: events and null stream interactions can create the required stream
3016 #: ordered dependencies. (default enabled)
3017 cudaMemPoolReuseFollowEventDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseFollowEventDependencies
3019 #: (value type = int) Allow reuse of already completed frees when there
3020 #: is no dependency between the free and allocation. (default enabled)
3021 cudaMemPoolReuseAllowOpportunistic = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowOpportunistic
3023 #: (value type = int) Allow cuMemAllocAsync to insert new stream
3024 #: dependencies in order to establish the stream ordering required to
3025 #: reuse a piece of memory released by cuFreeAsync (default enabled).
3026 cudaMemPoolReuseAllowInternalDependencies = cyruntime.cudaMemPoolAttr.cudaMemPoolReuseAllowInternalDependencies
3028 #: (value type = cuuint64_t) Amount of reserved memory in bytes to hold
3029 #: onto before trying to release memory back to the OS. When more than
3030 #: the release threshold bytes of memory are held by the memory pool,
3031 #: the allocator will try to release memory back to the OS on the next
3032 #: call to stream, event or context synchronize. (default 0)
3033 cudaMemPoolAttrReleaseThreshold = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReleaseThreshold
3035 #: (value type = cuuint64_t) Amount of backing memory currently
3036 #: allocated for the mempool.
3037 cudaMemPoolAttrReservedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemCurrent
3039 #: (value type = cuuint64_t) High watermark of backing memory allocated
3040 #: for the mempool since the last time it was reset. High watermark can
3041 #: only be reset to zero.
3042 cudaMemPoolAttrReservedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrReservedMemHigh
3044 #: (value type = cuuint64_t) Amount of memory from the pool that is
3045 #: currently in use by the application.
3046 cudaMemPoolAttrUsedMemCurrent = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemCurrent
3048 #: (value type = cuuint64_t) High watermark of the amount of memory
3049 #: from the pool that was in use by the application since the last time
3050 #: it was reset. High watermark can only be reset to zero.
3051 cudaMemPoolAttrUsedMemHigh = cyruntime.cudaMemPoolAttr.cudaMemPoolAttrUsedMemHigh
3053_dict_cudaMemPoolAttr = dict(((int(v), v) for k, v in cudaMemPoolAttr.__members__.items()))
3055class cudaMemLocationType(IntEnum):
3056 """
3057 Specifies the type of location
3058 """
3059 cudaMemLocationTypeInvalid = cyruntime.cudaMemLocationType.cudaMemLocationTypeInvalid
3061 #: Location is unspecified. This is used when creating a managed memory
3062 #: pool to indicate no preferred location for the pool
3063 cudaMemLocationTypeNone = cyruntime.cudaMemLocationType.cudaMemLocationTypeNone
3065 #: Location is a device location, thus id is a device ordinal
3066 cudaMemLocationTypeDevice = cyruntime.cudaMemLocationType.cudaMemLocationTypeDevice
3068 #: Location is host, id is ignored
3069 cudaMemLocationTypeHost = cyruntime.cudaMemLocationType.cudaMemLocationTypeHost
3071 #: Location is a host NUMA node, thus id is a host NUMA node id
3072 cudaMemLocationTypeHostNuma = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNuma
3074 #: Location is the host NUMA node closest to the current thread's CPU,
3075 #: id is ignored
3076 cudaMemLocationTypeHostNumaCurrent = cyruntime.cudaMemLocationType.cudaMemLocationTypeHostNumaCurrent
3078_dict_cudaMemLocationType = dict(((int(v), v) for k, v in cudaMemLocationType.__members__.items()))
3080class cudaMemAccessFlags(IntEnum):
3081 """
3082 Specifies the memory protection flags for mapping.
3083 """
3085 #: Default, make the address range not accessible
3086 cudaMemAccessFlagsProtNone = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtNone
3088 #: Make the address range read accessible
3089 cudaMemAccessFlagsProtRead = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtRead
3091 #: Make the address range read-write accessible
3092 cudaMemAccessFlagsProtReadWrite = cyruntime.cudaMemAccessFlags.cudaMemAccessFlagsProtReadWrite
3094_dict_cudaMemAccessFlags = dict(((int(v), v) for k, v in cudaMemAccessFlags.__members__.items()))
3096class cudaMemAllocationType(IntEnum):
3097 """
3098 Defines the allocation types available
3099 """
3100 cudaMemAllocationTypeInvalid = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeInvalid
3102 #: This allocation type is 'pinned', i.e. cannot migrate from its
3103 #: current location while the application is actively using it
3104 cudaMemAllocationTypePinned = cyruntime.cudaMemAllocationType.cudaMemAllocationTypePinned
3106 #: This allocation type is managed memory
3107 cudaMemAllocationTypeManaged = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeManaged
3108 cudaMemAllocationTypeMax = cyruntime.cudaMemAllocationType.cudaMemAllocationTypeMax
3110_dict_cudaMemAllocationType = dict(((int(v), v) for k, v in cudaMemAllocationType.__members__.items()))
3112class cudaMemAllocationHandleType(IntEnum):
3113 """
3114 Flags for specifying particular handle types
3115 """
3117 #: Does not allow any export mechanism. >
3118 cudaMemHandleTypeNone = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeNone
3120 #: Allows a file descriptor to be used for exporting. Permitted only on
3121 #: POSIX systems. (int)
3122 cudaMemHandleTypePosixFileDescriptor = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypePosixFileDescriptor
3124 #: Allows a Win32 NT handle to be used for exporting. (HANDLE)
3125 cudaMemHandleTypeWin32 = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32
3127 #: Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE)
3128 cudaMemHandleTypeWin32Kmt = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeWin32Kmt
3130 #: Allows a fabric handle to be used for exporting.
3131 #: (cudaMemFabricHandle_t)
3132 cudaMemHandleTypeFabric = cyruntime.cudaMemAllocationHandleType.cudaMemHandleTypeFabric
3134_dict_cudaMemAllocationHandleType = dict(((int(v), v) for k, v in cudaMemAllocationHandleType.__members__.items()))
3136class cudaGraphMemAttributeType(IntEnum):
3137 """
3138 Graph memory attributes
3139 """
3141 #: (value type = cuuint64_t) Amount of memory, in bytes, currently
3142 #: associated with graphs.
3143 cudaGraphMemAttrUsedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemCurrent
3145 #: (value type = cuuint64_t) High watermark of memory, in bytes,
3146 #: associated with graphs since the last time it was reset. High
3147 #: watermark can only be reset to zero.
3148 cudaGraphMemAttrUsedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrUsedMemHigh
3150 #: (value type = cuuint64_t) Amount of memory, in bytes, currently
3151 #: allocated for use by the CUDA graphs asynchronous allocator.
3152 cudaGraphMemAttrReservedMemCurrent = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemCurrent
3154 #: (value type = cuuint64_t) High watermark of memory, in bytes,
3155 #: currently allocated for use by the CUDA graphs asynchronous
3156 #: allocator.
3157 cudaGraphMemAttrReservedMemHigh = cyruntime.cudaGraphMemAttributeType.cudaGraphMemAttrReservedMemHigh
3159_dict_cudaGraphMemAttributeType = dict(((int(v), v) for k, v in cudaGraphMemAttributeType.__members__.items()))
3161class cudaMemcpyFlags(IntEnum):
3162 """
3163 Flags to specify for copies within a batch. For more details see
3164 :py:obj:`~.cudaMemcpyBatchAsync`.
3165 """
3166 cudaMemcpyFlagDefault = cyruntime.cudaMemcpyFlags.cudaMemcpyFlagDefault
3168 #: Hint to the driver to try and overlap the copy with compute work on
3169 #: the SMs.
3170 cudaMemcpyFlagPreferOverlapWithCompute = cyruntime.cudaMemcpyFlags.cudaMemcpyFlagPreferOverlapWithCompute
3172_dict_cudaMemcpyFlags = dict(((int(v), v) for k, v in cudaMemcpyFlags.__members__.items()))
3174class cudaMemcpySrcAccessOrder(IntEnum):
3175 """
3177 """
3179 #: Default invalid.
3180 cudaMemcpySrcAccessOrderInvalid = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderInvalid
3182 #: Indicates that access to the source pointer must be in stream order.
3183 cudaMemcpySrcAccessOrderStream = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderStream
3185 #: Indicates that access to the source pointer can be out of stream
3186 #: order and all accesses must be complete before the API call returns.
3187 #: This flag is suited for ephemeral sources (ex., stack variables)
3188 #: when it's known that no prior operations in the stream can be
3189 #: accessing the memory and also that the lifetime of the memory is
3190 #: limited to the scope that the source variable was declared in.
3191 #: Specifying this flag allows the driver to optimize the copy and
3192 #: removes the need for the user to synchronize the stream after the
3193 #: API call.
3194 cudaMemcpySrcAccessOrderDuringApiCall = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderDuringApiCall
3196 #: Indicates that access to the source pointer can be out of stream
3197 #: order and the accesses can happen even after the API call returns.
3198 #: This flag is suited for host pointers allocated outside CUDA (ex.,
3199 #: via malloc) when it's known that no prior operations in the stream
3200 #: can be accessing the memory. Specifying this flag allows the driver
3201 #: to optimize the copy on certain platforms.
3202 cudaMemcpySrcAccessOrderAny = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderAny
3203 cudaMemcpySrcAccessOrderMax = cyruntime.cudaMemcpySrcAccessOrder.cudaMemcpySrcAccessOrderMax
3205_dict_cudaMemcpySrcAccessOrder = dict(((int(v), v) for k, v in cudaMemcpySrcAccessOrder.__members__.items()))
3207class cudaMemcpy3DOperandType(IntEnum):
3208 """
3209 These flags allow applications to convey the operand type for
3210 individual copies specified in :py:obj:`~.cudaMemcpy3DBatchAsync`.
3211 """
3213 #: Memcpy operand is a valid pointer.
3214 cudaMemcpyOperandTypePointer = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypePointer
3216 #: Memcpy operand is a CUarray.
3217 cudaMemcpyOperandTypeArray = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypeArray
3218 cudaMemcpyOperandTypeMax = cyruntime.cudaMemcpy3DOperandType.cudaMemcpyOperandTypeMax
3220_dict_cudaMemcpy3DOperandType = dict(((int(v), v) for k, v in cudaMemcpy3DOperandType.__members__.items()))
3222class cudaDeviceP2PAttr(IntEnum):
3223 """
3224 CUDA device P2P attributes
3225 """
3227 #: A relative value indicating the performance of the link between two
3228 #: devices
3229 cudaDevP2PAttrPerformanceRank = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrPerformanceRank
3231 #: Peer access is enabled
3232 cudaDevP2PAttrAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrAccessSupported
3234 #: Native atomic operation over the link supported
3235 cudaDevP2PAttrNativeAtomicSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrNativeAtomicSupported
3237 #: Accessing CUDA arrays over the link supported
3238 cudaDevP2PAttrCudaArrayAccessSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrCudaArrayAccessSupported
3240 #: Only some CUDA-valid atomic operations over the link are supported.
3241 cudaDevP2PAttrOnlyPartialNativeAtomicSupported = cyruntime.cudaDeviceP2PAttr.cudaDevP2PAttrOnlyPartialNativeAtomicSupported
3243_dict_cudaDeviceP2PAttr = dict(((int(v), v) for k, v in cudaDeviceP2PAttr.__members__.items()))
3245class cudaAtomicOperation(IntEnum):
3246 """
3247 CUDA-valid Atomic Operations
3248 """
3249 cudaAtomicOperationIntegerAdd = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerAdd
3250 cudaAtomicOperationIntegerMin = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerMin
3251 cudaAtomicOperationIntegerMax = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerMax
3252 cudaAtomicOperationIntegerIncrement = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerIncrement
3253 cudaAtomicOperationIntegerDecrement = cyruntime.cudaAtomicOperation.cudaAtomicOperationIntegerDecrement
3254 cudaAtomicOperationAnd = cyruntime.cudaAtomicOperation.cudaAtomicOperationAnd
3255 cudaAtomicOperationOr = cyruntime.cudaAtomicOperation.cudaAtomicOperationOr
3256 cudaAtomicOperationXOR = cyruntime.cudaAtomicOperation.cudaAtomicOperationXOR
3257 cudaAtomicOperationExchange = cyruntime.cudaAtomicOperation.cudaAtomicOperationExchange
3258 cudaAtomicOperationCAS = cyruntime.cudaAtomicOperation.cudaAtomicOperationCAS
3259 cudaAtomicOperationFloatAdd = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatAdd
3260 cudaAtomicOperationFloatMin = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatMin
3261 cudaAtomicOperationFloatMax = cyruntime.cudaAtomicOperation.cudaAtomicOperationFloatMax
3263_dict_cudaAtomicOperation = dict(((int(v), v) for k, v in cudaAtomicOperation.__members__.items()))
3265class cudaAtomicOperationCapability(IntEnum):
3266 """
3267 CUDA-valid Atomic Operation capabilities
3268 """
3277_dict_cudaAtomicOperationCapability = dict(((int(v), v) for k, v in cudaAtomicOperationCapability.__members__.items()))
3279class cudaExternalMemoryHandleType(IntEnum):
3280 """
3281 External memory handle types
3282 """
3284 #: Handle is an opaque file descriptor
3285 cudaExternalMemoryHandleTypeOpaqueFd = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd
3287 #: Handle is an opaque shared NT handle
3288 cudaExternalMemoryHandleTypeOpaqueWin32 = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32
3290 #: Handle is an opaque, globally shared handle
3291 cudaExternalMemoryHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt
3293 #: Handle is a D3D12 heap object
3294 cudaExternalMemoryHandleTypeD3D12Heap = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap
3296 #: Handle is a D3D12 committed resource
3297 cudaExternalMemoryHandleTypeD3D12Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource
3299 #: Handle is a shared NT handle to a D3D11 resource
3300 cudaExternalMemoryHandleTypeD3D11Resource = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource
3302 #: Handle is a globally shared handle to a D3D11 resource
3303 cudaExternalMemoryHandleTypeD3D11ResourceKmt = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt
3305 #: Handle is an NvSciBuf object
3306 cudaExternalMemoryHandleTypeNvSciBuf = cyruntime.cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf
3308_dict_cudaExternalMemoryHandleType = dict(((int(v), v) for k, v in cudaExternalMemoryHandleType.__members__.items()))
3310class cudaExternalSemaphoreHandleType(IntEnum):
3311 """
3312 External semaphore handle types
3313 """
3315 #: Handle is an opaque file descriptor
3316 cudaExternalSemaphoreHandleTypeOpaqueFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd
3318 #: Handle is an opaque shared NT handle
3319 cudaExternalSemaphoreHandleTypeOpaqueWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32
3321 #: Handle is an opaque, globally shared handle
3322 cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
3324 #: Handle is a shared NT handle referencing a D3D12 fence object
3325 cudaExternalSemaphoreHandleTypeD3D12Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence
3327 #: Handle is a shared NT handle referencing a D3D11 fence object
3328 cudaExternalSemaphoreHandleTypeD3D11Fence = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence
3330 #: Opaque handle to NvSciSync Object
3331 cudaExternalSemaphoreHandleTypeNvSciSync = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync
3333 #: Handle is a shared NT handle referencing a D3D11 keyed mutex object
3334 cudaExternalSemaphoreHandleTypeKeyedMutex = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex
3336 #: Handle is a shared KMT handle referencing a D3D11 keyed mutex object
3337 cudaExternalSemaphoreHandleTypeKeyedMutexKmt = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt
3339 #: Handle is an opaque handle file descriptor referencing a timeline
3340 #: semaphore
3341 cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd
3343 #: Handle is an opaque handle file descriptor referencing a timeline
3344 #: semaphore
3345 cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = cyruntime.cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32
3347_dict_cudaExternalSemaphoreHandleType = dict(((int(v), v) for k, v in cudaExternalSemaphoreHandleType.__members__.items()))
3349class cudaDevSmResourceGroup_flags(IntEnum):
3350 """
3352 """
3353 cudaDevSmResourceGroupDefault = cyruntime.cudaDevSmResourceGroup_flags.cudaDevSmResourceGroupDefault
3354 cudaDevSmResourceGroupBackfill = cyruntime.cudaDevSmResourceGroup_flags.cudaDevSmResourceGroupBackfill
3356_dict_cudaDevSmResourceGroup_flags = dict(((int(v), v) for k, v in cudaDevSmResourceGroup_flags.__members__.items()))
3358class cudaDevSmResourceSplitByCount_flags(IntEnum):
3359 """
3361 """
3362 cudaDevSmResourceSplitIgnoreSmCoscheduling = cyruntime.cudaDevSmResourceSplitByCount_flags.cudaDevSmResourceSplitIgnoreSmCoscheduling
3363 cudaDevSmResourceSplitMaxPotentialClusterSize = cyruntime.cudaDevSmResourceSplitByCount_flags.cudaDevSmResourceSplitMaxPotentialClusterSize
3365_dict_cudaDevSmResourceSplitByCount_flags = dict(((int(v), v) for k, v in cudaDevSmResourceSplitByCount_flags.__members__.items()))
3367class cudaDevResourceType(IntEnum):
3368 """
3369 Type of resource
3370 """
3371 cudaDevResourceTypeInvalid = cyruntime.cudaDevResourceType.cudaDevResourceTypeInvalid
3373 #: Streaming multiprocessors related information
3374 cudaDevResourceTypeSm = cyruntime.cudaDevResourceType.cudaDevResourceTypeSm
3376 #: Workqueue configuration related information
3377 cudaDevResourceTypeWorkqueueConfig = cyruntime.cudaDevResourceType.cudaDevResourceTypeWorkqueueConfig
3379 #: Pre-existing workqueue related information
3380 cudaDevResourceTypeWorkqueue = cyruntime.cudaDevResourceType.cudaDevResourceTypeWorkqueue
3382_dict_cudaDevResourceType = dict(((int(v), v) for k, v in cudaDevResourceType.__members__.items()))
3384class cudaDevWorkqueueConfigScope(IntEnum):
3385 """
3386 Sharing scope for workqueues
3387 """
3389 #: Use all shared workqueue resources on the device. Default driver
3390 #: behaviour.
3391 cudaDevWorkqueueConfigScopeDeviceCtx = cyruntime.cudaDevWorkqueueConfigScope.cudaDevWorkqueueConfigScopeDeviceCtx
3393 #: When possible, use non-overlapping workqueue resources with other
3394 #: balanced green contexts.
3395 cudaDevWorkqueueConfigScopeGreenCtxBalanced = cyruntime.cudaDevWorkqueueConfigScope.cudaDevWorkqueueConfigScopeGreenCtxBalanced
3397_dict_cudaDevWorkqueueConfigScope = dict(((int(v), v) for k, v in cudaDevWorkqueueConfigScope.__members__.items()))
3399class cudaJitOption(IntEnum):
3400 """
3401 Online compiler and linker options
3402 """
3404 #: Max number of registers that a thread may use.
3405 #: Option type: unsigned int
3406 #: Applies to: compiler only
3407 cudaJitMaxRegisters = cyruntime.cudaJitOption.cudaJitMaxRegisters
3409 #: IN: Specifies minimum number of threads per block to target
3410 #: compilation for
3411 #: OUT: Returns the number of threads the compiler actually targeted.
3412 #: This restricts the resource utilization of the compiler (e.g. max
3413 #: registers) such that a block with the given number of threads should
3414 #: be able to launch based on register limitations. Note, this option
3415 #: does not currently take into account any other resource limitations,
3416 #: such as shared memory utilization.
3417 #: Option type: unsigned int
3418 #: Applies to: compiler only
3419 cudaJitThreadsPerBlock = cyruntime.cudaJitOption.cudaJitThreadsPerBlock
3421 #: Overwrites the option value with the total wall clock time, in
3422 #: milliseconds, spent in the compiler and linker
3423 #: Option type: float
3424 #: Applies to: compiler and linker
3425 cudaJitWallTime = cyruntime.cudaJitOption.cudaJitWallTime
3427 #: Pointer to a buffer in which to print any log messages that are
3428 #: informational in nature (the buffer size is specified via option
3429 #: :py:obj:`~.cudaJitInfoLogBufferSizeBytes`)
3430 #: Option type: char *
3431 #: Applies to: compiler and linker
3432 cudaJitInfoLogBuffer = cyruntime.cudaJitOption.cudaJitInfoLogBuffer
3434 #: IN: Log buffer size in bytes. Log messages will be capped at this
3435 #: size (including null terminator)
3436 #: OUT: Amount of log buffer filled with messages
3437 #: Option type: unsigned int
3438 #: Applies to: compiler and linker
3439 cudaJitInfoLogBufferSizeBytes = cyruntime.cudaJitOption.cudaJitInfoLogBufferSizeBytes
3441 #: Pointer to a buffer in which to print any log messages that reflect
3442 #: errors (the buffer size is specified via option
3443 #: :py:obj:`~.cudaJitErrorLogBufferSizeBytes`)
3444 #: Option type: char *
3445 #: Applies to: compiler and linker
3446 cudaJitErrorLogBuffer = cyruntime.cudaJitOption.cudaJitErrorLogBuffer
3448 #: IN: Log buffer size in bytes. Log messages will be capped at this
3449 #: size (including null terminator)
3450 #: OUT: Amount of log buffer filled with messages
3451 #: Option type: unsigned int
3452 #: Applies to: compiler and linker
3453 cudaJitErrorLogBufferSizeBytes = cyruntime.cudaJitOption.cudaJitErrorLogBufferSizeBytes
3455 #: Level of optimizations to apply to generated code (0 - 4), with 4
3456 #: being the default and highest level of optimizations.
3457 #: Option type: unsigned int
3458 #: Applies to: compiler only
3459 cudaJitOptimizationLevel = cyruntime.cudaJitOption.cudaJitOptimizationLevel
3461 #: Specifies choice of fallback strategy if matching cubin is not
3462 #: found. Choice is based on supplied :py:obj:`~.cudaJit_Fallback`.
3463 #: Option type: unsigned int for enumerated type
3464 #: :py:obj:`~.cudaJit_Fallback`
3465 #: Applies to: compiler only
3466 cudaJitFallbackStrategy = cyruntime.cudaJitOption.cudaJitFallbackStrategy
3468 #: Specifies whether to create debug information in output (-g) (0:
3469 #: false, default)
3470 #: Option type: int
3471 #: Applies to: compiler and linker
3472 cudaJitGenerateDebugInfo = cyruntime.cudaJitOption.cudaJitGenerateDebugInfo
3474 #: Generate verbose log messages (0: false, default)
3475 #: Option type: int
3476 #: Applies to: compiler and linker
3477 cudaJitLogVerbose = cyruntime.cudaJitOption.cudaJitLogVerbose
3479 #: Generate line number information (-lineinfo) (0: false, default)
3480 #: Option type: int
3481 #: Applies to: compiler only
3482 cudaJitGenerateLineInfo = cyruntime.cudaJitOption.cudaJitGenerateLineInfo
3484 #: Specifies whether to enable caching explicitly (-dlcm)
3485 #: Choice is based on supplied :py:obj:`~.cudaJit_CacheMode`.
3486 #: Option type: unsigned int for enumerated type
3487 #: :py:obj:`~.cudaJit_CacheMode`
3488 #: Applies to: compiler only
3489 cudaJitCacheMode = cyruntime.cudaJitOption.cudaJitCacheMode
3491 #: Generate position independent code (0: false)
3492 #: Option type: int
3493 #: Applies to: compiler only
3494 cudaJitPositionIndependentCode = cyruntime.cudaJitOption.cudaJitPositionIndependentCode
3496 #: This option hints to the JIT compiler the minimum number of CTAs
3497 #: from the kernel’s grid to be mapped to a SM. This option is ignored
3498 #: when used together with :py:obj:`~.cudaJitMaxRegisters` or
3499 #: :py:obj:`~.cudaJitThreadsPerBlock`. Optimizations based on this
3500 #: option need :py:obj:`~.cudaJitMaxThreadsPerBlock` to be specified as
3501 #: well. For kernels already using PTX directive .minnctapersm, this
3502 #: option will be ignored by default. Use
3503 #: :py:obj:`~.cudaJitOverrideDirectiveValues` to let this option take
3504 #: precedence over the PTX directive. Option type: unsigned int
3505 #: Applies to: compiler only
3506 cudaJitMinCtaPerSm = cyruntime.cudaJitOption.cudaJitMinCtaPerSm
3508 #: Maximum number threads in a thread block, computed as the product of
3509 #: the maximum extent specifed for each dimension of the block. This
3510 #: limit is guaranteed not to be exeeded in any invocation of the
3511 #: kernel. Exceeding the the maximum number of threads results in
3512 #: runtime error or kernel launch failure. For kernels already using
3513 #: PTX directive .maxntid, this option will be ignored by default. Use
3514 #: :py:obj:`~.cudaJitOverrideDirectiveValues` to let this option take
3515 #: precedence over the PTX directive. Option type: int
3516 #: Applies to: compiler only
3517 cudaJitMaxThreadsPerBlock = cyruntime.cudaJitOption.cudaJitMaxThreadsPerBlock
3519 #: This option lets the values specified using
3520 #: :py:obj:`~.cudaJitMaxRegisters`, :py:obj:`~.cudaJitThreadsPerBlock`,
3521 #: :py:obj:`~.cudaJitMaxThreadsPerBlock` and
3522 #: :py:obj:`~.cudaJitMinCtaPerSm` take precedence over any PTX
3523 #: directives. (0: Disable, default; 1: Enable) Option type: int
3524 #: Applies to: compiler only
3525 cudaJitOverrideDirectiveValues = cyruntime.cudaJitOption.cudaJitOverrideDirectiveValues
3527_dict_cudaJitOption = dict(((int(v), v) for k, v in cudaJitOption.__members__.items()))
3529class cudaLibraryOption(IntEnum):
3530 """
3531 Library options to be specified with
3532 :py:obj:`~.cudaLibraryLoadData()` or
3533 :py:obj:`~.cudaLibraryLoadFromFile()`
3534 """
3535 cudaLibraryHostUniversalFunctionAndDataTable = cyruntime.cudaLibraryOption.cudaLibraryHostUniversalFunctionAndDataTable
3537 #: Specifes that the argument `code` passed to
3538 #: :py:obj:`~.cudaLibraryLoadData()` will be preserved. Specifying this
3539 #: option will let the driver know that `code` can be accessed at any
3540 #: point until :py:obj:`~.cudaLibraryUnload()`. The default behavior is
3541 #: for the driver to allocate and maintain its own copy of `code`. Note
3542 #: that this is only a memory usage optimization hint and the driver
3543 #: can choose to ignore it if required. Specifying this option with
3544 #: :py:obj:`~.cudaLibraryLoadFromFile()` is invalid and will return
3545 #: :py:obj:`~.cudaErrorInvalidValue`.
3546 cudaLibraryBinaryIsPreserved = cyruntime.cudaLibraryOption.cudaLibraryBinaryIsPreserved
3548_dict_cudaLibraryOption = dict(((int(v), v) for k, v in cudaLibraryOption.__members__.items()))
3550class cudaJit_CacheMode(IntEnum):
3551 """
3552 Caching modes for dlcm
3553 """
3555 #: Compile with no -dlcm flag specified
3556 cudaJitCacheOptionNone = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionNone
3558 #: Compile with L1 cache disabled
3559 cudaJitCacheOptionCG = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionCG
3561 #: Compile with L1 cache enabled
3562 cudaJitCacheOptionCA = cyruntime.cudaJit_CacheMode.cudaJitCacheOptionCA
3564_dict_cudaJit_CacheMode = dict(((int(v), v) for k, v in cudaJit_CacheMode.__members__.items()))
3566class cudaJit_Fallback(IntEnum):
3567 """
3568 Cubin matching fallback strategies
3569 """
3571 #: Prefer to compile ptx if exact binary match not found
3572 cudaPreferPtx = cyruntime.cudaJit_Fallback.cudaPreferPtx
3574 #: Prefer to fall back to compatible binary code if exact match not
3575 #: found
3576 cudaPreferBinary = cyruntime.cudaJit_Fallback.cudaPreferBinary
3578_dict_cudaJit_Fallback = dict(((int(v), v) for k, v in cudaJit_Fallback.__members__.items()))
3580class cudaCGScope(IntEnum):
3581 """
3582 CUDA cooperative group scope
3583 """
3585 #: Invalid cooperative group scope
3586 cudaCGScopeInvalid = cyruntime.cudaCGScope.cudaCGScopeInvalid
3588 #: Scope represented by a grid_group
3589 cudaCGScopeGrid = cyruntime.cudaCGScope.cudaCGScopeGrid
3591 #: Reserved
3592 cudaCGScopeReserved = cyruntime.cudaCGScope.cudaCGScopeReserved
3594_dict_cudaCGScope = dict(((int(v), v) for k, v in cudaCGScope.__members__.items()))
3596class cudaGraphConditionalHandleFlags(IntEnum):
3597 """
3599 """
3601 #: Apply default handle value when graph is launched.
3602 cudaGraphCondAssignDefault = cyruntime.cudaGraphConditionalHandleFlags.cudaGraphCondAssignDefault
3604_dict_cudaGraphConditionalHandleFlags = dict(((int(v), v) for k, v in cudaGraphConditionalHandleFlags.__members__.items()))
3606class cudaGraphConditionalNodeType(IntEnum):
3607 """
3608 CUDA conditional node types
3609 """
3611 #: Conditional 'if/else' Node. Body[0] executed if condition is non-
3612 #: zero. If `size` == 2, an optional ELSE graph is created and this is
3613 #: executed if the condition is zero.
3614 cudaGraphCondTypeIf = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeIf
3616 #: Conditional 'while' Node. Body executed repeatedly while condition
3617 #: value is non-zero.
3618 cudaGraphCondTypeWhile = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeWhile
3620 #: Conditional 'switch' Node. Body[n] is executed once, where 'n' is
3621 #: the value of the condition. If the condition does not match a body
3622 #: index, no body is launched.
3623 cudaGraphCondTypeSwitch = cyruntime.cudaGraphConditionalNodeType.cudaGraphCondTypeSwitch
3625_dict_cudaGraphConditionalNodeType = dict(((int(v), v) for k, v in cudaGraphConditionalNodeType.__members__.items()))
3627class cudaGraphNodeType(IntEnum):
3628 """
3629 CUDA Graph node types
3630 """
3632 #: GPU kernel node
3633 cudaGraphNodeTypeKernel = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeKernel
3635 #: Memcpy node
3636 cudaGraphNodeTypeMemcpy = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemcpy
3638 #: Memset node
3639 cudaGraphNodeTypeMemset = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemset
3641 #: Host (executable) node
3642 cudaGraphNodeTypeHost = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeHost
3644 #: Node which executes an embedded graph
3645 cudaGraphNodeTypeGraph = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeGraph
3647 #: Empty (no-op) node
3648 cudaGraphNodeTypeEmpty = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEmpty
3650 #: External event wait node
3651 cudaGraphNodeTypeWaitEvent = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeWaitEvent
3653 #: External event record node
3654 cudaGraphNodeTypeEventRecord = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeEventRecord
3656 #: External semaphore signal node
3657 cudaGraphNodeTypeExtSemaphoreSignal = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreSignal
3659 #: External semaphore wait node
3660 cudaGraphNodeTypeExtSemaphoreWait = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeExtSemaphoreWait
3662 #: Memory allocation node
3663 cudaGraphNodeTypeMemAlloc = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemAlloc
3665 #: Memory free node
3666 cudaGraphNodeTypeMemFree = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeMemFree
3668 #: Conditional node May be used to
3669 #: implement a conditional execution path or loop
3670 #: inside of a graph. The graph(s)
3671 #: contained within the body of the conditional node
3672 #: can be selectively executed or
3673 #: iterated upon based on the value of a conditional
3674 #: variable.
3675 #:
3676 #: Handles must be created in
3677 #: advance of creating the node
3678 #: using
3679 #: :py:obj:`~.cudaGraphConditionalHandleCreate`.
3680 #:
3681 #: The following restrictions apply
3682 #: to graphs which contain conditional nodes:
3683 #: The graph cannot be used in a
3684 #: child node.
3685 #: Only one instantiation of the
3686 #: graph may exist at any point in time.
3687 #: The graph cannot be cloned.
3688 #:
3689 #: To set the control value, supply
3690 #: a default value when creating the handle and/or
3691 #: call
3692 #: :py:obj:`~.cudaGraphSetConditional` from device code.
3693 cudaGraphNodeTypeConditional = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeConditional
3694 cudaGraphNodeTypeCount = cyruntime.cudaGraphNodeType.cudaGraphNodeTypeCount
3696_dict_cudaGraphNodeType = dict(((int(v), v) for k, v in cudaGraphNodeType.__members__.items()))
3698class cudaGraphChildGraphNodeOwnership(IntEnum):
3699 """
3700 Child graph node ownership
3701 """
3703 #: Default behavior for a child graph node. Child graph is cloned into
3704 #: the parent and memory allocation/free nodes can't be present in the
3705 #: child graph.
3706 cudaGraphChildGraphOwnershipClone = cyruntime.cudaGraphChildGraphNodeOwnership.cudaGraphChildGraphOwnershipClone
3708 #: The child graph is moved to the parent. The handle to the child
3709 #: graph is owned by the parent and will be destroyed when the parent
3710 #: is destroyed.
3711 #:
3712 #: The following restrictions apply to child graphs after they have
3713 #: been moved: Cannot be independently instantiated or destroyed;
3714 #: Cannot be added as a child graph of a separate parent graph; Cannot
3715 #: be used as an argument to cudaGraphExecUpdate; Cannot have
3716 #: additional memory allocation or free nodes added.
3717 cudaGraphChildGraphOwnershipMove = cyruntime.cudaGraphChildGraphNodeOwnership.cudaGraphChildGraphOwnershipMove
3719_dict_cudaGraphChildGraphNodeOwnership = dict(((int(v), v) for k, v in cudaGraphChildGraphNodeOwnership.__members__.items()))
3721class cudaGraphExecUpdateResult(IntEnum):
3722 """
3723 CUDA Graph Update error types
3724 """
3726 #: The update succeeded
3727 cudaGraphExecUpdateSuccess = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateSuccess
3729 #: The update failed for an unexpected reason which is described in the
3730 #: return value of the function
3731 cudaGraphExecUpdateError = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateError
3733 #: The update failed because the topology changed
3734 cudaGraphExecUpdateErrorTopologyChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorTopologyChanged
3736 #: The update failed because a node type changed
3737 cudaGraphExecUpdateErrorNodeTypeChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNodeTypeChanged
3739 #: The update failed because the function of a kernel node changed
3740 #: (CUDA driver < 11.2)
3741 cudaGraphExecUpdateErrorFunctionChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorFunctionChanged
3743 #: The update failed because the parameters changed in a way that is
3744 #: not supported
3745 cudaGraphExecUpdateErrorParametersChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorParametersChanged
3747 #: The update failed because something about the node is not supported
3748 cudaGraphExecUpdateErrorNotSupported = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorNotSupported
3750 #: The update failed because the function of a kernel node changed in
3751 #: an unsupported way
3752 cudaGraphExecUpdateErrorUnsupportedFunctionChange = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorUnsupportedFunctionChange
3754 #: The update failed because the node attributes changed in a way that
3755 #: is not supported
3756 cudaGraphExecUpdateErrorAttributesChanged = cyruntime.cudaGraphExecUpdateResult.cudaGraphExecUpdateErrorAttributesChanged
3758_dict_cudaGraphExecUpdateResult = dict(((int(v), v) for k, v in cudaGraphExecUpdateResult.__members__.items()))
3760class cudaGraphKernelNodeField(IntEnum):
3761 """
3762 Specifies the field to update when performing multiple node updates
3763 from the device
3764 """
3766 #: Invalid field
3767 cudaGraphKernelNodeFieldInvalid = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldInvalid
3769 #: Grid dimension update
3770 cudaGraphKernelNodeFieldGridDim = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldGridDim
3772 #: Kernel parameter update
3773 cudaGraphKernelNodeFieldParam = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldParam
3775 #: Node enable/disable
3776 cudaGraphKernelNodeFieldEnabled = cyruntime.cudaGraphKernelNodeField.cudaGraphKernelNodeFieldEnabled
3778_dict_cudaGraphKernelNodeField = dict(((int(v), v) for k, v in cudaGraphKernelNodeField.__members__.items()))
3780class cudaGetDriverEntryPointFlags(IntEnum):
3781 """
3782 Flags to specify search options to be used with
3783 :py:obj:`~.cudaGetDriverEntryPoint` For more details see
3784 :py:obj:`~.cuGetProcAddress`
3785 """
3787 #: Default search mode for driver symbols.
3788 cudaEnableDefault = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableDefault
3790 #: Search for legacy versions of driver symbols.
3791 cudaEnableLegacyStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnableLegacyStream
3793 #: Search for per-thread versions of driver symbols.
3794 cudaEnablePerThreadDefaultStream = cyruntime.cudaGetDriverEntryPointFlags.cudaEnablePerThreadDefaultStream
3796_dict_cudaGetDriverEntryPointFlags = dict(((int(v), v) for k, v in cudaGetDriverEntryPointFlags.__members__.items()))
3798class cudaDriverEntryPointQueryResult(IntEnum):
3799 """
3800 Enum for status from obtaining driver entry points, used with
3801 :py:obj:`~.cudaApiGetDriverEntryPoint`
3802 """
3804 #: Search for symbol found a match
3805 cudaDriverEntryPointSuccess = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSuccess
3807 #: Search for symbol was not found
3808 cudaDriverEntryPointSymbolNotFound = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointSymbolNotFound
3810 #: Search for symbol was found but version wasn't great enough
3811 cudaDriverEntryPointVersionNotSufficent = cyruntime.cudaDriverEntryPointQueryResult.cudaDriverEntryPointVersionNotSufficent
3813_dict_cudaDriverEntryPointQueryResult = dict(((int(v), v) for k, v in cudaDriverEntryPointQueryResult.__members__.items()))
3815class cudaGraphDebugDotFlags(IntEnum):
3816 """
3817 CUDA Graph debug write options
3818 """
3820 #: Output all debug data as if every debug flag is enabled
3821 cudaGraphDebugDotFlagsVerbose = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsVerbose
3823 #: Adds :py:obj:`~.cudaKernelNodeParams` to output
3824 cudaGraphDebugDotFlagsKernelNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeParams
3826 #: Adds :py:obj:`~.cudaMemcpy3DParms` to output
3827 cudaGraphDebugDotFlagsMemcpyNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemcpyNodeParams
3829 #: Adds :py:obj:`~.cudaMemsetParams` to output
3830 cudaGraphDebugDotFlagsMemsetNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsMemsetNodeParams
3832 #: Adds :py:obj:`~.cudaHostNodeParams` to output
3833 cudaGraphDebugDotFlagsHostNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHostNodeParams
3835 #: Adds cudaEvent_t handle from record and wait nodes to output
3836 cudaGraphDebugDotFlagsEventNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsEventNodeParams
3838 #: Adds :py:obj:`~.cudaExternalSemaphoreSignalNodeParams` values to
3839 #: output
3840 cudaGraphDebugDotFlagsExtSemasSignalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasSignalNodeParams
3842 #: Adds :py:obj:`~.cudaExternalSemaphoreWaitNodeParams` to output
3843 cudaGraphDebugDotFlagsExtSemasWaitNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsExtSemasWaitNodeParams
3845 #: Adds cudaKernelNodeAttrID values to output
3846 cudaGraphDebugDotFlagsKernelNodeAttributes = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsKernelNodeAttributes
3848 #: Adds node handles and every kernel function handle to output
3849 cudaGraphDebugDotFlagsHandles = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsHandles
3851 #: Adds :py:obj:`~.cudaConditionalNodeParams` to output
3852 cudaGraphDebugDotFlagsConditionalNodeParams = cyruntime.cudaGraphDebugDotFlags.cudaGraphDebugDotFlagsConditionalNodeParams
3854_dict_cudaGraphDebugDotFlags = dict(((int(v), v) for k, v in cudaGraphDebugDotFlags.__members__.items()))
3856class cudaGraphInstantiateFlags(IntEnum):
3857 """
3858 Flags for instantiating a graph
3859 """
3861 #: Automatically free memory allocated in a graph before relaunching.
3862 cudaGraphInstantiateFlagAutoFreeOnLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagAutoFreeOnLaunch
3864 #: Automatically upload the graph after instantiation. Only supported
3865 #: by
3866 #: :py:obj:`~.cudaGraphInstantiateWithParams`. The upload will be
3867 #: performed using the
3868 #: stream provided in `instantiateParams`.
3869 cudaGraphInstantiateFlagUpload = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUpload
3871 #: Instantiate the graph to be launchable from the device. This flag
3872 #: can only
3873 #: be used on platforms which support unified addressing. This flag
3874 #: cannot be
3875 #: used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch.
3876 cudaGraphInstantiateFlagDeviceLaunch = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagDeviceLaunch
3878 #: Run the graph using the per-node priority attributes rather than the
3879 #: priority of the stream it is launched into.
3880 cudaGraphInstantiateFlagUseNodePriority = cyruntime.cudaGraphInstantiateFlags.cudaGraphInstantiateFlagUseNodePriority
3882_dict_cudaGraphInstantiateFlags = dict(((int(v), v) for k, v in cudaGraphInstantiateFlags.__members__.items()))
3884class cudaDeviceNumaConfig(IntEnum):
3885 """
3886 CUDA device NUMA config
3887 """
3889 #: The GPU is not a NUMA node
3890 cudaDeviceNumaConfigNone = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNone
3892 #: The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID
3893 cudaDeviceNumaConfigNumaNode = cyruntime.cudaDeviceNumaConfig.cudaDeviceNumaConfigNumaNode
3895_dict_cudaDeviceNumaConfig = dict(((int(v), v) for k, v in cudaDeviceNumaConfig.__members__.items()))
3897class cudaSurfaceBoundaryMode(IntEnum):
3898 """
3899 CUDA Surface boundary modes
3900 """
3902 #: Zero boundary mode
3903 cudaBoundaryModeZero = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeZero
3905 #: Clamp boundary mode
3906 cudaBoundaryModeClamp = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeClamp
3908 #: Trap boundary mode
3909 cudaBoundaryModeTrap = cyruntime.cudaSurfaceBoundaryMode.cudaBoundaryModeTrap
3911_dict_cudaSurfaceBoundaryMode = dict(((int(v), v) for k, v in cudaSurfaceBoundaryMode.__members__.items()))
3913class cudaSurfaceFormatMode(IntEnum):
3914 """
3915 CUDA Surface format modes
3916 """
3918 #: Forced format mode
3919 cudaFormatModeForced = cyruntime.cudaSurfaceFormatMode.cudaFormatModeForced
3921 #: Auto format mode
3922 cudaFormatModeAuto = cyruntime.cudaSurfaceFormatMode.cudaFormatModeAuto
3924_dict_cudaSurfaceFormatMode = dict(((int(v), v) for k, v in cudaSurfaceFormatMode.__members__.items()))
3926class cudaTextureAddressMode(IntEnum):
3927 """
3928 CUDA texture address modes
3929 """
3931 #: Wrapping address mode
3932 cudaAddressModeWrap = cyruntime.cudaTextureAddressMode.cudaAddressModeWrap
3934 #: Clamp to edge address mode
3935 cudaAddressModeClamp = cyruntime.cudaTextureAddressMode.cudaAddressModeClamp
3937 #: Mirror address mode
3938 cudaAddressModeMirror = cyruntime.cudaTextureAddressMode.cudaAddressModeMirror
3940 #: Border address mode
3941 cudaAddressModeBorder = cyruntime.cudaTextureAddressMode.cudaAddressModeBorder
3943_dict_cudaTextureAddressMode = dict(((int(v), v) for k, v in cudaTextureAddressMode.__members__.items()))
3945class cudaTextureFilterMode(IntEnum):
3946 """
3947 CUDA texture filter modes
3948 """
3950 #: Point filter mode
3951 cudaFilterModePoint = cyruntime.cudaTextureFilterMode.cudaFilterModePoint
3953 #: Linear filter mode
3954 cudaFilterModeLinear = cyruntime.cudaTextureFilterMode.cudaFilterModeLinear
3956_dict_cudaTextureFilterMode = dict(((int(v), v) for k, v in cudaTextureFilterMode.__members__.items()))
3958class cudaTextureReadMode(IntEnum):
3959 """
3960 CUDA texture read modes
3961 """
3963 #: Read texture as specified element type
3964 cudaReadModeElementType = cyruntime.cudaTextureReadMode.cudaReadModeElementType
3966 #: Read texture as normalized float
3967 cudaReadModeNormalizedFloat = cyruntime.cudaTextureReadMode.cudaReadModeNormalizedFloat
3969_dict_cudaTextureReadMode = dict(((int(v), v) for k, v in cudaTextureReadMode.__members__.items()))
3971class cudaRoundMode(IntEnum):
3972 """"""
3973 cudaRoundNearest = cyruntime.cudaRoundMode.cudaRoundNearest
3974 cudaRoundZero = cyruntime.cudaRoundMode.cudaRoundZero
3975 cudaRoundPosInf = cyruntime.cudaRoundMode.cudaRoundPosInf
3976 cudaRoundMinInf = cyruntime.cudaRoundMode.cudaRoundMinInf
3978_dict_cudaRoundMode = dict(((int(v), v) for k, v in cudaRoundMode.__members__.items()))
3980class cudaGLDeviceList(IntEnum):
3981 """
3982 CUDA devices corresponding to the current OpenGL context
3983 """
3985 #: The CUDA devices for all GPUs used by the current OpenGL context
3986 cudaGLDeviceListAll = cyruntime.cudaGLDeviceList.cudaGLDeviceListAll
3988 #: The CUDA devices for the GPUs used by the current OpenGL context in
3989 #: its currently rendering frame
3990 cudaGLDeviceListCurrentFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListCurrentFrame
3992 #: The CUDA devices for the GPUs to be used by the current OpenGL
3993 #: context in the next frame
3994 cudaGLDeviceListNextFrame = cyruntime.cudaGLDeviceList.cudaGLDeviceListNextFrame
3996_dict_cudaGLDeviceList = dict(((int(v), v) for k, v in cudaGLDeviceList.__members__.items()))
3998class cudaGLMapFlags(IntEnum):
3999 """
4000 CUDA GL Map Flags
4001 """
4003 #: Default; Assume resource can be read/written
4004 cudaGLMapFlagsNone = cyruntime.cudaGLMapFlags.cudaGLMapFlagsNone
4006 #: CUDA kernels will not write to this resource
4007 cudaGLMapFlagsReadOnly = cyruntime.cudaGLMapFlags.cudaGLMapFlagsReadOnly
4009 #: CUDA kernels will only write to and will not read from this resource
4010 cudaGLMapFlagsWriteDiscard = cyruntime.cudaGLMapFlags.cudaGLMapFlagsWriteDiscard
4012_dict_cudaGLMapFlags = dict(((int(v), v) for k, v in cudaGLMapFlags.__members__.items()))
4014class cudaStreamAttrID(IntEnum):
4015 """
4016 Launch attributes enum; used as id field of
4017 :py:obj:`~.cudaLaunchAttribute`
4018 """
4020 #: Ignored entry, for convenient composition
4021 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore
4023 #: Valid for streams, graph nodes, launches. See
4024 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
4025 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow
4027 #: Valid for graph nodes, launches. See
4028 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
4029 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative
4031 #: Valid for streams. See
4032 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
4033 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy
4035 #: Valid for graph nodes, launches. See
4036 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
4037 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension
4039 #: Valid for graph nodes, launches. See
4040 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
4041 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference
4043 #: Valid for launches. Setting
4044 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
4045 #: to non-0 signals that the kernel will use programmatic means to
4046 #: resolve its stream dependency, so that the CUDA runtime should
4047 #: opportunistically allow the grid's execution to overlap with the
4048 #: previous kernel in the stream, if that kernel requests the overlap.
4049 #: The dependent launches can choose to wait on the dependency using
4050 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
4051 #: PTX instructions).
4052 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization
4054 #: Valid for launches. Set
4055 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
4056 #: event. Event recorded through this launch attribute is guaranteed to
4057 #: only trigger after all block in the associated kernel trigger the
4058 #: event. A block can trigger the event programmatically in a future
4059 #: CUDA release. A trigger can also be inserted at the beginning of
4060 #: each block's execution if triggerAtBlockStart is set to non-0. The
4061 #: dependent launches can choose to wait on the dependency using the
4062 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
4063 #: instructions). Note that dependents (including the CPU thread
4064 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
4065 #: observe the release precisely when it is released. For example,
4066 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
4067 #: trigger long after the associated kernel has completed. This
4068 #: recording type is primarily meant for establishing programmatic
4069 #: dependency between device tasks. Note also this type of dependency
4070 #: allows, but does not guarantee, concurrent execution of tasks.
4071 #: The event supplied must not be an interprocess or interop event.
4072 #: The event must disable timing (i.e. must be created with the
4073 #: :py:obj:`~.cudaEventDisableTiming` flag set).
4074 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent
4076 #: Valid for streams, graph nodes, launches. See
4077 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
4078 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority
4080 #: Valid for streams, graph nodes, launches. See
4081 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
4082 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap
4084 #: Valid for streams, graph nodes, launches. See
4085 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
4086 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain
4088 #: Valid for graph nodes and launches. Set
4089 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow
4090 #: the kernel launch to specify a preferred substitute cluster
4091 #: dimension. Blocks may be grouped according to either the dimensions
4092 #: specified with this attribute (grouped into a "preferred substitute
4093 #: cluster"), or the one specified with
4094 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped
4095 #: into a "regular cluster"). The cluster dimensions of a "preferred
4096 #: substitute cluster" shall be an integer multiple greater than zero
4097 #: of the regular cluster dimensions. The device will attempt - on a
4098 #: best-effort basis - to group thread blocks into preferred clusters
4099 #: over grouping them into regular clusters. When it deems necessary
4100 #: (primarily when the device temporarily runs out of physical
4101 #: resources to launch the larger preferred clusters), the device may
4102 #: switch to launch the regular clusters instead to attempt to utilize
4103 #: as much of the physical device resources as possible.
4104 #: Each type of cluster will have its enumeration / coordinate setup
4105 #: as if the grid consists solely of its type of cluster. For example,
4106 #: if the preferred substitute cluster dimensions double the regular
4107 #: cluster dimensions, there might be simultaneously a regular cluster
4108 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In
4109 #: this example, the preferred substitute cluster (1,0,0) replaces
4110 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.
4111 #: This attribute will only take effect when a regular cluster
4112 #: dimension has been specified. The preferred substitute cluster
4113 #: dimension must be an integer multiple greater than zero of the
4114 #: regular cluster dimension and must divide the grid. It must also be
4115 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's
4116 #: `__launch_bounds__`. Otherwise it must be less than the maximum
4117 #: value the driver can support. Otherwise, setting this attribute to a
4118 #: value physically unable to fit on any particular device is
4119 #: permitted.
4120 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension
4122 #: Valid for launches. Set
4123 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
4124 #: the event.
4125 #: Nominally, the event is triggered once all blocks of the kernel
4126 #: have begun execution. Currently this is a best effort. If a kernel B
4127 #: has a launch completion dependency on a kernel A, B may wait until A
4128 #: is complete. Alternatively, blocks of B may begin before all blocks
4129 #: of A have begun, for example if B can claim execution resources
4130 #: unavailable to A (e.g. they run on different GPUs) or if B is a
4131 #: higher priority than A. Exercise caution if such an ordering
4132 #: inversion could lead to deadlock.
4133 #: A launch completion event is nominally similar to a programmatic
4134 #: event with `triggerAtBlockStart` set except that it is not visible
4135 #: to `cudaGridDependencySynchronize()` and can be used with compute
4136 #: capability less than 9.0.
4137 #: The event supplied must not be an interprocess or interop event.
4138 #: The event must disable timing (i.e. must be created with the
4139 #: :py:obj:`~.cudaEventDisableTiming` flag set).
4140 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent
4142 #: Valid for graph nodes, launches. This attribute is graphs-only, and
4143 #: passing it to a launch in a non-capturing stream will result in an
4144 #: error.
4145 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
4146 #: can only be set to 0 or 1. Setting the field to 1 indicates that the
4147 #: corresponding kernel node should be device-updatable. On success, a
4148 #: handle will be returned via
4149 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
4150 #: which can be passed to the various device-side update functions to
4151 #: update the node's kernel parameters from within another kernel. For
4152 #: more information on the types of device updates that can be made, as
4153 #: well as the relevant limitations thereof, see
4154 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
4155 #: Nodes which are device-updatable have additional restrictions
4156 #: compared to regular kernel nodes. Firstly, device-updatable nodes
4157 #: cannot be removed from their graph via
4158 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
4159 #: this functionality, a node cannot opt out, and any attempt to set
4160 #: the deviceUpdatable attribute to 0 will result in an error. Device-
4161 #: updatable kernel nodes also cannot have their attributes copied
4162 #: to/from another kernel node via
4163 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
4164 #: or more device-updatable nodes also do not allow multiple
4165 #: instantiation, and neither the graph nor its instantiated version
4166 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
4167 #: If a graph contains device-updatable nodes and updates those nodes
4168 #: from the device from within the graph, the graph must be uploaded
4169 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
4170 #: graph, if host-side executable graph updates are made to the device-
4171 #: updatable nodes, the graph must be uploaded before it is launched
4172 #: again.
4173 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode
4175 #: Valid for launches. On devices where the L1 cache and shared memory
4176 #: use the same hardware resources, setting
4177 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
4178 #: percentage between 0-100 signals sets the shared memory carveout
4179 #: preference in percent of the total shared memory for that kernel
4180 #: launch. This attribute takes precedence over
4181 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
4182 #: only a hint, and the driver can choose a different configuration if
4183 #: required for the launch.
4184 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout
4186 #: Valid for streams, graph nodes, launches. This attribute is a hint
4187 #: to the CUDA runtime that the launch should attempt to make the
4188 #: kernel maximize its NVLINK utilization.
4189 #:
4190 #: When possible to honor this hint, CUDA will assume each block in
4191 #: the grid launch will carry out an even amount of NVLINK traffic, and
4192 #: make a best-effort attempt to adjust the kernel launch based on that
4193 #: assumption.
4194 #: This attribute is a hint only. CUDA makes no functional or
4195 #: performance guarantee. Its applicability can be affected by many
4196 #: different factors, including driver version (i.e. CUDA doesn't
4197 #: guarantee the performance characteristics will be maintained between
4198 #: driver versions or a driver update could alter or regress previously
4199 #: observed perf characteristics.) It also doesn't guarantee a
4200 #: successful result, i.e. applying the attribute may not improve the
4201 #: performance of either the targeted kernel or the encapsulating
4202 #: application.
4203 #: Valid values for
4204 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are
4205 #: 0 (disabled) and 1 (enabled).
4206 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling
4208_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))
4210class cudaKernelNodeAttrID(IntEnum):
4211 """
4212 Launch attributes enum; used as id field of
4213 :py:obj:`~.cudaLaunchAttribute`
4214 """
4216 #: Ignored entry, for convenient composition
4217 cudaLaunchAttributeIgnore = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeIgnore
4219 #: Valid for streams, graph nodes, launches. See
4220 #: :py:obj:`~.cudaLaunchAttributeValue.accessPolicyWindow`.
4221 cudaLaunchAttributeAccessPolicyWindow = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeAccessPolicyWindow
4223 #: Valid for graph nodes, launches. See
4224 #: :py:obj:`~.cudaLaunchAttributeValue.cooperative`.
4225 cudaLaunchAttributeCooperative = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeCooperative
4227 #: Valid for streams. See
4228 #: :py:obj:`~.cudaLaunchAttributeValue.syncPolicy`.
4229 cudaLaunchAttributeSynchronizationPolicy = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeSynchronizationPolicy
4231 #: Valid for graph nodes, launches. See
4232 #: :py:obj:`~.cudaLaunchAttributeValue.clusterDim`.
4233 cudaLaunchAttributeClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterDimension
4235 #: Valid for graph nodes, launches. See
4236 #: :py:obj:`~.cudaLaunchAttributeValue.clusterSchedulingPolicyPreference`.
4237 cudaLaunchAttributeClusterSchedulingPolicyPreference = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeClusterSchedulingPolicyPreference
4239 #: Valid for launches. Setting
4240 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticStreamSerializationAllowed`
4241 #: to non-0 signals that the kernel will use programmatic means to
4242 #: resolve its stream dependency, so that the CUDA runtime should
4243 #: opportunistically allow the grid's execution to overlap with the
4244 #: previous kernel in the stream, if that kernel requests the overlap.
4245 #: The dependent launches can choose to wait on the dependency using
4246 #: the programmatic sync (cudaGridDependencySynchronize() or equivalent
4247 #: PTX instructions).
4248 cudaLaunchAttributeProgrammaticStreamSerialization = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticStreamSerialization
4250 #: Valid for launches. Set
4251 #: :py:obj:`~.cudaLaunchAttributeValue.programmaticEvent` to record the
4252 #: event. Event recorded through this launch attribute is guaranteed to
4253 #: only trigger after all block in the associated kernel trigger the
4254 #: event. A block can trigger the event programmatically in a future
4255 #: CUDA release. A trigger can also be inserted at the beginning of
4256 #: each block's execution if triggerAtBlockStart is set to non-0. The
4257 #: dependent launches can choose to wait on the dependency using the
4258 #: programmatic sync (cudaGridDependencySynchronize() or equivalent PTX
4259 #: instructions). Note that dependents (including the CPU thread
4260 #: calling :py:obj:`~.cudaEventSynchronize()`) are not guaranteed to
4261 #: observe the release precisely when it is released. For example,
4262 #: :py:obj:`~.cudaEventSynchronize()` may only observe the event
4263 #: trigger long after the associated kernel has completed. This
4264 #: recording type is primarily meant for establishing programmatic
4265 #: dependency between device tasks. Note also this type of dependency
4266 #: allows, but does not guarantee, concurrent execution of tasks.
4267 #: The event supplied must not be an interprocess or interop event.
4268 #: The event must disable timing (i.e. must be created with the
4269 #: :py:obj:`~.cudaEventDisableTiming` flag set).
4270 cudaLaunchAttributeProgrammaticEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeProgrammaticEvent
4272 #: Valid for streams, graph nodes, launches. See
4273 #: :py:obj:`~.cudaLaunchAttributeValue.priority`.
4274 cudaLaunchAttributePriority = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePriority
4276 #: Valid for streams, graph nodes, launches. See
4277 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomainMap`.
4278 cudaLaunchAttributeMemSyncDomainMap = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomainMap
4280 #: Valid for streams, graph nodes, launches. See
4281 #: :py:obj:`~.cudaLaunchAttributeValue.memSyncDomain`.
4282 cudaLaunchAttributeMemSyncDomain = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeMemSyncDomain
4284 #: Valid for graph nodes and launches. Set
4285 #: :py:obj:`~.cudaLaunchAttributeValue.preferredClusterDim` to allow
4286 #: the kernel launch to specify a preferred substitute cluster
4287 #: dimension. Blocks may be grouped according to either the dimensions
4288 #: specified with this attribute (grouped into a "preferred substitute
4289 #: cluster"), or the one specified with
4290 #: :py:obj:`~.cudaLaunchAttributeClusterDimension` attribute (grouped
4291 #: into a "regular cluster"). The cluster dimensions of a "preferred
4292 #: substitute cluster" shall be an integer multiple greater than zero
4293 #: of the regular cluster dimensions. The device will attempt - on a
4294 #: best-effort basis - to group thread blocks into preferred clusters
4295 #: over grouping them into regular clusters. When it deems necessary
4296 #: (primarily when the device temporarily runs out of physical
4297 #: resources to launch the larger preferred clusters), the device may
4298 #: switch to launch the regular clusters instead to attempt to utilize
4299 #: as much of the physical device resources as possible.
4300 #: Each type of cluster will have its enumeration / coordinate setup
4301 #: as if the grid consists solely of its type of cluster. For example,
4302 #: if the preferred substitute cluster dimensions double the regular
4303 #: cluster dimensions, there might be simultaneously a regular cluster
4304 #: indexed at (1,0,0), and a preferred cluster indexed at (1,0,0). In
4305 #: this example, the preferred substitute cluster (1,0,0) replaces
4306 #: regular clusters (2,0,0) and (3,0,0) and groups their blocks.
4307 #: This attribute will only take effect when a regular cluster
4308 #: dimension has been specified. The preferred substitute cluster
4309 #: dimension must be an integer multiple greater than zero of the
4310 #: regular cluster dimension and must divide the grid. It must also be
4311 #: no more than `maxBlocksPerCluster`, if it is set in the kernel's
4312 #: `__launch_bounds__`. Otherwise it must be less than the maximum
4313 #: value the driver can support. Otherwise, setting this attribute to a
4314 #: value physically unable to fit on any particular device is
4315 #: permitted.
4316 cudaLaunchAttributePreferredClusterDimension = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredClusterDimension
4318 #: Valid for launches. Set
4319 #: :py:obj:`~.cudaLaunchAttributeValue.launchCompletionEvent` to record
4320 #: the event.
4321 #: Nominally, the event is triggered once all blocks of the kernel
4322 #: have begun execution. Currently this is a best effort. If a kernel B
4323 #: has a launch completion dependency on a kernel A, B may wait until A
4324 #: is complete. Alternatively, blocks of B may begin before all blocks
4325 #: of A have begun, for example if B can claim execution resources
4326 #: unavailable to A (e.g. they run on different GPUs) or if B is a
4327 #: higher priority than A. Exercise caution if such an ordering
4328 #: inversion could lead to deadlock.
4329 #: A launch completion event is nominally similar to a programmatic
4330 #: event with `triggerAtBlockStart` set except that it is not visible
4331 #: to `cudaGridDependencySynchronize()` and can be used with compute
4332 #: capability less than 9.0.
4333 #: The event supplied must not be an interprocess or interop event.
4334 #: The event must disable timing (i.e. must be created with the
4335 #: :py:obj:`~.cudaEventDisableTiming` flag set).
4336 cudaLaunchAttributeLaunchCompletionEvent = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeLaunchCompletionEvent
4338 #: Valid for graph nodes, launches. This attribute is graphs-only, and
4339 #: passing it to a launch in a non-capturing stream will result in an
4340 #: error.
4341 #: :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable
4342 #: can only be set to 0 or 1. Setting the field to 1 indicates that the
4343 #: corresponding kernel node should be device-updatable. On success, a
4344 #: handle will be returned via
4345 #: :py:obj:`~.cudaLaunchAttributeValue`::deviceUpdatableKernelNode::devNode
4346 #: which can be passed to the various device-side update functions to
4347 #: update the node's kernel parameters from within another kernel. For
4348 #: more information on the types of device updates that can be made, as
4349 #: well as the relevant limitations thereof, see
4350 #: :py:obj:`~.cudaGraphKernelNodeUpdatesApply`.
4351 #: Nodes which are device-updatable have additional restrictions
4352 #: compared to regular kernel nodes. Firstly, device-updatable nodes
4353 #: cannot be removed from their graph via
4354 #: :py:obj:`~.cudaGraphDestroyNode`. Additionally, once opted-in to
4355 #: this functionality, a node cannot opt out, and any attempt to set
4356 #: the deviceUpdatable attribute to 0 will result in an error. Device-
4357 #: updatable kernel nodes also cannot have their attributes copied
4358 #: to/from another kernel node via
4359 #: :py:obj:`~.cudaGraphKernelNodeCopyAttributes`. Graphs containing one
4360 #: or more device-updatable nodes also do not allow multiple
4361 #: instantiation, and neither the graph nor its instantiated version
4362 #: can be passed to :py:obj:`~.cudaGraphExecUpdate`.
4363 #: If a graph contains device-updatable nodes and updates those nodes
4364 #: from the device from within the graph, the graph must be uploaded
4365 #: with :py:obj:`~.cuGraphUpload` before it is launched. For such a
4366 #: graph, if host-side executable graph updates are made to the device-
4367 #: updatable nodes, the graph must be uploaded before it is launched
4368 #: again.
4369 cudaLaunchAttributeDeviceUpdatableKernelNode = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeDeviceUpdatableKernelNode
4371 #: Valid for launches. On devices where the L1 cache and shared memory
4372 #: use the same hardware resources, setting
4373 #: :py:obj:`~.cudaLaunchAttributeValue.sharedMemCarveout` to a
4374 #: percentage between 0-100 signals sets the shared memory carveout
4375 #: preference in percent of the total shared memory for that kernel
4376 #: launch. This attribute takes precedence over
4377 #: :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout`. This is
4378 #: only a hint, and the driver can choose a different configuration if
4379 #: required for the launch.
4380 cudaLaunchAttributePreferredSharedMemoryCarveout = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributePreferredSharedMemoryCarveout
4382 #: Valid for streams, graph nodes, launches. This attribute is a hint
4383 #: to the CUDA runtime that the launch should attempt to make the
4384 #: kernel maximize its NVLINK utilization.
4385 #:
4386 #: When possible to honor this hint, CUDA will assume each block in
4387 #: the grid launch will carry out an even amount of NVLINK traffic, and
4388 #: make a best-effort attempt to adjust the kernel launch based on that
4389 #: assumption.
4390 #: This attribute is a hint only. CUDA makes no functional or
4391 #: performance guarantee. Its applicability can be affected by many
4392 #: different factors, including driver version (i.e. CUDA doesn't
4393 #: guarantee the performance characteristics will be maintained between
4394 #: driver versions or a driver update could alter or regress previously
4395 #: observed perf characteristics.) It also doesn't guarantee a
4396 #: successful result, i.e. applying the attribute may not improve the
4397 #: performance of either the targeted kernel or the encapsulating
4398 #: application.
4399 #: Valid values for
4400 #: :py:obj:`~.cudaLaunchAttributeValue.nvlinkUtilCentricScheduling` are
4401 #: 0 (disabled) and 1 (enabled).
4402 cudaLaunchAttributeNvlinkUtilCentricScheduling = cyruntime.cudaLaunchAttributeID.cudaLaunchAttributeNvlinkUtilCentricScheduling
4404_dict_cudaLaunchAttributeID = dict(((int(v), v) for k, v in cudaLaunchAttributeID.__members__.items()))
4406cdef class cudaDevResourceDesc_t:
4407 """
4409 An opaque descriptor handle. The descriptor encapsulates multiple created and configured resources. Created via ::cudaDeviceResourceGenerateDesc
4411 Methods
4412 -------
4413 getPtr()
4414 Get memory address of class instance
4416 """
4417 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4418 if _ptr == 0:
4419 self._pvt_ptr = &self._pvt_val
4420 self._pvt_ptr[0] = <cyruntime.cudaDevResourceDesc_t>init_value
4421 else:
4422 self._pvt_ptr = <cyruntime.cudaDevResourceDesc_t *>_ptr
4423 def __init__(self, *args, **kwargs):
4424 pass
4425 def __repr__(self):
4426 return '<cudaDevResourceDesc_t ' + str(hex(self.__int__())) + '>'
4427 def __index__(self):
4428 return self.__int__()
4429 def __eq__(self, other):
4430 if not isinstance(other, cudaDevResourceDesc_t):
4431 return False
4432 return self._pvt_ptr[0] == (<cudaDevResourceDesc_t>other)._pvt_ptr[0]
4433 def __hash__(self):
4434 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4435 def __int__(self):
4436 return <void_ptr>self._pvt_ptr[0]
4437 def getPtr(self):
4438 return <void_ptr>self._pvt_ptr
4440cdef class cudaExecutionContext_t:
4441 """
4443 An opaque handle to a CUDA execution context. It represents an execution context created via CUDA Runtime APIs such as cudaGreenCtxCreate.
4445 Methods
4446 -------
4447 getPtr()
4448 Get memory address of class instance
4450 """
4451 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4452 if _ptr == 0:
4453 self._pvt_ptr = &self._pvt_val
4454 self._pvt_ptr[0] = <cyruntime.cudaExecutionContext_t>init_value
4455 else:
4456 self._pvt_ptr = <cyruntime.cudaExecutionContext_t *>_ptr
4457 def __init__(self, *args, **kwargs):
4458 pass
4459 def __repr__(self):
4460 return '<cudaExecutionContext_t ' + str(hex(self.__int__())) + '>'
4461 def __index__(self):
4462 return self.__int__()
4463 def __eq__(self, other):
4464 if not isinstance(other, cudaExecutionContext_t):
4465 return False
4466 return self._pvt_ptr[0] == (<cudaExecutionContext_t>other)._pvt_ptr[0]
4467 def __hash__(self):
4468 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4469 def __int__(self):
4470 return <void_ptr>self._pvt_ptr[0]
4471 def getPtr(self):
4472 return <void_ptr>self._pvt_ptr
4474cdef class cudaArray_t:
4475 """
4477 CUDA array
4479 Methods
4480 -------
4481 getPtr()
4482 Get memory address of class instance
4484 """
4485 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4486 if _ptr == 0:
4487 self._pvt_ptr = &self._pvt_val
4488 self._pvt_ptr[0] = <cyruntime.cudaArray_t>init_value
4489 else:
4490 self._pvt_ptr = <cyruntime.cudaArray_t *>_ptr
4491 def __init__(self, *args, **kwargs):
4492 pass
4493 def __repr__(self):
4494 return '<cudaArray_t ' + str(hex(self.__int__())) + '>'
4495 def __index__(self):
4496 return self.__int__()
4497 def __eq__(self, other):
4498 if not isinstance(other, cudaArray_t):
4499 return False
4500 return self._pvt_ptr[0] == (<cudaArray_t>other)._pvt_ptr[0]
4501 def __hash__(self):
4502 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4503 def __int__(self):
4504 return <void_ptr>self._pvt_ptr[0]
4505 def getPtr(self):
4506 return <void_ptr>self._pvt_ptr
4508cdef class cudaArray_const_t:
4509 """
4511 CUDA array (as source copy argument)
4513 Methods
4514 -------
4515 getPtr()
4516 Get memory address of class instance
4518 """
4519 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4520 if _ptr == 0:
4521 self._pvt_ptr = &self._pvt_val
4522 self._pvt_ptr[0] = <cyruntime.cudaArray_const_t>init_value
4523 else:
4524 self._pvt_ptr = <cyruntime.cudaArray_const_t *>_ptr
4525 def __init__(self, *args, **kwargs):
4526 pass
4527 def __repr__(self):
4528 return '<cudaArray_const_t ' + str(hex(self.__int__())) + '>'
4529 def __index__(self):
4530 return self.__int__()
4531 def __eq__(self, other):
4532 if not isinstance(other, cudaArray_const_t):
4533 return False
4534 return self._pvt_ptr[0] == (<cudaArray_const_t>other)._pvt_ptr[0]
4535 def __hash__(self):
4536 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4537 def __int__(self):
4538 return <void_ptr>self._pvt_ptr[0]
4539 def getPtr(self):
4540 return <void_ptr>self._pvt_ptr
4542cdef class cudaMipmappedArray_t:
4543 """
4545 CUDA mipmapped array
4547 Methods
4548 -------
4549 getPtr()
4550 Get memory address of class instance
4552 """
4553 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4554 if _ptr == 0:
4555 self._pvt_ptr = &self._pvt_val
4556 self._pvt_ptr[0] = <cyruntime.cudaMipmappedArray_t>init_value
4557 else:
4558 self._pvt_ptr = <cyruntime.cudaMipmappedArray_t *>_ptr
4559 def __init__(self, *args, **kwargs):
4560 pass
4561 def __repr__(self):
4562 return '<cudaMipmappedArray_t ' + str(hex(self.__int__())) + '>'
4563 def __index__(self):
4564 return self.__int__()
4565 def __eq__(self, other):
4566 if not isinstance(other, cudaMipmappedArray_t):
4567 return False
4568 return self._pvt_ptr[0] == (<cudaMipmappedArray_t>other)._pvt_ptr[0]
4569 def __hash__(self):
4570 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4571 def __int__(self):
4572 return <void_ptr>self._pvt_ptr[0]
4573 def getPtr(self):
4574 return <void_ptr>self._pvt_ptr
4576cdef class cudaMipmappedArray_const_t:
4577 """
4579 CUDA mipmapped array (as source argument)
4581 Methods
4582 -------
4583 getPtr()
4584 Get memory address of class instance
4586 """
4587 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4588 if _ptr == 0:
4589 self._pvt_ptr = &self._pvt_val
4590 self._pvt_ptr[0] = <cyruntime.cudaMipmappedArray_const_t>init_value
4591 else:
4592 self._pvt_ptr = <cyruntime.cudaMipmappedArray_const_t *>_ptr
4593 def __init__(self, *args, **kwargs):
4594 pass
4595 def __repr__(self):
4596 return '<cudaMipmappedArray_const_t ' + str(hex(self.__int__())) + '>'
4597 def __index__(self):
4598 return self.__int__()
4599 def __eq__(self, other):
4600 if not isinstance(other, cudaMipmappedArray_const_t):
4601 return False
4602 return self._pvt_ptr[0] == (<cudaMipmappedArray_const_t>other)._pvt_ptr[0]
4603 def __hash__(self):
4604 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4605 def __int__(self):
4606 return <void_ptr>self._pvt_ptr[0]
4607 def getPtr(self):
4608 return <void_ptr>self._pvt_ptr
4610cdef class cudaGraphicsResource_t:
4611 """
4613 CUDA graphics resource types
4615 Methods
4616 -------
4617 getPtr()
4618 Get memory address of class instance
4620 """
4621 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4622 if _ptr == 0:
4623 self._pvt_ptr = &self._pvt_val
4624 self._pvt_ptr[0] = <cyruntime.cudaGraphicsResource_t>init_value
4625 else:
4626 self._pvt_ptr = <cyruntime.cudaGraphicsResource_t *>_ptr
4627 def __init__(self, *args, **kwargs):
4628 pass
4629 def __repr__(self):
4630 return '<cudaGraphicsResource_t ' + str(hex(self.__int__())) + '>'
4631 def __index__(self):
4632 return self.__int__()
4633 def __eq__(self, other):
4634 if not isinstance(other, cudaGraphicsResource_t):
4635 return False
4636 return self._pvt_ptr[0] == (<cudaGraphicsResource_t>other)._pvt_ptr[0]
4637 def __hash__(self):
4638 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4639 def __int__(self):
4640 return <void_ptr>self._pvt_ptr[0]
4641 def getPtr(self):
4642 return <void_ptr>self._pvt_ptr
4644cdef class cudaExternalMemory_t:
4645 """
4647 CUDA external memory
4649 Methods
4650 -------
4651 getPtr()
4652 Get memory address of class instance
4654 """
4655 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4656 if _ptr == 0:
4657 self._pvt_ptr = &self._pvt_val
4658 self._pvt_ptr[0] = <cyruntime.cudaExternalMemory_t>init_value
4659 else:
4660 self._pvt_ptr = <cyruntime.cudaExternalMemory_t *>_ptr
4661 def __init__(self, *args, **kwargs):
4662 pass
4663 def __repr__(self):
4664 return '<cudaExternalMemory_t ' + str(hex(self.__int__())) + '>'
4665 def __index__(self):
4666 return self.__int__()
4667 def __eq__(self, other):
4668 if not isinstance(other, cudaExternalMemory_t):
4669 return False
4670 return self._pvt_ptr[0] == (<cudaExternalMemory_t>other)._pvt_ptr[0]
4671 def __hash__(self):
4672 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4673 def __int__(self):
4674 return <void_ptr>self._pvt_ptr[0]
4675 def getPtr(self):
4676 return <void_ptr>self._pvt_ptr
4678cdef class cudaExternalSemaphore_t:
4679 """
4681 CUDA external semaphore
4683 Methods
4684 -------
4685 getPtr()
4686 Get memory address of class instance
4688 """
4689 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4690 if _ptr == 0:
4691 self._pvt_ptr = &self._pvt_val
4692 self._pvt_ptr[0] = <cyruntime.cudaExternalSemaphore_t>init_value
4693 else:
4694 self._pvt_ptr = <cyruntime.cudaExternalSemaphore_t *>_ptr
4695 def __init__(self, *args, **kwargs):
4696 pass
4697 def __repr__(self):
4698 return '<cudaExternalSemaphore_t ' + str(hex(self.__int__())) + '>'
4699 def __index__(self):
4700 return self.__int__()
4701 def __eq__(self, other):
4702 if not isinstance(other, cudaExternalSemaphore_t):
4703 return False
4704 return self._pvt_ptr[0] == (<cudaExternalSemaphore_t>other)._pvt_ptr[0]
4705 def __hash__(self):
4706 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4707 def __int__(self):
4708 return <void_ptr>self._pvt_ptr[0]
4709 def getPtr(self):
4710 return <void_ptr>self._pvt_ptr
4712cdef class cudaKernel_t:
4713 """
4715 CUDA kernel
4717 Methods
4718 -------
4719 getPtr()
4720 Get memory address of class instance
4722 """
4723 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4724 if _ptr == 0:
4725 self._pvt_ptr = &self._pvt_val
4726 self._pvt_ptr[0] = <cyruntime.cudaKernel_t>init_value
4727 else:
4728 self._pvt_ptr = <cyruntime.cudaKernel_t *>_ptr
4729 def __init__(self, *args, **kwargs):
4730 pass
4731 def __repr__(self):
4732 return '<cudaKernel_t ' + str(hex(self.__int__())) + '>'
4733 def __index__(self):
4734 return self.__int__()
4735 def __eq__(self, other):
4736 if not isinstance(other, cudaKernel_t):
4737 return False
4738 return self._pvt_ptr[0] == (<cudaKernel_t>other)._pvt_ptr[0]
4739 def __hash__(self):
4740 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4741 def __int__(self):
4742 return <void_ptr>self._pvt_ptr[0]
4743 def getPtr(self):
4744 return <void_ptr>self._pvt_ptr
4746cdef class cudaLibrary_t:
4747 """
4749 CUDA library
4751 Methods
4752 -------
4753 getPtr()
4754 Get memory address of class instance
4756 """
4757 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4758 if _ptr == 0:
4759 self._pvt_ptr = &self._pvt_val
4760 self._pvt_ptr[0] = <cyruntime.cudaLibrary_t>init_value
4761 else:
4762 self._pvt_ptr = <cyruntime.cudaLibrary_t *>_ptr
4763 def __init__(self, *args, **kwargs):
4764 pass
4765 def __repr__(self):
4766 return '<cudaLibrary_t ' + str(hex(self.__int__())) + '>'
4767 def __index__(self):
4768 return self.__int__()
4769 def __eq__(self, other):
4770 if not isinstance(other, cudaLibrary_t):
4771 return False
4772 return self._pvt_ptr[0] == (<cudaLibrary_t>other)._pvt_ptr[0]
4773 def __hash__(self):
4774 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4775 def __int__(self):
4776 return <void_ptr>self._pvt_ptr[0]
4777 def getPtr(self):
4778 return <void_ptr>self._pvt_ptr
4780cdef class cudaGraphDeviceNode_t:
4781 """
4783 CUDA device node handle for device-side node update
4785 Methods
4786 -------
4787 getPtr()
4788 Get memory address of class instance
4790 """
4791 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4792 if _ptr == 0:
4793 self._pvt_ptr = &self._pvt_val
4794 self._pvt_ptr[0] = <cyruntime.cudaGraphDeviceNode_t>init_value
4795 else:
4796 self._pvt_ptr = <cyruntime.cudaGraphDeviceNode_t *>_ptr
4797 def __init__(self, *args, **kwargs):
4798 pass
4799 def __repr__(self):
4800 return '<cudaGraphDeviceNode_t ' + str(hex(self.__int__())) + '>'
4801 def __index__(self):
4802 return self.__int__()
4803 def __eq__(self, other):
4804 if not isinstance(other, cudaGraphDeviceNode_t):
4805 return False
4806 return self._pvt_ptr[0] == (<cudaGraphDeviceNode_t>other)._pvt_ptr[0]
4807 def __hash__(self):
4808 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4809 def __int__(self):
4810 return <void_ptr>self._pvt_ptr[0]
4811 def getPtr(self):
4812 return <void_ptr>self._pvt_ptr
4814cdef class cudaAsyncCallbackHandle_t:
4815 """
4817 CUDA async callback handle
4819 Methods
4820 -------
4821 getPtr()
4822 Get memory address of class instance
4824 """
4825 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4826 if _ptr == 0:
4827 self._pvt_ptr = &self._pvt_val
4828 self._pvt_ptr[0] = <cyruntime.cudaAsyncCallbackHandle_t>init_value
4829 else:
4830 self._pvt_ptr = <cyruntime.cudaAsyncCallbackHandle_t *>_ptr
4831 def __init__(self, *args, **kwargs):
4832 pass
4833 def __repr__(self):
4834 return '<cudaAsyncCallbackHandle_t ' + str(hex(self.__int__())) + '>'
4835 def __index__(self):
4836 return self.__int__()
4837 def __eq__(self, other):
4838 if not isinstance(other, cudaAsyncCallbackHandle_t):
4839 return False
4840 return self._pvt_ptr[0] == (<cudaAsyncCallbackHandle_t>other)._pvt_ptr[0]
4841 def __hash__(self):
4842 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4843 def __int__(self):
4844 return <void_ptr>self._pvt_ptr[0]
4845 def getPtr(self):
4846 return <void_ptr>self._pvt_ptr
4848cdef class cudaLogsCallbackHandle:
4849 """
4851 Methods
4852 -------
4853 getPtr()
4854 Get memory address of class instance
4856 """
4857 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4858 if _ptr == 0:
4859 self._pvt_ptr = &self._pvt_val
4860 self._pvt_ptr[0] = <cyruntime.cudaLogsCallbackHandle>init_value
4861 else:
4862 self._pvt_ptr = <cyruntime.cudaLogsCallbackHandle *>_ptr
4863 def __init__(self, *args, **kwargs):
4864 pass
4865 def __repr__(self):
4866 return '<cudaLogsCallbackHandle ' + str(hex(self.__int__())) + '>'
4867 def __index__(self):
4868 return self.__int__()
4869 def __eq__(self, other):
4870 if not isinstance(other, cudaLogsCallbackHandle):
4871 return False
4872 return self._pvt_ptr[0] == (<cudaLogsCallbackHandle>other)._pvt_ptr[0]
4873 def __hash__(self):
4874 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4875 def __int__(self):
4876 return <void_ptr>self._pvt_ptr[0]
4877 def getPtr(self):
4878 return <void_ptr>self._pvt_ptr
4880cdef class EGLImageKHR:
4881 """
4883 Methods
4884 -------
4885 getPtr()
4886 Get memory address of class instance
4888 """
4889 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4890 if _ptr == 0:
4891 self._pvt_ptr = &self._pvt_val
4892 self._pvt_ptr[0] = <cyruntime.EGLImageKHR>init_value
4893 else:
4894 self._pvt_ptr = <cyruntime.EGLImageKHR *>_ptr
4895 def __init__(self, *args, **kwargs):
4896 pass
4897 def __repr__(self):
4898 return '<EGLImageKHR ' + str(hex(self.__int__())) + '>'
4899 def __index__(self):
4900 return self.__int__()
4901 def __eq__(self, other):
4902 if not isinstance(other, EGLImageKHR):
4903 return False
4904 return self._pvt_ptr[0] == (<EGLImageKHR>other)._pvt_ptr[0]
4905 def __hash__(self):
4906 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4907 def __int__(self):
4908 return <void_ptr>self._pvt_ptr[0]
4909 def getPtr(self):
4910 return <void_ptr>self._pvt_ptr
4912cdef class EGLStreamKHR:
4913 """
4915 Methods
4916 -------
4917 getPtr()
4918 Get memory address of class instance
4920 """
4921 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4922 if _ptr == 0:
4923 self._pvt_ptr = &self._pvt_val
4924 self._pvt_ptr[0] = <cyruntime.EGLStreamKHR>init_value
4925 else:
4926 self._pvt_ptr = <cyruntime.EGLStreamKHR *>_ptr
4927 def __init__(self, *args, **kwargs):
4928 pass
4929 def __repr__(self):
4930 return '<EGLStreamKHR ' + str(hex(self.__int__())) + '>'
4931 def __index__(self):
4932 return self.__int__()
4933 def __eq__(self, other):
4934 if not isinstance(other, EGLStreamKHR):
4935 return False
4936 return self._pvt_ptr[0] == (<EGLStreamKHR>other)._pvt_ptr[0]
4937 def __hash__(self):
4938 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4939 def __int__(self):
4940 return <void_ptr>self._pvt_ptr[0]
4941 def getPtr(self):
4942 return <void_ptr>self._pvt_ptr
4944cdef class EGLSyncKHR:
4945 """
4947 Methods
4948 -------
4949 getPtr()
4950 Get memory address of class instance
4952 """
4953 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4954 if _ptr == 0:
4955 self._pvt_ptr = &self._pvt_val
4956 self._pvt_ptr[0] = <cyruntime.EGLSyncKHR>init_value
4957 else:
4958 self._pvt_ptr = <cyruntime.EGLSyncKHR *>_ptr
4959 def __init__(self, *args, **kwargs):
4960 pass
4961 def __repr__(self):
4962 return '<EGLSyncKHR ' + str(hex(self.__int__())) + '>'
4963 def __index__(self):
4964 return self.__int__()
4965 def __eq__(self, other):
4966 if not isinstance(other, EGLSyncKHR):
4967 return False
4968 return self._pvt_ptr[0] == (<EGLSyncKHR>other)._pvt_ptr[0]
4969 def __hash__(self):
4970 return hash(<uintptr_t><void*>(self._pvt_ptr[0]))
4971 def __int__(self):
4972 return <void_ptr>self._pvt_ptr[0]
4973 def getPtr(self):
4974 return <void_ptr>self._pvt_ptr
4976cdef class cudaHostFn_t:
4977 """
4979 Methods
4980 -------
4981 getPtr()
4982 Get memory address of class instance
4984 """
4985 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
4986 if _ptr == 0:
4987 self._pvt_ptr = &self._pvt_val
4988 self._pvt_ptr[0] = <cyruntime.cudaHostFn_t>init_value
4989 else:
4990 self._pvt_ptr = <cyruntime.cudaHostFn_t *>_ptr
4991 def __init__(self, *args, **kwargs):
4992 pass
4993 def __repr__(self):
4994 return '<cudaHostFn_t ' + str(hex(self.__int__())) + '>'
4995 def __index__(self):
4996 return self.__int__()
4997 def __int__(self):
4998 return <void_ptr>self._pvt_ptr[0]
4999 def getPtr(self):
5000 return <void_ptr>self._pvt_ptr
5002cdef class cudaAsyncCallback:
5003 """
5005 Methods
5006 -------
5007 getPtr()
5008 Get memory address of class instance
5010 """
5011 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
5012 if _ptr == 0:
5013 self._pvt_ptr = &self._pvt_val
5014 self._pvt_ptr[0] = <cyruntime.cudaAsyncCallback>init_value
5015 else:
5016 self._pvt_ptr = <cyruntime.cudaAsyncCallback *>_ptr
5017 def __init__(self, *args, **kwargs):
5018 pass
5019 def __repr__(self):
5020 return '<cudaAsyncCallback ' + str(hex(self.__int__())) + '>'
5021 def __index__(self):
5022 return self.__int__()
5023 def __int__(self):
5024 return <void_ptr>self._pvt_ptr[0]
5025 def getPtr(self):
5026 return <void_ptr>self._pvt_ptr
5028cdef class cudaStreamCallback_t:
5029 """
5031 Methods
5032 -------
5033 getPtr()
5034 Get memory address of class instance
5036 """
5037 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
5038 if _ptr == 0:
5039 self._pvt_ptr = &self._pvt_val
5040 self._pvt_ptr[0] = <cyruntime.cudaStreamCallback_t>init_value
5041 else:
5042 self._pvt_ptr = <cyruntime.cudaStreamCallback_t *>_ptr
5043 def __init__(self, *args, **kwargs):
5044 pass
5045 def __repr__(self):
5046 return '<cudaStreamCallback_t ' + str(hex(self.__int__())) + '>'
5047 def __index__(self):
5048 return self.__int__()
5049 def __int__(self):
5050 return <void_ptr>self._pvt_ptr[0]
5051 def getPtr(self):
5052 return <void_ptr>self._pvt_ptr
5054cdef class cudaLogsCallback_t:
5055 """
5057 Methods
5058 -------
5059 getPtr()
5060 Get memory address of class instance
5062 """
5063 def __cinit__(self, void_ptr init_value = 0, void_ptr _ptr = 0):
5064 if _ptr == 0:
5065 self._pvt_ptr = &self._pvt_val
5066 self._pvt_ptr[0] = <cyruntime.cudaLogsCallback_t>init_value
5067 else:
5068 self._pvt_ptr = <cyruntime.cudaLogsCallback_t *>_ptr
5069 def __init__(self, *args, **kwargs):
5070 pass
5071 def __repr__(self):
5072 return '<cudaLogsCallback_t ' + str(hex(self.__int__())) + '>'
5073 def __index__(self):
5074 return self.__int__()
5075 def __int__(self):
5076 return <void_ptr>self._pvt_ptr[0]
5077 def getPtr(self):
5078 return <void_ptr>self._pvt_ptr
5080cdef class dim3:
5081 """
5082 Attributes
5083 ----------
5084 x : unsigned int
5086 y : unsigned int
5088 z : unsigned int
5091 Methods
5092 -------
5093 getPtr()
5094 Get memory address of class instance
5095 """
5096 def __cinit__(self, void_ptr _ptr = 0):
5097 if _ptr == 0:
5098 self._pvt_ptr = &self._pvt_val
5099 else:
5100 self._pvt_ptr = <cyruntime.dim3 *>_ptr
5101 def __init__(self, void_ptr _ptr = 0):
5102 pass
5103 def __dealloc__(self):
5104 pass
5105 def getPtr(self):
5106 return <void_ptr>self._pvt_ptr
5107 def __repr__(self):
5108 if self._pvt_ptr is not NULL:
5109 str_list = []
5110 try:
5111 str_list += ['x : ' + str(self.x)]
5112 except ValueError:
5113 str_list += ['x : <ValueError>']
5114 try:
5115 str_list += ['y : ' + str(self.y)]
5116 except ValueError:
5117 str_list += ['y : <ValueError>']
5118 try:
5119 str_list += ['z : ' + str(self.z)]
5120 except ValueError:
5121 str_list += ['z : <ValueError>']
5122 return '\n'.join(str_list)
5123 else:
5124 return ''
5125 @property
5126 def x(self):
5127 return self._pvt_ptr[0].x
5128 @x.setter
5129 def x(self, unsigned int x):
5130 self._pvt_ptr[0].x = x
5131 @property
5132 def y(self):
5133 return self._pvt_ptr[0].y
5134 @y.setter
5135 def y(self, unsigned int y):
5136 self._pvt_ptr[0].y = y
5137 @property
5138 def z(self):
5139 return self._pvt_ptr[0].z
5140 @z.setter
5141 def z(self, unsigned int z):
5142 self._pvt_ptr[0].z = z
5144cdef class cudaChannelFormatDesc:
5145 """
5146 CUDA Channel format descriptor
5148 Attributes
5149 ----------
5150 x : int
5151 x
5152 y : int
5153 y
5154 z : int
5155 z
5156 w : int
5157 w
5158 f : cudaChannelFormatKind
5159 Channel format kind
5161 Methods
5162 -------
5163 getPtr()
5164 Get memory address of class instance
5165 """
5166 def __cinit__(self, void_ptr _ptr = 0):
5167 if _ptr == 0:
5168 self._pvt_ptr = &self._pvt_val
5169 else:
5170 self._pvt_ptr = <cyruntime.cudaChannelFormatDesc *>_ptr
5171 def __init__(self, void_ptr _ptr = 0):
5172 pass
5173 def __dealloc__(self):
5174 pass
5175 def getPtr(self):
5176 return <void_ptr>self._pvt_ptr
5177 def __repr__(self):
5178 if self._pvt_ptr is not NULL:
5179 str_list = []
5180 try:
5181 str_list += ['x : ' + str(self.x)]
5182 except ValueError:
5183 str_list += ['x : <ValueError>']
5184 try:
5185 str_list += ['y : ' + str(self.y)]
5186 except ValueError:
5187 str_list += ['y : <ValueError>']
5188 try:
5189 str_list += ['z : ' + str(self.z)]
5190 except ValueError:
5191 str_list += ['z : <ValueError>']
5192 try:
5193 str_list += ['w : ' + str(self.w)]
5194 except ValueError:
5195 str_list += ['w : <ValueError>']
5196 try:
5197 str_list += ['f : ' + str(self.f)]
5198 except ValueError:
5199 str_list += ['f : <ValueError>']
5200 return '\n'.join(str_list)
5201 else:
5202 return ''
5203 @property
5204 def x(self):
5205 return self._pvt_ptr[0].x
5206 @x.setter
5207 def x(self, int x):
5208 self._pvt_ptr[0].x = x
5209 @property
5210 def y(self):
5211 return self._pvt_ptr[0].y
5212 @y.setter
5213 def y(self, int y):
5214 self._pvt_ptr[0].y = y
5215 @property
5216 def z(self):
5217 return self._pvt_ptr[0].z
5218 @z.setter
5219 def z(self, int z):
5220 self._pvt_ptr[0].z = z
5221 @property
5222 def w(self):
5223 return self._pvt_ptr[0].w
5224 @w.setter
5225 def w(self, int w):
5226 self._pvt_ptr[0].w = w
5227 @property
5228 def f(self):
5229 if self._pvt_ptr[0].f not in _dict_cudaChannelFormatKind:
5230 return None
5231 return _dict_cudaChannelFormatKind[self._pvt_ptr[0].f]
5232 @f.setter
5233 def f(self, f not None : cudaChannelFormatKind):
5234 self._pvt_ptr[0].f = f.value
5236cdef class anon_struct0:
5237 """
5238 Attributes
5239 ----------
5240 width : unsigned int
5242 height : unsigned int
5244 depth : unsigned int
5247 Methods
5248 -------
5249 getPtr()
5250 Get memory address of class instance
5251 """
5252 def __cinit__(self, void_ptr _ptr):
5253 self._pvt_ptr = <cyruntime.cudaArraySparseProperties *>_ptr
5255 def __init__(self, void_ptr _ptr):
5256 pass
5257 def __dealloc__(self):
5258 pass
5259 def getPtr(self):
5260 return <void_ptr>&self._pvt_ptr[0].tileExtent
5261 def __repr__(self):
5262 if self._pvt_ptr is not NULL:
5263 str_list = []
5264 try:
5265 str_list += ['width : ' + str(self.width)]
5266 except ValueError:
5267 str_list += ['width : <ValueError>']
5268 try:
5269 str_list += ['height : ' + str(self.height)]
5270 except ValueError:
5271 str_list += ['height : <ValueError>']
5272 try:
5273 str_list += ['depth : ' + str(self.depth)]
5274 except ValueError:
5275 str_list += ['depth : <ValueError>']
5276 return '\n'.join(str_list)
5277 else:
5278 return ''
5279 @property
5280 def width(self):
5281 return self._pvt_ptr[0].tileExtent.width
5282 @width.setter
5283 def width(self, unsigned int width):
5284 self._pvt_ptr[0].tileExtent.width = width
5285 @property
5286 def height(self):
5287 return self._pvt_ptr[0].tileExtent.height
5288 @height.setter
5289 def height(self, unsigned int height):
5290 self._pvt_ptr[0].tileExtent.height = height
5291 @property
5292 def depth(self):
5293 return self._pvt_ptr[0].tileExtent.depth
5294 @depth.setter
5295 def depth(self, unsigned int depth):
5296 self._pvt_ptr[0].tileExtent.depth = depth
5298cdef class cudaArraySparseProperties:
5299 """
5300 Sparse CUDA array and CUDA mipmapped array properties
5302 Attributes
5303 ----------
5304 tileExtent : anon_struct0
5306 miptailFirstLevel : unsigned int
5307 First mip level at which the mip tail begins
5308 miptailSize : unsigned long long
5309 Total size of the mip tail.
5310 flags : unsigned int
5311 Flags will either be zero or cudaArraySparsePropertiesSingleMipTail
5312 reserved : list[unsigned int]
5315 Methods
5316 -------
5317 getPtr()
5318 Get memory address of class instance
5319 """
5320 def __cinit__(self, void_ptr _ptr = 0):
5321 if _ptr == 0:
5322 self._pvt_ptr = &self._pvt_val
5323 else:
5324 self._pvt_ptr = <cyruntime.cudaArraySparseProperties *>_ptr
5325 def __init__(self, void_ptr _ptr = 0):
5326 pass
5327 self._tileExtent = anon_struct0(_ptr=<void_ptr>self._pvt_ptr)
5328 def __dealloc__(self):
5329 pass
5330 def getPtr(self):
5331 return <void_ptr>self._pvt_ptr
5332 def __repr__(self):
5333 if self._pvt_ptr is not NULL:
5334 str_list = []
5335 try:
5336 str_list += ['tileExtent :\n' + '\n'.join([' ' + line for line in str(self.tileExtent).splitlines()])]
5337 except ValueError:
5338 str_list += ['tileExtent : <ValueError>']
5339 try:
5340 str_list += ['miptailFirstLevel : ' + str(self.miptailFirstLevel)]
5341 except ValueError:
5342 str_list += ['miptailFirstLevel : <ValueError>']
5343 try:
5344 str_list += ['miptailSize : ' + str(self.miptailSize)]
5345 except ValueError:
5346 str_list += ['miptailSize : <ValueError>']
5347 try:
5348 str_list += ['flags : ' + str(self.flags)]
5349 except ValueError:
5350 str_list += ['flags : <ValueError>']
5351 try:
5352 str_list += ['reserved : ' + str(self.reserved)]
5353 except ValueError:
5354 str_list += ['reserved : <ValueError>']
5355 return '\n'.join(str_list)
5356 else:
5357 return ''
5358 @property
5359 def tileExtent(self):
5360 return self._tileExtent
5361 @tileExtent.setter
5362 def tileExtent(self, tileExtent not None : anon_struct0):
5363 string.memcpy(&self._pvt_ptr[0].tileExtent, <cyruntime.anon_struct0*><void_ptr>tileExtent.getPtr(), sizeof(self._pvt_ptr[0].tileExtent))
5364 @property
5365 def miptailFirstLevel(self):
5366 return self._pvt_ptr[0].miptailFirstLevel
5367 @miptailFirstLevel.setter
5368 def miptailFirstLevel(self, unsigned int miptailFirstLevel):
5369 self._pvt_ptr[0].miptailFirstLevel = miptailFirstLevel
5370 @property
5371 def miptailSize(self):
5372 return self._pvt_ptr[0].miptailSize
5373 @miptailSize.setter
5374 def miptailSize(self, unsigned long long miptailSize):
5375 self._pvt_ptr[0].miptailSize = miptailSize
5376 @property
5377 def flags(self):
5378 return self._pvt_ptr[0].flags
5379 @flags.setter
5380 def flags(self, unsigned int flags):
5381 self._pvt_ptr[0].flags = flags
5382 @property
5383 def reserved(self):
5384 return self._pvt_ptr[0].reserved
5385 @reserved.setter
5386 def reserved(self, reserved):
5387 self._pvt_ptr[0].reserved = reserved
5389cdef class cudaArrayMemoryRequirements:
5390 """
5391 CUDA array and CUDA mipmapped array memory requirements
5393 Attributes
5394 ----------
5395 size : size_t
5396 Total size of the array.
5397 alignment : size_t
5398 Alignment necessary for mapping the array.
5399 reserved : list[unsigned int]
5402 Methods
5403 -------
5404 getPtr()
5405 Get memory address of class instance
5406 """
5407 def __cinit__(self, void_ptr _ptr = 0):
5408 if _ptr == 0:
5409 self._pvt_ptr = &self._pvt_val
5410 else:
5411 self._pvt_ptr = <cyruntime.cudaArrayMemoryRequirements *>_ptr
5412 def __init__(self, void_ptr _ptr = 0):
5413 pass
5414 def __dealloc__(self):
5415 pass
5416 def getPtr(self):
5417 return <void_ptr>self._pvt_ptr
5418 def __repr__(self):
5419 if self._pvt_ptr is not NULL:
5420 str_list = []
5421 try:
5422 str_list += ['size : ' + str(self.size)]
5423 except ValueError:
5424 str_list += ['size : <ValueError>']
5425 try:
5426 str_list += ['alignment : ' + str(self.alignment)]
5427 except ValueError:
5428 str_list += ['alignment : <ValueError>']
5429 try:
5430 str_list += ['reserved : ' + str(self.reserved)]
5431 except ValueError:
5432 str_list += ['reserved : <ValueError>']
5433 return '\n'.join(str_list)
5434 else:
5435 return ''
5436 @property
5437 def size(self):
5438 return self._pvt_ptr[0].size
5439 @size.setter
5440 def size(self, size_t size):
5441 self._pvt_ptr[0].size = size
5442 @property
5443 def alignment(self):
5444 return self._pvt_ptr[0].alignment
5445 @alignment.setter
5446 def alignment(self, size_t alignment):
5447 self._pvt_ptr[0].alignment = alignment
5448 @property
5449 def reserved(self):
5450 return self._pvt_ptr[0].reserved
5451 @reserved.setter
5452 def reserved(self, reserved):
5453 self._pvt_ptr[0].reserved = reserved
5455cdef class cudaPitchedPtr:
5456 """
5457 CUDA Pitched memory pointer ::make_cudaPitchedPtr
5459 Attributes
5460 ----------
5461 ptr : Any
5462 Pointer to allocated memory
5463 pitch : size_t
5464 Pitch of allocated memory in bytes
5465 xsize : size_t
5466 Logical width of allocation in elements
5467 ysize : size_t
5468 Logical height of allocation in elements
5470 Methods
5471 -------
5472 getPtr()
5473 Get memory address of class instance
5474 """
5475 def __cinit__(self, void_ptr _ptr = 0):
5476 if _ptr == 0:
5477 self._pvt_ptr = &self._pvt_val
5478 else:
5479 self._pvt_ptr = <cyruntime.cudaPitchedPtr *>_ptr
5480 def __init__(self, void_ptr _ptr = 0):
5481 pass
5482 def __dealloc__(self):
5483 pass
5484 def getPtr(self):
5485 return <void_ptr>self._pvt_ptr
5486 def __repr__(self):
5487 if self._pvt_ptr is not NULL:
5488 str_list = []
5489 try:
5490 str_list += ['ptr : ' + hex(self.ptr)]
5491 except ValueError:
5492 str_list += ['ptr : <ValueError>']
5493 try:
5494 str_list += ['pitch : ' + str(self.pitch)]
5495 except ValueError:
5496 str_list += ['pitch : <ValueError>']
5497 try:
5498 str_list += ['xsize : ' + str(self.xsize)]
5499 except ValueError:
5500 str_list += ['xsize : <ValueError>']
5501 try:
5502 str_list += ['ysize : ' + str(self.ysize)]
5503 except ValueError:
5504 str_list += ['ysize : <ValueError>']
5505 return '\n'.join(str_list)
5506 else:
5507 return ''
5508 @property
5509 def ptr(self):
5510 return <void_ptr>self._pvt_ptr[0].ptr
5511 @ptr.setter
5512 def ptr(self, ptr):
5513 _cptr = _HelperInputVoidPtr(ptr)
5514 self._pvt_ptr[0].ptr = <void*><void_ptr>_cptr.cptr
5515 @property
5516 def pitch(self):
5517 return self._pvt_ptr[0].pitch
5518 @pitch.setter
5519 def pitch(self, size_t pitch):
5520 self._pvt_ptr[0].pitch = pitch
5521 @property
5522 def xsize(self):
5523 return self._pvt_ptr[0].xsize
5524 @xsize.setter
5525 def xsize(self, size_t xsize):
5526 self._pvt_ptr[0].xsize = xsize
5527 @property
5528 def ysize(self):
5529 return self._pvt_ptr[0].ysize
5530 @ysize.setter
5531 def ysize(self, size_t ysize):
5532 self._pvt_ptr[0].ysize = ysize
5534cdef class cudaExtent:
5535 """
5536 CUDA extent ::make_cudaExtent
5538 Attributes
5539 ----------
5540 width : size_t
5541 Width in elements when referring to array memory, in bytes when
5542 referring to linear memory
5543 height : size_t
5544 Height in elements
5545 depth : size_t
5546 Depth in elements
5548 Methods
5549 -------
5550 getPtr()
5551 Get memory address of class instance
5552 """
5553 def __cinit__(self, void_ptr _ptr = 0):
5554 if _ptr == 0:
5555 self._pvt_ptr = &self._pvt_val
5556 else:
5557 self._pvt_ptr = <cyruntime.cudaExtent *>_ptr
5558 def __init__(self, void_ptr _ptr = 0):
5559 pass
5560 def __dealloc__(self):
5561 pass
5562 def getPtr(self):
5563 return <void_ptr>self._pvt_ptr
5564 def __repr__(self):
5565 if self._pvt_ptr is not NULL:
5566 str_list = []
5567 try:
5568 str_list += ['width : ' + str(self.width)]
5569 except ValueError:
5570 str_list += ['width : <ValueError>']
5571 try:
5572 str_list += ['height : ' + str(self.height)]
5573 except ValueError:
5574 str_list += ['height : <ValueError>']
5575 try:
5576 str_list += ['depth : ' + str(self.depth)]
5577 except ValueError:
5578 str_list += ['depth : <ValueError>']
5579 return '\n'.join(str_list)
5580 else:
5581 return ''
5582 @property
5583 def width(self):
5584 return self._pvt_ptr[0].width
5585 @width.setter
5586 def width(self, size_t width):
5587 self._pvt_ptr[0].width = width
5588 @property
5589 def height(self):
5590 return self._pvt_ptr[0].height
5591 @height.setter
5592 def height(self, size_t height):
5593 self._pvt_ptr[0].height = height
5594 @property
5595 def depth(self):
5596 return self._pvt_ptr[0].depth
5597 @depth.setter
5598 def depth(self, size_t depth):
5599 self._pvt_ptr[0].depth = depth
5601cdef class cudaPos:
5602 """
5603 CUDA 3D position ::make_cudaPos
5605 Attributes
5606 ----------
5607 x : size_t
5608 x
5609 y : size_t
5610 y
5611 z : size_t
5612 z
5614 Methods
5615 -------
5616 getPtr()
5617 Get memory address of class instance
5618 """
5619 def __cinit__(self, void_ptr _ptr = 0):
5620 if _ptr == 0:
5621 self._pvt_ptr = &self._pvt_val
5622 else:
5623 self._pvt_ptr = <cyruntime.cudaPos *>_ptr
5624 def __init__(self, void_ptr _ptr = 0):
5625 pass
5626 def __dealloc__(self):
5627 pass
5628 def getPtr(self):
5629 return <void_ptr>self._pvt_ptr
5630 def __repr__(self):
5631 if self._pvt_ptr is not NULL:
5632 str_list = []
5633 try:
5634 str_list += ['x : ' + str(self.x)]
5635 except ValueError:
5636 str_list += ['x : <ValueError>']
5637 try:
5638 str_list += ['y : ' + str(self.y)]
5639 except ValueError:
5640 str_list += ['y : <ValueError>']
5641 try:
5642 str_list += ['z : ' + str(self.z)]
5643 except ValueError:
5644 str_list += ['z : <ValueError>']
5645 return '\n'.join(str_list)
5646 else:
5647 return ''
5648 @property
5649 def x(self):
5650 return self._pvt_ptr[0].x
5651 @x.setter
5652 def x(self, size_t x):
5653 self._pvt_ptr[0].x = x
5654 @property
5655 def y(self):
5656 return self._pvt_ptr[0].y
5657 @y.setter
5658 def y(self, size_t y):
5659 self._pvt_ptr[0].y = y
5660 @property
5661 def z(self):
5662 return self._pvt_ptr[0].z
5663 @z.setter
5664 def z(self, size_t z):
5665 self._pvt_ptr[0].z = z
5667cdef class cudaMemcpy3DParms:
5668 """
5669 CUDA 3D memory copying parameters
5671 Attributes
5672 ----------
5673 srcArray : cudaArray_t
5674 Source memory address
5675 srcPos : cudaPos
5676 Source position offset
5677 srcPtr : cudaPitchedPtr
5678 Pitched source memory address
5679 dstArray : cudaArray_t
5680 Destination memory address
5681 dstPos : cudaPos
5682 Destination position offset
5683 dstPtr : cudaPitchedPtr
5684 Pitched destination memory address
5685 extent : cudaExtent
5686 Requested memory copy size
5687 kind : cudaMemcpyKind
5688 Type of transfer
5690 Methods
5691 -------
5692 getPtr()
5693 Get memory address of class instance
5694 """
5695 def __cinit__(self, void_ptr _ptr = 0):
5696 if _ptr == 0:
5697 self._pvt_ptr = &self._pvt_val
5698 else:
5699 self._pvt_ptr = <cyruntime.cudaMemcpy3DParms *>_ptr
5700 def __init__(self, void_ptr _ptr = 0):
5701 pass
5702 self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].srcArray)
5703 self._srcPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].srcPos)
5704 self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].srcPtr)
5705 self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].dstArray)
5706 self._dstPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].dstPos)
5707 self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].dstPtr)
5708 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)
5709 def __dealloc__(self):
5710 pass
5711 def getPtr(self):
5712 return <void_ptr>self._pvt_ptr
5713 def __repr__(self):
5714 if self._pvt_ptr is not NULL:
5715 str_list = []
5716 try:
5717 str_list += ['srcArray : ' + str(self.srcArray)]
5718 except ValueError:
5719 str_list += ['srcArray : <ValueError>']
5720 try:
5721 str_list += ['srcPos :\n' + '\n'.join([' ' + line for line in str(self.srcPos).splitlines()])]
5722 except ValueError:
5723 str_list += ['srcPos : <ValueError>']
5724 try:
5725 str_list += ['srcPtr :\n' + '\n'.join([' ' + line for line in str(self.srcPtr).splitlines()])]
5726 except ValueError:
5727 str_list += ['srcPtr : <ValueError>']
5728 try:
5729 str_list += ['dstArray : ' + str(self.dstArray)]
5730 except ValueError:
5731 str_list += ['dstArray : <ValueError>']
5732 try:
5733 str_list += ['dstPos :\n' + '\n'.join([' ' + line for line in str(self.dstPos).splitlines()])]
5734 except ValueError:
5735 str_list += ['dstPos : <ValueError>']
5736 try:
5737 str_list += ['dstPtr :\n' + '\n'.join([' ' + line for line in str(self.dstPtr).splitlines()])]
5738 except ValueError:
5739 str_list += ['dstPtr : <ValueError>']
5740 try:
5741 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]
5742 except ValueError:
5743 str_list += ['extent : <ValueError>']
5744 try:
5745 str_list += ['kind : ' + str(self.kind)]
5746 except ValueError:
5747 str_list += ['kind : <ValueError>']
5748 return '\n'.join(str_list)
5749 else:
5750 return ''
5751 @property
5752 def srcArray(self):
5753 return self._srcArray
5754 @srcArray.setter
5755 def srcArray(self, srcArray):
5756 cdef cyruntime.cudaArray_t cysrcArray
5757 if srcArray is None:
5758 cysrcArray = <cyruntime.cudaArray_t><void_ptr>0
5759 elif isinstance(srcArray, (cudaArray_t,)):
5760 psrcArray = int(srcArray)
5761 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
5762 else:
5763 psrcArray = int(cudaArray_t(srcArray))
5764 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
5765 self._srcArray._pvt_ptr[0] = cysrcArray
5766 @property
5767 def srcPos(self):
5768 return self._srcPos
5769 @srcPos.setter
5770 def srcPos(self, srcPos not None : cudaPos):
5771 string.memcpy(&self._pvt_ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._pvt_ptr[0].srcPos))
5772 @property
5773 def srcPtr(self):
5774 return self._srcPtr
5775 @srcPtr.setter
5776 def srcPtr(self, srcPtr not None : cudaPitchedPtr):
5777 string.memcpy(&self._pvt_ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._pvt_ptr[0].srcPtr))
5778 @property
5779 def dstArray(self):
5780 return self._dstArray
5781 @dstArray.setter
5782 def dstArray(self, dstArray):
5783 cdef cyruntime.cudaArray_t cydstArray
5784 if dstArray is None:
5785 cydstArray = <cyruntime.cudaArray_t><void_ptr>0
5786 elif isinstance(dstArray, (cudaArray_t,)):
5787 pdstArray = int(dstArray)
5788 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
5789 else:
5790 pdstArray = int(cudaArray_t(dstArray))
5791 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
5792 self._dstArray._pvt_ptr[0] = cydstArray
5793 @property
5794 def dstPos(self):
5795 return self._dstPos
5796 @dstPos.setter
5797 def dstPos(self, dstPos not None : cudaPos):
5798 string.memcpy(&self._pvt_ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._pvt_ptr[0].dstPos))
5799 @property
5800 def dstPtr(self):
5801 return self._dstPtr
5802 @dstPtr.setter
5803 def dstPtr(self, dstPtr not None : cudaPitchedPtr):
5804 string.memcpy(&self._pvt_ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._pvt_ptr[0].dstPtr))
5805 @property
5806 def extent(self):
5807 return self._extent
5808 @extent.setter
5809 def extent(self, extent not None : cudaExtent):
5810 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))
5811 @property
5812 def kind(self):
5813 if self._pvt_ptr[0].kind not in _dict_cudaMemcpyKind:
5814 return None
5815 return _dict_cudaMemcpyKind[self._pvt_ptr[0].kind]
5816 @kind.setter
5817 def kind(self, kind not None : cudaMemcpyKind):
5818 self._pvt_ptr[0].kind = kind.value
5820cdef class cudaMemcpyNodeParams:
5821 """
5822 Memcpy node parameters
5824 Attributes
5825 ----------
5826 flags : int
5827 Must be zero
5828 reserved : int
5829 Must be zero
5830 ctx : cudaExecutionContext_t
5831 Context in which to run the memcpy. If NULL will try to use the
5832 current context.
5833 copyParams : cudaMemcpy3DParms
5834 Parameters for the memory copy
5836 Methods
5837 -------
5838 getPtr()
5839 Get memory address of class instance
5840 """
5841 def __cinit__(self, void_ptr _ptr = 0):
5842 if _ptr == 0:
5843 self._pvt_ptr = &self._pvt_val
5844 else:
5845 self._pvt_ptr = <cyruntime.cudaMemcpyNodeParams *>_ptr
5846 def __init__(self, void_ptr _ptr = 0):
5847 pass
5848 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)
5849 self._copyParams = cudaMemcpy3DParms(_ptr=<void_ptr>&self._pvt_ptr[0].copyParams)
5850 def __dealloc__(self):
5851 pass
5852 def getPtr(self):
5853 return <void_ptr>self._pvt_ptr
5854 def __repr__(self):
5855 if self._pvt_ptr is not NULL:
5856 str_list = []
5857 try:
5858 str_list += ['flags : ' + str(self.flags)]
5859 except ValueError:
5860 str_list += ['flags : <ValueError>']
5861 try:
5862 str_list += ['reserved : ' + str(self.reserved)]
5863 except ValueError:
5864 str_list += ['reserved : <ValueError>']
5865 try:
5866 str_list += ['ctx : ' + str(self.ctx)]
5867 except ValueError:
5868 str_list += ['ctx : <ValueError>']
5869 try:
5870 str_list += ['copyParams :\n' + '\n'.join([' ' + line for line in str(self.copyParams).splitlines()])]
5871 except ValueError:
5872 str_list += ['copyParams : <ValueError>']
5873 return '\n'.join(str_list)
5874 else:
5875 return ''
5876 @property
5877 def flags(self):
5878 return self._pvt_ptr[0].flags
5879 @flags.setter
5880 def flags(self, int flags):
5881 self._pvt_ptr[0].flags = flags
5882 @property
5883 def reserved(self):
5884 return self._pvt_ptr[0].reserved
5885 @reserved.setter
5886 def reserved(self, int reserved):
5887 self._pvt_ptr[0].reserved = reserved
5888 @property
5889 def ctx(self):
5890 return self._ctx
5891 @ctx.setter
5892 def ctx(self, ctx):
5893 cdef cyruntime.cudaExecutionContext_t cyctx
5894 if ctx is None:
5895 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0
5896 elif isinstance(ctx, (cudaExecutionContext_t,)):
5897 pctx = int(ctx)
5898 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
5899 else:
5900 pctx = int(cudaExecutionContext_t(ctx))
5901 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
5902 self._ctx._pvt_ptr[0] = cyctx
5903 @property
5904 def copyParams(self):
5905 return self._copyParams
5906 @copyParams.setter
5907 def copyParams(self, copyParams not None : cudaMemcpy3DParms):
5908 string.memcpy(&self._pvt_ptr[0].copyParams, <cyruntime.cudaMemcpy3DParms*><void_ptr>copyParams.getPtr(), sizeof(self._pvt_ptr[0].copyParams))
5910cdef class cudaMemcpy3DPeerParms:
5911 """
5912 CUDA 3D cross-device memory copying parameters
5914 Attributes
5915 ----------
5916 srcArray : cudaArray_t
5917 Source memory address
5918 srcPos : cudaPos
5919 Source position offset
5920 srcPtr : cudaPitchedPtr
5921 Pitched source memory address
5922 srcDevice : int
5923 Source device
5924 dstArray : cudaArray_t
5925 Destination memory address
5926 dstPos : cudaPos
5927 Destination position offset
5928 dstPtr : cudaPitchedPtr
5929 Pitched destination memory address
5930 dstDevice : int
5931 Destination device
5932 extent : cudaExtent
5933 Requested memory copy size
5935 Methods
5936 -------
5937 getPtr()
5938 Get memory address of class instance
5939 """
5940 def __cinit__(self, void_ptr _ptr = 0):
5941 if _ptr == 0:
5942 self._pvt_ptr = &self._pvt_val
5943 else:
5944 self._pvt_ptr = <cyruntime.cudaMemcpy3DPeerParms *>_ptr
5945 def __init__(self, void_ptr _ptr = 0):
5946 pass
5947 self._srcArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].srcArray)
5948 self._srcPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].srcPos)
5949 self._srcPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].srcPtr)
5950 self._dstArray = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].dstArray)
5951 self._dstPos = cudaPos(_ptr=<void_ptr>&self._pvt_ptr[0].dstPos)
5952 self._dstPtr = cudaPitchedPtr(_ptr=<void_ptr>&self._pvt_ptr[0].dstPtr)
5953 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)
5954 def __dealloc__(self):
5955 pass
5956 def getPtr(self):
5957 return <void_ptr>self._pvt_ptr
5958 def __repr__(self):
5959 if self._pvt_ptr is not NULL:
5960 str_list = []
5961 try:
5962 str_list += ['srcArray : ' + str(self.srcArray)]
5963 except ValueError:
5964 str_list += ['srcArray : <ValueError>']
5965 try:
5966 str_list += ['srcPos :\n' + '\n'.join([' ' + line for line in str(self.srcPos).splitlines()])]
5967 except ValueError:
5968 str_list += ['srcPos : <ValueError>']
5969 try:
5970 str_list += ['srcPtr :\n' + '\n'.join([' ' + line for line in str(self.srcPtr).splitlines()])]
5971 except ValueError:
5972 str_list += ['srcPtr : <ValueError>']
5973 try:
5974 str_list += ['srcDevice : ' + str(self.srcDevice)]
5975 except ValueError:
5976 str_list += ['srcDevice : <ValueError>']
5977 try:
5978 str_list += ['dstArray : ' + str(self.dstArray)]
5979 except ValueError:
5980 str_list += ['dstArray : <ValueError>']
5981 try:
5982 str_list += ['dstPos :\n' + '\n'.join([' ' + line for line in str(self.dstPos).splitlines()])]
5983 except ValueError:
5984 str_list += ['dstPos : <ValueError>']
5985 try:
5986 str_list += ['dstPtr :\n' + '\n'.join([' ' + line for line in str(self.dstPtr).splitlines()])]
5987 except ValueError:
5988 str_list += ['dstPtr : <ValueError>']
5989 try:
5990 str_list += ['dstDevice : ' + str(self.dstDevice)]
5991 except ValueError:
5992 str_list += ['dstDevice : <ValueError>']
5993 try:
5994 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]
5995 except ValueError:
5996 str_list += ['extent : <ValueError>']
5997 return '\n'.join(str_list)
5998 else:
5999 return ''
6000 @property
6001 def srcArray(self):
6002 return self._srcArray
6003 @srcArray.setter
6004 def srcArray(self, srcArray):
6005 cdef cyruntime.cudaArray_t cysrcArray
6006 if srcArray is None:
6007 cysrcArray = <cyruntime.cudaArray_t><void_ptr>0
6008 elif isinstance(srcArray, (cudaArray_t,)):
6009 psrcArray = int(srcArray)
6010 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
6011 else:
6012 psrcArray = int(cudaArray_t(srcArray))
6013 cysrcArray = <cyruntime.cudaArray_t><void_ptr>psrcArray
6014 self._srcArray._pvt_ptr[0] = cysrcArray
6015 @property
6016 def srcPos(self):
6017 return self._srcPos
6018 @srcPos.setter
6019 def srcPos(self, srcPos not None : cudaPos):
6020 string.memcpy(&self._pvt_ptr[0].srcPos, <cyruntime.cudaPos*><void_ptr>srcPos.getPtr(), sizeof(self._pvt_ptr[0].srcPos))
6021 @property
6022 def srcPtr(self):
6023 return self._srcPtr
6024 @srcPtr.setter
6025 def srcPtr(self, srcPtr not None : cudaPitchedPtr):
6026 string.memcpy(&self._pvt_ptr[0].srcPtr, <cyruntime.cudaPitchedPtr*><void_ptr>srcPtr.getPtr(), sizeof(self._pvt_ptr[0].srcPtr))
6027 @property
6028 def srcDevice(self):
6029 return self._pvt_ptr[0].srcDevice
6030 @srcDevice.setter
6031 def srcDevice(self, int srcDevice):
6032 self._pvt_ptr[0].srcDevice = srcDevice
6033 @property
6034 def dstArray(self):
6035 return self._dstArray
6036 @dstArray.setter
6037 def dstArray(self, dstArray):
6038 cdef cyruntime.cudaArray_t cydstArray
6039 if dstArray is None:
6040 cydstArray = <cyruntime.cudaArray_t><void_ptr>0
6041 elif isinstance(dstArray, (cudaArray_t,)):
6042 pdstArray = int(dstArray)
6043 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
6044 else:
6045 pdstArray = int(cudaArray_t(dstArray))
6046 cydstArray = <cyruntime.cudaArray_t><void_ptr>pdstArray
6047 self._dstArray._pvt_ptr[0] = cydstArray
6048 @property
6049 def dstPos(self):
6050 return self._dstPos
6051 @dstPos.setter
6052 def dstPos(self, dstPos not None : cudaPos):
6053 string.memcpy(&self._pvt_ptr[0].dstPos, <cyruntime.cudaPos*><void_ptr>dstPos.getPtr(), sizeof(self._pvt_ptr[0].dstPos))
6054 @property
6055 def dstPtr(self):
6056 return self._dstPtr
6057 @dstPtr.setter
6058 def dstPtr(self, dstPtr not None : cudaPitchedPtr):
6059 string.memcpy(&self._pvt_ptr[0].dstPtr, <cyruntime.cudaPitchedPtr*><void_ptr>dstPtr.getPtr(), sizeof(self._pvt_ptr[0].dstPtr))
6060 @property
6061 def dstDevice(self):
6062 return self._pvt_ptr[0].dstDevice
6063 @dstDevice.setter
6064 def dstDevice(self, int dstDevice):
6065 self._pvt_ptr[0].dstDevice = dstDevice
6066 @property
6067 def extent(self):
6068 return self._extent
6069 @extent.setter
6070 def extent(self, extent not None : cudaExtent):
6071 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))
6073cdef class cudaMemsetParams:
6074 """
6075 CUDA Memset node parameters
6077 Attributes
6078 ----------
6079 dst : Any
6080 Destination device pointer
6081 pitch : size_t
6082 Pitch of destination device pointer. Unused if height is 1
6083 value : unsigned int
6084 Value to be set
6085 elementSize : unsigned int
6086 Size of each element in bytes. Must be 1, 2, or 4.
6087 width : size_t
6088 Width of the row in elements
6089 height : size_t
6090 Number of rows
6092 Methods
6093 -------
6094 getPtr()
6095 Get memory address of class instance
6096 """
6097 def __cinit__(self, void_ptr _ptr = 0):
6098 if _ptr == 0:
6099 self._pvt_ptr = &self._pvt_val
6100 else:
6101 self._pvt_ptr = <cyruntime.cudaMemsetParams *>_ptr
6102 def __init__(self, void_ptr _ptr = 0):
6103 pass
6104 def __dealloc__(self):
6105 pass
6106 def getPtr(self):
6107 return <void_ptr>self._pvt_ptr
6108 def __repr__(self):
6109 if self._pvt_ptr is not NULL:
6110 str_list = []
6111 try:
6112 str_list += ['dst : ' + hex(self.dst)]
6113 except ValueError:
6114 str_list += ['dst : <ValueError>']
6115 try:
6116 str_list += ['pitch : ' + str(self.pitch)]
6117 except ValueError:
6118 str_list += ['pitch : <ValueError>']
6119 try:
6120 str_list += ['value : ' + str(self.value)]
6121 except ValueError:
6122 str_list += ['value : <ValueError>']
6123 try:
6124 str_list += ['elementSize : ' + str(self.elementSize)]
6125 except ValueError:
6126 str_list += ['elementSize : <ValueError>']
6127 try:
6128 str_list += ['width : ' + str(self.width)]
6129 except ValueError:
6130 str_list += ['width : <ValueError>']
6131 try:
6132 str_list += ['height : ' + str(self.height)]
6133 except ValueError:
6134 str_list += ['height : <ValueError>']
6135 return '\n'.join(str_list)
6136 else:
6137 return ''
6138 @property
6139 def dst(self):
6140 return <void_ptr>self._pvt_ptr[0].dst
6141 @dst.setter
6142 def dst(self, dst):
6143 _cdst = _HelperInputVoidPtr(dst)
6144 self._pvt_ptr[0].dst = <void*><void_ptr>_cdst.cptr
6145 @property
6146 def pitch(self):
6147 return self._pvt_ptr[0].pitch
6148 @pitch.setter
6149 def pitch(self, size_t pitch):
6150 self._pvt_ptr[0].pitch = pitch
6151 @property
6152 def value(self):
6153 return self._pvt_ptr[0].value
6154 @value.setter
6155 def value(self, unsigned int value):
6156 self._pvt_ptr[0].value = value
6157 @property
6158 def elementSize(self):
6159 return self._pvt_ptr[0].elementSize
6160 @elementSize.setter
6161 def elementSize(self, unsigned int elementSize):
6162 self._pvt_ptr[0].elementSize = elementSize
6163 @property
6164 def width(self):
6165 return self._pvt_ptr[0].width
6166 @width.setter
6167 def width(self, size_t width):
6168 self._pvt_ptr[0].width = width
6169 @property
6170 def height(self):
6171 return self._pvt_ptr[0].height
6172 @height.setter
6173 def height(self, size_t height):
6174 self._pvt_ptr[0].height = height
6176cdef class cudaMemsetParamsV2:
6177 """
6178 CUDA Memset node parameters
6180 Attributes
6181 ----------
6182 dst : Any
6183 Destination device pointer
6184 pitch : size_t
6185 Pitch of destination device pointer. Unused if height is 1
6186 value : unsigned int
6187 Value to be set
6188 elementSize : unsigned int
6189 Size of each element in bytes. Must be 1, 2, or 4.
6190 width : size_t
6191 Width of the row in elements
6192 height : size_t
6193 Number of rows
6194 ctx : cudaExecutionContext_t
6195 Context in which to run the memset. If NULL will try to use the
6196 current context.
6198 Methods
6199 -------
6200 getPtr()
6201 Get memory address of class instance
6202 """
6203 def __cinit__(self, void_ptr _ptr = 0):
6204 if _ptr == 0:
6205 self._pvt_ptr = &self._pvt_val
6206 else:
6207 self._pvt_ptr = <cyruntime.cudaMemsetParamsV2 *>_ptr
6208 def __init__(self, void_ptr _ptr = 0):
6209 pass
6210 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)
6211 def __dealloc__(self):
6212 pass
6213 def getPtr(self):
6214 return <void_ptr>self._pvt_ptr
6215 def __repr__(self):
6216 if self._pvt_ptr is not NULL:
6217 str_list = []
6218 try:
6219 str_list += ['dst : ' + hex(self.dst)]
6220 except ValueError:
6221 str_list += ['dst : <ValueError>']
6222 try:
6223 str_list += ['pitch : ' + str(self.pitch)]
6224 except ValueError:
6225 str_list += ['pitch : <ValueError>']
6226 try:
6227 str_list += ['value : ' + str(self.value)]
6228 except ValueError:
6229 str_list += ['value : <ValueError>']
6230 try:
6231 str_list += ['elementSize : ' + str(self.elementSize)]
6232 except ValueError:
6233 str_list += ['elementSize : <ValueError>']
6234 try:
6235 str_list += ['width : ' + str(self.width)]
6236 except ValueError:
6237 str_list += ['width : <ValueError>']
6238 try:
6239 str_list += ['height : ' + str(self.height)]
6240 except ValueError:
6241 str_list += ['height : <ValueError>']
6242 try:
6243 str_list += ['ctx : ' + str(self.ctx)]
6244 except ValueError:
6245 str_list += ['ctx : <ValueError>']
6246 return '\n'.join(str_list)
6247 else:
6248 return ''
6249 @property
6250 def dst(self):
6251 return <void_ptr>self._pvt_ptr[0].dst
6252 @dst.setter
6253 def dst(self, dst):
6254 _cdst = _HelperInputVoidPtr(dst)
6255 self._pvt_ptr[0].dst = <void*><void_ptr>_cdst.cptr
6256 @property
6257 def pitch(self):
6258 return self._pvt_ptr[0].pitch
6259 @pitch.setter
6260 def pitch(self, size_t pitch):
6261 self._pvt_ptr[0].pitch = pitch
6262 @property
6263 def value(self):
6264 return self._pvt_ptr[0].value
6265 @value.setter
6266 def value(self, unsigned int value):
6267 self._pvt_ptr[0].value = value
6268 @property
6269 def elementSize(self):
6270 return self._pvt_ptr[0].elementSize
6271 @elementSize.setter
6272 def elementSize(self, unsigned int elementSize):
6273 self._pvt_ptr[0].elementSize = elementSize
6274 @property
6275 def width(self):
6276 return self._pvt_ptr[0].width
6277 @width.setter
6278 def width(self, size_t width):
6279 self._pvt_ptr[0].width = width
6280 @property
6281 def height(self):
6282 return self._pvt_ptr[0].height
6283 @height.setter
6284 def height(self, size_t height):
6285 self._pvt_ptr[0].height = height
6286 @property
6287 def ctx(self):
6288 return self._ctx
6289 @ctx.setter
6290 def ctx(self, ctx):
6291 cdef cyruntime.cudaExecutionContext_t cyctx
6292 if ctx is None:
6293 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0
6294 elif isinstance(ctx, (cudaExecutionContext_t,)):
6295 pctx = int(ctx)
6296 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
6297 else:
6298 pctx = int(cudaExecutionContext_t(ctx))
6299 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
6300 self._ctx._pvt_ptr[0] = cyctx
6302cdef class cudaAccessPolicyWindow:
6303 """
6304 Specifies an access policy for a window, a contiguous extent of
6305 memory beginning at base_ptr and ending at base_ptr + num_bytes.
6306 Partition into many segments and assign segments such that. sum of
6307 "hit segments" / window == approx. ratio. sum of "miss segments" /
6308 window == approx 1-ratio. Segments and ratio specifications are
6309 fitted to the capabilities of the architecture. Accesses in a hit
6310 segment apply the hitProp access policy. Accesses in a miss segment
6311 apply the missProp access policy.
6313 Attributes
6314 ----------
6315 base_ptr : Any
6316 Starting address of the access policy window. CUDA driver may align
6317 it.
6318 num_bytes : size_t
6319 Size in bytes of the window policy. CUDA driver may restrict the
6320 maximum size and alignment.
6321 hitRatio : float
6322 hitRatio specifies percentage of lines assigned hitProp, rest are
6323 assigned missProp.
6324 hitProp : cudaAccessProperty
6325 ::CUaccessProperty set for hit.
6326 missProp : cudaAccessProperty
6327 ::CUaccessProperty set for miss. Must be either NORMAL or
6328 STREAMING.
6330 Methods
6331 -------
6332 getPtr()
6333 Get memory address of class instance
6334 """
6335 def __cinit__(self, void_ptr _ptr = 0):
6336 if _ptr == 0:
6337 self._pvt_ptr = &self._pvt_val
6338 else:
6339 self._pvt_ptr = <cyruntime.cudaAccessPolicyWindow *>_ptr
6340 def __init__(self, void_ptr _ptr = 0):
6341 pass
6342 def __dealloc__(self):
6343 pass
6344 def getPtr(self):
6345 return <void_ptr>self._pvt_ptr
6346 def __repr__(self):
6347 if self._pvt_ptr is not NULL:
6348 str_list = []
6349 try:
6350 str_list += ['base_ptr : ' + hex(self.base_ptr)]
6351 except ValueError:
6352 str_list += ['base_ptr : <ValueError>']
6353 try:
6354 str_list += ['num_bytes : ' + str(self.num_bytes)]
6355 except ValueError:
6356 str_list += ['num_bytes : <ValueError>']
6357 try:
6358 str_list += ['hitRatio : ' + str(self.hitRatio)]
6359 except ValueError:
6360 str_list += ['hitRatio : <ValueError>']
6361 try:
6362 str_list += ['hitProp : ' + str(self.hitProp)]
6363 except ValueError:
6364 str_list += ['hitProp : <ValueError>']
6365 try:
6366 str_list += ['missProp : ' + str(self.missProp)]
6367 except ValueError:
6368 str_list += ['missProp : <ValueError>']
6369 return '\n'.join(str_list)
6370 else:
6371 return ''
6372 @property
6373 def base_ptr(self):
6374 return <void_ptr>self._pvt_ptr[0].base_ptr
6375 @base_ptr.setter
6376 def base_ptr(self, base_ptr):
6377 _cbase_ptr = _HelperInputVoidPtr(base_ptr)
6378 self._pvt_ptr[0].base_ptr = <void*><void_ptr>_cbase_ptr.cptr
6379 @property
6380 def num_bytes(self):
6381 return self._pvt_ptr[0].num_bytes
6382 @num_bytes.setter
6383 def num_bytes(self, size_t num_bytes):
6384 self._pvt_ptr[0].num_bytes = num_bytes
6385 @property
6386 def hitRatio(self):
6387 return self._pvt_ptr[0].hitRatio
6388 @hitRatio.setter
6389 def hitRatio(self, float hitRatio):
6390 self._pvt_ptr[0].hitRatio = hitRatio
6391 @property
6392 def hitProp(self):
6393 if self._pvt_ptr[0].hitProp not in _dict_cudaAccessProperty:
6394 return None
6395 return _dict_cudaAccessProperty[self._pvt_ptr[0].hitProp]
6396 @hitProp.setter
6397 def hitProp(self, hitProp not None : cudaAccessProperty):
6398 self._pvt_ptr[0].hitProp = hitProp.value
6399 @property
6400 def missProp(self):
6401 if self._pvt_ptr[0].missProp not in _dict_cudaAccessProperty:
6402 return None
6403 return _dict_cudaAccessProperty[self._pvt_ptr[0].missProp]
6404 @missProp.setter
6405 def missProp(self, missProp not None : cudaAccessProperty):
6406 self._pvt_ptr[0].missProp = missProp.value
6408cdef class cudaHostNodeParams:
6409 """
6410 CUDA host node parameters
6412 Attributes
6413 ----------
6414 fn : cudaHostFn_t
6415 The function to call when the node executes
6416 userData : Any
6417 Argument to pass to the function
6419 Methods
6420 -------
6421 getPtr()
6422 Get memory address of class instance
6423 """
6424 def __cinit__(self, void_ptr _ptr = 0):
6425 if _ptr == 0:
6426 self._pvt_ptr = &self._pvt_val
6427 else:
6428 self._pvt_ptr = <cyruntime.cudaHostNodeParams *>_ptr
6429 def __init__(self, void_ptr _ptr = 0):
6430 pass
6431 self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._pvt_ptr[0].fn)
6432 def __dealloc__(self):
6433 pass
6434 def getPtr(self):
6435 return <void_ptr>self._pvt_ptr
6436 def __repr__(self):
6437 if self._pvt_ptr is not NULL:
6438 str_list = []
6439 try:
6440 str_list += ['fn : ' + str(self.fn)]
6441 except ValueError:
6442 str_list += ['fn : <ValueError>']
6443 try:
6444 str_list += ['userData : ' + hex(self.userData)]
6445 except ValueError:
6446 str_list += ['userData : <ValueError>']
6447 return '\n'.join(str_list)
6448 else:
6449 return ''
6450 @property
6451 def fn(self):
6452 return self._fn
6453 @fn.setter
6454 def fn(self, fn):
6455 cdef cyruntime.cudaHostFn_t cyfn
6456 if fn is None:
6457 cyfn = <cyruntime.cudaHostFn_t><void_ptr>0
6458 elif isinstance(fn, (cudaHostFn_t)):
6459 pfn = int(fn)
6460 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
6461 else:
6462 pfn = int(cudaHostFn_t(fn))
6463 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
6464 self._fn._pvt_ptr[0] = cyfn
6465 @property
6466 def userData(self):
6467 return <void_ptr>self._pvt_ptr[0].userData
6468 @userData.setter
6469 def userData(self, userData):
6470 _cuserData = _HelperInputVoidPtr(userData)
6471 self._pvt_ptr[0].userData = <void*><void_ptr>_cuserData.cptr
6473cdef class cudaHostNodeParamsV2:
6474 """
6475 CUDA host node parameters
6477 Attributes
6478 ----------
6479 fn : cudaHostFn_t
6480 The function to call when the node executes
6481 userData : Any
6482 Argument to pass to the function
6484 Methods
6485 -------
6486 getPtr()
6487 Get memory address of class instance
6488 """
6489 def __cinit__(self, void_ptr _ptr = 0):
6490 if _ptr == 0:
6491 self._pvt_ptr = &self._pvt_val
6492 else:
6493 self._pvt_ptr = <cyruntime.cudaHostNodeParamsV2 *>_ptr
6494 def __init__(self, void_ptr _ptr = 0):
6495 pass
6496 self._fn = cudaHostFn_t(_ptr=<void_ptr>&self._pvt_ptr[0].fn)
6497 def __dealloc__(self):
6498 pass
6499 def getPtr(self):
6500 return <void_ptr>self._pvt_ptr
6501 def __repr__(self):
6502 if self._pvt_ptr is not NULL:
6503 str_list = []
6504 try:
6505 str_list += ['fn : ' + str(self.fn)]
6506 except ValueError:
6507 str_list += ['fn : <ValueError>']
6508 try:
6509 str_list += ['userData : ' + hex(self.userData)]
6510 except ValueError:
6511 str_list += ['userData : <ValueError>']
6512 return '\n'.join(str_list)
6513 else:
6514 return ''
6515 @property
6516 def fn(self):
6517 return self._fn
6518 @fn.setter
6519 def fn(self, fn):
6520 cdef cyruntime.cudaHostFn_t cyfn
6521 if fn is None:
6522 cyfn = <cyruntime.cudaHostFn_t><void_ptr>0
6523 elif isinstance(fn, (cudaHostFn_t)):
6524 pfn = int(fn)
6525 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
6526 else:
6527 pfn = int(cudaHostFn_t(fn))
6528 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
6529 self._fn._pvt_ptr[0] = cyfn
6530 @property
6531 def userData(self):
6532 return <void_ptr>self._pvt_ptr[0].userData
6533 @userData.setter
6534 def userData(self, userData):
6535 _cuserData = _HelperInputVoidPtr(userData)
6536 self._pvt_ptr[0].userData = <void*><void_ptr>_cuserData.cptr
6538cdef class anon_struct1:
6539 """
6540 Attributes
6541 ----------
6542 array : cudaArray_t
6545 Methods
6546 -------
6547 getPtr()
6548 Get memory address of class instance
6549 """
6550 def __cinit__(self, void_ptr _ptr):
6551 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6553 def __init__(self, void_ptr _ptr):
6554 pass
6555 self._array = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].res.array.array)
6556 def __dealloc__(self):
6557 pass
6558 def getPtr(self):
6559 return <void_ptr>&self._pvt_ptr[0].res.array
6560 def __repr__(self):
6561 if self._pvt_ptr is not NULL:
6562 str_list = []
6563 try:
6564 str_list += ['array : ' + str(self.array)]
6565 except ValueError:
6566 str_list += ['array : <ValueError>']
6567 return '\n'.join(str_list)
6568 else:
6569 return ''
6570 @property
6571 def array(self):
6572 return self._array
6573 @array.setter
6574 def array(self, array):
6575 cdef cyruntime.cudaArray_t cyarray
6576 if array is None:
6577 cyarray = <cyruntime.cudaArray_t><void_ptr>0
6578 elif isinstance(array, (cudaArray_t,)):
6579 parray = int(array)
6580 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
6581 else:
6582 parray = int(cudaArray_t(array))
6583 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
6584 self._array._pvt_ptr[0] = cyarray
6586cdef class anon_struct2:
6587 """
6588 Attributes
6589 ----------
6590 mipmap : cudaMipmappedArray_t
6593 Methods
6594 -------
6595 getPtr()
6596 Get memory address of class instance
6597 """
6598 def __cinit__(self, void_ptr _ptr):
6599 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6601 def __init__(self, void_ptr _ptr):
6602 pass
6603 self._mipmap = cudaMipmappedArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].res.mipmap.mipmap)
6604 def __dealloc__(self):
6605 pass
6606 def getPtr(self):
6607 return <void_ptr>&self._pvt_ptr[0].res.mipmap
6608 def __repr__(self):
6609 if self._pvt_ptr is not NULL:
6610 str_list = []
6611 try:
6612 str_list += ['mipmap : ' + str(self.mipmap)]
6613 except ValueError:
6614 str_list += ['mipmap : <ValueError>']
6615 return '\n'.join(str_list)
6616 else:
6617 return ''
6618 @property
6619 def mipmap(self):
6620 return self._mipmap
6621 @mipmap.setter
6622 def mipmap(self, mipmap):
6623 cdef cyruntime.cudaMipmappedArray_t cymipmap
6624 if mipmap is None:
6625 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>0
6626 elif isinstance(mipmap, (cudaMipmappedArray_t,)):
6627 pmipmap = int(mipmap)
6628 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
6629 else:
6630 pmipmap = int(cudaMipmappedArray_t(mipmap))
6631 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
6632 self._mipmap._pvt_ptr[0] = cymipmap
6634cdef class anon_struct3:
6635 """
6636 Attributes
6637 ----------
6638 devPtr : Any
6640 desc : cudaChannelFormatDesc
6642 sizeInBytes : size_t
6645 Methods
6646 -------
6647 getPtr()
6648 Get memory address of class instance
6649 """
6650 def __cinit__(self, void_ptr _ptr):
6651 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6653 def __init__(self, void_ptr _ptr):
6654 pass
6655 self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].res.linear.desc)
6656 def __dealloc__(self):
6657 pass
6658 def getPtr(self):
6659 return <void_ptr>&self._pvt_ptr[0].res.linear
6660 def __repr__(self):
6661 if self._pvt_ptr is not NULL:
6662 str_list = []
6663 try:
6664 str_list += ['devPtr : ' + hex(self.devPtr)]
6665 except ValueError:
6666 str_list += ['devPtr : <ValueError>']
6667 try:
6668 str_list += ['desc :\n' + '\n'.join([' ' + line for line in str(self.desc).splitlines()])]
6669 except ValueError:
6670 str_list += ['desc : <ValueError>']
6671 try:
6672 str_list += ['sizeInBytes : ' + str(self.sizeInBytes)]
6673 except ValueError:
6674 str_list += ['sizeInBytes : <ValueError>']
6675 return '\n'.join(str_list)
6676 else:
6677 return ''
6678 @property
6679 def devPtr(self):
6680 return <void_ptr>self._pvt_ptr[0].res.linear.devPtr
6681 @devPtr.setter
6682 def devPtr(self, devPtr):
6683 _cdevPtr = _HelperInputVoidPtr(devPtr)
6684 self._pvt_ptr[0].res.linear.devPtr = <void*><void_ptr>_cdevPtr.cptr
6685 @property
6686 def desc(self):
6687 return self._desc
6688 @desc.setter
6689 def desc(self, desc not None : cudaChannelFormatDesc):
6690 string.memcpy(&self._pvt_ptr[0].res.linear.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._pvt_ptr[0].res.linear.desc))
6691 @property
6692 def sizeInBytes(self):
6693 return self._pvt_ptr[0].res.linear.sizeInBytes
6694 @sizeInBytes.setter
6695 def sizeInBytes(self, size_t sizeInBytes):
6696 self._pvt_ptr[0].res.linear.sizeInBytes = sizeInBytes
6698cdef class anon_struct4:
6699 """
6700 Attributes
6701 ----------
6702 devPtr : Any
6704 desc : cudaChannelFormatDesc
6706 width : size_t
6708 height : size_t
6710 pitchInBytes : size_t
6713 Methods
6714 -------
6715 getPtr()
6716 Get memory address of class instance
6717 """
6718 def __cinit__(self, void_ptr _ptr):
6719 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6721 def __init__(self, void_ptr _ptr):
6722 pass
6723 self._desc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].res.pitch2D.desc)
6724 def __dealloc__(self):
6725 pass
6726 def getPtr(self):
6727 return <void_ptr>&self._pvt_ptr[0].res.pitch2D
6728 def __repr__(self):
6729 if self._pvt_ptr is not NULL:
6730 str_list = []
6731 try:
6732 str_list += ['devPtr : ' + hex(self.devPtr)]
6733 except ValueError:
6734 str_list += ['devPtr : <ValueError>']
6735 try:
6736 str_list += ['desc :\n' + '\n'.join([' ' + line for line in str(self.desc).splitlines()])]
6737 except ValueError:
6738 str_list += ['desc : <ValueError>']
6739 try:
6740 str_list += ['width : ' + str(self.width)]
6741 except ValueError:
6742 str_list += ['width : <ValueError>']
6743 try:
6744 str_list += ['height : ' + str(self.height)]
6745 except ValueError:
6746 str_list += ['height : <ValueError>']
6747 try:
6748 str_list += ['pitchInBytes : ' + str(self.pitchInBytes)]
6749 except ValueError:
6750 str_list += ['pitchInBytes : <ValueError>']
6751 return '\n'.join(str_list)
6752 else:
6753 return ''
6754 @property
6755 def devPtr(self):
6756 return <void_ptr>self._pvt_ptr[0].res.pitch2D.devPtr
6757 @devPtr.setter
6758 def devPtr(self, devPtr):
6759 _cdevPtr = _HelperInputVoidPtr(devPtr)
6760 self._pvt_ptr[0].res.pitch2D.devPtr = <void*><void_ptr>_cdevPtr.cptr
6761 @property
6762 def desc(self):
6763 return self._desc
6764 @desc.setter
6765 def desc(self, desc not None : cudaChannelFormatDesc):
6766 string.memcpy(&self._pvt_ptr[0].res.pitch2D.desc, <cyruntime.cudaChannelFormatDesc*><void_ptr>desc.getPtr(), sizeof(self._pvt_ptr[0].res.pitch2D.desc))
6767 @property
6768 def width(self):
6769 return self._pvt_ptr[0].res.pitch2D.width
6770 @width.setter
6771 def width(self, size_t width):
6772 self._pvt_ptr[0].res.pitch2D.width = width
6773 @property
6774 def height(self):
6775 return self._pvt_ptr[0].res.pitch2D.height
6776 @height.setter
6777 def height(self, size_t height):
6778 self._pvt_ptr[0].res.pitch2D.height = height
6779 @property
6780 def pitchInBytes(self):
6781 return self._pvt_ptr[0].res.pitch2D.pitchInBytes
6782 @pitchInBytes.setter
6783 def pitchInBytes(self, size_t pitchInBytes):
6784 self._pvt_ptr[0].res.pitch2D.pitchInBytes = pitchInBytes
6786cdef class anon_struct5:
6787 """
6788 Attributes
6789 ----------
6790 reserved : list[int]
6793 Methods
6794 -------
6795 getPtr()
6796 Get memory address of class instance
6797 """
6798 def __cinit__(self, void_ptr _ptr):
6799 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6801 def __init__(self, void_ptr _ptr):
6802 pass
6803 def __dealloc__(self):
6804 pass
6805 def getPtr(self):
6806 return <void_ptr>&self._pvt_ptr[0].res.reserved
6807 def __repr__(self):
6808 if self._pvt_ptr is not NULL:
6809 str_list = []
6810 try:
6811 str_list += ['reserved : ' + str(self.reserved)]
6812 except ValueError:
6813 str_list += ['reserved : <ValueError>']
6814 return '\n'.join(str_list)
6815 else:
6816 return ''
6817 @property
6818 def reserved(self):
6819 return self._pvt_ptr[0].res.reserved.reserved
6820 @reserved.setter
6821 def reserved(self, reserved):
6822 self._pvt_ptr[0].res.reserved.reserved = reserved
6824cdef class anon_union0:
6825 """
6826 Attributes
6827 ----------
6828 array : anon_struct1
6830 mipmap : anon_struct2
6832 linear : anon_struct3
6834 pitch2D : anon_struct4
6836 reserved : anon_struct5
6839 Methods
6840 -------
6841 getPtr()
6842 Get memory address of class instance
6843 """
6844 def __cinit__(self, void_ptr _ptr):
6845 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6847 def __init__(self, void_ptr _ptr):
6848 pass
6849 self._array = anon_struct1(_ptr=<void_ptr>self._pvt_ptr)
6850 self._mipmap = anon_struct2(_ptr=<void_ptr>self._pvt_ptr)
6851 self._linear = anon_struct3(_ptr=<void_ptr>self._pvt_ptr)
6852 self._pitch2D = anon_struct4(_ptr=<void_ptr>self._pvt_ptr)
6853 self._reserved = anon_struct5(_ptr=<void_ptr>self._pvt_ptr)
6854 def __dealloc__(self):
6855 pass
6856 def getPtr(self):
6857 return <void_ptr>&self._pvt_ptr[0].res
6858 def __repr__(self):
6859 if self._pvt_ptr is not NULL:
6860 str_list = []
6861 try:
6862 str_list += ['array :\n' + '\n'.join([' ' + line for line in str(self.array).splitlines()])]
6863 except ValueError:
6864 str_list += ['array : <ValueError>']
6865 try:
6866 str_list += ['mipmap :\n' + '\n'.join([' ' + line for line in str(self.mipmap).splitlines()])]
6867 except ValueError:
6868 str_list += ['mipmap : <ValueError>']
6869 try:
6870 str_list += ['linear :\n' + '\n'.join([' ' + line for line in str(self.linear).splitlines()])]
6871 except ValueError:
6872 str_list += ['linear : <ValueError>']
6873 try:
6874 str_list += ['pitch2D :\n' + '\n'.join([' ' + line for line in str(self.pitch2D).splitlines()])]
6875 except ValueError:
6876 str_list += ['pitch2D : <ValueError>']
6877 try:
6878 str_list += ['reserved :\n' + '\n'.join([' ' + line for line in str(self.reserved).splitlines()])]
6879 except ValueError:
6880 str_list += ['reserved : <ValueError>']
6881 return '\n'.join(str_list)
6882 else:
6883 return ''
6884 @property
6885 def array(self):
6886 return self._array
6887 @array.setter
6888 def array(self, array not None : anon_struct1):
6889 string.memcpy(&self._pvt_ptr[0].res.array, <cyruntime.anon_struct1*><void_ptr>array.getPtr(), sizeof(self._pvt_ptr[0].res.array))
6890 @property
6891 def mipmap(self):
6892 return self._mipmap
6893 @mipmap.setter
6894 def mipmap(self, mipmap not None : anon_struct2):
6895 string.memcpy(&self._pvt_ptr[0].res.mipmap, <cyruntime.anon_struct2*><void_ptr>mipmap.getPtr(), sizeof(self._pvt_ptr[0].res.mipmap))
6896 @property
6897 def linear(self):
6898 return self._linear
6899 @linear.setter
6900 def linear(self, linear not None : anon_struct3):
6901 string.memcpy(&self._pvt_ptr[0].res.linear, <cyruntime.anon_struct3*><void_ptr>linear.getPtr(), sizeof(self._pvt_ptr[0].res.linear))
6902 @property
6903 def pitch2D(self):
6904 return self._pitch2D
6905 @pitch2D.setter
6906 def pitch2D(self, pitch2D not None : anon_struct4):
6907 string.memcpy(&self._pvt_ptr[0].res.pitch2D, <cyruntime.anon_struct4*><void_ptr>pitch2D.getPtr(), sizeof(self._pvt_ptr[0].res.pitch2D))
6908 @property
6909 def reserved(self):
6910 return self._reserved
6911 @reserved.setter
6912 def reserved(self, reserved not None : anon_struct5):
6913 string.memcpy(&self._pvt_ptr[0].res.reserved, <cyruntime.anon_struct5*><void_ptr>reserved.getPtr(), sizeof(self._pvt_ptr[0].res.reserved))
6915cdef class cudaResourceDesc:
6916 """
6917 CUDA resource descriptor
6919 Attributes
6920 ----------
6921 resType : cudaResourceType
6922 Resource type
6923 res : anon_union0
6925 flags : unsigned int
6926 Flags (must be zero)
6928 Methods
6929 -------
6930 getPtr()
6931 Get memory address of class instance
6932 """
6933 def __cinit__(self, void_ptr _ptr = 0):
6934 if _ptr == 0:
6935 self._val_ptr = <cyruntime.cudaResourceDesc *>calloc(1, sizeof(cyruntime.cudaResourceDesc))
6936 self._pvt_ptr = self._val_ptr
6937 else:
6938 self._pvt_ptr = <cyruntime.cudaResourceDesc *>_ptr
6939 def __init__(self, void_ptr _ptr = 0):
6940 pass
6941 self._res = anon_union0(_ptr=<void_ptr>self._pvt_ptr)
6942 def __dealloc__(self):
6943 if self._val_ptr is not NULL:
6944 free(self._val_ptr)
6945 def getPtr(self):
6946 return <void_ptr>self._pvt_ptr
6947 def __repr__(self):
6948 if self._pvt_ptr is not NULL:
6949 str_list = []
6950 try:
6951 str_list += ['resType : ' + str(self.resType)]
6952 except ValueError:
6953 str_list += ['resType : <ValueError>']
6954 try:
6955 str_list += ['res :\n' + '\n'.join([' ' + line for line in str(self.res).splitlines()])]
6956 except ValueError:
6957 str_list += ['res : <ValueError>']
6958 try:
6959 str_list += ['flags : ' + str(self.flags)]
6960 except ValueError:
6961 str_list += ['flags : <ValueError>']
6962 return '\n'.join(str_list)
6963 else:
6964 return ''
6965 @property
6966 def resType(self):
6967 if self._pvt_ptr[0].resType not in _dict_cudaResourceType:
6968 return None
6969 return _dict_cudaResourceType[self._pvt_ptr[0].resType]
6970 @resType.setter
6971 def resType(self, resType not None : cudaResourceType):
6972 self._pvt_ptr[0].resType = resType.value
6973 @property
6974 def res(self):
6975 return self._res
6976 @res.setter
6977 def res(self, res not None : anon_union0):
6978 string.memcpy(&self._pvt_ptr[0].res, <cyruntime.anon_union0*><void_ptr>res.getPtr(), sizeof(self._pvt_ptr[0].res))
6979 @property
6980 def flags(self):
6981 return self._pvt_ptr[0].flags
6982 @flags.setter
6983 def flags(self, unsigned int flags):
6984 self._pvt_ptr[0].flags = flags
6986cdef class cudaResourceViewDesc:
6987 """
6988 CUDA resource view descriptor
6990 Attributes
6991 ----------
6992 format : cudaResourceViewFormat
6993 Resource view format
6994 width : size_t
6995 Width of the resource view
6996 height : size_t
6997 Height of the resource view
6998 depth : size_t
6999 Depth of the resource view
7000 firstMipmapLevel : unsigned int
7001 First defined mipmap level
7002 lastMipmapLevel : unsigned int
7003 Last defined mipmap level
7004 firstLayer : unsigned int
7005 First layer index
7006 lastLayer : unsigned int
7007 Last layer index
7008 reserved : list[unsigned int]
7009 Must be zero
7011 Methods
7012 -------
7013 getPtr()
7014 Get memory address of class instance
7015 """
7016 def __cinit__(self, void_ptr _ptr = 0):
7017 if _ptr == 0:
7018 self._pvt_ptr = &self._pvt_val
7019 else:
7020 self._pvt_ptr = <cyruntime.cudaResourceViewDesc *>_ptr
7021 def __init__(self, void_ptr _ptr = 0):
7022 pass
7023 def __dealloc__(self):
7024 pass
7025 def getPtr(self):
7026 return <void_ptr>self._pvt_ptr
7027 def __repr__(self):
7028 if self._pvt_ptr is not NULL:
7029 str_list = []
7030 try:
7031 str_list += ['format : ' + str(self.format)]
7032 except ValueError:
7033 str_list += ['format : <ValueError>']
7034 try:
7035 str_list += ['width : ' + str(self.width)]
7036 except ValueError:
7037 str_list += ['width : <ValueError>']
7038 try:
7039 str_list += ['height : ' + str(self.height)]
7040 except ValueError:
7041 str_list += ['height : <ValueError>']
7042 try:
7043 str_list += ['depth : ' + str(self.depth)]
7044 except ValueError:
7045 str_list += ['depth : <ValueError>']
7046 try:
7047 str_list += ['firstMipmapLevel : ' + str(self.firstMipmapLevel)]
7048 except ValueError:
7049 str_list += ['firstMipmapLevel : <ValueError>']
7050 try:
7051 str_list += ['lastMipmapLevel : ' + str(self.lastMipmapLevel)]
7052 except ValueError:
7053 str_list += ['lastMipmapLevel : <ValueError>']
7054 try:
7055 str_list += ['firstLayer : ' + str(self.firstLayer)]
7056 except ValueError:
7057 str_list += ['firstLayer : <ValueError>']
7058 try:
7059 str_list += ['lastLayer : ' + str(self.lastLayer)]
7060 except ValueError:
7061 str_list += ['lastLayer : <ValueError>']
7062 try:
7063 str_list += ['reserved : ' + str(self.reserved)]
7064 except ValueError:
7065 str_list += ['reserved : <ValueError>']
7066 return '\n'.join(str_list)
7067 else:
7068 return ''
7069 @property
7070 def format(self):
7071 if self._pvt_ptr[0].format not in _dict_cudaResourceViewFormat:
7072 return None
7073 return _dict_cudaResourceViewFormat[self._pvt_ptr[0].format]
7074 @format.setter
7075 def format(self, format not None : cudaResourceViewFormat):
7076 self._pvt_ptr[0].format = format.value
7077 @property
7078 def width(self):
7079 return self._pvt_ptr[0].width
7080 @width.setter
7081 def width(self, size_t width):
7082 self._pvt_ptr[0].width = width
7083 @property
7084 def height(self):
7085 return self._pvt_ptr[0].height
7086 @height.setter
7087 def height(self, size_t height):
7088 self._pvt_ptr[0].height = height
7089 @property
7090 def depth(self):
7091 return self._pvt_ptr[0].depth
7092 @depth.setter
7093 def depth(self, size_t depth):
7094 self._pvt_ptr[0].depth = depth
7095 @property
7096 def firstMipmapLevel(self):
7097 return self._pvt_ptr[0].firstMipmapLevel
7098 @firstMipmapLevel.setter
7099 def firstMipmapLevel(self, unsigned int firstMipmapLevel):
7100 self._pvt_ptr[0].firstMipmapLevel = firstMipmapLevel
7101 @property
7102 def lastMipmapLevel(self):
7103 return self._pvt_ptr[0].lastMipmapLevel
7104 @lastMipmapLevel.setter
7105 def lastMipmapLevel(self, unsigned int lastMipmapLevel):
7106 self._pvt_ptr[0].lastMipmapLevel = lastMipmapLevel
7107 @property
7108 def firstLayer(self):
7109 return self._pvt_ptr[0].firstLayer
7110 @firstLayer.setter
7111 def firstLayer(self, unsigned int firstLayer):
7112 self._pvt_ptr[0].firstLayer = firstLayer
7113 @property
7114 def lastLayer(self):
7115 return self._pvt_ptr[0].lastLayer
7116 @lastLayer.setter
7117 def lastLayer(self, unsigned int lastLayer):
7118 self._pvt_ptr[0].lastLayer = lastLayer
7119 @property
7120 def reserved(self):
7121 return self._pvt_ptr[0].reserved
7122 @reserved.setter
7123 def reserved(self, reserved):
7124 self._pvt_ptr[0].reserved = reserved
7126cdef class cudaPointerAttributes:
7127 """
7128 CUDA pointer attributes
7130 Attributes
7131 ----------
7132 type : cudaMemoryType
7133 The type of memory - cudaMemoryTypeUnregistered,
7134 cudaMemoryTypeHost, cudaMemoryTypeDevice or cudaMemoryTypeManaged.
7135 device : int
7136 The device against which the memory was allocated or registered. If
7137 the memory type is cudaMemoryTypeDevice then this identifies the
7138 device on which the memory referred physically resides. If the
7139 memory type is cudaMemoryTypeHost or::cudaMemoryTypeManaged then
7140 this identifies the device which was current when the memory was
7141 allocated or registered (and if that device is deinitialized then
7142 this allocation will vanish with that device's state).
7143 devicePointer : Any
7144 The address which may be dereferenced on the current device to
7145 access the memory or NULL if no such address exists.
7146 hostPointer : Any
7147 The address which may be dereferenced on the host to access the
7148 memory or NULL if no such address exists. CUDA doesn't check if
7149 unregistered memory is allocated so this field may contain invalid
7150 pointer if an invalid pointer has been passed to CUDA.
7151 reserved : list[long]
7152 Must be zero
7154 Methods
7155 -------
7156 getPtr()
7157 Get memory address of class instance
7158 """
7159 def __cinit__(self, void_ptr _ptr = 0):
7160 if _ptr == 0:
7161 self._pvt_ptr = &self._pvt_val
7162 else:
7163 self._pvt_ptr = <cyruntime.cudaPointerAttributes *>_ptr
7164 def __init__(self, void_ptr _ptr = 0):
7165 pass
7166 def __dealloc__(self):
7167 pass
7168 def getPtr(self):
7169 return <void_ptr>self._pvt_ptr
7170 def __repr__(self):
7171 if self._pvt_ptr is not NULL:
7172 str_list = []
7173 try:
7174 str_list += ['type : ' + str(self.type)]
7175 except ValueError:
7176 str_list += ['type : <ValueError>']
7177 try:
7178 str_list += ['device : ' + str(self.device)]
7179 except ValueError:
7180 str_list += ['device : <ValueError>']
7181 try:
7182 str_list += ['devicePointer : ' + hex(self.devicePointer)]
7183 except ValueError:
7184 str_list += ['devicePointer : <ValueError>']
7185 try:
7186 str_list += ['hostPointer : ' + hex(self.hostPointer)]
7187 except ValueError:
7188 str_list += ['hostPointer : <ValueError>']
7189 try:
7190 str_list += ['reserved : ' + str(self.reserved)]
7191 except ValueError:
7192 str_list += ['reserved : <ValueError>']
7193 return '\n'.join(str_list)
7194 else:
7195 return ''
7196 @property
7197 def type(self):
7198 if self._pvt_ptr[0].type not in _dict_cudaMemoryType:
7199 return None
7200 return _dict_cudaMemoryType[self._pvt_ptr[0].type]
7201 @type.setter
7202 def type(self, type not None : cudaMemoryType):
7203 self._pvt_ptr[0].type = type.value
7204 @property
7205 def device(self):
7206 return self._pvt_ptr[0].device
7207 @device.setter
7208 def device(self, int device):
7209 self._pvt_ptr[0].device = device
7210 @property
7211 def devicePointer(self):
7212 return <void_ptr>self._pvt_ptr[0].devicePointer
7213 @devicePointer.setter
7214 def devicePointer(self, devicePointer):
7215 _cdevicePointer = _HelperInputVoidPtr(devicePointer)
7216 self._pvt_ptr[0].devicePointer = <void*><void_ptr>_cdevicePointer.cptr
7217 @property
7218 def hostPointer(self):
7219 return <void_ptr>self._pvt_ptr[0].hostPointer
7220 @hostPointer.setter
7221 def hostPointer(self, hostPointer):
7222 _chostPointer = _HelperInputVoidPtr(hostPointer)
7223 self._pvt_ptr[0].hostPointer = <void*><void_ptr>_chostPointer.cptr
7224 @property
7225 def reserved(self):
7226 return self._pvt_ptr[0].reserved
7227 @reserved.setter
7228 def reserved(self, reserved):
7229 self._pvt_ptr[0].reserved = reserved
7231cdef class cudaFuncAttributes:
7232 """
7233 CUDA function attributes
7235 Attributes
7236 ----------
7237 sharedSizeBytes : size_t
7238 The size in bytes of statically-allocated shared memory per block
7239 required by this function. This does not include dynamically-
7240 allocated shared memory requested by the user at runtime.
7241 constSizeBytes : size_t
7242 The size in bytes of user-allocated constant memory required by
7243 this function.
7244 localSizeBytes : size_t
7245 The size in bytes of local memory used by each thread of this
7246 function.
7247 maxThreadsPerBlock : int
7248 The maximum number of threads per block, beyond which a launch of
7249 the function would fail. This number depends on both the function
7250 and the device on which the function is currently loaded.
7251 numRegs : int
7252 The number of registers used by each thread of this function.
7253 ptxVersion : int
7254 The PTX virtual architecture version for which the function was
7255 compiled. This value is the major PTX version * 10 + the minor PTX
7256 version, so a PTX version 1.3 function would return the value 13.
7257 binaryVersion : int
7258 The binary architecture version for which the function was
7259 compiled. This value is the major binary version * 10 + the minor
7260 binary version, so a binary version 1.3 function would return the
7261 value 13.
7262 cacheModeCA : int
7263 The attribute to indicate whether the function has been compiled
7264 with user specified option "-Xptxas --dlcm=ca" set.
7265 maxDynamicSharedSizeBytes : int
7266 The maximum size in bytes of dynamic shared memory per block for
7267 this function. Any launch must have a dynamic shared memory size
7268 smaller than this value.
7269 preferredShmemCarveout : int
7270 On devices where the L1 cache and shared memory use the same
7271 hardware resources, this sets the shared memory carveout
7272 preference, in percent of the maximum shared memory. Refer to
7273 cudaDevAttrMaxSharedMemoryPerMultiprocessor. This is only a hint,
7274 and the driver can choose a different ratio if required to execute
7275 the function. See cudaFuncSetAttribute
7276 clusterDimMustBeSet : int
7277 If this attribute is set, the kernel must launch with a valid
7278 cluster dimension specified.
7279 requiredClusterWidth : int
7280 The required cluster width/height/depth in blocks. The values must
7281 either all be 0 or all be positive. The validity of the cluster
7282 dimensions is otherwise checked at launch time. If the value is
7283 set during compile time, it cannot be set at runtime. Setting it at
7284 runtime should return cudaErrorNotPermitted. See
7285 cudaFuncSetAttribute
7286 requiredClusterHeight : int
7288 requiredClusterDepth : int
7290 clusterSchedulingPolicyPreference : int
7291 The block scheduling policy of a function. See cudaFuncSetAttribute
7292 nonPortableClusterSizeAllowed : int
7293 Whether the function can be launched with non-portable cluster
7294 size. 1 is allowed, 0 is disallowed. A non-portable cluster size
7295 may only function on the specific SKUs the program is tested on.
7296 The launch might fail if the program is run on a different hardware
7297 platform. CUDA API provides cudaOccupancyMaxActiveClusters to
7298 assist with checking whether the desired size can be launched on
7299 the current device. Portable Cluster Size A portable cluster size
7300 is guaranteed to be functional on all compute capabilities higher
7301 than the target compute capability. The portable cluster size for
7302 sm_90 is 8 blocks per cluster. This value may increase for future
7303 compute capabilities. The specific hardware unit may support
7304 higher cluster sizes that’s not guaranteed to be portable. See
7305 cudaFuncSetAttribute
7306 reserved : list[int]
7307 Reserved for future use.
7309 Methods
7310 -------
7311 getPtr()
7312 Get memory address of class instance
7313 """
7314 def __cinit__(self, void_ptr _ptr = 0):
7315 if _ptr == 0:
7316 self._pvt_ptr = &self._pvt_val
7317 else:
7318 self._pvt_ptr = <cyruntime.cudaFuncAttributes *>_ptr
7319 def __init__(self, void_ptr _ptr = 0):
7320 pass
7321 def __dealloc__(self):
7322 pass
7323 def getPtr(self):
7324 return <void_ptr>self._pvt_ptr
7325 def __repr__(self):
7326 if self._pvt_ptr is not NULL:
7327 str_list = []
7328 try:
7329 str_list += ['sharedSizeBytes : ' + str(self.sharedSizeBytes)]
7330 except ValueError:
7331 str_list += ['sharedSizeBytes : <ValueError>']
7332 try:
7333 str_list += ['constSizeBytes : ' + str(self.constSizeBytes)]
7334 except ValueError:
7335 str_list += ['constSizeBytes : <ValueError>']
7336 try:
7337 str_list += ['localSizeBytes : ' + str(self.localSizeBytes)]
7338 except ValueError:
7339 str_list += ['localSizeBytes : <ValueError>']
7340 try:
7341 str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]
7342 except ValueError:
7343 str_list += ['maxThreadsPerBlock : <ValueError>']
7344 try:
7345 str_list += ['numRegs : ' + str(self.numRegs)]
7346 except ValueError:
7347 str_list += ['numRegs : <ValueError>']
7348 try:
7349 str_list += ['ptxVersion : ' + str(self.ptxVersion)]
7350 except ValueError:
7351 str_list += ['ptxVersion : <ValueError>']
7352 try:
7353 str_list += ['binaryVersion : ' + str(self.binaryVersion)]
7354 except ValueError:
7355 str_list += ['binaryVersion : <ValueError>']
7356 try:
7357 str_list += ['cacheModeCA : ' + str(self.cacheModeCA)]
7358 except ValueError:
7359 str_list += ['cacheModeCA : <ValueError>']
7360 try:
7361 str_list += ['maxDynamicSharedSizeBytes : ' + str(self.maxDynamicSharedSizeBytes)]
7362 except ValueError:
7363 str_list += ['maxDynamicSharedSizeBytes : <ValueError>']
7364 try:
7365 str_list += ['preferredShmemCarveout : ' + str(self.preferredShmemCarveout)]
7366 except ValueError:
7367 str_list += ['preferredShmemCarveout : <ValueError>']
7368 try:
7369 str_list += ['clusterDimMustBeSet : ' + str(self.clusterDimMustBeSet)]
7370 except ValueError:
7371 str_list += ['clusterDimMustBeSet : <ValueError>']
7372 try:
7373 str_list += ['requiredClusterWidth : ' + str(self.requiredClusterWidth)]
7374 except ValueError:
7375 str_list += ['requiredClusterWidth : <ValueError>']
7376 try:
7377 str_list += ['requiredClusterHeight : ' + str(self.requiredClusterHeight)]
7378 except ValueError:
7379 str_list += ['requiredClusterHeight : <ValueError>']
7380 try:
7381 str_list += ['requiredClusterDepth : ' + str(self.requiredClusterDepth)]
7382 except ValueError:
7383 str_list += ['requiredClusterDepth : <ValueError>']
7384 try:
7385 str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]
7386 except ValueError:
7387 str_list += ['clusterSchedulingPolicyPreference : <ValueError>']
7388 try:
7389 str_list += ['nonPortableClusterSizeAllowed : ' + str(self.nonPortableClusterSizeAllowed)]
7390 except ValueError:
7391 str_list += ['nonPortableClusterSizeAllowed : <ValueError>']
7392 try:
7393 str_list += ['reserved : ' + str(self.reserved)]
7394 except ValueError:
7395 str_list += ['reserved : <ValueError>']
7396 return '\n'.join(str_list)
7397 else:
7398 return ''
7399 @property
7400 def sharedSizeBytes(self):
7401 return self._pvt_ptr[0].sharedSizeBytes
7402 @sharedSizeBytes.setter
7403 def sharedSizeBytes(self, size_t sharedSizeBytes):
7404 self._pvt_ptr[0].sharedSizeBytes = sharedSizeBytes
7405 @property
7406 def constSizeBytes(self):
7407 return self._pvt_ptr[0].constSizeBytes
7408 @constSizeBytes.setter
7409 def constSizeBytes(self, size_t constSizeBytes):
7410 self._pvt_ptr[0].constSizeBytes = constSizeBytes
7411 @property
7412 def localSizeBytes(self):
7413 return self._pvt_ptr[0].localSizeBytes
7414 @localSizeBytes.setter
7415 def localSizeBytes(self, size_t localSizeBytes):
7416 self._pvt_ptr[0].localSizeBytes = localSizeBytes
7417 @property
7418 def maxThreadsPerBlock(self):
7419 return self._pvt_ptr[0].maxThreadsPerBlock
7420 @maxThreadsPerBlock.setter
7421 def maxThreadsPerBlock(self, int maxThreadsPerBlock):
7422 self._pvt_ptr[0].maxThreadsPerBlock = maxThreadsPerBlock
7423 @property
7424 def numRegs(self):
7425 return self._pvt_ptr[0].numRegs
7426 @numRegs.setter
7427 def numRegs(self, int numRegs):
7428 self._pvt_ptr[0].numRegs = numRegs
7429 @property
7430 def ptxVersion(self):
7431 return self._pvt_ptr[0].ptxVersion
7432 @ptxVersion.setter
7433 def ptxVersion(self, int ptxVersion):
7434 self._pvt_ptr[0].ptxVersion = ptxVersion
7435 @property
7436 def binaryVersion(self):
7437 return self._pvt_ptr[0].binaryVersion
7438 @binaryVersion.setter
7439 def binaryVersion(self, int binaryVersion):
7440 self._pvt_ptr[0].binaryVersion = binaryVersion
7441 @property
7442 def cacheModeCA(self):
7443 return self._pvt_ptr[0].cacheModeCA
7444 @cacheModeCA.setter
7445 def cacheModeCA(self, int cacheModeCA):
7446 self._pvt_ptr[0].cacheModeCA = cacheModeCA
7447 @property
7448 def maxDynamicSharedSizeBytes(self):
7449 return self._pvt_ptr[0].maxDynamicSharedSizeBytes
7450 @maxDynamicSharedSizeBytes.setter
7451 def maxDynamicSharedSizeBytes(self, int maxDynamicSharedSizeBytes):
7452 self._pvt_ptr[0].maxDynamicSharedSizeBytes = maxDynamicSharedSizeBytes
7453 @property
7454 def preferredShmemCarveout(self):
7455 return self._pvt_ptr[0].preferredShmemCarveout
7456 @preferredShmemCarveout.setter
7457 def preferredShmemCarveout(self, int preferredShmemCarveout):
7458 self._pvt_ptr[0].preferredShmemCarveout = preferredShmemCarveout
7459 @property
7460 def clusterDimMustBeSet(self):
7461 return self._pvt_ptr[0].clusterDimMustBeSet
7462 @clusterDimMustBeSet.setter
7463 def clusterDimMustBeSet(self, int clusterDimMustBeSet):
7464 self._pvt_ptr[0].clusterDimMustBeSet = clusterDimMustBeSet
7465 @property
7466 def requiredClusterWidth(self):
7467 return self._pvt_ptr[0].requiredClusterWidth
7468 @requiredClusterWidth.setter
7469 def requiredClusterWidth(self, int requiredClusterWidth):
7470 self._pvt_ptr[0].requiredClusterWidth = requiredClusterWidth
7471 @property
7472 def requiredClusterHeight(self):
7473 return self._pvt_ptr[0].requiredClusterHeight
7474 @requiredClusterHeight.setter
7475 def requiredClusterHeight(self, int requiredClusterHeight):
7476 self._pvt_ptr[0].requiredClusterHeight = requiredClusterHeight
7477 @property
7478 def requiredClusterDepth(self):
7479 return self._pvt_ptr[0].requiredClusterDepth
7480 @requiredClusterDepth.setter
7481 def requiredClusterDepth(self, int requiredClusterDepth):
7482 self._pvt_ptr[0].requiredClusterDepth = requiredClusterDepth
7483 @property
7484 def clusterSchedulingPolicyPreference(self):
7485 return self._pvt_ptr[0].clusterSchedulingPolicyPreference
7486 @clusterSchedulingPolicyPreference.setter
7487 def clusterSchedulingPolicyPreference(self, int clusterSchedulingPolicyPreference):
7488 self._pvt_ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference
7489 @property
7490 def nonPortableClusterSizeAllowed(self):
7491 return self._pvt_ptr[0].nonPortableClusterSizeAllowed
7492 @nonPortableClusterSizeAllowed.setter
7493 def nonPortableClusterSizeAllowed(self, int nonPortableClusterSizeAllowed):
7494 self._pvt_ptr[0].nonPortableClusterSizeAllowed = nonPortableClusterSizeAllowed
7495 @property
7496 def reserved(self):
7497 return self._pvt_ptr[0].reserved
7498 @reserved.setter
7499 def reserved(self, reserved):
7500 self._pvt_ptr[0].reserved = reserved
7502cdef class cudaMemLocation:
7503 """
7504 Specifies a memory location. To specify a gpu, set type =
7505 cudaMemLocationTypeDevice and set id = the gpu's device ordinal. To
7506 specify a cpu NUMA node, set type = cudaMemLocationTypeHostNuma and
7507 set id = host NUMA node id.
7509 Attributes
7510 ----------
7511 type : cudaMemLocationType
7512 Specifies the location type, which modifies the meaning of id.
7513 id : int
7514 identifier for a given this location's ::CUmemLocationType.
7516 Methods
7517 -------
7518 getPtr()
7519 Get memory address of class instance
7520 """
7521 def __cinit__(self, void_ptr _ptr = 0):
7522 if _ptr == 0:
7523 self._pvt_ptr = &self._pvt_val
7524 else:
7525 self._pvt_ptr = <cyruntime.cudaMemLocation *>_ptr
7526 def __init__(self, void_ptr _ptr = 0):
7527 pass
7528 def __dealloc__(self):
7529 pass
7530 def getPtr(self):
7531 return <void_ptr>self._pvt_ptr
7532 def __repr__(self):
7533 if self._pvt_ptr is not NULL:
7534 str_list = []
7535 try:
7536 str_list += ['type : ' + str(self.type)]
7537 except ValueError:
7538 str_list += ['type : <ValueError>']
7539 try:
7540 str_list += ['id : ' + str(self.id)]
7541 except ValueError:
7542 str_list += ['id : <ValueError>']
7543 return '\n'.join(str_list)
7544 else:
7545 return ''
7546 @property
7547 def type(self):
7548 if self._pvt_ptr[0].type not in _dict_cudaMemLocationType:
7549 return None
7550 return _dict_cudaMemLocationType[self._pvt_ptr[0].type]
7551 @type.setter
7552 def type(self, type not None : cudaMemLocationType):
7553 self._pvt_ptr[0].type = type.value
7554 @property
7555 def id(self):
7556 return self._pvt_ptr[0].id
7557 @id.setter
7558 def id(self, int id):
7559 self._pvt_ptr[0].id = id
7561cdef class cudaMemAccessDesc:
7562 """
7563 Memory access descriptor
7565 Attributes
7566 ----------
7567 location : cudaMemLocation
7568 Location on which the request is to change it's accessibility
7569 flags : cudaMemAccessFlags
7570 ::CUmemProt accessibility flags to set on the request
7572 Methods
7573 -------
7574 getPtr()
7575 Get memory address of class instance
7576 """
7577 def __cinit__(self, void_ptr _ptr = 0):
7578 if _ptr == 0:
7579 self._pvt_ptr = &self._pvt_val
7580 else:
7581 self._pvt_ptr = <cyruntime.cudaMemAccessDesc *>_ptr
7582 def __init__(self, void_ptr _ptr = 0):
7583 pass
7584 self._location = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].location)
7585 def __dealloc__(self):
7586 pass
7587 def getPtr(self):
7588 return <void_ptr>self._pvt_ptr
7589 def __repr__(self):
7590 if self._pvt_ptr is not NULL:
7591 str_list = []
7592 try:
7593 str_list += ['location :\n' + '\n'.join([' ' + line for line in str(self.location).splitlines()])]
7594 except ValueError:
7595 str_list += ['location : <ValueError>']
7596 try:
7597 str_list += ['flags : ' + str(self.flags)]
7598 except ValueError:
7599 str_list += ['flags : <ValueError>']
7600 return '\n'.join(str_list)
7601 else:
7602 return ''
7603 @property
7604 def location(self):
7605 return self._location
7606 @location.setter
7607 def location(self, location not None : cudaMemLocation):
7608 string.memcpy(&self._pvt_ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._pvt_ptr[0].location))
7609 @property
7610 def flags(self):
7611 if self._pvt_ptr[0].flags not in _dict_cudaMemAccessFlags:
7612 return None
7613 return _dict_cudaMemAccessFlags[self._pvt_ptr[0].flags]
7614 @flags.setter
7615 def flags(self, flags not None : cudaMemAccessFlags):
7616 self._pvt_ptr[0].flags = flags.value
7618cdef class cudaMemPoolProps:
7619 """
7620 Specifies the properties of allocations made from the pool.
7622 Attributes
7623 ----------
7624 allocType : cudaMemAllocationType
7625 Allocation type. Currently must be specified as
7626 cudaMemAllocationTypePinned
7627 handleTypes : cudaMemAllocationHandleType
7628 Handle types that will be supported by allocations from the pool.
7629 location : cudaMemLocation
7630 Location allocations should reside.
7631 win32SecurityAttributes : Any
7632 Windows-specific LPSECURITYATTRIBUTES required when
7633 cudaMemHandleTypeWin32 is specified. This security attribute
7634 defines the scope of which exported allocations may be tranferred
7635 to other processes. In all other cases, this field is required to
7636 be zero.
7637 maxSize : size_t
7638 Maximum pool size. When set to 0, defaults to a system dependent
7639 value.
7640 usage : unsigned short
7641 Bitmask indicating intended usage for the pool.
7642 reserved : bytes
7643 reserved for future use, must be 0
7645 Methods
7646 -------
7647 getPtr()
7648 Get memory address of class instance
7649 """
7650 def __cinit__(self, void_ptr _ptr = 0):
7651 if _ptr == 0:
7652 self._pvt_ptr = &self._pvt_val
7653 else:
7654 self._pvt_ptr = <cyruntime.cudaMemPoolProps *>_ptr
7655 def __init__(self, void_ptr _ptr = 0):
7656 pass
7657 self._location = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].location)
7658 def __dealloc__(self):
7659 pass
7660 def getPtr(self):
7661 return <void_ptr>self._pvt_ptr
7662 def __repr__(self):
7663 if self._pvt_ptr is not NULL:
7664 str_list = []
7665 try:
7666 str_list += ['allocType : ' + str(self.allocType)]
7667 except ValueError:
7668 str_list += ['allocType : <ValueError>']
7669 try:
7670 str_list += ['handleTypes : ' + str(self.handleTypes)]
7671 except ValueError:
7672 str_list += ['handleTypes : <ValueError>']
7673 try:
7674 str_list += ['location :\n' + '\n'.join([' ' + line for line in str(self.location).splitlines()])]
7675 except ValueError:
7676 str_list += ['location : <ValueError>']
7677 try:
7678 str_list += ['win32SecurityAttributes : ' + hex(self.win32SecurityAttributes)]
7679 except ValueError:
7680 str_list += ['win32SecurityAttributes : <ValueError>']
7681 try:
7682 str_list += ['maxSize : ' + str(self.maxSize)]
7683 except ValueError:
7684 str_list += ['maxSize : <ValueError>']
7685 try:
7686 str_list += ['usage : ' + str(self.usage)]
7687 except ValueError:
7688 str_list += ['usage : <ValueError>']
7689 try:
7690 str_list += ['reserved : ' + str(self.reserved)]
7691 except ValueError:
7692 str_list += ['reserved : <ValueError>']
7693 return '\n'.join(str_list)
7694 else:
7695 return ''
7696 @property
7697 def allocType(self):
7698 if self._pvt_ptr[0].allocType not in _dict_cudaMemAllocationType:
7699 return None
7700 return _dict_cudaMemAllocationType[self._pvt_ptr[0].allocType]
7701 @allocType.setter
7702 def allocType(self, allocType not None : cudaMemAllocationType):
7703 self._pvt_ptr[0].allocType = allocType.value
7704 @property
7705 def handleTypes(self):
7706 if self._pvt_ptr[0].handleTypes not in _dict_cudaMemAllocationHandleType:
7707 return None
7708 return _dict_cudaMemAllocationHandleType[self._pvt_ptr[0].handleTypes]
7709 @handleTypes.setter
7710 def handleTypes(self, handleTypes not None : cudaMemAllocationHandleType):
7711 self._pvt_ptr[0].handleTypes = handleTypes.value
7712 @property
7713 def location(self):
7714 return self._location
7715 @location.setter
7716 def location(self, location not None : cudaMemLocation):
7717 string.memcpy(&self._pvt_ptr[0].location, <cyruntime.cudaMemLocation*><void_ptr>location.getPtr(), sizeof(self._pvt_ptr[0].location))
7718 @property
7719 def win32SecurityAttributes(self):
7720 return <void_ptr>self._pvt_ptr[0].win32SecurityAttributes
7721 @win32SecurityAttributes.setter
7722 def win32SecurityAttributes(self, win32SecurityAttributes):
7723 _cwin32SecurityAttributes = _HelperInputVoidPtr(win32SecurityAttributes)
7724 self._pvt_ptr[0].win32SecurityAttributes = <void*><void_ptr>_cwin32SecurityAttributes.cptr
7725 @property
7726 def maxSize(self):
7727 return self._pvt_ptr[0].maxSize
7728 @maxSize.setter
7729 def maxSize(self, size_t maxSize):
7730 self._pvt_ptr[0].maxSize = maxSize
7731 @property
7732 def usage(self):
7733 return self._pvt_ptr[0].usage
7734 @usage.setter
7735 def usage(self, unsigned short usage):
7736 self._pvt_ptr[0].usage = usage
7737 @property
7738 def reserved(self):
7739 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 54)
7740 @reserved.setter
7741 def reserved(self, reserved):
7742 if len(reserved) != 54:
7743 raise ValueError("reserved length must be 54, is " + str(len(reserved)))
7744 for i, b in enumerate(reserved):
7745 self._pvt_ptr[0].reserved[i] = b
7747cdef class cudaMemPoolPtrExportData:
7748 """
7749 Opaque data for exporting a pool allocation
7751 Attributes
7752 ----------
7753 reserved : bytes
7756 Methods
7757 -------
7758 getPtr()
7759 Get memory address of class instance
7760 """
7761 def __cinit__(self, void_ptr _ptr = 0):
7762 if _ptr == 0:
7763 self._pvt_ptr = &self._pvt_val
7764 else:
7765 self._pvt_ptr = <cyruntime.cudaMemPoolPtrExportData *>_ptr
7766 def __init__(self, void_ptr _ptr = 0):
7767 pass
7768 def __dealloc__(self):
7769 pass
7770 def getPtr(self):
7771 return <void_ptr>self._pvt_ptr
7772 def __repr__(self):
7773 if self._pvt_ptr is not NULL:
7774 str_list = []
7775 try:
7776 str_list += ['reserved : ' + str(self.reserved)]
7777 except ValueError:
7778 str_list += ['reserved : <ValueError>']
7779 return '\n'.join(str_list)
7780 else:
7781 return ''
7782 @property
7783 def reserved(self):
7784 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 64)
7785 @reserved.setter
7786 def reserved(self, reserved):
7787 if len(reserved) != 64:
7788 raise ValueError("reserved length must be 64, is " + str(len(reserved)))
7789 for i, b in enumerate(reserved):
7790 self._pvt_ptr[0].reserved[i] = b
7792cdef class cudaMemAllocNodeParams:
7793 """
7794 Memory allocation node parameters
7796 Attributes
7797 ----------
7798 poolProps : cudaMemPoolProps
7799 in: location where the allocation should reside (specified in
7800 ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
7801 not supported. in: array of memory access descriptors. Used to
7802 describe peer GPU access
7803 accessDescs : cudaMemAccessDesc
7804 in: number of memory access descriptors. Must not exceed the number
7805 of GPUs.
7806 accessDescCount : size_t
7807 in: Number of `accessDescs`s
7808 bytesize : size_t
7809 in: size in bytes of the requested allocation
7810 dptr : Any
7811 out: address of the allocation returned by CUDA
7813 Methods
7814 -------
7815 getPtr()
7816 Get memory address of class instance
7817 """
7818 def __cinit__(self, void_ptr _ptr = 0):
7819 if _ptr == 0:
7820 self._pvt_ptr = &self._pvt_val
7821 else:
7822 self._pvt_ptr = <cyruntime.cudaMemAllocNodeParams *>_ptr
7823 def __init__(self, void_ptr _ptr = 0):
7824 pass
7825 self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._pvt_ptr[0].poolProps)
7826 def __dealloc__(self):
7827 pass
7828 if self._accessDescs is not NULL:
7829 free(self._accessDescs)
7830 def getPtr(self):
7831 return <void_ptr>self._pvt_ptr
7832 def __repr__(self):
7833 if self._pvt_ptr is not NULL:
7834 str_list = []
7835 try:
7836 str_list += ['poolProps :\n' + '\n'.join([' ' + line for line in str(self.poolProps).splitlines()])]
7837 except ValueError:
7838 str_list += ['poolProps : <ValueError>']
7839 try:
7840 str_list += ['accessDescs : ' + str(self.accessDescs)]
7841 except ValueError:
7842 str_list += ['accessDescs : <ValueError>']
7843 try:
7844 str_list += ['accessDescCount : ' + str(self.accessDescCount)]
7845 except ValueError:
7846 str_list += ['accessDescCount : <ValueError>']
7847 try:
7848 str_list += ['bytesize : ' + str(self.bytesize)]
7849 except ValueError:
7850 str_list += ['bytesize : <ValueError>']
7851 try:
7852 str_list += ['dptr : ' + hex(self.dptr)]
7853 except ValueError:
7854 str_list += ['dptr : <ValueError>']
7855 return '\n'.join(str_list)
7856 else:
7857 return ''
7858 @property
7859 def poolProps(self):
7860 return self._poolProps
7861 @poolProps.setter
7862 def poolProps(self, poolProps not None : cudaMemPoolProps):
7863 string.memcpy(&self._pvt_ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._pvt_ptr[0].poolProps))
7864 @property
7865 def accessDescs(self):
7866 arrs = [<void_ptr>self._pvt_ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]
7867 return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]
7868 @accessDescs.setter
7869 def accessDescs(self, val):
7870 if len(val) == 0:
7871 free(self._accessDescs)
7872 self._accessDescs_length = 0
7873 self._pvt_ptr[0].accessDescs = NULL
7874 else:
7875 if self._accessDescs_length != <size_t>len(val):
7876 free(self._accessDescs)
7877 self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))
7878 if self._accessDescs is NULL:
7879 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
7880 self._accessDescs_length = <size_t>len(val)
7881 self._pvt_ptr[0].accessDescs = self._accessDescs
7882 for idx in range(len(val)):
7883 string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))
7885 @property
7886 def accessDescCount(self):
7887 return self._pvt_ptr[0].accessDescCount
7888 @accessDescCount.setter
7889 def accessDescCount(self, size_t accessDescCount):
7890 self._pvt_ptr[0].accessDescCount = accessDescCount
7891 @property
7892 def bytesize(self):
7893 return self._pvt_ptr[0].bytesize
7894 @bytesize.setter
7895 def bytesize(self, size_t bytesize):
7896 self._pvt_ptr[0].bytesize = bytesize
7897 @property
7898 def dptr(self):
7899 return <void_ptr>self._pvt_ptr[0].dptr
7900 @dptr.setter
7901 def dptr(self, dptr):
7902 _cdptr = _HelperInputVoidPtr(dptr)
7903 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr
7905cdef class cudaMemAllocNodeParamsV2:
7906 """
7907 Memory allocation node parameters
7909 Attributes
7910 ----------
7911 poolProps : cudaMemPoolProps
7912 in: location where the allocation should reside (specified in
7913 ::location). ::handleTypes must be cudaMemHandleTypeNone. IPC is
7914 not supported. in: array of memory access descriptors. Used to
7915 describe peer GPU access
7916 accessDescs : cudaMemAccessDesc
7917 in: number of memory access descriptors. Must not exceed the number
7918 of GPUs.
7919 accessDescCount : size_t
7920 in: Number of `accessDescs`s
7921 bytesize : size_t
7922 in: size in bytes of the requested allocation
7923 dptr : Any
7924 out: address of the allocation returned by CUDA
7926 Methods
7927 -------
7928 getPtr()
7929 Get memory address of class instance
7930 """
7931 def __cinit__(self, void_ptr _ptr = 0):
7932 if _ptr == 0:
7933 self._pvt_ptr = &self._pvt_val
7934 else:
7935 self._pvt_ptr = <cyruntime.cudaMemAllocNodeParamsV2 *>_ptr
7936 def __init__(self, void_ptr _ptr = 0):
7937 pass
7938 self._poolProps = cudaMemPoolProps(_ptr=<void_ptr>&self._pvt_ptr[0].poolProps)
7939 def __dealloc__(self):
7940 pass
7941 if self._accessDescs is not NULL:
7942 free(self._accessDescs)
7943 def getPtr(self):
7944 return <void_ptr>self._pvt_ptr
7945 def __repr__(self):
7946 if self._pvt_ptr is not NULL:
7947 str_list = []
7948 try:
7949 str_list += ['poolProps :\n' + '\n'.join([' ' + line for line in str(self.poolProps).splitlines()])]
7950 except ValueError:
7951 str_list += ['poolProps : <ValueError>']
7952 try:
7953 str_list += ['accessDescs : ' + str(self.accessDescs)]
7954 except ValueError:
7955 str_list += ['accessDescs : <ValueError>']
7956 try:
7957 str_list += ['accessDescCount : ' + str(self.accessDescCount)]
7958 except ValueError:
7959 str_list += ['accessDescCount : <ValueError>']
7960 try:
7961 str_list += ['bytesize : ' + str(self.bytesize)]
7962 except ValueError:
7963 str_list += ['bytesize : <ValueError>']
7964 try:
7965 str_list += ['dptr : ' + hex(self.dptr)]
7966 except ValueError:
7967 str_list += ['dptr : <ValueError>']
7968 return '\n'.join(str_list)
7969 else:
7970 return ''
7971 @property
7972 def poolProps(self):
7973 return self._poolProps
7974 @poolProps.setter
7975 def poolProps(self, poolProps not None : cudaMemPoolProps):
7976 string.memcpy(&self._pvt_ptr[0].poolProps, <cyruntime.cudaMemPoolProps*><void_ptr>poolProps.getPtr(), sizeof(self._pvt_ptr[0].poolProps))
7977 @property
7978 def accessDescs(self):
7979 arrs = [<void_ptr>self._pvt_ptr[0].accessDescs + x*sizeof(cyruntime.cudaMemAccessDesc) for x in range(self._accessDescs_length)]
7980 return [cudaMemAccessDesc(_ptr=arr) for arr in arrs]
7981 @accessDescs.setter
7982 def accessDescs(self, val):
7983 if len(val) == 0:
7984 free(self._accessDescs)
7985 self._accessDescs_length = 0
7986 self._pvt_ptr[0].accessDescs = NULL
7987 else:
7988 if self._accessDescs_length != <size_t>len(val):
7989 free(self._accessDescs)
7990 self._accessDescs = <cyruntime.cudaMemAccessDesc*> calloc(len(val), sizeof(cyruntime.cudaMemAccessDesc))
7991 if self._accessDescs is NULL:
7992 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
7993 self._accessDescs_length = <size_t>len(val)
7994 self._pvt_ptr[0].accessDescs = self._accessDescs
7995 for idx in range(len(val)):
7996 string.memcpy(&self._accessDescs[idx], (<cudaMemAccessDesc>val[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))
7998 @property
7999 def accessDescCount(self):
8000 return self._pvt_ptr[0].accessDescCount
8001 @accessDescCount.setter
8002 def accessDescCount(self, size_t accessDescCount):
8003 self._pvt_ptr[0].accessDescCount = accessDescCount
8004 @property
8005 def bytesize(self):
8006 return self._pvt_ptr[0].bytesize
8007 @bytesize.setter
8008 def bytesize(self, size_t bytesize):
8009 self._pvt_ptr[0].bytesize = bytesize
8010 @property
8011 def dptr(self):
8012 return <void_ptr>self._pvt_ptr[0].dptr
8013 @dptr.setter
8014 def dptr(self, dptr):
8015 _cdptr = _HelperInputVoidPtr(dptr)
8016 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr
8018cdef class cudaMemFreeNodeParams:
8019 """
8020 Memory free node parameters
8022 Attributes
8023 ----------
8024 dptr : Any
8025 in: the pointer to free
8027 Methods
8028 -------
8029 getPtr()
8030 Get memory address of class instance
8031 """
8032 def __cinit__(self, void_ptr _ptr = 0):
8033 if _ptr == 0:
8034 self._pvt_ptr = &self._pvt_val
8035 else:
8036 self._pvt_ptr = <cyruntime.cudaMemFreeNodeParams *>_ptr
8037 def __init__(self, void_ptr _ptr = 0):
8038 pass
8039 def __dealloc__(self):
8040 pass
8041 def getPtr(self):
8042 return <void_ptr>self._pvt_ptr
8043 def __repr__(self):
8044 if self._pvt_ptr is not NULL:
8045 str_list = []
8046 try:
8047 str_list += ['dptr : ' + hex(self.dptr)]
8048 except ValueError:
8049 str_list += ['dptr : <ValueError>']
8050 return '\n'.join(str_list)
8051 else:
8052 return ''
8053 @property
8054 def dptr(self):
8055 return <void_ptr>self._pvt_ptr[0].dptr
8056 @dptr.setter
8057 def dptr(self, dptr):
8058 _cdptr = _HelperInputVoidPtr(dptr)
8059 self._pvt_ptr[0].dptr = <void*><void_ptr>_cdptr.cptr
8061cdef class cudaMemcpyAttributes:
8062 """
8063 Attributes specific to copies within a batch. For more details on
8064 usage see cudaMemcpyBatchAsync.
8066 Attributes
8067 ----------
8068 srcAccessOrder : cudaMemcpySrcAccessOrder
8069 Source access ordering to be observed for copies with this
8070 attribute.
8071 srcLocHint : cudaMemLocation
8072 Hint location for the source operand. Ignored when the pointers are
8073 not managed memory or memory allocated outside CUDA.
8074 dstLocHint : cudaMemLocation
8075 Hint location for the destination operand. Ignored when the
8076 pointers are not managed memory or memory allocated outside CUDA.
8077 flags : unsigned int
8078 Additional flags for copies with this attribute. See
8079 cudaMemcpyFlags.
8081 Methods
8082 -------
8083 getPtr()
8084 Get memory address of class instance
8085 """
8086 def __cinit__(self, void_ptr _ptr = 0):
8087 if _ptr == 0:
8088 self._pvt_ptr = &self._pvt_val
8089 else:
8090 self._pvt_ptr = <cyruntime.cudaMemcpyAttributes *>_ptr
8091 def __init__(self, void_ptr _ptr = 0):
8092 pass
8093 self._srcLocHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].srcLocHint)
8094 self._dstLocHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].dstLocHint)
8095 def __dealloc__(self):
8096 pass
8097 def getPtr(self):
8098 return <void_ptr>self._pvt_ptr
8099 def __repr__(self):
8100 if self._pvt_ptr is not NULL:
8101 str_list = []
8102 try:
8103 str_list += ['srcAccessOrder : ' + str(self.srcAccessOrder)]
8104 except ValueError:
8105 str_list += ['srcAccessOrder : <ValueError>']
8106 try:
8107 str_list += ['srcLocHint :\n' + '\n'.join([' ' + line for line in str(self.srcLocHint).splitlines()])]
8108 except ValueError:
8109 str_list += ['srcLocHint : <ValueError>']
8110 try:
8111 str_list += ['dstLocHint :\n' + '\n'.join([' ' + line for line in str(self.dstLocHint).splitlines()])]
8112 except ValueError:
8113 str_list += ['dstLocHint : <ValueError>']
8114 try:
8115 str_list += ['flags : ' + str(self.flags)]
8116 except ValueError:
8117 str_list += ['flags : <ValueError>']
8118 return '\n'.join(str_list)
8119 else:
8120 return ''
8121 @property
8122 def srcAccessOrder(self):
8123 if self._pvt_ptr[0].srcAccessOrder not in _dict_cudaMemcpySrcAccessOrder:
8124 return None
8125 return _dict_cudaMemcpySrcAccessOrder[self._pvt_ptr[0].srcAccessOrder]
8126 @srcAccessOrder.setter
8127 def srcAccessOrder(self, srcAccessOrder not None : cudaMemcpySrcAccessOrder):
8128 self._pvt_ptr[0].srcAccessOrder = srcAccessOrder.value
8129 @property
8130 def srcLocHint(self):
8131 return self._srcLocHint
8132 @srcLocHint.setter
8133 def srcLocHint(self, srcLocHint not None : cudaMemLocation):
8134 string.memcpy(&self._pvt_ptr[0].srcLocHint, <cyruntime.cudaMemLocation*><void_ptr>srcLocHint.getPtr(), sizeof(self._pvt_ptr[0].srcLocHint))
8135 @property
8136 def dstLocHint(self):
8137 return self._dstLocHint
8138 @dstLocHint.setter
8139 def dstLocHint(self, dstLocHint not None : cudaMemLocation):
8140 string.memcpy(&self._pvt_ptr[0].dstLocHint, <cyruntime.cudaMemLocation*><void_ptr>dstLocHint.getPtr(), sizeof(self._pvt_ptr[0].dstLocHint))
8141 @property
8142 def flags(self):
8143 return self._pvt_ptr[0].flags
8144 @flags.setter
8145 def flags(self, unsigned int flags):
8146 self._pvt_ptr[0].flags = flags
8148cdef class cudaOffset3D:
8149 """
8150 Struct representing offset into a cudaArray_t in elements
8152 Attributes
8153 ----------
8154 x : size_t
8156 y : size_t
8158 z : size_t
8161 Methods
8162 -------
8163 getPtr()
8164 Get memory address of class instance
8165 """
8166 def __cinit__(self, void_ptr _ptr = 0):
8167 if _ptr == 0:
8168 self._pvt_ptr = &self._pvt_val
8169 else:
8170 self._pvt_ptr = <cyruntime.cudaOffset3D *>_ptr
8171 def __init__(self, void_ptr _ptr = 0):
8172 pass
8173 def __dealloc__(self):
8174 pass
8175 def getPtr(self):
8176 return <void_ptr>self._pvt_ptr
8177 def __repr__(self):
8178 if self._pvt_ptr is not NULL:
8179 str_list = []
8180 try:
8181 str_list += ['x : ' + str(self.x)]
8182 except ValueError:
8183 str_list += ['x : <ValueError>']
8184 try:
8185 str_list += ['y : ' + str(self.y)]
8186 except ValueError:
8187 str_list += ['y : <ValueError>']
8188 try:
8189 str_list += ['z : ' + str(self.z)]
8190 except ValueError:
8191 str_list += ['z : <ValueError>']
8192 return '\n'.join(str_list)
8193 else:
8194 return ''
8195 @property
8196 def x(self):
8197 return self._pvt_ptr[0].x
8198 @x.setter
8199 def x(self, size_t x):
8200 self._pvt_ptr[0].x = x
8201 @property
8202 def y(self):
8203 return self._pvt_ptr[0].y
8204 @y.setter
8205 def y(self, size_t y):
8206 self._pvt_ptr[0].y = y
8207 @property
8208 def z(self):
8209 return self._pvt_ptr[0].z
8210 @z.setter
8211 def z(self, size_t z):
8212 self._pvt_ptr[0].z = z
8214cdef class anon_struct6:
8215 """
8216 Attributes
8217 ----------
8218 ptr : Any
8220 rowLength : size_t
8222 layerHeight : size_t
8224 locHint : cudaMemLocation
8227 Methods
8228 -------
8229 getPtr()
8230 Get memory address of class instance
8231 """
8232 def __cinit__(self, void_ptr _ptr):
8233 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr
8235 def __init__(self, void_ptr _ptr):
8236 pass
8237 self._locHint = cudaMemLocation(_ptr=<void_ptr>&self._pvt_ptr[0].op.ptr.locHint)
8238 def __dealloc__(self):
8239 pass
8240 def getPtr(self):
8241 return <void_ptr>&self._pvt_ptr[0].op.ptr
8242 def __repr__(self):
8243 if self._pvt_ptr is not NULL:
8244 str_list = []
8245 try:
8246 str_list += ['ptr : ' + hex(self.ptr)]
8247 except ValueError:
8248 str_list += ['ptr : <ValueError>']
8249 try:
8250 str_list += ['rowLength : ' + str(self.rowLength)]
8251 except ValueError:
8252 str_list += ['rowLength : <ValueError>']
8253 try:
8254 str_list += ['layerHeight : ' + str(self.layerHeight)]
8255 except ValueError:
8256 str_list += ['layerHeight : <ValueError>']
8257 try:
8258 str_list += ['locHint :\n' + '\n'.join([' ' + line for line in str(self.locHint).splitlines()])]
8259 except ValueError:
8260 str_list += ['locHint : <ValueError>']
8261 return '\n'.join(str_list)
8262 else:
8263 return ''
8264 @property
8265 def ptr(self):
8266 return <void_ptr>self._pvt_ptr[0].op.ptr.ptr
8267 @ptr.setter
8268 def ptr(self, ptr):
8269 _cptr = _HelperInputVoidPtr(ptr)
8270 self._pvt_ptr[0].op.ptr.ptr = <void*><void_ptr>_cptr.cptr
8271 @property
8272 def rowLength(self):
8273 return self._pvt_ptr[0].op.ptr.rowLength
8274 @rowLength.setter
8275 def rowLength(self, size_t rowLength):
8276 self._pvt_ptr[0].op.ptr.rowLength = rowLength
8277 @property
8278 def layerHeight(self):
8279 return self._pvt_ptr[0].op.ptr.layerHeight
8280 @layerHeight.setter
8281 def layerHeight(self, size_t layerHeight):
8282 self._pvt_ptr[0].op.ptr.layerHeight = layerHeight
8283 @property
8284 def locHint(self):
8285 return self._locHint
8286 @locHint.setter
8287 def locHint(self, locHint not None : cudaMemLocation):
8288 string.memcpy(&self._pvt_ptr[0].op.ptr.locHint, <cyruntime.cudaMemLocation*><void_ptr>locHint.getPtr(), sizeof(self._pvt_ptr[0].op.ptr.locHint))
8290cdef class anon_struct7:
8291 """
8292 Attributes
8293 ----------
8294 array : cudaArray_t
8296 offset : cudaOffset3D
8299 Methods
8300 -------
8301 getPtr()
8302 Get memory address of class instance
8303 """
8304 def __cinit__(self, void_ptr _ptr):
8305 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr
8307 def __init__(self, void_ptr _ptr):
8308 pass
8309 self._array = cudaArray_t(_ptr=<void_ptr>&self._pvt_ptr[0].op.array.array)
8310 self._offset = cudaOffset3D(_ptr=<void_ptr>&self._pvt_ptr[0].op.array.offset)
8311 def __dealloc__(self):
8312 pass
8313 def getPtr(self):
8314 return <void_ptr>&self._pvt_ptr[0].op.array
8315 def __repr__(self):
8316 if self._pvt_ptr is not NULL:
8317 str_list = []
8318 try:
8319 str_list += ['array : ' + str(self.array)]
8320 except ValueError:
8321 str_list += ['array : <ValueError>']
8322 try:
8323 str_list += ['offset :\n' + '\n'.join([' ' + line for line in str(self.offset).splitlines()])]
8324 except ValueError:
8325 str_list += ['offset : <ValueError>']
8326 return '\n'.join(str_list)
8327 else:
8328 return ''
8329 @property
8330 def array(self):
8331 return self._array
8332 @array.setter
8333 def array(self, array):
8334 cdef cyruntime.cudaArray_t cyarray
8335 if array is None:
8336 cyarray = <cyruntime.cudaArray_t><void_ptr>0
8337 elif isinstance(array, (cudaArray_t,)):
8338 parray = int(array)
8339 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
8340 else:
8341 parray = int(cudaArray_t(array))
8342 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
8343 self._array._pvt_ptr[0] = cyarray
8344 @property
8345 def offset(self):
8346 return self._offset
8347 @offset.setter
8348 def offset(self, offset not None : cudaOffset3D):
8349 string.memcpy(&self._pvt_ptr[0].op.array.offset, <cyruntime.cudaOffset3D*><void_ptr>offset.getPtr(), sizeof(self._pvt_ptr[0].op.array.offset))
8351cdef class anon_union1:
8352 """
8353 Attributes
8354 ----------
8355 ptr : anon_struct6
8357 array : anon_struct7
8360 Methods
8361 -------
8362 getPtr()
8363 Get memory address of class instance
8364 """
8365 def __cinit__(self, void_ptr _ptr):
8366 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr
8368 def __init__(self, void_ptr _ptr):
8369 pass
8370 self._ptr = anon_struct6(_ptr=<void_ptr>self._pvt_ptr)
8371 self._array = anon_struct7(_ptr=<void_ptr>self._pvt_ptr)
8372 def __dealloc__(self):
8373 pass
8374 def getPtr(self):
8375 return <void_ptr>&self._pvt_ptr[0].op
8376 def __repr__(self):
8377 if self._pvt_ptr is not NULL:
8378 str_list = []
8379 try:
8380 str_list += ['ptr :\n' + '\n'.join([' ' + line for line in str(self.ptr).splitlines()])]
8381 except ValueError:
8382 str_list += ['ptr : <ValueError>']
8383 try:
8384 str_list += ['array :\n' + '\n'.join([' ' + line for line in str(self.array).splitlines()])]
8385 except ValueError:
8386 str_list += ['array : <ValueError>']
8387 return '\n'.join(str_list)
8388 else:
8389 return ''
8390 @property
8391 def ptr(self):
8392 return self._ptr
8393 @ptr.setter
8394 def ptr(self, ptr not None : anon_struct6):
8395 string.memcpy(&self._pvt_ptr[0].op.ptr, <cyruntime.anon_struct6*><void_ptr>ptr.getPtr(), sizeof(self._pvt_ptr[0].op.ptr))
8396 @property
8397 def array(self):
8398 return self._array
8399 @array.setter
8400 def array(self, array not None : anon_struct7):
8401 string.memcpy(&self._pvt_ptr[0].op.array, <cyruntime.anon_struct7*><void_ptr>array.getPtr(), sizeof(self._pvt_ptr[0].op.array))
8403cdef class cudaMemcpy3DOperand:
8404 """
8405 Struct representing an operand for copy with cudaMemcpy3DBatchAsync
8407 Attributes
8408 ----------
8409 type : cudaMemcpy3DOperandType
8411 op : anon_union1
8414 Methods
8415 -------
8416 getPtr()
8417 Get memory address of class instance
8418 """
8419 def __cinit__(self, void_ptr _ptr = 0):
8420 if _ptr == 0:
8421 self._val_ptr = <cyruntime.cudaMemcpy3DOperand *>calloc(1, sizeof(cyruntime.cudaMemcpy3DOperand))
8422 self._pvt_ptr = self._val_ptr
8423 else:
8424 self._pvt_ptr = <cyruntime.cudaMemcpy3DOperand *>_ptr
8425 def __init__(self, void_ptr _ptr = 0):
8426 pass
8427 self._op = anon_union1(_ptr=<void_ptr>self._pvt_ptr)
8428 def __dealloc__(self):
8429 if self._val_ptr is not NULL:
8430 free(self._val_ptr)
8431 def getPtr(self):
8432 return <void_ptr>self._pvt_ptr
8433 def __repr__(self):
8434 if self._pvt_ptr is not NULL:
8435 str_list = []
8436 try:
8437 str_list += ['type : ' + str(self.type)]
8438 except ValueError:
8439 str_list += ['type : <ValueError>']
8440 try:
8441 str_list += ['op :\n' + '\n'.join([' ' + line for line in str(self.op).splitlines()])]
8442 except ValueError:
8443 str_list += ['op : <ValueError>']
8444 return '\n'.join(str_list)
8445 else:
8446 return ''
8447 @property
8448 def type(self):
8449 if self._pvt_ptr[0].type not in _dict_cudaMemcpy3DOperandType:
8450 return None
8451 return _dict_cudaMemcpy3DOperandType[self._pvt_ptr[0].type]
8452 @type.setter
8453 def type(self, type not None : cudaMemcpy3DOperandType):
8454 self._pvt_ptr[0].type = type.value
8455 @property
8456 def op(self):
8457 return self._op
8458 @op.setter
8459 def op(self, op not None : anon_union1):
8460 string.memcpy(&self._pvt_ptr[0].op, <cyruntime.anon_union1*><void_ptr>op.getPtr(), sizeof(self._pvt_ptr[0].op))
8462cdef class cudaMemcpy3DBatchOp:
8463 """
8464 Attributes
8465 ----------
8466 src : cudaMemcpy3DOperand
8467 Source memcpy operand.
8468 dst : cudaMemcpy3DOperand
8469 Destination memcpy operand.
8470 extent : cudaExtent
8471 Extents of the memcpy between src and dst. The width, height and
8472 depth components must not be 0.
8473 srcAccessOrder : cudaMemcpySrcAccessOrder
8474 Source access ordering to be observed for copy from src to dst.
8475 flags : unsigned int
8476 Additional flags for copy from src to dst. See cudaMemcpyFlags.
8478 Methods
8479 -------
8480 getPtr()
8481 Get memory address of class instance
8482 """
8483 def __cinit__(self, void_ptr _ptr = 0):
8484 if _ptr == 0:
8485 self._pvt_ptr = &self._pvt_val
8486 else:
8487 self._pvt_ptr = <cyruntime.cudaMemcpy3DBatchOp *>_ptr
8488 def __init__(self, void_ptr _ptr = 0):
8489 pass
8490 self._src = cudaMemcpy3DOperand(_ptr=<void_ptr>&self._pvt_ptr[0].src)
8491 self._dst = cudaMemcpy3DOperand(_ptr=<void_ptr>&self._pvt_ptr[0].dst)
8492 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)
8493 def __dealloc__(self):
8494 pass
8495 def getPtr(self):
8496 return <void_ptr>self._pvt_ptr
8497 def __repr__(self):
8498 if self._pvt_ptr is not NULL:
8499 str_list = []
8500 try:
8501 str_list += ['src :\n' + '\n'.join([' ' + line for line in str(self.src).splitlines()])]
8502 except ValueError:
8503 str_list += ['src : <ValueError>']
8504 try:
8505 str_list += ['dst :\n' + '\n'.join([' ' + line for line in str(self.dst).splitlines()])]
8506 except ValueError:
8507 str_list += ['dst : <ValueError>']
8508 try:
8509 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]
8510 except ValueError:
8511 str_list += ['extent : <ValueError>']
8512 try:
8513 str_list += ['srcAccessOrder : ' + str(self.srcAccessOrder)]
8514 except ValueError:
8515 str_list += ['srcAccessOrder : <ValueError>']
8516 try:
8517 str_list += ['flags : ' + str(self.flags)]
8518 except ValueError:
8519 str_list += ['flags : <ValueError>']
8520 return '\n'.join(str_list)
8521 else:
8522 return ''
8523 @property
8524 def src(self):
8525 return self._src
8526 @src.setter
8527 def src(self, src not None : cudaMemcpy3DOperand):
8528 string.memcpy(&self._pvt_ptr[0].src, <cyruntime.cudaMemcpy3DOperand*><void_ptr>src.getPtr(), sizeof(self._pvt_ptr[0].src))
8529 @property
8530 def dst(self):
8531 return self._dst
8532 @dst.setter
8533 def dst(self, dst not None : cudaMemcpy3DOperand):
8534 string.memcpy(&self._pvt_ptr[0].dst, <cyruntime.cudaMemcpy3DOperand*><void_ptr>dst.getPtr(), sizeof(self._pvt_ptr[0].dst))
8535 @property
8536 def extent(self):
8537 return self._extent
8538 @extent.setter
8539 def extent(self, extent not None : cudaExtent):
8540 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))
8541 @property
8542 def srcAccessOrder(self):
8543 if self._pvt_ptr[0].srcAccessOrder not in _dict_cudaMemcpySrcAccessOrder:
8544 return None
8545 return _dict_cudaMemcpySrcAccessOrder[self._pvt_ptr[0].srcAccessOrder]
8546 @srcAccessOrder.setter
8547 def srcAccessOrder(self, srcAccessOrder not None : cudaMemcpySrcAccessOrder):
8548 self._pvt_ptr[0].srcAccessOrder = srcAccessOrder.value
8549 @property
8550 def flags(self):
8551 return self._pvt_ptr[0].flags
8552 @flags.setter
8553 def flags(self, unsigned int flags):
8554 self._pvt_ptr[0].flags = flags
8556cdef class CUuuid_st:
8557 """
8558 Attributes
8559 ----------
8560 bytes : bytes
8561 < CUDA definition of UUID
8563 Methods
8564 -------
8565 getPtr()
8566 Get memory address of class instance
8567 """
8568 def __cinit__(self, void_ptr _ptr = 0):
8569 if _ptr == 0:
8570 self._pvt_ptr = &self._pvt_val
8571 else:
8572 self._pvt_ptr = <cyruntime.CUuuid_st *>_ptr
8573 def __init__(self, void_ptr _ptr = 0):
8574 pass
8575 def __dealloc__(self):
8576 pass
8577 def getPtr(self):
8578 return <void_ptr>self._pvt_ptr
8579 def __repr__(self):
8580 if self._pvt_ptr is not NULL:
8581 str_list = []
8582 try:
8583 str_list += ['bytes : ' + str(self.bytes.hex())]
8584 except ValueError:
8585 str_list += ['bytes : <ValueError>']
8586 return '\n'.join(str_list)
8587 else:
8588 return ''
8589 @property
8590 def bytes(self):
8591 return PyBytes_FromStringAndSize(self._pvt_ptr[0].bytes, 16)
8593cdef class cudaDeviceProp:
8594 """
8595 CUDA device properties
8597 Attributes
8598 ----------
8599 name : bytes
8600 ASCII string identifying device
8601 uuid : cudaUUID_t
8602 16-byte unique identifier
8603 luid : bytes
8604 8-byte locally unique identifier. Value is undefined on TCC and
8605 non-Windows platforms
8606 luidDeviceNodeMask : unsigned int
8607 LUID device node mask. Value is undefined on TCC and non-Windows
8608 platforms
8609 totalGlobalMem : size_t
8610 Global memory available on device in bytes
8611 sharedMemPerBlock : size_t
8612 Shared memory available per block in bytes
8613 regsPerBlock : int
8614 32-bit registers available per block
8615 warpSize : int
8616 Warp size in threads
8617 memPitch : size_t
8618 Maximum pitch in bytes allowed by memory copies
8619 maxThreadsPerBlock : int
8620 Maximum number of threads per block
8621 maxThreadsDim : list[int]
8622 Maximum size of each dimension of a block
8623 maxGridSize : list[int]
8624 Maximum size of each dimension of a grid
8625 totalConstMem : size_t
8626 Constant memory available on device in bytes
8627 major : int
8628 Major compute capability
8629 minor : int
8630 Minor compute capability
8631 textureAlignment : size_t
8632 Alignment requirement for textures
8633 texturePitchAlignment : size_t
8634 Pitch alignment requirement for texture references bound to pitched
8635 memory
8636 multiProcessorCount : int
8637 Number of multiprocessors on device
8638 integrated : int
8639 Device is integrated as opposed to discrete
8640 canMapHostMemory : int
8641 Device can map host memory with
8642 cudaHostAlloc/cudaHostGetDevicePointer
8643 maxTexture1D : int
8644 Maximum 1D texture size
8645 maxTexture1DMipmap : int
8646 Maximum 1D mipmapped texture size
8647 maxTexture2D : list[int]
8648 Maximum 2D texture dimensions
8649 maxTexture2DMipmap : list[int]
8650 Maximum 2D mipmapped texture dimensions
8651 maxTexture2DLinear : list[int]
8652 Maximum dimensions (width, height, pitch) for 2D textures bound to
8653 pitched memory
8654 maxTexture2DGather : list[int]
8655 Maximum 2D texture dimensions if texture gather operations have to
8656 be performed
8657 maxTexture3D : list[int]
8658 Maximum 3D texture dimensions
8659 maxTexture3DAlt : list[int]
8660 Maximum alternate 3D texture dimensions
8661 maxTextureCubemap : int
8662 Maximum Cubemap texture dimensions
8663 maxTexture1DLayered : list[int]
8664 Maximum 1D layered texture dimensions
8665 maxTexture2DLayered : list[int]
8666 Maximum 2D layered texture dimensions
8667 maxTextureCubemapLayered : list[int]
8668 Maximum Cubemap layered texture dimensions
8669 maxSurface1D : int
8670 Maximum 1D surface size
8671 maxSurface2D : list[int]
8672 Maximum 2D surface dimensions
8673 maxSurface3D : list[int]
8674 Maximum 3D surface dimensions
8675 maxSurface1DLayered : list[int]
8676 Maximum 1D layered surface dimensions
8677 maxSurface2DLayered : list[int]
8678 Maximum 2D layered surface dimensions
8679 maxSurfaceCubemap : int
8680 Maximum Cubemap surface dimensions
8681 maxSurfaceCubemapLayered : list[int]
8682 Maximum Cubemap layered surface dimensions
8683 surfaceAlignment : size_t
8684 Alignment requirements for surfaces
8685 concurrentKernels : int
8686 Device can possibly execute multiple kernels concurrently
8687 ECCEnabled : int
8688 Device has ECC support enabled
8689 pciBusID : int
8690 PCI bus ID of the device
8691 pciDeviceID : int
8692 PCI device ID of the device
8693 pciDomainID : int
8694 PCI domain ID of the device
8695 tccDriver : int
8696 1 if device is a Tesla device using TCC driver, 0 otherwise
8697 asyncEngineCount : int
8698 Number of asynchronous engines
8699 unifiedAddressing : int
8700 Device shares a unified address space with the host
8701 memoryBusWidth : int
8702 Global memory bus width in bits
8703 l2CacheSize : int
8704 Size of L2 cache in bytes
8705 persistingL2CacheMaxSize : int
8706 Device's maximum l2 persisting lines capacity setting in bytes
8707 maxThreadsPerMultiProcessor : int
8708 Maximum resident threads per multiprocessor
8709 streamPrioritiesSupported : int
8710 Device supports stream priorities
8711 globalL1CacheSupported : int
8712 Device supports caching globals in L1
8713 localL1CacheSupported : int
8714 Device supports caching locals in L1
8715 sharedMemPerMultiprocessor : size_t
8716 Shared memory available per multiprocessor in bytes
8717 regsPerMultiprocessor : int
8718 32-bit registers available per multiprocessor
8719 managedMemory : int
8720 Device supports allocating managed memory on this system
8721 isMultiGpuBoard : int
8722 Device is on a multi-GPU board
8723 multiGpuBoardGroupID : int
8724 Unique identifier for a group of devices on the same multi-GPU
8725 board
8726 hostNativeAtomicSupported : int
8727 Link between the device and the host supports native atomic
8728 operations
8729 pageableMemoryAccess : int
8730 Device supports coherently accessing pageable memory without
8731 calling cudaHostRegister on it
8732 concurrentManagedAccess : int
8733 Device can coherently access managed memory concurrently with the
8734 CPU
8735 computePreemptionSupported : int
8736 Device supports Compute Preemption
8737 canUseHostPointerForRegisteredMem : int
8738 Device can access host registered memory at the same virtual
8739 address as the CPU
8740 cooperativeLaunch : int
8741 Device supports launching cooperative kernels via
8742 cudaLaunchCooperativeKernel
8743 sharedMemPerBlockOptin : size_t
8744 Per device maximum shared memory per block usable by special opt in
8745 pageableMemoryAccessUsesHostPageTables : int
8746 Device accesses pageable memory via the host's page tables
8747 directManagedMemAccessFromHost : int
8748 Host can directly access managed memory on the device without
8749 migration.
8750 maxBlocksPerMultiProcessor : int
8751 Maximum number of resident blocks per multiprocessor
8752 accessPolicyMaxWindowSize : int
8753 The maximum value of cudaAccessPolicyWindow::num_bytes.
8754 reservedSharedMemPerBlock : size_t
8755 Shared memory reserved by CUDA driver per block in bytes
8756 hostRegisterSupported : int
8757 Device supports host memory registration via cudaHostRegister.
8758 sparseCudaArraySupported : int
8759 1 if the device supports sparse CUDA arrays and sparse CUDA
8760 mipmapped arrays, 0 otherwise
8761 hostRegisterReadOnlySupported : int
8762 Device supports using the cudaHostRegister flag
8763 cudaHostRegisterReadOnly to register memory that must be mapped as
8764 read-only to the GPU
8765 timelineSemaphoreInteropSupported : int
8766 External timeline semaphore interop is supported on the device
8767 memoryPoolsSupported : int
8768 1 if the device supports using the cudaMallocAsync and cudaMemPool
8769 family of APIs, 0 otherwise
8770 gpuDirectRDMASupported : int
8771 1 if the device supports GPUDirect RDMA APIs, 0 otherwise
8772 gpuDirectRDMAFlushWritesOptions : unsigned int
8773 Bitmask to be interpreted according to the
8774 cudaFlushGPUDirectRDMAWritesOptions enum
8775 gpuDirectRDMAWritesOrdering : int
8776 See the cudaGPUDirectRDMAWritesOrdering enum for numerical values
8777 memoryPoolSupportedHandleTypes : unsigned int
8778 Bitmask of handle types supported with mempool-based IPC
8779 deferredMappingCudaArraySupported : int
8780 1 if the device supports deferred mapping CUDA arrays and CUDA
8781 mipmapped arrays
8782 ipcEventSupported : int
8783 Device supports IPC Events.
8784 clusterLaunch : int
8785 Indicates device supports cluster launch
8786 unifiedFunctionPointers : int
8787 Indicates device supports unified pointers
8788 deviceNumaConfig : int
8789 NUMA configuration of a device: value is of type
8790 cudaDeviceNumaConfig enum
8791 deviceNumaId : int
8792 NUMA node ID of the GPU memory
8793 mpsEnabled : int
8794 Indicates if contexts created on this device will be shared via MPS
8795 hostNumaId : int
8796 NUMA ID of the host node closest to the device or -1 when system
8797 does not support NUMA
8798 gpuPciDeviceID : unsigned int
8799 The combined 16-bit PCI device ID and 16-bit PCI vendor ID
8800 gpuPciSubsystemID : unsigned int
8801 The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem
8802 vendor ID
8803 hostNumaMultinodeIpcSupported : int
8804 1 if the device supports HostNuma location IPC between nodes in a
8805 multi-node system.
8806 reserved : list[int]
8807 Reserved for future use
8809 Methods
8810 -------
8811 getPtr()
8812 Get memory address of class instance
8813 """
8814 def __cinit__(self, void_ptr _ptr = 0):
8815 if _ptr == 0:
8816 self._pvt_ptr = &self._pvt_val
8817 else:
8818 self._pvt_ptr = <cyruntime.cudaDeviceProp *>_ptr
8819 def __init__(self, void_ptr _ptr = 0):
8820 pass
8821 self._uuid = cudaUUID_t(_ptr=<void_ptr>&self._pvt_ptr[0].uuid)
8822 def __dealloc__(self):
8823 pass
8824 def getPtr(self):
8825 return <void_ptr>self._pvt_ptr
8826 def __repr__(self):
8827 if self._pvt_ptr is not NULL:
8828 str_list = []
8829 try:
8830 str_list += ['name : ' + self.name.decode('utf-8')]
8831 except ValueError:
8832 str_list += ['name : <ValueError>']
8833 try:
8834 str_list += ['uuid :\n' + '\n'.join([' ' + line for line in str(self.uuid).splitlines()])]
8835 except ValueError:
8836 str_list += ['uuid : <ValueError>']
8837 try:
8838 str_list += ['luid : ' + self.luid.hex()]
8839 except ValueError:
8840 str_list += ['luid : <ValueError>']
8841 try:
8842 str_list += ['luidDeviceNodeMask : ' + str(self.luidDeviceNodeMask)]
8843 except ValueError:
8844 str_list += ['luidDeviceNodeMask : <ValueError>']
8845 try:
8846 str_list += ['totalGlobalMem : ' + str(self.totalGlobalMem)]
8847 except ValueError:
8848 str_list += ['totalGlobalMem : <ValueError>']
8849 try:
8850 str_list += ['sharedMemPerBlock : ' + str(self.sharedMemPerBlock)]
8851 except ValueError:
8852 str_list += ['sharedMemPerBlock : <ValueError>']
8853 try:
8854 str_list += ['regsPerBlock : ' + str(self.regsPerBlock)]
8855 except ValueError:
8856 str_list += ['regsPerBlock : <ValueError>']
8857 try:
8858 str_list += ['warpSize : ' + str(self.warpSize)]
8859 except ValueError:
8860 str_list += ['warpSize : <ValueError>']
8861 try:
8862 str_list += ['memPitch : ' + str(self.memPitch)]
8863 except ValueError:
8864 str_list += ['memPitch : <ValueError>']
8865 try:
8866 str_list += ['maxThreadsPerBlock : ' + str(self.maxThreadsPerBlock)]
8867 except ValueError:
8868 str_list += ['maxThreadsPerBlock : <ValueError>']
8869 try:
8870 str_list += ['maxThreadsDim : ' + str(self.maxThreadsDim)]
8871 except ValueError:
8872 str_list += ['maxThreadsDim : <ValueError>']
8873 try:
8874 str_list += ['maxGridSize : ' + str(self.maxGridSize)]
8875 except ValueError:
8876 str_list += ['maxGridSize : <ValueError>']
8877 try:
8878 str_list += ['totalConstMem : ' + str(self.totalConstMem)]
8879 except ValueError:
8880 str_list += ['totalConstMem : <ValueError>']
8881 try:
8882 str_list += ['major : ' + str(self.major)]
8883 except ValueError:
8884 str_list += ['major : <ValueError>']
8885 try:
8886 str_list += ['minor : ' + str(self.minor)]
8887 except ValueError:
8888 str_list += ['minor : <ValueError>']
8889 try:
8890 str_list += ['textureAlignment : ' + str(self.textureAlignment)]
8891 except ValueError:
8892 str_list += ['textureAlignment : <ValueError>']
8893 try:
8894 str_list += ['texturePitchAlignment : ' + str(self.texturePitchAlignment)]
8895 except ValueError:
8896 str_list += ['texturePitchAlignment : <ValueError>']
8897 try:
8898 str_list += ['multiProcessorCount : ' + str(self.multiProcessorCount)]
8899 except ValueError:
8900 str_list += ['multiProcessorCount : <ValueError>']
8901 try:
8902 str_list += ['integrated : ' + str(self.integrated)]
8903 except ValueError:
8904 str_list += ['integrated : <ValueError>']
8905 try:
8906 str_list += ['canMapHostMemory : ' + str(self.canMapHostMemory)]
8907 except ValueError:
8908 str_list += ['canMapHostMemory : <ValueError>']
8909 try:
8910 str_list += ['maxTexture1D : ' + str(self.maxTexture1D)]
8911 except ValueError:
8912 str_list += ['maxTexture1D : <ValueError>']
8913 try:
8914 str_list += ['maxTexture1DMipmap : ' + str(self.maxTexture1DMipmap)]
8915 except ValueError:
8916 str_list += ['maxTexture1DMipmap : <ValueError>']
8917 try:
8918 str_list += ['maxTexture2D : ' + str(self.maxTexture2D)]
8919 except ValueError:
8920 str_list += ['maxTexture2D : <ValueError>']
8921 try:
8922 str_list += ['maxTexture2DMipmap : ' + str(self.maxTexture2DMipmap)]
8923 except ValueError:
8924 str_list += ['maxTexture2DMipmap : <ValueError>']
8925 try:
8926 str_list += ['maxTexture2DLinear : ' + str(self.maxTexture2DLinear)]
8927 except ValueError:
8928 str_list += ['maxTexture2DLinear : <ValueError>']
8929 try:
8930 str_list += ['maxTexture2DGather : ' + str(self.maxTexture2DGather)]
8931 except ValueError:
8932 str_list += ['maxTexture2DGather : <ValueError>']
8933 try:
8934 str_list += ['maxTexture3D : ' + str(self.maxTexture3D)]
8935 except ValueError:
8936 str_list += ['maxTexture3D : <ValueError>']
8937 try:
8938 str_list += ['maxTexture3DAlt : ' + str(self.maxTexture3DAlt)]
8939 except ValueError:
8940 str_list += ['maxTexture3DAlt : <ValueError>']
8941 try:
8942 str_list += ['maxTextureCubemap : ' + str(self.maxTextureCubemap)]
8943 except ValueError:
8944 str_list += ['maxTextureCubemap : <ValueError>']
8945 try:
8946 str_list += ['maxTexture1DLayered : ' + str(self.maxTexture1DLayered)]
8947 except ValueError:
8948 str_list += ['maxTexture1DLayered : <ValueError>']
8949 try:
8950 str_list += ['maxTexture2DLayered : ' + str(self.maxTexture2DLayered)]
8951 except ValueError:
8952 str_list += ['maxTexture2DLayered : <ValueError>']
8953 try:
8954 str_list += ['maxTextureCubemapLayered : ' + str(self.maxTextureCubemapLayered)]
8955 except ValueError:
8956 str_list += ['maxTextureCubemapLayered : <ValueError>']
8957 try:
8958 str_list += ['maxSurface1D : ' + str(self.maxSurface1D)]
8959 except ValueError:
8960 str_list += ['maxSurface1D : <ValueError>']
8961 try:
8962 str_list += ['maxSurface2D : ' + str(self.maxSurface2D)]
8963 except ValueError:
8964 str_list += ['maxSurface2D : <ValueError>']
8965 try:
8966 str_list += ['maxSurface3D : ' + str(self.maxSurface3D)]
8967 except ValueError:
8968 str_list += ['maxSurface3D : <ValueError>']
8969 try:
8970 str_list += ['maxSurface1DLayered : ' + str(self.maxSurface1DLayered)]
8971 except ValueError:
8972 str_list += ['maxSurface1DLayered : <ValueError>']
8973 try:
8974 str_list += ['maxSurface2DLayered : ' + str(self.maxSurface2DLayered)]
8975 except ValueError:
8976 str_list += ['maxSurface2DLayered : <ValueError>']
8977 try:
8978 str_list += ['maxSurfaceCubemap : ' + str(self.maxSurfaceCubemap)]
8979 except ValueError:
8980 str_list += ['maxSurfaceCubemap : <ValueError>']
8981 try:
8982 str_list += ['maxSurfaceCubemapLayered : ' + str(self.maxSurfaceCubemapLayered)]
8983 except ValueError:
8984 str_list += ['maxSurfaceCubemapLayered : <ValueError>']
8985 try:
8986 str_list += ['surfaceAlignment : ' + str(self.surfaceAlignment)]
8987 except ValueError:
8988 str_list += ['surfaceAlignment : <ValueError>']
8989 try:
8990 str_list += ['concurrentKernels : ' + str(self.concurrentKernels)]
8991 except ValueError:
8992 str_list += ['concurrentKernels : <ValueError>']
8993 try:
8994 str_list += ['ECCEnabled : ' + str(self.ECCEnabled)]
8995 except ValueError:
8996 str_list += ['ECCEnabled : <ValueError>']
8997 try:
8998 str_list += ['pciBusID : ' + str(self.pciBusID)]
8999 except ValueError:
9000 str_list += ['pciBusID : <ValueError>']
9001 try:
9002 str_list += ['pciDeviceID : ' + str(self.pciDeviceID)]
9003 except ValueError:
9004 str_list += ['pciDeviceID : <ValueError>']
9005 try:
9006 str_list += ['pciDomainID : ' + str(self.pciDomainID)]
9007 except ValueError:
9008 str_list += ['pciDomainID : <ValueError>']
9009 try:
9010 str_list += ['tccDriver : ' + str(self.tccDriver)]
9011 except ValueError:
9012 str_list += ['tccDriver : <ValueError>']
9013 try:
9014 str_list += ['asyncEngineCount : ' + str(self.asyncEngineCount)]
9015 except ValueError:
9016 str_list += ['asyncEngineCount : <ValueError>']
9017 try:
9018 str_list += ['unifiedAddressing : ' + str(self.unifiedAddressing)]
9019 except ValueError:
9020 str_list += ['unifiedAddressing : <ValueError>']
9021 try:
9022 str_list += ['memoryBusWidth : ' + str(self.memoryBusWidth)]
9023 except ValueError:
9024 str_list += ['memoryBusWidth : <ValueError>']
9025 try:
9026 str_list += ['l2CacheSize : ' + str(self.l2CacheSize)]
9027 except ValueError:
9028 str_list += ['l2CacheSize : <ValueError>']
9029 try:
9030 str_list += ['persistingL2CacheMaxSize : ' + str(self.persistingL2CacheMaxSize)]
9031 except ValueError:
9032 str_list += ['persistingL2CacheMaxSize : <ValueError>']
9033 try:
9034 str_list += ['maxThreadsPerMultiProcessor : ' + str(self.maxThreadsPerMultiProcessor)]
9035 except ValueError:
9036 str_list += ['maxThreadsPerMultiProcessor : <ValueError>']
9037 try:
9038 str_list += ['streamPrioritiesSupported : ' + str(self.streamPrioritiesSupported)]
9039 except ValueError:
9040 str_list += ['streamPrioritiesSupported : <ValueError>']
9041 try:
9042 str_list += ['globalL1CacheSupported : ' + str(self.globalL1CacheSupported)]
9043 except ValueError:
9044 str_list += ['globalL1CacheSupported : <ValueError>']
9045 try:
9046 str_list += ['localL1CacheSupported : ' + str(self.localL1CacheSupported)]
9047 except ValueError:
9048 str_list += ['localL1CacheSupported : <ValueError>']
9049 try:
9050 str_list += ['sharedMemPerMultiprocessor : ' + str(self.sharedMemPerMultiprocessor)]
9051 except ValueError:
9052 str_list += ['sharedMemPerMultiprocessor : <ValueError>']
9053 try:
9054 str_list += ['regsPerMultiprocessor : ' + str(self.regsPerMultiprocessor)]
9055 except ValueError:
9056 str_list += ['regsPerMultiprocessor : <ValueError>']
9057 try:
9058 str_list += ['managedMemory : ' + str(self.managedMemory)]
9059 except ValueError:
9060 str_list += ['managedMemory : <ValueError>']
9061 try:
9062 str_list += ['isMultiGpuBoard : ' + str(self.isMultiGpuBoard)]
9063 except ValueError:
9064 str_list += ['isMultiGpuBoard : <ValueError>']
9065 try:
9066 str_list += ['multiGpuBoardGroupID : ' + str(self.multiGpuBoardGroupID)]
9067 except ValueError:
9068 str_list += ['multiGpuBoardGroupID : <ValueError>']
9069 try:
9070 str_list += ['hostNativeAtomicSupported : ' + str(self.hostNativeAtomicSupported)]
9071 except ValueError:
9072 str_list += ['hostNativeAtomicSupported : <ValueError>']
9073 try:
9074 str_list += ['pageableMemoryAccess : ' + str(self.pageableMemoryAccess)]
9075 except ValueError:
9076 str_list += ['pageableMemoryAccess : <ValueError>']
9077 try:
9078 str_list += ['concurrentManagedAccess : ' + str(self.concurrentManagedAccess)]
9079 except ValueError:
9080 str_list += ['concurrentManagedAccess : <ValueError>']
9081 try:
9082 str_list += ['computePreemptionSupported : ' + str(self.computePreemptionSupported)]
9083 except ValueError:
9084 str_list += ['computePreemptionSupported : <ValueError>']
9085 try:
9086 str_list += ['canUseHostPointerForRegisteredMem : ' + str(self.canUseHostPointerForRegisteredMem)]
9087 except ValueError:
9088 str_list += ['canUseHostPointerForRegisteredMem : <ValueError>']
9089 try:
9090 str_list += ['cooperativeLaunch : ' + str(self.cooperativeLaunch)]
9091 except ValueError:
9092 str_list += ['cooperativeLaunch : <ValueError>']
9093 try:
9094 str_list += ['sharedMemPerBlockOptin : ' + str(self.sharedMemPerBlockOptin)]
9095 except ValueError:
9096 str_list += ['sharedMemPerBlockOptin : <ValueError>']
9097 try:
9098 str_list += ['pageableMemoryAccessUsesHostPageTables : ' + str(self.pageableMemoryAccessUsesHostPageTables)]
9099 except ValueError:
9100 str_list += ['pageableMemoryAccessUsesHostPageTables : <ValueError>']
9101 try:
9102 str_list += ['directManagedMemAccessFromHost : ' + str(self.directManagedMemAccessFromHost)]
9103 except ValueError:
9104 str_list += ['directManagedMemAccessFromHost : <ValueError>']
9105 try:
9106 str_list += ['maxBlocksPerMultiProcessor : ' + str(self.maxBlocksPerMultiProcessor)]
9107 except ValueError:
9108 str_list += ['maxBlocksPerMultiProcessor : <ValueError>']
9109 try:
9110 str_list += ['accessPolicyMaxWindowSize : ' + str(self.accessPolicyMaxWindowSize)]
9111 except ValueError:
9112 str_list += ['accessPolicyMaxWindowSize : <ValueError>']
9113 try:
9114 str_list += ['reservedSharedMemPerBlock : ' + str(self.reservedSharedMemPerBlock)]
9115 except ValueError:
9116 str_list += ['reservedSharedMemPerBlock : <ValueError>']
9117 try:
9118 str_list += ['hostRegisterSupported : ' + str(self.hostRegisterSupported)]
9119 except ValueError:
9120 str_list += ['hostRegisterSupported : <ValueError>']
9121 try:
9122 str_list += ['sparseCudaArraySupported : ' + str(self.sparseCudaArraySupported)]
9123 except ValueError:
9124 str_list += ['sparseCudaArraySupported : <ValueError>']
9125 try:
9126 str_list += ['hostRegisterReadOnlySupported : ' + str(self.hostRegisterReadOnlySupported)]
9127 except ValueError:
9128 str_list += ['hostRegisterReadOnlySupported : <ValueError>']
9129 try:
9130 str_list += ['timelineSemaphoreInteropSupported : ' + str(self.timelineSemaphoreInteropSupported)]
9131 except ValueError:
9132 str_list += ['timelineSemaphoreInteropSupported : <ValueError>']
9133 try:
9134 str_list += ['memoryPoolsSupported : ' + str(self.memoryPoolsSupported)]
9135 except ValueError:
9136 str_list += ['memoryPoolsSupported : <ValueError>']
9137 try:
9138 str_list += ['gpuDirectRDMASupported : ' + str(self.gpuDirectRDMASupported)]
9139 except ValueError:
9140 str_list += ['gpuDirectRDMASupported : <ValueError>']
9141 try:
9142 str_list += ['gpuDirectRDMAFlushWritesOptions : ' + str(self.gpuDirectRDMAFlushWritesOptions)]
9143 except ValueError:
9144 str_list += ['gpuDirectRDMAFlushWritesOptions : <ValueError>']
9145 try:
9146 str_list += ['gpuDirectRDMAWritesOrdering : ' + str(self.gpuDirectRDMAWritesOrdering)]
9147 except ValueError:
9148 str_list += ['gpuDirectRDMAWritesOrdering : <ValueError>']
9149 try:
9150 str_list += ['memoryPoolSupportedHandleTypes : ' + str(self.memoryPoolSupportedHandleTypes)]
9151 except ValueError:
9152 str_list += ['memoryPoolSupportedHandleTypes : <ValueError>']
9153 try:
9154 str_list += ['deferredMappingCudaArraySupported : ' + str(self.deferredMappingCudaArraySupported)]
9155 except ValueError:
9156 str_list += ['deferredMappingCudaArraySupported : <ValueError>']
9157 try:
9158 str_list += ['ipcEventSupported : ' + str(self.ipcEventSupported)]
9159 except ValueError:
9160 str_list += ['ipcEventSupported : <ValueError>']
9161 try:
9162 str_list += ['clusterLaunch : ' + str(self.clusterLaunch)]
9163 except ValueError:
9164 str_list += ['clusterLaunch : <ValueError>']
9165 try:
9166 str_list += ['unifiedFunctionPointers : ' + str(self.unifiedFunctionPointers)]
9167 except ValueError:
9168 str_list += ['unifiedFunctionPointers : <ValueError>']
9169 try:
9170 str_list += ['deviceNumaConfig : ' + str(self.deviceNumaConfig)]
9171 except ValueError:
9172 str_list += ['deviceNumaConfig : <ValueError>']
9173 try:
9174 str_list += ['deviceNumaId : ' + str(self.deviceNumaId)]
9175 except ValueError:
9176 str_list += ['deviceNumaId : <ValueError>']
9177 try:
9178 str_list += ['mpsEnabled : ' + str(self.mpsEnabled)]
9179 except ValueError:
9180 str_list += ['mpsEnabled : <ValueError>']
9181 try:
9182 str_list += ['hostNumaId : ' + str(self.hostNumaId)]
9183 except ValueError:
9184 str_list += ['hostNumaId : <ValueError>']
9185 try:
9186 str_list += ['gpuPciDeviceID : ' + str(self.gpuPciDeviceID)]
9187 except ValueError:
9188 str_list += ['gpuPciDeviceID : <ValueError>']
9189 try:
9190 str_list += ['gpuPciSubsystemID : ' + str(self.gpuPciSubsystemID)]
9191 except ValueError:
9192 str_list += ['gpuPciSubsystemID : <ValueError>']
9193 try:
9194 str_list += ['hostNumaMultinodeIpcSupported : ' + str(self.hostNumaMultinodeIpcSupported)]
9195 except ValueError:
9196 str_list += ['hostNumaMultinodeIpcSupported : <ValueError>']
9197 try:
9198 str_list += ['reserved : ' + str(self.reserved)]
9199 except ValueError:
9200 str_list += ['reserved : <ValueError>']
9201 return '\n'.join(str_list)
9202 else:
9203 return ''
9204 @property
9205 def name(self):
9206 return self._pvt_ptr[0].name
9207 @name.setter
9208 def name(self, name):
9209 pass
9210 self._pvt_ptr[0].name = name
9211 @property
9212 def uuid(self):
9213 return self._uuid
9214 @uuid.setter
9215 def uuid(self, uuid not None : cudaUUID_t):
9216 string.memcpy(&self._pvt_ptr[0].uuid, <cyruntime.cudaUUID_t*><void_ptr>uuid.getPtr(), sizeof(self._pvt_ptr[0].uuid))
9217 @property
9218 def luid(self):
9219 return PyBytes_FromStringAndSize(self._pvt_ptr[0].luid, 8)
9220 @luid.setter
9221 def luid(self, luid):
9222 if len(luid) != 8:
9223 raise ValueError("luid length must be 8, is " + str(len(luid)))
9224 if CHAR_MIN == 0:
9225 for i, b in enumerate(luid):
9226 if b < 0 and b > -129:
9227 b = b + 256
9228 self._pvt_ptr[0].luid[i] = b
9229 else:
9230 for i, b in enumerate(luid):
9231 if b > 127 and b < 256:
9232 b = b - 256
9233 self._pvt_ptr[0].luid[i] = b
9234 @property
9235 def luidDeviceNodeMask(self):
9236 return self._pvt_ptr[0].luidDeviceNodeMask
9237 @luidDeviceNodeMask.setter
9238 def luidDeviceNodeMask(self, unsigned int luidDeviceNodeMask):
9239 self._pvt_ptr[0].luidDeviceNodeMask = luidDeviceNodeMask
9240 @property
9241 def totalGlobalMem(self):
9242 return self._pvt_ptr[0].totalGlobalMem
9243 @totalGlobalMem.setter
9244 def totalGlobalMem(self, size_t totalGlobalMem):
9245 self._pvt_ptr[0].totalGlobalMem = totalGlobalMem
9246 @property
9247 def sharedMemPerBlock(self):
9248 return self._pvt_ptr[0].sharedMemPerBlock
9249 @sharedMemPerBlock.setter
9250 def sharedMemPerBlock(self, size_t sharedMemPerBlock):
9251 self._pvt_ptr[0].sharedMemPerBlock = sharedMemPerBlock
9252 @property
9253 def regsPerBlock(self):
9254 return self._pvt_ptr[0].regsPerBlock
9255 @regsPerBlock.setter
9256 def regsPerBlock(self, int regsPerBlock):
9257 self._pvt_ptr[0].regsPerBlock = regsPerBlock
9258 @property
9259 def warpSize(self):
9260 return self._pvt_ptr[0].warpSize
9261 @warpSize.setter
9262 def warpSize(self, int warpSize):
9263 self._pvt_ptr[0].warpSize = warpSize
9264 @property
9265 def memPitch(self):
9266 return self._pvt_ptr[0].memPitch
9267 @memPitch.setter
9268 def memPitch(self, size_t memPitch):
9269 self._pvt_ptr[0].memPitch = memPitch
9270 @property
9271 def maxThreadsPerBlock(self):
9272 return self._pvt_ptr[0].maxThreadsPerBlock
9273 @maxThreadsPerBlock.setter
9274 def maxThreadsPerBlock(self, int maxThreadsPerBlock):
9275 self._pvt_ptr[0].maxThreadsPerBlock = maxThreadsPerBlock
9276 @property
9277 def maxThreadsDim(self):
9278 return self._pvt_ptr[0].maxThreadsDim
9279 @maxThreadsDim.setter
9280 def maxThreadsDim(self, maxThreadsDim):
9281 self._pvt_ptr[0].maxThreadsDim = maxThreadsDim
9282 @property
9283 def maxGridSize(self):
9284 return self._pvt_ptr[0].maxGridSize
9285 @maxGridSize.setter
9286 def maxGridSize(self, maxGridSize):
9287 self._pvt_ptr[0].maxGridSize = maxGridSize
9288 @property
9289 def totalConstMem(self):
9290 return self._pvt_ptr[0].totalConstMem
9291 @totalConstMem.setter
9292 def totalConstMem(self, size_t totalConstMem):
9293 self._pvt_ptr[0].totalConstMem = totalConstMem
9294 @property
9295 def major(self):
9296 return self._pvt_ptr[0].major
9297 @major.setter
9298 def major(self, int major):
9299 self._pvt_ptr[0].major = major
9300 @property
9301 def minor(self):
9302 return self._pvt_ptr[0].minor
9303 @minor.setter
9304 def minor(self, int minor):
9305 self._pvt_ptr[0].minor = minor
9306 @property
9307 def textureAlignment(self):
9308 return self._pvt_ptr[0].textureAlignment
9309 @textureAlignment.setter
9310 def textureAlignment(self, size_t textureAlignment):
9311 self._pvt_ptr[0].textureAlignment = textureAlignment
9312 @property
9313 def texturePitchAlignment(self):
9314 return self._pvt_ptr[0].texturePitchAlignment
9315 @texturePitchAlignment.setter
9316 def texturePitchAlignment(self, size_t texturePitchAlignment):
9317 self._pvt_ptr[0].texturePitchAlignment = texturePitchAlignment
9318 @property
9319 def multiProcessorCount(self):
9320 return self._pvt_ptr[0].multiProcessorCount
9321 @multiProcessorCount.setter
9322 def multiProcessorCount(self, int multiProcessorCount):
9323 self._pvt_ptr[0].multiProcessorCount = multiProcessorCount
9324 @property
9325 def integrated(self):
9326 return self._pvt_ptr[0].integrated
9327 @integrated.setter
9328 def integrated(self, int integrated):
9329 self._pvt_ptr[0].integrated = integrated
9330 @property
9331 def canMapHostMemory(self):
9332 return self._pvt_ptr[0].canMapHostMemory
9333 @canMapHostMemory.setter
9334 def canMapHostMemory(self, int canMapHostMemory):
9335 self._pvt_ptr[0].canMapHostMemory = canMapHostMemory
9336 @property
9337 def maxTexture1D(self):
9338 return self._pvt_ptr[0].maxTexture1D
9339 @maxTexture1D.setter
9340 def maxTexture1D(self, int maxTexture1D):
9341 self._pvt_ptr[0].maxTexture1D = maxTexture1D
9342 @property
9343 def maxTexture1DMipmap(self):
9344 return self._pvt_ptr[0].maxTexture1DMipmap
9345 @maxTexture1DMipmap.setter
9346 def maxTexture1DMipmap(self, int maxTexture1DMipmap):
9347 self._pvt_ptr[0].maxTexture1DMipmap = maxTexture1DMipmap
9348 @property
9349 def maxTexture2D(self):
9350 return self._pvt_ptr[0].maxTexture2D
9351 @maxTexture2D.setter
9352 def maxTexture2D(self, maxTexture2D):
9353 self._pvt_ptr[0].maxTexture2D = maxTexture2D
9354 @property
9355 def maxTexture2DMipmap(self):
9356 return self._pvt_ptr[0].maxTexture2DMipmap
9357 @maxTexture2DMipmap.setter
9358 def maxTexture2DMipmap(self, maxTexture2DMipmap):
9359 self._pvt_ptr[0].maxTexture2DMipmap = maxTexture2DMipmap
9360 @property
9361 def maxTexture2DLinear(self):
9362 return self._pvt_ptr[0].maxTexture2DLinear
9363 @maxTexture2DLinear.setter
9364 def maxTexture2DLinear(self, maxTexture2DLinear):
9365 self._pvt_ptr[0].maxTexture2DLinear = maxTexture2DLinear
9366 @property
9367 def maxTexture2DGather(self):
9368 return self._pvt_ptr[0].maxTexture2DGather
9369 @maxTexture2DGather.setter
9370 def maxTexture2DGather(self, maxTexture2DGather):
9371 self._pvt_ptr[0].maxTexture2DGather = maxTexture2DGather
9372 @property
9373 def maxTexture3D(self):
9374 return self._pvt_ptr[0].maxTexture3D
9375 @maxTexture3D.setter
9376 def maxTexture3D(self, maxTexture3D):
9377 self._pvt_ptr[0].maxTexture3D = maxTexture3D
9378 @property
9379 def maxTexture3DAlt(self):
9380 return self._pvt_ptr[0].maxTexture3DAlt
9381 @maxTexture3DAlt.setter
9382 def maxTexture3DAlt(self, maxTexture3DAlt):
9383 self._pvt_ptr[0].maxTexture3DAlt = maxTexture3DAlt
9384 @property
9385 def maxTextureCubemap(self):
9386 return self._pvt_ptr[0].maxTextureCubemap
9387 @maxTextureCubemap.setter
9388 def maxTextureCubemap(self, int maxTextureCubemap):
9389 self._pvt_ptr[0].maxTextureCubemap = maxTextureCubemap
9390 @property
9391 def maxTexture1DLayered(self):
9392 return self._pvt_ptr[0].maxTexture1DLayered
9393 @maxTexture1DLayered.setter
9394 def maxTexture1DLayered(self, maxTexture1DLayered):
9395 self._pvt_ptr[0].maxTexture1DLayered = maxTexture1DLayered
9396 @property
9397 def maxTexture2DLayered(self):
9398 return self._pvt_ptr[0].maxTexture2DLayered
9399 @maxTexture2DLayered.setter
9400 def maxTexture2DLayered(self, maxTexture2DLayered):
9401 self._pvt_ptr[0].maxTexture2DLayered = maxTexture2DLayered
9402 @property
9403 def maxTextureCubemapLayered(self):
9404 return self._pvt_ptr[0].maxTextureCubemapLayered
9405 @maxTextureCubemapLayered.setter
9406 def maxTextureCubemapLayered(self, maxTextureCubemapLayered):
9407 self._pvt_ptr[0].maxTextureCubemapLayered = maxTextureCubemapLayered
9408 @property
9409 def maxSurface1D(self):
9410 return self._pvt_ptr[0].maxSurface1D
9411 @maxSurface1D.setter
9412 def maxSurface1D(self, int maxSurface1D):
9413 self._pvt_ptr[0].maxSurface1D = maxSurface1D
9414 @property
9415 def maxSurface2D(self):
9416 return self._pvt_ptr[0].maxSurface2D
9417 @maxSurface2D.setter
9418 def maxSurface2D(self, maxSurface2D):
9419 self._pvt_ptr[0].maxSurface2D = maxSurface2D
9420 @property
9421 def maxSurface3D(self):
9422 return self._pvt_ptr[0].maxSurface3D
9423 @maxSurface3D.setter
9424 def maxSurface3D(self, maxSurface3D):
9425 self._pvt_ptr[0].maxSurface3D = maxSurface3D
9426 @property
9427 def maxSurface1DLayered(self):
9428 return self._pvt_ptr[0].maxSurface1DLayered
9429 @maxSurface1DLayered.setter
9430 def maxSurface1DLayered(self, maxSurface1DLayered):
9431 self._pvt_ptr[0].maxSurface1DLayered = maxSurface1DLayered
9432 @property
9433 def maxSurface2DLayered(self):
9434 return self._pvt_ptr[0].maxSurface2DLayered
9435 @maxSurface2DLayered.setter
9436 def maxSurface2DLayered(self, maxSurface2DLayered):
9437 self._pvt_ptr[0].maxSurface2DLayered = maxSurface2DLayered
9438 @property
9439 def maxSurfaceCubemap(self):
9440 return self._pvt_ptr[0].maxSurfaceCubemap
9441 @maxSurfaceCubemap.setter
9442 def maxSurfaceCubemap(self, int maxSurfaceCubemap):
9443 self._pvt_ptr[0].maxSurfaceCubemap = maxSurfaceCubemap
9444 @property
9445 def maxSurfaceCubemapLayered(self):
9446 return self._pvt_ptr[0].maxSurfaceCubemapLayered
9447 @maxSurfaceCubemapLayered.setter
9448 def maxSurfaceCubemapLayered(self, maxSurfaceCubemapLayered):
9449 self._pvt_ptr[0].maxSurfaceCubemapLayered = maxSurfaceCubemapLayered
9450 @property
9451 def surfaceAlignment(self):
9452 return self._pvt_ptr[0].surfaceAlignment
9453 @surfaceAlignment.setter
9454 def surfaceAlignment(self, size_t surfaceAlignment):
9455 self._pvt_ptr[0].surfaceAlignment = surfaceAlignment
9456 @property
9457 def concurrentKernels(self):
9458 return self._pvt_ptr[0].concurrentKernels
9459 @concurrentKernels.setter
9460 def concurrentKernels(self, int concurrentKernels):
9461 self._pvt_ptr[0].concurrentKernels = concurrentKernels
9462 @property
9463 def ECCEnabled(self):
9464 return self._pvt_ptr[0].ECCEnabled
9465 @ECCEnabled.setter
9466 def ECCEnabled(self, int ECCEnabled):
9467 self._pvt_ptr[0].ECCEnabled = ECCEnabled
9468 @property
9469 def pciBusID(self):
9470 return self._pvt_ptr[0].pciBusID
9471 @pciBusID.setter
9472 def pciBusID(self, int pciBusID):
9473 self._pvt_ptr[0].pciBusID = pciBusID
9474 @property
9475 def pciDeviceID(self):
9476 return self._pvt_ptr[0].pciDeviceID
9477 @pciDeviceID.setter
9478 def pciDeviceID(self, int pciDeviceID):
9479 self._pvt_ptr[0].pciDeviceID = pciDeviceID
9480 @property
9481 def pciDomainID(self):
9482 return self._pvt_ptr[0].pciDomainID
9483 @pciDomainID.setter
9484 def pciDomainID(self, int pciDomainID):
9485 self._pvt_ptr[0].pciDomainID = pciDomainID
9486 @property
9487 def tccDriver(self):
9488 return self._pvt_ptr[0].tccDriver
9489 @tccDriver.setter
9490 def tccDriver(self, int tccDriver):
9491 self._pvt_ptr[0].tccDriver = tccDriver
9492 @property
9493 def asyncEngineCount(self):
9494 return self._pvt_ptr[0].asyncEngineCount
9495 @asyncEngineCount.setter
9496 def asyncEngineCount(self, int asyncEngineCount):
9497 self._pvt_ptr[0].asyncEngineCount = asyncEngineCount
9498 @property
9499 def unifiedAddressing(self):
9500 return self._pvt_ptr[0].unifiedAddressing
9501 @unifiedAddressing.setter
9502 def unifiedAddressing(self, int unifiedAddressing):
9503 self._pvt_ptr[0].unifiedAddressing = unifiedAddressing
9504 @property
9505 def memoryBusWidth(self):
9506 return self._pvt_ptr[0].memoryBusWidth
9507 @memoryBusWidth.setter
9508 def memoryBusWidth(self, int memoryBusWidth):
9509 self._pvt_ptr[0].memoryBusWidth = memoryBusWidth
9510 @property
9511 def l2CacheSize(self):
9512 return self._pvt_ptr[0].l2CacheSize
9513 @l2CacheSize.setter
9514 def l2CacheSize(self, int l2CacheSize):
9515 self._pvt_ptr[0].l2CacheSize = l2CacheSize
9516 @property
9517 def persistingL2CacheMaxSize(self):
9518 return self._pvt_ptr[0].persistingL2CacheMaxSize
9519 @persistingL2CacheMaxSize.setter
9520 def persistingL2CacheMaxSize(self, int persistingL2CacheMaxSize):
9521 self._pvt_ptr[0].persistingL2CacheMaxSize = persistingL2CacheMaxSize
9522 @property
9523 def maxThreadsPerMultiProcessor(self):
9524 return self._pvt_ptr[0].maxThreadsPerMultiProcessor
9525 @maxThreadsPerMultiProcessor.setter
9526 def maxThreadsPerMultiProcessor(self, int maxThreadsPerMultiProcessor):
9527 self._pvt_ptr[0].maxThreadsPerMultiProcessor = maxThreadsPerMultiProcessor
9528 @property
9529 def streamPrioritiesSupported(self):
9530 return self._pvt_ptr[0].streamPrioritiesSupported
9531 @streamPrioritiesSupported.setter
9532 def streamPrioritiesSupported(self, int streamPrioritiesSupported):
9533 self._pvt_ptr[0].streamPrioritiesSupported = streamPrioritiesSupported
9534 @property
9535 def globalL1CacheSupported(self):
9536 return self._pvt_ptr[0].globalL1CacheSupported
9537 @globalL1CacheSupported.setter
9538 def globalL1CacheSupported(self, int globalL1CacheSupported):
9539 self._pvt_ptr[0].globalL1CacheSupported = globalL1CacheSupported
9540 @property
9541 def localL1CacheSupported(self):
9542 return self._pvt_ptr[0].localL1CacheSupported
9543 @localL1CacheSupported.setter
9544 def localL1CacheSupported(self, int localL1CacheSupported):
9545 self._pvt_ptr[0].localL1CacheSupported = localL1CacheSupported
9546 @property
9547 def sharedMemPerMultiprocessor(self):
9548 return self._pvt_ptr[0].sharedMemPerMultiprocessor
9549 @sharedMemPerMultiprocessor.setter
9550 def sharedMemPerMultiprocessor(self, size_t sharedMemPerMultiprocessor):
9551 self._pvt_ptr[0].sharedMemPerMultiprocessor = sharedMemPerMultiprocessor
9552 @property
9553 def regsPerMultiprocessor(self):
9554 return self._pvt_ptr[0].regsPerMultiprocessor
9555 @regsPerMultiprocessor.setter
9556 def regsPerMultiprocessor(self, int regsPerMultiprocessor):
9557 self._pvt_ptr[0].regsPerMultiprocessor = regsPerMultiprocessor
9558 @property
9559 def managedMemory(self):
9560 return self._pvt_ptr[0].managedMemory
9561 @managedMemory.setter
9562 def managedMemory(self, int managedMemory):
9563 self._pvt_ptr[0].managedMemory = managedMemory
9564 @property
9565 def isMultiGpuBoard(self):
9566 return self._pvt_ptr[0].isMultiGpuBoard
9567 @isMultiGpuBoard.setter
9568 def isMultiGpuBoard(self, int isMultiGpuBoard):
9569 self._pvt_ptr[0].isMultiGpuBoard = isMultiGpuBoard
9570 @property
9571 def multiGpuBoardGroupID(self):
9572 return self._pvt_ptr[0].multiGpuBoardGroupID
9573 @multiGpuBoardGroupID.setter
9574 def multiGpuBoardGroupID(self, int multiGpuBoardGroupID):
9575 self._pvt_ptr[0].multiGpuBoardGroupID = multiGpuBoardGroupID
9576 @property
9577 def hostNativeAtomicSupported(self):
9578 return self._pvt_ptr[0].hostNativeAtomicSupported
9579 @hostNativeAtomicSupported.setter
9580 def hostNativeAtomicSupported(self, int hostNativeAtomicSupported):
9581 self._pvt_ptr[0].hostNativeAtomicSupported = hostNativeAtomicSupported
9582 @property
9583 def pageableMemoryAccess(self):
9584 return self._pvt_ptr[0].pageableMemoryAccess
9585 @pageableMemoryAccess.setter
9586 def pageableMemoryAccess(self, int pageableMemoryAccess):
9587 self._pvt_ptr[0].pageableMemoryAccess = pageableMemoryAccess
9588 @property
9589 def concurrentManagedAccess(self):
9590 return self._pvt_ptr[0].concurrentManagedAccess
9591 @concurrentManagedAccess.setter
9592 def concurrentManagedAccess(self, int concurrentManagedAccess):
9593 self._pvt_ptr[0].concurrentManagedAccess = concurrentManagedAccess
9594 @property
9595 def computePreemptionSupported(self):
9596 return self._pvt_ptr[0].computePreemptionSupported
9597 @computePreemptionSupported.setter
9598 def computePreemptionSupported(self, int computePreemptionSupported):
9599 self._pvt_ptr[0].computePreemptionSupported = computePreemptionSupported
9600 @property
9601 def canUseHostPointerForRegisteredMem(self):
9602 return self._pvt_ptr[0].canUseHostPointerForRegisteredMem
9603 @canUseHostPointerForRegisteredMem.setter
9604 def canUseHostPointerForRegisteredMem(self, int canUseHostPointerForRegisteredMem):
9605 self._pvt_ptr[0].canUseHostPointerForRegisteredMem = canUseHostPointerForRegisteredMem
9606 @property
9607 def cooperativeLaunch(self):
9608 return self._pvt_ptr[0].cooperativeLaunch
9609 @cooperativeLaunch.setter
9610 def cooperativeLaunch(self, int cooperativeLaunch):
9611 self._pvt_ptr[0].cooperativeLaunch = cooperativeLaunch
9612 @property
9613 def sharedMemPerBlockOptin(self):
9614 return self._pvt_ptr[0].sharedMemPerBlockOptin
9615 @sharedMemPerBlockOptin.setter
9616 def sharedMemPerBlockOptin(self, size_t sharedMemPerBlockOptin):
9617 self._pvt_ptr[0].sharedMemPerBlockOptin = sharedMemPerBlockOptin
9618 @property
9619 def pageableMemoryAccessUsesHostPageTables(self):
9620 return self._pvt_ptr[0].pageableMemoryAccessUsesHostPageTables
9621 @pageableMemoryAccessUsesHostPageTables.setter
9622 def pageableMemoryAccessUsesHostPageTables(self, int pageableMemoryAccessUsesHostPageTables):
9623 self._pvt_ptr[0].pageableMemoryAccessUsesHostPageTables = pageableMemoryAccessUsesHostPageTables
9624 @property
9625 def directManagedMemAccessFromHost(self):
9626 return self._pvt_ptr[0].directManagedMemAccessFromHost
9627 @directManagedMemAccessFromHost.setter
9628 def directManagedMemAccessFromHost(self, int directManagedMemAccessFromHost):
9629 self._pvt_ptr[0].directManagedMemAccessFromHost = directManagedMemAccessFromHost
9630 @property
9631 def maxBlocksPerMultiProcessor(self):
9632 return self._pvt_ptr[0].maxBlocksPerMultiProcessor
9633 @maxBlocksPerMultiProcessor.setter
9634 def maxBlocksPerMultiProcessor(self, int maxBlocksPerMultiProcessor):
9635 self._pvt_ptr[0].maxBlocksPerMultiProcessor = maxBlocksPerMultiProcessor
9636 @property
9637 def accessPolicyMaxWindowSize(self):
9638 return self._pvt_ptr[0].accessPolicyMaxWindowSize
9639 @accessPolicyMaxWindowSize.setter
9640 def accessPolicyMaxWindowSize(self, int accessPolicyMaxWindowSize):
9641 self._pvt_ptr[0].accessPolicyMaxWindowSize = accessPolicyMaxWindowSize
9642 @property
9643 def reservedSharedMemPerBlock(self):
9644 return self._pvt_ptr[0].reservedSharedMemPerBlock
9645 @reservedSharedMemPerBlock.setter
9646 def reservedSharedMemPerBlock(self, size_t reservedSharedMemPerBlock):
9647 self._pvt_ptr[0].reservedSharedMemPerBlock = reservedSharedMemPerBlock
9648 @property
9649 def hostRegisterSupported(self):
9650 return self._pvt_ptr[0].hostRegisterSupported
9651 @hostRegisterSupported.setter
9652 def hostRegisterSupported(self, int hostRegisterSupported):
9653 self._pvt_ptr[0].hostRegisterSupported = hostRegisterSupported
9654 @property
9655 def sparseCudaArraySupported(self):
9656 return self._pvt_ptr[0].sparseCudaArraySupported
9657 @sparseCudaArraySupported.setter
9658 def sparseCudaArraySupported(self, int sparseCudaArraySupported):
9659 self._pvt_ptr[0].sparseCudaArraySupported = sparseCudaArraySupported
9660 @property
9661 def hostRegisterReadOnlySupported(self):
9662 return self._pvt_ptr[0].hostRegisterReadOnlySupported
9663 @hostRegisterReadOnlySupported.setter
9664 def hostRegisterReadOnlySupported(self, int hostRegisterReadOnlySupported):
9665 self._pvt_ptr[0].hostRegisterReadOnlySupported = hostRegisterReadOnlySupported
9666 @property
9667 def timelineSemaphoreInteropSupported(self):
9668 return self._pvt_ptr[0].timelineSemaphoreInteropSupported
9669 @timelineSemaphoreInteropSupported.setter
9670 def timelineSemaphoreInteropSupported(self, int timelineSemaphoreInteropSupported):
9671 self._pvt_ptr[0].timelineSemaphoreInteropSupported = timelineSemaphoreInteropSupported
9672 @property
9673 def memoryPoolsSupported(self):
9674 return self._pvt_ptr[0].memoryPoolsSupported
9675 @memoryPoolsSupported.setter
9676 def memoryPoolsSupported(self, int memoryPoolsSupported):
9677 self._pvt_ptr[0].memoryPoolsSupported = memoryPoolsSupported
9678 @property
9679 def gpuDirectRDMASupported(self):
9680 return self._pvt_ptr[0].gpuDirectRDMASupported
9681 @gpuDirectRDMASupported.setter
9682 def gpuDirectRDMASupported(self, int gpuDirectRDMASupported):
9683 self._pvt_ptr[0].gpuDirectRDMASupported = gpuDirectRDMASupported
9684 @property
9685 def gpuDirectRDMAFlushWritesOptions(self):
9686 return self._pvt_ptr[0].gpuDirectRDMAFlushWritesOptions
9687 @gpuDirectRDMAFlushWritesOptions.setter
9688 def gpuDirectRDMAFlushWritesOptions(self, unsigned int gpuDirectRDMAFlushWritesOptions):
9689 self._pvt_ptr[0].gpuDirectRDMAFlushWritesOptions = gpuDirectRDMAFlushWritesOptions
9690 @property
9691 def gpuDirectRDMAWritesOrdering(self):
9692 return self._pvt_ptr[0].gpuDirectRDMAWritesOrdering
9693 @gpuDirectRDMAWritesOrdering.setter
9694 def gpuDirectRDMAWritesOrdering(self, int gpuDirectRDMAWritesOrdering):
9695 self._pvt_ptr[0].gpuDirectRDMAWritesOrdering = gpuDirectRDMAWritesOrdering
9696 @property
9697 def memoryPoolSupportedHandleTypes(self):
9698 return self._pvt_ptr[0].memoryPoolSupportedHandleTypes
9699 @memoryPoolSupportedHandleTypes.setter
9700 def memoryPoolSupportedHandleTypes(self, unsigned int memoryPoolSupportedHandleTypes):
9701 self._pvt_ptr[0].memoryPoolSupportedHandleTypes = memoryPoolSupportedHandleTypes
9702 @property
9703 def deferredMappingCudaArraySupported(self):
9704 return self._pvt_ptr[0].deferredMappingCudaArraySupported
9705 @deferredMappingCudaArraySupported.setter
9706 def deferredMappingCudaArraySupported(self, int deferredMappingCudaArraySupported):
9707 self._pvt_ptr[0].deferredMappingCudaArraySupported = deferredMappingCudaArraySupported
9708 @property
9709 def ipcEventSupported(self):
9710 return self._pvt_ptr[0].ipcEventSupported
9711 @ipcEventSupported.setter
9712 def ipcEventSupported(self, int ipcEventSupported):
9713 self._pvt_ptr[0].ipcEventSupported = ipcEventSupported
9714 @property
9715 def clusterLaunch(self):
9716 return self._pvt_ptr[0].clusterLaunch
9717 @clusterLaunch.setter
9718 def clusterLaunch(self, int clusterLaunch):
9719 self._pvt_ptr[0].clusterLaunch = clusterLaunch
9720 @property
9721 def unifiedFunctionPointers(self):
9722 return self._pvt_ptr[0].unifiedFunctionPointers
9723 @unifiedFunctionPointers.setter
9724 def unifiedFunctionPointers(self, int unifiedFunctionPointers):
9725 self._pvt_ptr[0].unifiedFunctionPointers = unifiedFunctionPointers
9726 @property
9727 def deviceNumaConfig(self):
9728 return self._pvt_ptr[0].deviceNumaConfig
9729 @deviceNumaConfig.setter
9730 def deviceNumaConfig(self, int deviceNumaConfig):
9731 self._pvt_ptr[0].deviceNumaConfig = deviceNumaConfig
9732 @property
9733 def deviceNumaId(self):
9734 return self._pvt_ptr[0].deviceNumaId
9735 @deviceNumaId.setter
9736 def deviceNumaId(self, int deviceNumaId):
9737 self._pvt_ptr[0].deviceNumaId = deviceNumaId
9738 @property
9739 def mpsEnabled(self):
9740 return self._pvt_ptr[0].mpsEnabled
9741 @mpsEnabled.setter
9742 def mpsEnabled(self, int mpsEnabled):
9743 self._pvt_ptr[0].mpsEnabled = mpsEnabled
9744 @property
9745 def hostNumaId(self):
9746 return self._pvt_ptr[0].hostNumaId
9747 @hostNumaId.setter
9748 def hostNumaId(self, int hostNumaId):
9749 self._pvt_ptr[0].hostNumaId = hostNumaId
9750 @property
9751 def gpuPciDeviceID(self):
9752 return self._pvt_ptr[0].gpuPciDeviceID
9753 @gpuPciDeviceID.setter
9754 def gpuPciDeviceID(self, unsigned int gpuPciDeviceID):
9755 self._pvt_ptr[0].gpuPciDeviceID = gpuPciDeviceID
9756 @property
9757 def gpuPciSubsystemID(self):
9758 return self._pvt_ptr[0].gpuPciSubsystemID
9759 @gpuPciSubsystemID.setter
9760 def gpuPciSubsystemID(self, unsigned int gpuPciSubsystemID):
9761 self._pvt_ptr[0].gpuPciSubsystemID = gpuPciSubsystemID
9762 @property
9763 def hostNumaMultinodeIpcSupported(self):
9764 return self._pvt_ptr[0].hostNumaMultinodeIpcSupported
9765 @hostNumaMultinodeIpcSupported.setter
9766 def hostNumaMultinodeIpcSupported(self, int hostNumaMultinodeIpcSupported):
9767 self._pvt_ptr[0].hostNumaMultinodeIpcSupported = hostNumaMultinodeIpcSupported
9768 @property
9769 def reserved(self):
9770 return self._pvt_ptr[0].reserved
9771 @reserved.setter
9772 def reserved(self, reserved):
9773 self._pvt_ptr[0].reserved = reserved
9775cdef class cudaIpcEventHandle_st:
9776 """
9777 CUDA IPC event handle
9779 Attributes
9780 ----------
9781 reserved : bytes
9784 Methods
9785 -------
9786 getPtr()
9787 Get memory address of class instance
9788 """
9789 def __cinit__(self, void_ptr _ptr = 0):
9790 if _ptr == 0:
9791 self._pvt_ptr = &self._pvt_val
9792 else:
9793 self._pvt_ptr = <cyruntime.cudaIpcEventHandle_st *>_ptr
9794 def __init__(self, void_ptr _ptr = 0):
9795 pass
9796 def __dealloc__(self):
9797 pass
9798 def getPtr(self):
9799 return <void_ptr>self._pvt_ptr
9800 def __repr__(self):
9801 if self._pvt_ptr is not NULL:
9802 str_list = []
9803 try:
9804 str_list += ['reserved : ' + str(self.reserved)]
9805 except ValueError:
9806 str_list += ['reserved : <ValueError>']
9807 return '\n'.join(str_list)
9808 else:
9809 return ''
9810 @property
9811 def reserved(self):
9812 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)
9813 @reserved.setter
9814 def reserved(self, reserved):
9815 if len(reserved) != 64:
9816 raise ValueError("reserved length must be 64, is " + str(len(reserved)))
9817 if CHAR_MIN == 0:
9818 for i, b in enumerate(reserved):
9819 if b < 0 and b > -129:
9820 b = b + 256
9821 self._pvt_ptr[0].reserved[i] = b
9822 else:
9823 for i, b in enumerate(reserved):
9824 if b > 127 and b < 256:
9825 b = b - 256
9826 self._pvt_ptr[0].reserved[i] = b
9828cdef class cudaIpcMemHandle_st:
9829 """
9830 CUDA IPC memory handle
9832 Attributes
9833 ----------
9834 reserved : bytes
9837 Methods
9838 -------
9839 getPtr()
9840 Get memory address of class instance
9841 """
9842 def __cinit__(self, void_ptr _ptr = 0):
9843 if _ptr == 0:
9844 self._pvt_ptr = &self._pvt_val
9845 else:
9846 self._pvt_ptr = <cyruntime.cudaIpcMemHandle_st *>_ptr
9847 def __init__(self, void_ptr _ptr = 0):
9848 pass
9849 def __dealloc__(self):
9850 pass
9851 def getPtr(self):
9852 return <void_ptr>self._pvt_ptr
9853 def __repr__(self):
9854 if self._pvt_ptr is not NULL:
9855 str_list = []
9856 try:
9857 str_list += ['reserved : ' + str(self.reserved)]
9858 except ValueError:
9859 str_list += ['reserved : <ValueError>']
9860 return '\n'.join(str_list)
9861 else:
9862 return ''
9863 @property
9864 def reserved(self):
9865 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)
9866 @reserved.setter
9867 def reserved(self, reserved):
9868 if len(reserved) != 64:
9869 raise ValueError("reserved length must be 64, is " + str(len(reserved)))
9870 if CHAR_MIN == 0:
9871 for i, b in enumerate(reserved):
9872 if b < 0 and b > -129:
9873 b = b + 256
9874 self._pvt_ptr[0].reserved[i] = b
9875 else:
9876 for i, b in enumerate(reserved):
9877 if b > 127 and b < 256:
9878 b = b - 256
9879 self._pvt_ptr[0].reserved[i] = b
9881cdef class cudaMemFabricHandle_st:
9882 """
9883 Attributes
9884 ----------
9885 reserved : bytes
9888 Methods
9889 -------
9890 getPtr()
9891 Get memory address of class instance
9892 """
9893 def __cinit__(self, void_ptr _ptr = 0):
9894 if _ptr == 0:
9895 self._pvt_ptr = &self._pvt_val
9896 else:
9897 self._pvt_ptr = <cyruntime.cudaMemFabricHandle_st *>_ptr
9898 def __init__(self, void_ptr _ptr = 0):
9899 pass
9900 def __dealloc__(self):
9901 pass
9902 def getPtr(self):
9903 return <void_ptr>self._pvt_ptr
9904 def __repr__(self):
9905 if self._pvt_ptr is not NULL:
9906 str_list = []
9907 try:
9908 str_list += ['reserved : ' + str(self.reserved)]
9909 except ValueError:
9910 str_list += ['reserved : <ValueError>']
9911 return '\n'.join(str_list)
9912 else:
9913 return ''
9914 @property
9915 def reserved(self):
9916 return PyBytes_FromStringAndSize(self._pvt_ptr[0].reserved, 64)
9917 @reserved.setter
9918 def reserved(self, reserved):
9919 if len(reserved) != 64:
9920 raise ValueError("reserved length must be 64, is " + str(len(reserved)))
9921 if CHAR_MIN == 0:
9922 for i, b in enumerate(reserved):
9923 if b < 0 and b > -129:
9924 b = b + 256
9925 self._pvt_ptr[0].reserved[i] = b
9926 else:
9927 for i, b in enumerate(reserved):
9928 if b > 127 and b < 256:
9929 b = b - 256
9930 self._pvt_ptr[0].reserved[i] = b
9932cdef class anon_struct8:
9933 """
9934 Attributes
9935 ----------
9936 handle : Any
9938 name : Any
9941 Methods
9942 -------
9943 getPtr()
9944 Get memory address of class instance
9945 """
9946 def __cinit__(self, void_ptr _ptr):
9947 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
9949 def __init__(self, void_ptr _ptr):
9950 pass
9951 def __dealloc__(self):
9952 pass
9953 def getPtr(self):
9954 return <void_ptr>&self._pvt_ptr[0].handle.win32
9955 def __repr__(self):
9956 if self._pvt_ptr is not NULL:
9957 str_list = []
9958 try:
9959 str_list += ['handle : ' + hex(self.handle)]
9960 except ValueError:
9961 str_list += ['handle : <ValueError>']
9962 try:
9963 str_list += ['name : ' + hex(self.name)]
9964 except ValueError:
9965 str_list += ['name : <ValueError>']
9966 return '\n'.join(str_list)
9967 else:
9968 return ''
9969 @property
9970 def handle(self):
9971 return <void_ptr>self._pvt_ptr[0].handle.win32.handle
9972 @handle.setter
9973 def handle(self, handle):
9974 _chandle = _HelperInputVoidPtr(handle)
9975 self._pvt_ptr[0].handle.win32.handle = <void*><void_ptr>_chandle.cptr
9976 @property
9977 def name(self):
9978 return <void_ptr>self._pvt_ptr[0].handle.win32.name
9979 @name.setter
9980 def name(self, name):
9981 _cname = _HelperInputVoidPtr(name)
9982 self._pvt_ptr[0].handle.win32.name = <void*><void_ptr>_cname.cptr
9984cdef class anon_union2:
9985 """
9986 Attributes
9987 ----------
9988 fd : int
9990 win32 : anon_struct8
9992 nvSciBufObject : Any
9995 Methods
9996 -------
9997 getPtr()
9998 Get memory address of class instance
9999 """
10000 def __cinit__(self, void_ptr _ptr):
10001 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
10003 def __init__(self, void_ptr _ptr):
10004 pass
10005 self._win32 = anon_struct8(_ptr=<void_ptr>self._pvt_ptr)
10006 def __dealloc__(self):
10007 pass
10008 def getPtr(self):
10009 return <void_ptr>&self._pvt_ptr[0].handle
10010 def __repr__(self):
10011 if self._pvt_ptr is not NULL:
10012 str_list = []
10013 try:
10014 str_list += ['fd : ' + str(self.fd)]
10015 except ValueError:
10016 str_list += ['fd : <ValueError>']
10017 try:
10018 str_list += ['win32 :\n' + '\n'.join([' ' + line for line in str(self.win32).splitlines()])]
10019 except ValueError:
10020 str_list += ['win32 : <ValueError>']
10021 try:
10022 str_list += ['nvSciBufObject : ' + hex(self.nvSciBufObject)]
10023 except ValueError:
10024 str_list += ['nvSciBufObject : <ValueError>']
10025 return '\n'.join(str_list)
10026 else:
10027 return ''
10028 @property
10029 def fd(self):
10030 return self._pvt_ptr[0].handle.fd
10031 @fd.setter
10032 def fd(self, int fd):
10033 self._pvt_ptr[0].handle.fd = fd
10034 @property
10035 def win32(self):
10036 return self._win32
10037 @win32.setter
10038 def win32(self, win32 not None : anon_struct8):
10039 string.memcpy(&self._pvt_ptr[0].handle.win32, <cyruntime.anon_struct8*><void_ptr>win32.getPtr(), sizeof(self._pvt_ptr[0].handle.win32))
10040 @property
10041 def nvSciBufObject(self):
10042 return <void_ptr>self._pvt_ptr[0].handle.nvSciBufObject
10043 @nvSciBufObject.setter
10044 def nvSciBufObject(self, nvSciBufObject):
10045 _cnvSciBufObject = _HelperInputVoidPtr(nvSciBufObject)
10046 self._pvt_ptr[0].handle.nvSciBufObject = <void*><void_ptr>_cnvSciBufObject.cptr
10048cdef class cudaExternalMemoryHandleDesc:
10049 """
10050 External memory handle descriptor
10052 Attributes
10053 ----------
10054 type : cudaExternalMemoryHandleType
10055 Type of the handle
10056 handle : anon_union2
10058 size : unsigned long long
10059 Size of the memory allocation
10060 flags : unsigned int
10061 Flags must either be zero or cudaExternalMemoryDedicated
10062 reserved : list[unsigned int]
10063 Must be zero
10065 Methods
10066 -------
10067 getPtr()
10068 Get memory address of class instance
10069 """
10070 def __cinit__(self, void_ptr _ptr = 0):
10071 if _ptr == 0:
10072 self._val_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalMemoryHandleDesc))
10073 self._pvt_ptr = self._val_ptr
10074 else:
10075 self._pvt_ptr = <cyruntime.cudaExternalMemoryHandleDesc *>_ptr
10076 def __init__(self, void_ptr _ptr = 0):
10077 pass
10078 self._handle = anon_union2(_ptr=<void_ptr>self._pvt_ptr)
10079 def __dealloc__(self):
10080 if self._val_ptr is not NULL:
10081 free(self._val_ptr)
10082 def getPtr(self):
10083 return <void_ptr>self._pvt_ptr
10084 def __repr__(self):
10085 if self._pvt_ptr is not NULL:
10086 str_list = []
10087 try:
10088 str_list += ['type : ' + str(self.type)]
10089 except ValueError:
10090 str_list += ['type : <ValueError>']
10091 try:
10092 str_list += ['handle :\n' + '\n'.join([' ' + line for line in str(self.handle).splitlines()])]
10093 except ValueError:
10094 str_list += ['handle : <ValueError>']
10095 try:
10096 str_list += ['size : ' + str(self.size)]
10097 except ValueError:
10098 str_list += ['size : <ValueError>']
10099 try:
10100 str_list += ['flags : ' + str(self.flags)]
10101 except ValueError:
10102 str_list += ['flags : <ValueError>']
10103 try:
10104 str_list += ['reserved : ' + str(self.reserved)]
10105 except ValueError:
10106 str_list += ['reserved : <ValueError>']
10107 return '\n'.join(str_list)
10108 else:
10109 return ''
10110 @property
10111 def type(self):
10112 if self._pvt_ptr[0].type not in _dict_cudaExternalMemoryHandleType:
10113 return None
10114 return _dict_cudaExternalMemoryHandleType[self._pvt_ptr[0].type]
10115 @type.setter
10116 def type(self, type not None : cudaExternalMemoryHandleType):
10117 self._pvt_ptr[0].type = type.value
10118 @property
10119 def handle(self):
10120 return self._handle
10121 @handle.setter
10122 def handle(self, handle not None : anon_union2):
10123 string.memcpy(&self._pvt_ptr[0].handle, <cyruntime.anon_union2*><void_ptr>handle.getPtr(), sizeof(self._pvt_ptr[0].handle))
10124 @property
10125 def size(self):
10126 return self._pvt_ptr[0].size
10127 @size.setter
10128 def size(self, unsigned long long size):
10129 self._pvt_ptr[0].size = size
10130 @property
10131 def flags(self):
10132 return self._pvt_ptr[0].flags
10133 @flags.setter
10134 def flags(self, unsigned int flags):
10135 self._pvt_ptr[0].flags = flags
10136 @property
10137 def reserved(self):
10138 return self._pvt_ptr[0].reserved
10139 @reserved.setter
10140 def reserved(self, reserved):
10141 self._pvt_ptr[0].reserved = reserved
10143cdef class cudaExternalMemoryBufferDesc:
10144 """
10145 External memory buffer descriptor
10147 Attributes
10148 ----------
10149 offset : unsigned long long
10150 Offset into the memory object where the buffer's base is
10151 size : unsigned long long
10152 Size of the buffer
10153 flags : unsigned int
10154 Flags reserved for future use. Must be zero.
10155 reserved : list[unsigned int]
10156 Must be zero
10158 Methods
10159 -------
10160 getPtr()
10161 Get memory address of class instance
10162 """
10163 def __cinit__(self, void_ptr _ptr = 0):
10164 if _ptr == 0:
10165 self._pvt_ptr = &self._pvt_val
10166 else:
10167 self._pvt_ptr = <cyruntime.cudaExternalMemoryBufferDesc *>_ptr
10168 def __init__(self, void_ptr _ptr = 0):
10169 pass
10170 def __dealloc__(self):
10171 pass
10172 def getPtr(self):
10173 return <void_ptr>self._pvt_ptr
10174 def __repr__(self):
10175 if self._pvt_ptr is not NULL:
10176 str_list = []
10177 try:
10178 str_list += ['offset : ' + str(self.offset)]
10179 except ValueError:
10180 str_list += ['offset : <ValueError>']
10181 try:
10182 str_list += ['size : ' + str(self.size)]
10183 except ValueError:
10184 str_list += ['size : <ValueError>']
10185 try:
10186 str_list += ['flags : ' + str(self.flags)]
10187 except ValueError:
10188 str_list += ['flags : <ValueError>']
10189 try:
10190 str_list += ['reserved : ' + str(self.reserved)]
10191 except ValueError:
10192 str_list += ['reserved : <ValueError>']
10193 return '\n'.join(str_list)
10194 else:
10195 return ''
10196 @property
10197 def offset(self):
10198 return self._pvt_ptr[0].offset
10199 @offset.setter
10200 def offset(self, unsigned long long offset):
10201 self._pvt_ptr[0].offset = offset
10202 @property
10203 def size(self):
10204 return self._pvt_ptr[0].size
10205 @size.setter
10206 def size(self, unsigned long long size):
10207 self._pvt_ptr[0].size = size
10208 @property
10209 def flags(self):
10210 return self._pvt_ptr[0].flags
10211 @flags.setter
10212 def flags(self, unsigned int flags):
10213 self._pvt_ptr[0].flags = flags
10214 @property
10215 def reserved(self):
10216 return self._pvt_ptr[0].reserved
10217 @reserved.setter
10218 def reserved(self, reserved):
10219 self._pvt_ptr[0].reserved = reserved
10221cdef class cudaExternalMemoryMipmappedArrayDesc:
10222 """
10223 External memory mipmap descriptor
10225 Attributes
10226 ----------
10227 offset : unsigned long long
10228 Offset into the memory object where the base level of the mipmap
10229 chain is.
10230 formatDesc : cudaChannelFormatDesc
10231 Format of base level of the mipmap chain
10232 extent : cudaExtent
10233 Dimensions of base level of the mipmap chain
10234 flags : unsigned int
10235 Flags associated with CUDA mipmapped arrays. See
10236 cudaMallocMipmappedArray
10237 numLevels : unsigned int
10238 Total number of levels in the mipmap chain
10239 reserved : list[unsigned int]
10240 Must be zero
10242 Methods
10243 -------
10244 getPtr()
10245 Get memory address of class instance
10246 """
10247 def __cinit__(self, void_ptr _ptr = 0):
10248 if _ptr == 0:
10249 self._pvt_ptr = &self._pvt_val
10250 else:
10251 self._pvt_ptr = <cyruntime.cudaExternalMemoryMipmappedArrayDesc *>_ptr
10252 def __init__(self, void_ptr _ptr = 0):
10253 pass
10254 self._formatDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].formatDesc)
10255 self._extent = cudaExtent(_ptr=<void_ptr>&self._pvt_ptr[0].extent)
10256 def __dealloc__(self):
10257 pass
10258 def getPtr(self):
10259 return <void_ptr>self._pvt_ptr
10260 def __repr__(self):
10261 if self._pvt_ptr is not NULL:
10262 str_list = []
10263 try:
10264 str_list += ['offset : ' + str(self.offset)]
10265 except ValueError:
10266 str_list += ['offset : <ValueError>']
10267 try:
10268 str_list += ['formatDesc :\n' + '\n'.join([' ' + line for line in str(self.formatDesc).splitlines()])]
10269 except ValueError:
10270 str_list += ['formatDesc : <ValueError>']
10271 try:
10272 str_list += ['extent :\n' + '\n'.join([' ' + line for line in str(self.extent).splitlines()])]
10273 except ValueError:
10274 str_list += ['extent : <ValueError>']
10275 try:
10276 str_list += ['flags : ' + str(self.flags)]
10277 except ValueError:
10278 str_list += ['flags : <ValueError>']
10279 try:
10280 str_list += ['numLevels : ' + str(self.numLevels)]
10281 except ValueError:
10282 str_list += ['numLevels : <ValueError>']
10283 try:
10284 str_list += ['reserved : ' + str(self.reserved)]
10285 except ValueError:
10286 str_list += ['reserved : <ValueError>']
10287 return '\n'.join(str_list)
10288 else:
10289 return ''
10290 @property
10291 def offset(self):
10292 return self._pvt_ptr[0].offset
10293 @offset.setter
10294 def offset(self, unsigned long long offset):
10295 self._pvt_ptr[0].offset = offset
10296 @property
10297 def formatDesc(self):
10298 return self._formatDesc
10299 @formatDesc.setter
10300 def formatDesc(self, formatDesc not None : cudaChannelFormatDesc):
10301 string.memcpy(&self._pvt_ptr[0].formatDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>formatDesc.getPtr(), sizeof(self._pvt_ptr[0].formatDesc))
10302 @property
10303 def extent(self):
10304 return self._extent
10305 @extent.setter
10306 def extent(self, extent not None : cudaExtent):
10307 string.memcpy(&self._pvt_ptr[0].extent, <cyruntime.cudaExtent*><void_ptr>extent.getPtr(), sizeof(self._pvt_ptr[0].extent))
10308 @property
10309 def flags(self):
10310 return self._pvt_ptr[0].flags
10311 @flags.setter
10312 def flags(self, unsigned int flags):
10313 self._pvt_ptr[0].flags = flags
10314 @property
10315 def numLevels(self):
10316 return self._pvt_ptr[0].numLevels
10317 @numLevels.setter
10318 def numLevels(self, unsigned int numLevels):
10319 self._pvt_ptr[0].numLevels = numLevels
10320 @property
10321 def reserved(self):
10322 return self._pvt_ptr[0].reserved
10323 @reserved.setter
10324 def reserved(self, reserved):
10325 self._pvt_ptr[0].reserved = reserved
10327cdef class anon_struct9:
10328 """
10329 Attributes
10330 ----------
10331 handle : Any
10333 name : Any
10336 Methods
10337 -------
10338 getPtr()
10339 Get memory address of class instance
10340 """
10341 def __cinit__(self, void_ptr _ptr):
10342 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
10344 def __init__(self, void_ptr _ptr):
10345 pass
10346 def __dealloc__(self):
10347 pass
10348 def getPtr(self):
10349 return <void_ptr>&self._pvt_ptr[0].handle.win32
10350 def __repr__(self):
10351 if self._pvt_ptr is not NULL:
10352 str_list = []
10353 try:
10354 str_list += ['handle : ' + hex(self.handle)]
10355 except ValueError:
10356 str_list += ['handle : <ValueError>']
10357 try:
10358 str_list += ['name : ' + hex(self.name)]
10359 except ValueError:
10360 str_list += ['name : <ValueError>']
10361 return '\n'.join(str_list)
10362 else:
10363 return ''
10364 @property
10365 def handle(self):
10366 return <void_ptr>self._pvt_ptr[0].handle.win32.handle
10367 @handle.setter
10368 def handle(self, handle):
10369 _chandle = _HelperInputVoidPtr(handle)
10370 self._pvt_ptr[0].handle.win32.handle = <void*><void_ptr>_chandle.cptr
10371 @property
10372 def name(self):
10373 return <void_ptr>self._pvt_ptr[0].handle.win32.name
10374 @name.setter
10375 def name(self, name):
10376 _cname = _HelperInputVoidPtr(name)
10377 self._pvt_ptr[0].handle.win32.name = <void*><void_ptr>_cname.cptr
10379cdef class anon_union3:
10380 """
10381 Attributes
10382 ----------
10383 fd : int
10385 win32 : anon_struct9
10387 nvSciSyncObj : Any
10390 Methods
10391 -------
10392 getPtr()
10393 Get memory address of class instance
10394 """
10395 def __cinit__(self, void_ptr _ptr):
10396 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
10398 def __init__(self, void_ptr _ptr):
10399 pass
10400 self._win32 = anon_struct9(_ptr=<void_ptr>self._pvt_ptr)
10401 def __dealloc__(self):
10402 pass
10403 def getPtr(self):
10404 return <void_ptr>&self._pvt_ptr[0].handle
10405 def __repr__(self):
10406 if self._pvt_ptr is not NULL:
10407 str_list = []
10408 try:
10409 str_list += ['fd : ' + str(self.fd)]
10410 except ValueError:
10411 str_list += ['fd : <ValueError>']
10412 try:
10413 str_list += ['win32 :\n' + '\n'.join([' ' + line for line in str(self.win32).splitlines()])]
10414 except ValueError:
10415 str_list += ['win32 : <ValueError>']
10416 try:
10417 str_list += ['nvSciSyncObj : ' + hex(self.nvSciSyncObj)]
10418 except ValueError:
10419 str_list += ['nvSciSyncObj : <ValueError>']
10420 return '\n'.join(str_list)
10421 else:
10422 return ''
10423 @property
10424 def fd(self):
10425 return self._pvt_ptr[0].handle.fd
10426 @fd.setter
10427 def fd(self, int fd):
10428 self._pvt_ptr[0].handle.fd = fd
10429 @property
10430 def win32(self):
10431 return self._win32
10432 @win32.setter
10433 def win32(self, win32 not None : anon_struct9):
10434 string.memcpy(&self._pvt_ptr[0].handle.win32, <cyruntime.anon_struct9*><void_ptr>win32.getPtr(), sizeof(self._pvt_ptr[0].handle.win32))
10435 @property
10436 def nvSciSyncObj(self):
10437 return <void_ptr>self._pvt_ptr[0].handle.nvSciSyncObj
10438 @nvSciSyncObj.setter
10439 def nvSciSyncObj(self, nvSciSyncObj):
10440 _cnvSciSyncObj = _HelperInputVoidPtr(nvSciSyncObj)
10441 self._pvt_ptr[0].handle.nvSciSyncObj = <void*><void_ptr>_cnvSciSyncObj.cptr
10443cdef class cudaExternalSemaphoreHandleDesc:
10444 """
10445 External semaphore handle descriptor
10447 Attributes
10448 ----------
10449 type : cudaExternalSemaphoreHandleType
10450 Type of the handle
10451 handle : anon_union3
10453 flags : unsigned int
10454 Flags reserved for the future. Must be zero.
10455 reserved : list[unsigned int]
10456 Must be zero
10458 Methods
10459 -------
10460 getPtr()
10461 Get memory address of class instance
10462 """
10463 def __cinit__(self, void_ptr _ptr = 0):
10464 if _ptr == 0:
10465 self._val_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>calloc(1, sizeof(cyruntime.cudaExternalSemaphoreHandleDesc))
10466 self._pvt_ptr = self._val_ptr
10467 else:
10468 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreHandleDesc *>_ptr
10469 def __init__(self, void_ptr _ptr = 0):
10470 pass
10471 self._handle = anon_union3(_ptr=<void_ptr>self._pvt_ptr)
10472 def __dealloc__(self):
10473 if self._val_ptr is not NULL:
10474 free(self._val_ptr)
10475 def getPtr(self):
10476 return <void_ptr>self._pvt_ptr
10477 def __repr__(self):
10478 if self._pvt_ptr is not NULL:
10479 str_list = []
10480 try:
10481 str_list += ['type : ' + str(self.type)]
10482 except ValueError:
10483 str_list += ['type : <ValueError>']
10484 try:
10485 str_list += ['handle :\n' + '\n'.join([' ' + line for line in str(self.handle).splitlines()])]
10486 except ValueError:
10487 str_list += ['handle : <ValueError>']
10488 try:
10489 str_list += ['flags : ' + str(self.flags)]
10490 except ValueError:
10491 str_list += ['flags : <ValueError>']
10492 try:
10493 str_list += ['reserved : ' + str(self.reserved)]
10494 except ValueError:
10495 str_list += ['reserved : <ValueError>']
10496 return '\n'.join(str_list)
10497 else:
10498 return ''
10499 @property
10500 def type(self):
10501 if self._pvt_ptr[0].type not in _dict_cudaExternalSemaphoreHandleType:
10502 return None
10503 return _dict_cudaExternalSemaphoreHandleType[self._pvt_ptr[0].type]
10504 @type.setter
10505 def type(self, type not None : cudaExternalSemaphoreHandleType):
10506 self._pvt_ptr[0].type = type.value
10507 @property
10508 def handle(self):
10509 return self._handle
10510 @handle.setter
10511 def handle(self, handle not None : anon_union3):
10512 string.memcpy(&self._pvt_ptr[0].handle, <cyruntime.anon_union3*><void_ptr>handle.getPtr(), sizeof(self._pvt_ptr[0].handle))
10513 @property
10514 def flags(self):
10515 return self._pvt_ptr[0].flags
10516 @flags.setter
10517 def flags(self, unsigned int flags):
10518 self._pvt_ptr[0].flags = flags
10519 @property
10520 def reserved(self):
10521 return self._pvt_ptr[0].reserved
10522 @reserved.setter
10523 def reserved(self, reserved):
10524 self._pvt_ptr[0].reserved = reserved
10526cdef class anon_struct10:
10527 """
10528 Attributes
10529 ----------
10530 value : unsigned long long
10533 Methods
10534 -------
10535 getPtr()
10536 Get memory address of class instance
10537 """
10538 def __cinit__(self, void_ptr _ptr):
10539 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
10541 def __init__(self, void_ptr _ptr):
10542 pass
10543 def __dealloc__(self):
10544 pass
10545 def getPtr(self):
10546 return <void_ptr>&self._pvt_ptr[0].params.fence
10547 def __repr__(self):
10548 if self._pvt_ptr is not NULL:
10549 str_list = []
10550 try:
10551 str_list += ['value : ' + str(self.value)]
10552 except ValueError:
10553 str_list += ['value : <ValueError>']
10554 return '\n'.join(str_list)
10555 else:
10556 return ''
10557 @property
10558 def value(self):
10559 return self._pvt_ptr[0].params.fence.value
10560 @value.setter
10561 def value(self, unsigned long long value):
10562 self._pvt_ptr[0].params.fence.value = value
10564cdef class anon_union4:
10565 """
10566 Attributes
10567 ----------
10568 fence : Any
10570 reserved : unsigned long long
10573 Methods
10574 -------
10575 getPtr()
10576 Get memory address of class instance
10577 """
10578 def __cinit__(self, void_ptr _ptr):
10579 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
10581 def __init__(self, void_ptr _ptr):
10582 pass
10583 def __dealloc__(self):
10584 pass
10585 def getPtr(self):
10586 return <void_ptr>&self._pvt_ptr[0].params.nvSciSync
10587 def __repr__(self):
10588 if self._pvt_ptr is not NULL:
10589 str_list = []
10590 try:
10591 str_list += ['fence : ' + hex(self.fence)]
10592 except ValueError:
10593 str_list += ['fence : <ValueError>']
10594 try:
10595 str_list += ['reserved : ' + str(self.reserved)]
10596 except ValueError:
10597 str_list += ['reserved : <ValueError>']
10598 return '\n'.join(str_list)
10599 else:
10600 return ''
10601 @property
10602 def fence(self):
10603 return <void_ptr>self._pvt_ptr[0].params.nvSciSync.fence
10604 @fence.setter
10605 def fence(self, fence):
10606 _cfence = _HelperInputVoidPtr(fence)
10607 self._pvt_ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cfence.cptr
10608 @property
10609 def reserved(self):
10610 return self._pvt_ptr[0].params.nvSciSync.reserved
10611 @reserved.setter
10612 def reserved(self, unsigned long long reserved):
10613 self._pvt_ptr[0].params.nvSciSync.reserved = reserved
10615cdef class anon_struct11:
10616 """
10617 Attributes
10618 ----------
10619 key : unsigned long long
10622 Methods
10623 -------
10624 getPtr()
10625 Get memory address of class instance
10626 """
10627 def __cinit__(self, void_ptr _ptr):
10628 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
10630 def __init__(self, void_ptr _ptr):
10631 pass
10632 def __dealloc__(self):
10633 pass
10634 def getPtr(self):
10635 return <void_ptr>&self._pvt_ptr[0].params.keyedMutex
10636 def __repr__(self):
10637 if self._pvt_ptr is not NULL:
10638 str_list = []
10639 try:
10640 str_list += ['key : ' + str(self.key)]
10641 except ValueError:
10642 str_list += ['key : <ValueError>']
10643 return '\n'.join(str_list)
10644 else:
10645 return ''
10646 @property
10647 def key(self):
10648 return self._pvt_ptr[0].params.keyedMutex.key
10649 @key.setter
10650 def key(self, unsigned long long key):
10651 self._pvt_ptr[0].params.keyedMutex.key = key
10653cdef class anon_struct12:
10654 """
10655 Attributes
10656 ----------
10657 fence : anon_struct10
10659 nvSciSync : anon_union4
10661 keyedMutex : anon_struct11
10663 reserved : list[unsigned int]
10666 Methods
10667 -------
10668 getPtr()
10669 Get memory address of class instance
10670 """
10671 def __cinit__(self, void_ptr _ptr):
10672 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
10674 def __init__(self, void_ptr _ptr):
10675 pass
10676 self._fence = anon_struct10(_ptr=<void_ptr>self._pvt_ptr)
10677 self._nvSciSync = anon_union4(_ptr=<void_ptr>self._pvt_ptr)
10678 self._keyedMutex = anon_struct11(_ptr=<void_ptr>self._pvt_ptr)
10679 def __dealloc__(self):
10680 pass
10681 def getPtr(self):
10682 return <void_ptr>&self._pvt_ptr[0].params
10683 def __repr__(self):
10684 if self._pvt_ptr is not NULL:
10685 str_list = []
10686 try:
10687 str_list += ['fence :\n' + '\n'.join([' ' + line for line in str(self.fence).splitlines()])]
10688 except ValueError:
10689 str_list += ['fence : <ValueError>']
10690 try:
10691 str_list += ['nvSciSync :\n' + '\n'.join([' ' + line for line in str(self.nvSciSync).splitlines()])]
10692 except ValueError:
10693 str_list += ['nvSciSync : <ValueError>']
10694 try:
10695 str_list += ['keyedMutex :\n' + '\n'.join([' ' + line for line in str(self.keyedMutex).splitlines()])]
10696 except ValueError:
10697 str_list += ['keyedMutex : <ValueError>']
10698 try:
10699 str_list += ['reserved : ' + str(self.reserved)]
10700 except ValueError:
10701 str_list += ['reserved : <ValueError>']
10702 return '\n'.join(str_list)
10703 else:
10704 return ''
10705 @property
10706 def fence(self):
10707 return self._fence
10708 @fence.setter
10709 def fence(self, fence not None : anon_struct10):
10710 string.memcpy(&self._pvt_ptr[0].params.fence, <cyruntime.anon_struct10*><void_ptr>fence.getPtr(), sizeof(self._pvt_ptr[0].params.fence))
10711 @property
10712 def nvSciSync(self):
10713 return self._nvSciSync
10714 @nvSciSync.setter
10715 def nvSciSync(self, nvSciSync not None : anon_union4):
10716 string.memcpy(&self._pvt_ptr[0].params.nvSciSync, <cyruntime.anon_union4*><void_ptr>nvSciSync.getPtr(), sizeof(self._pvt_ptr[0].params.nvSciSync))
10717 @property
10718 def keyedMutex(self):
10719 return self._keyedMutex
10720 @keyedMutex.setter
10721 def keyedMutex(self, keyedMutex not None : anon_struct11):
10722 string.memcpy(&self._pvt_ptr[0].params.keyedMutex, <cyruntime.anon_struct11*><void_ptr>keyedMutex.getPtr(), sizeof(self._pvt_ptr[0].params.keyedMutex))
10723 @property
10724 def reserved(self):
10725 return self._pvt_ptr[0].params.reserved
10726 @reserved.setter
10727 def reserved(self, reserved):
10728 self._pvt_ptr[0].params.reserved = reserved
10730cdef class cudaExternalSemaphoreSignalParams:
10731 """
10732 External semaphore signal parameters, compatible with driver type
10734 Attributes
10735 ----------
10736 params : anon_struct12
10738 flags : unsigned int
10739 Only when cudaExternalSemaphoreSignalParams is used to signal a
10740 cudaExternalSemaphore_t of type
10741 cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
10742 cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
10743 that while signaling the cudaExternalSemaphore_t, no memory
10744 synchronization operations should be performed for any external
10745 memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
10746 all other types of cudaExternalSemaphore_t, flags must be zero.
10747 reserved : list[unsigned int]
10750 Methods
10751 -------
10752 getPtr()
10753 Get memory address of class instance
10754 """
10755 def __cinit__(self, void_ptr _ptr = 0):
10756 if _ptr == 0:
10757 self._pvt_ptr = &self._pvt_val
10758 else:
10759 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalParams *>_ptr
10760 def __init__(self, void_ptr _ptr = 0):
10761 pass
10762 self._params = anon_struct12(_ptr=<void_ptr>self._pvt_ptr)
10763 def __dealloc__(self):
10764 pass
10765 def getPtr(self):
10766 return <void_ptr>self._pvt_ptr
10767 def __repr__(self):
10768 if self._pvt_ptr is not NULL:
10769 str_list = []
10770 try:
10771 str_list += ['params :\n' + '\n'.join([' ' + line for line in str(self.params).splitlines()])]
10772 except ValueError:
10773 str_list += ['params : <ValueError>']
10774 try:
10775 str_list += ['flags : ' + str(self.flags)]
10776 except ValueError:
10777 str_list += ['flags : <ValueError>']
10778 try:
10779 str_list += ['reserved : ' + str(self.reserved)]
10780 except ValueError:
10781 str_list += ['reserved : <ValueError>']
10782 return '\n'.join(str_list)
10783 else:
10784 return ''
10785 @property
10786 def params(self):
10787 return self._params
10788 @params.setter
10789 def params(self, params not None : anon_struct12):
10790 string.memcpy(&self._pvt_ptr[0].params, <cyruntime.anon_struct12*><void_ptr>params.getPtr(), sizeof(self._pvt_ptr[0].params))
10791 @property
10792 def flags(self):
10793 return self._pvt_ptr[0].flags
10794 @flags.setter
10795 def flags(self, unsigned int flags):
10796 self._pvt_ptr[0].flags = flags
10797 @property
10798 def reserved(self):
10799 return self._pvt_ptr[0].reserved
10800 @reserved.setter
10801 def reserved(self, reserved):
10802 self._pvt_ptr[0].reserved = reserved
10804cdef class anon_struct13:
10805 """
10806 Attributes
10807 ----------
10808 value : unsigned long long
10811 Methods
10812 -------
10813 getPtr()
10814 Get memory address of class instance
10815 """
10816 def __cinit__(self, void_ptr _ptr):
10817 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
10819 def __init__(self, void_ptr _ptr):
10820 pass
10821 def __dealloc__(self):
10822 pass
10823 def getPtr(self):
10824 return <void_ptr>&self._pvt_ptr[0].params.fence
10825 def __repr__(self):
10826 if self._pvt_ptr is not NULL:
10827 str_list = []
10828 try:
10829 str_list += ['value : ' + str(self.value)]
10830 except ValueError:
10831 str_list += ['value : <ValueError>']
10832 return '\n'.join(str_list)
10833 else:
10834 return ''
10835 @property
10836 def value(self):
10837 return self._pvt_ptr[0].params.fence.value
10838 @value.setter
10839 def value(self, unsigned long long value):
10840 self._pvt_ptr[0].params.fence.value = value
10842cdef class anon_union5:
10843 """
10844 Attributes
10845 ----------
10846 fence : Any
10848 reserved : unsigned long long
10851 Methods
10852 -------
10853 getPtr()
10854 Get memory address of class instance
10855 """
10856 def __cinit__(self, void_ptr _ptr):
10857 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
10859 def __init__(self, void_ptr _ptr):
10860 pass
10861 def __dealloc__(self):
10862 pass
10863 def getPtr(self):
10864 return <void_ptr>&self._pvt_ptr[0].params.nvSciSync
10865 def __repr__(self):
10866 if self._pvt_ptr is not NULL:
10867 str_list = []
10868 try:
10869 str_list += ['fence : ' + hex(self.fence)]
10870 except ValueError:
10871 str_list += ['fence : <ValueError>']
10872 try:
10873 str_list += ['reserved : ' + str(self.reserved)]
10874 except ValueError:
10875 str_list += ['reserved : <ValueError>']
10876 return '\n'.join(str_list)
10877 else:
10878 return ''
10879 @property
10880 def fence(self):
10881 return <void_ptr>self._pvt_ptr[0].params.nvSciSync.fence
10882 @fence.setter
10883 def fence(self, fence):
10884 _cfence = _HelperInputVoidPtr(fence)
10885 self._pvt_ptr[0].params.nvSciSync.fence = <void*><void_ptr>_cfence.cptr
10886 @property
10887 def reserved(self):
10888 return self._pvt_ptr[0].params.nvSciSync.reserved
10889 @reserved.setter
10890 def reserved(self, unsigned long long reserved):
10891 self._pvt_ptr[0].params.nvSciSync.reserved = reserved
10893cdef class anon_struct14:
10894 """
10895 Attributes
10896 ----------
10897 key : unsigned long long
10899 timeoutMs : unsigned int
10902 Methods
10903 -------
10904 getPtr()
10905 Get memory address of class instance
10906 """
10907 def __cinit__(self, void_ptr _ptr):
10908 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
10910 def __init__(self, void_ptr _ptr):
10911 pass
10912 def __dealloc__(self):
10913 pass
10914 def getPtr(self):
10915 return <void_ptr>&self._pvt_ptr[0].params.keyedMutex
10916 def __repr__(self):
10917 if self._pvt_ptr is not NULL:
10918 str_list = []
10919 try:
10920 str_list += ['key : ' + str(self.key)]
10921 except ValueError:
10922 str_list += ['key : <ValueError>']
10923 try:
10924 str_list += ['timeoutMs : ' + str(self.timeoutMs)]
10925 except ValueError:
10926 str_list += ['timeoutMs : <ValueError>']
10927 return '\n'.join(str_list)
10928 else:
10929 return ''
10930 @property
10931 def key(self):
10932 return self._pvt_ptr[0].params.keyedMutex.key
10933 @key.setter
10934 def key(self, unsigned long long key):
10935 self._pvt_ptr[0].params.keyedMutex.key = key
10936 @property
10937 def timeoutMs(self):
10938 return self._pvt_ptr[0].params.keyedMutex.timeoutMs
10939 @timeoutMs.setter
10940 def timeoutMs(self, unsigned int timeoutMs):
10941 self._pvt_ptr[0].params.keyedMutex.timeoutMs = timeoutMs
10943cdef class anon_struct15:
10944 """
10945 Attributes
10946 ----------
10947 fence : anon_struct13
10949 nvSciSync : anon_union5
10951 keyedMutex : anon_struct14
10953 reserved : list[unsigned int]
10956 Methods
10957 -------
10958 getPtr()
10959 Get memory address of class instance
10960 """
10961 def __cinit__(self, void_ptr _ptr):
10962 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
10964 def __init__(self, void_ptr _ptr):
10965 pass
10966 self._fence = anon_struct13(_ptr=<void_ptr>self._pvt_ptr)
10967 self._nvSciSync = anon_union5(_ptr=<void_ptr>self._pvt_ptr)
10968 self._keyedMutex = anon_struct14(_ptr=<void_ptr>self._pvt_ptr)
10969 def __dealloc__(self):
10970 pass
10971 def getPtr(self):
10972 return <void_ptr>&self._pvt_ptr[0].params
10973 def __repr__(self):
10974 if self._pvt_ptr is not NULL:
10975 str_list = []
10976 try:
10977 str_list += ['fence :\n' + '\n'.join([' ' + line for line in str(self.fence).splitlines()])]
10978 except ValueError:
10979 str_list += ['fence : <ValueError>']
10980 try:
10981 str_list += ['nvSciSync :\n' + '\n'.join([' ' + line for line in str(self.nvSciSync).splitlines()])]
10982 except ValueError:
10983 str_list += ['nvSciSync : <ValueError>']
10984 try:
10985 str_list += ['keyedMutex :\n' + '\n'.join([' ' + line for line in str(self.keyedMutex).splitlines()])]
10986 except ValueError:
10987 str_list += ['keyedMutex : <ValueError>']
10988 try:
10989 str_list += ['reserved : ' + str(self.reserved)]
10990 except ValueError:
10991 str_list += ['reserved : <ValueError>']
10992 return '\n'.join(str_list)
10993 else:
10994 return ''
10995 @property
10996 def fence(self):
10997 return self._fence
10998 @fence.setter
10999 def fence(self, fence not None : anon_struct13):
11000 string.memcpy(&self._pvt_ptr[0].params.fence, <cyruntime.anon_struct13*><void_ptr>fence.getPtr(), sizeof(self._pvt_ptr[0].params.fence))
11001 @property
11002 def nvSciSync(self):
11003 return self._nvSciSync
11004 @nvSciSync.setter
11005 def nvSciSync(self, nvSciSync not None : anon_union5):
11006 string.memcpy(&self._pvt_ptr[0].params.nvSciSync, <cyruntime.anon_union5*><void_ptr>nvSciSync.getPtr(), sizeof(self._pvt_ptr[0].params.nvSciSync))
11007 @property
11008 def keyedMutex(self):
11009 return self._keyedMutex
11010 @keyedMutex.setter
11011 def keyedMutex(self, keyedMutex not None : anon_struct14):
11012 string.memcpy(&self._pvt_ptr[0].params.keyedMutex, <cyruntime.anon_struct14*><void_ptr>keyedMutex.getPtr(), sizeof(self._pvt_ptr[0].params.keyedMutex))
11013 @property
11014 def reserved(self):
11015 return self._pvt_ptr[0].params.reserved
11016 @reserved.setter
11017 def reserved(self, reserved):
11018 self._pvt_ptr[0].params.reserved = reserved
11020cdef class cudaExternalSemaphoreWaitParams:
11021 """
11022 External semaphore wait parameters, compatible with driver type
11024 Attributes
11025 ----------
11026 params : anon_struct15
11028 flags : unsigned int
11029 Only when cudaExternalSemaphoreSignalParams is used to signal a
11030 cudaExternalSemaphore_t of type
11031 cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
11032 cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
11033 that while waiting for the cudaExternalSemaphore_t, no memory
11034 synchronization operations should be performed for any external
11035 memory object imported as cudaExternalMemoryHandleTypeNvSciBuf. For
11036 all other types of cudaExternalSemaphore_t, flags must be zero.
11037 reserved : list[unsigned int]
11040 Methods
11041 -------
11042 getPtr()
11043 Get memory address of class instance
11044 """
11045 def __cinit__(self, void_ptr _ptr = 0):
11046 if _ptr == 0:
11047 self._pvt_ptr = &self._pvt_val
11048 else:
11049 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitParams *>_ptr
11050 def __init__(self, void_ptr _ptr = 0):
11051 pass
11052 self._params = anon_struct15(_ptr=<void_ptr>self._pvt_ptr)
11053 def __dealloc__(self):
11054 pass
11055 def getPtr(self):
11056 return <void_ptr>self._pvt_ptr
11057 def __repr__(self):
11058 if self._pvt_ptr is not NULL:
11059 str_list = []
11060 try:
11061 str_list += ['params :\n' + '\n'.join([' ' + line for line in str(self.params).splitlines()])]
11062 except ValueError:
11063 str_list += ['params : <ValueError>']
11064 try:
11065 str_list += ['flags : ' + str(self.flags)]
11066 except ValueError:
11067 str_list += ['flags : <ValueError>']
11068 try:
11069 str_list += ['reserved : ' + str(self.reserved)]
11070 except ValueError:
11071 str_list += ['reserved : <ValueError>']
11072 return '\n'.join(str_list)
11073 else:
11074 return ''
11075 @property
11076 def params(self):
11077 return self._params
11078 @params.setter
11079 def params(self, params not None : anon_struct15):
11080 string.memcpy(&self._pvt_ptr[0].params, <cyruntime.anon_struct15*><void_ptr>params.getPtr(), sizeof(self._pvt_ptr[0].params))
11081 @property
11082 def flags(self):
11083 return self._pvt_ptr[0].flags
11084 @flags.setter
11085 def flags(self, unsigned int flags):
11086 self._pvt_ptr[0].flags = flags
11087 @property
11088 def reserved(self):
11089 return self._pvt_ptr[0].reserved
11090 @reserved.setter
11091 def reserved(self, reserved):
11092 self._pvt_ptr[0].reserved = reserved
11094cdef class cudaDevSmResource:
11095 """
11096 Data for SM-related resources All parameters in this structure are
11097 OUTPUT only. Do not write to any of the fields in this structure.
11099 Attributes
11100 ----------
11101 smCount : unsigned int
11102 The amount of streaming multiprocessors available in this resource.
11103 minSmPartitionSize : unsigned int
11104 The minimum number of streaming multiprocessors required to
11105 partition this resource.
11106 smCoscheduledAlignment : unsigned int
11107 The number of streaming multiprocessors in this resource that are
11108 guaranteed to be co-scheduled on the same GPU processing cluster.
11109 smCount will be a multiple of this value, unless the backfill flag
11110 is set.
11111 flags : unsigned int
11112 The flags set on this SM resource. For available flags see
11113 ::cudaDevSmResourceGroup_flags.
11115 Methods
11116 -------
11117 getPtr()
11118 Get memory address of class instance
11119 """
11120 def __cinit__(self, void_ptr _ptr = 0):
11121 if _ptr == 0:
11122 self._pvt_ptr = &self._pvt_val
11123 else:
11124 self._pvt_ptr = <cyruntime.cudaDevSmResource *>_ptr
11125 def __init__(self, void_ptr _ptr = 0):
11126 pass
11127 def __dealloc__(self):
11128 pass
11129 def getPtr(self):
11130 return <void_ptr>self._pvt_ptr
11131 def __repr__(self):
11132 if self._pvt_ptr is not NULL:
11133 str_list = []
11134 try:
11135 str_list += ['smCount : ' + str(self.smCount)]
11136 except ValueError:
11137 str_list += ['smCount : <ValueError>']
11138 try:
11139 str_list += ['minSmPartitionSize : ' + str(self.minSmPartitionSize)]
11140 except ValueError:
11141 str_list += ['minSmPartitionSize : <ValueError>']
11142 try:
11143 str_list += ['smCoscheduledAlignment : ' + str(self.smCoscheduledAlignment)]
11144 except ValueError:
11145 str_list += ['smCoscheduledAlignment : <ValueError>']
11146 try:
11147 str_list += ['flags : ' + str(self.flags)]
11148 except ValueError:
11149 str_list += ['flags : <ValueError>']
11150 return '\n'.join(str_list)
11151 else:
11152 return ''
11153 @property
11154 def smCount(self):
11155 return self._pvt_ptr[0].smCount
11156 @smCount.setter
11157 def smCount(self, unsigned int smCount):
11158 self._pvt_ptr[0].smCount = smCount
11159 @property
11160 def minSmPartitionSize(self):
11161 return self._pvt_ptr[0].minSmPartitionSize
11162 @minSmPartitionSize.setter
11163 def minSmPartitionSize(self, unsigned int minSmPartitionSize):
11164 self._pvt_ptr[0].minSmPartitionSize = minSmPartitionSize
11165 @property
11166 def smCoscheduledAlignment(self):
11167 return self._pvt_ptr[0].smCoscheduledAlignment
11168 @smCoscheduledAlignment.setter
11169 def smCoscheduledAlignment(self, unsigned int smCoscheduledAlignment):
11170 self._pvt_ptr[0].smCoscheduledAlignment = smCoscheduledAlignment
11171 @property
11172 def flags(self):
11173 return self._pvt_ptr[0].flags
11174 @flags.setter
11175 def flags(self, unsigned int flags):
11176 self._pvt_ptr[0].flags = flags
11178cdef class cudaDevWorkqueueConfigResource:
11179 """
11180 Data for workqueue configuration related resources
11182 Attributes
11183 ----------
11184 device : int
11185 The device on which the workqueue resources are available
11186 wqConcurrencyLimit : unsigned int
11187 The expected maximum number of concurrent stream-ordered workloads
11188 sharingScope : cudaDevWorkqueueConfigScope
11189 The sharing scope for the workqueue resources
11191 Methods
11192 -------
11193 getPtr()
11194 Get memory address of class instance
11195 """
11196 def __cinit__(self, void_ptr _ptr = 0):
11197 if _ptr == 0:
11198 self._pvt_ptr = &self._pvt_val
11199 else:
11200 self._pvt_ptr = <cyruntime.cudaDevWorkqueueConfigResource *>_ptr
11201 def __init__(self, void_ptr _ptr = 0):
11202 pass
11203 def __dealloc__(self):
11204 pass
11205 def getPtr(self):
11206 return <void_ptr>self._pvt_ptr
11207 def __repr__(self):
11208 if self._pvt_ptr is not NULL:
11209 str_list = []
11210 try:
11211 str_list += ['device : ' + str(self.device)]
11212 except ValueError:
11213 str_list += ['device : <ValueError>']
11214 try:
11215 str_list += ['wqConcurrencyLimit : ' + str(self.wqConcurrencyLimit)]
11216 except ValueError:
11217 str_list += ['wqConcurrencyLimit : <ValueError>']
11218 try:
11219 str_list += ['sharingScope : ' + str(self.sharingScope)]
11220 except ValueError:
11221 str_list += ['sharingScope : <ValueError>']
11222 return '\n'.join(str_list)
11223 else:
11224 return ''
11225 @property
11226 def device(self):
11227 return self._pvt_ptr[0].device
11228 @device.setter
11229 def device(self, int device):
11230 self._pvt_ptr[0].device = device
11231 @property
11232 def wqConcurrencyLimit(self):
11233 return self._pvt_ptr[0].wqConcurrencyLimit
11234 @wqConcurrencyLimit.setter
11235 def wqConcurrencyLimit(self, unsigned int wqConcurrencyLimit):
11236 self._pvt_ptr[0].wqConcurrencyLimit = wqConcurrencyLimit
11237 @property
11238 def sharingScope(self):
11239 if self._pvt_ptr[0].sharingScope not in _dict_cudaDevWorkqueueConfigScope:
11240 return None
11241 return _dict_cudaDevWorkqueueConfigScope[self._pvt_ptr[0].sharingScope]
11242 @sharingScope.setter
11243 def sharingScope(self, sharingScope not None : cudaDevWorkqueueConfigScope):
11244 self._pvt_ptr[0].sharingScope = sharingScope.value
11246cdef class cudaDevWorkqueueResource:
11247 """
11248 Handle to a pre-existing workqueue related resource
11250 Attributes
11251 ----------
11252 reserved : bytes
11253 Reserved for future use
11255 Methods
11256 -------
11257 getPtr()
11258 Get memory address of class instance
11259 """
11260 def __cinit__(self, void_ptr _ptr = 0):
11261 if _ptr == 0:
11262 self._pvt_ptr = &self._pvt_val
11263 else:
11264 self._pvt_ptr = <cyruntime.cudaDevWorkqueueResource *>_ptr
11265 def __init__(self, void_ptr _ptr = 0):
11266 pass
11267 def __dealloc__(self):
11268 pass
11269 def getPtr(self):
11270 return <void_ptr>self._pvt_ptr
11271 def __repr__(self):
11272 if self._pvt_ptr is not NULL:
11273 str_list = []
11274 try:
11275 str_list += ['reserved : ' + str(self.reserved)]
11276 except ValueError:
11277 str_list += ['reserved : <ValueError>']
11278 return '\n'.join(str_list)
11279 else:
11280 return ''
11281 @property
11282 def reserved(self):
11283 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 40)
11284 @reserved.setter
11285 def reserved(self, reserved):
11286 if len(reserved) != 40:
11287 raise ValueError("reserved length must be 40, is " + str(len(reserved)))
11288 for i, b in enumerate(reserved):
11289 self._pvt_ptr[0].reserved[i] = b
11291cdef class cudaDevSmResourceGroupParams_st:
11292 """
11293 Input data for splitting SMs
11295 Attributes
11296 ----------
11297 smCount : unsigned int
11298 The amount of SMs available in this resource.
11299 coscheduledSmCount : unsigned int
11300 The amount of co-scheduled SMs grouped together for locality
11301 purposes.
11302 preferredCoscheduledSmCount : unsigned int
11303 When possible, combine co-scheduled groups together into larger
11304 groups of this size.
11305 flags : unsigned int
11306 Combination of `cudaDevSmResourceGroup_flags` values to indicate
11307 this this group is created.
11308 reserved : list[unsigned int]
11309 Reserved for future use - ensure this is is zero initialized.
11311 Methods
11312 -------
11313 getPtr()
11314 Get memory address of class instance
11315 """
11316 def __cinit__(self, void_ptr _ptr = 0):
11317 if _ptr == 0:
11318 self._pvt_ptr = &self._pvt_val
11319 else:
11320 self._pvt_ptr = <cyruntime.cudaDevSmResourceGroupParams_st *>_ptr
11321 def __init__(self, void_ptr _ptr = 0):
11322 pass
11323 def __dealloc__(self):
11324 pass
11325 def getPtr(self):
11326 return <void_ptr>self._pvt_ptr
11327 def __repr__(self):
11328 if self._pvt_ptr is not NULL:
11329 str_list = []
11330 try:
11331 str_list += ['smCount : ' + str(self.smCount)]
11332 except ValueError:
11333 str_list += ['smCount : <ValueError>']
11334 try:
11335 str_list += ['coscheduledSmCount : ' + str(self.coscheduledSmCount)]
11336 except ValueError:
11337 str_list += ['coscheduledSmCount : <ValueError>']
11338 try:
11339 str_list += ['preferredCoscheduledSmCount : ' + str(self.preferredCoscheduledSmCount)]
11340 except ValueError:
11341 str_list += ['preferredCoscheduledSmCount : <ValueError>']
11342 try:
11343 str_list += ['flags : ' + str(self.flags)]
11344 except ValueError:
11345 str_list += ['flags : <ValueError>']
11346 try:
11347 str_list += ['reserved : ' + str(self.reserved)]
11348 except ValueError:
11349 str_list += ['reserved : <ValueError>']
11350 return '\n'.join(str_list)
11351 else:
11352 return ''
11353 @property
11354 def smCount(self):
11355 return self._pvt_ptr[0].smCount
11356 @smCount.setter
11357 def smCount(self, unsigned int smCount):
11358 self._pvt_ptr[0].smCount = smCount
11359 @property
11360 def coscheduledSmCount(self):
11361 return self._pvt_ptr[0].coscheduledSmCount
11362 @coscheduledSmCount.setter
11363 def coscheduledSmCount(self, unsigned int coscheduledSmCount):
11364 self._pvt_ptr[0].coscheduledSmCount = coscheduledSmCount
11365 @property
11366 def preferredCoscheduledSmCount(self):
11367 return self._pvt_ptr[0].preferredCoscheduledSmCount
11368 @preferredCoscheduledSmCount.setter
11369 def preferredCoscheduledSmCount(self, unsigned int preferredCoscheduledSmCount):
11370 self._pvt_ptr[0].preferredCoscheduledSmCount = preferredCoscheduledSmCount
11371 @property
11372 def flags(self):
11373 return self._pvt_ptr[0].flags
11374 @flags.setter
11375 def flags(self, unsigned int flags):
11376 self._pvt_ptr[0].flags = flags
11377 @property
11378 def reserved(self):
11379 return self._pvt_ptr[0].reserved
11380 @reserved.setter
11381 def reserved(self, reserved):
11382 self._pvt_ptr[0].reserved = reserved
11384cdef class cudaDevResource_st:
11385 """
11386 A tagged union describing different resources identified by the
11387 type field. This structure should not be directly modified outside
11388 of the API that created it. struct enumcudaDevResourceTypetype;
11389 union structcudaDevSmResourcesm;
11390 structcudaDevWorkqueueConfigResourcewqConfig;
11391 structcudaDevWorkqueueResourcewq; ; ; - If `typename` is
11392 `cudaDevResourceTypeInvalid`, this resoure is not valid and cannot
11393 be further accessed. - If `typename` is `cudaDevResourceTypeSm`,
11394 the cudaDevSmResource structure `sm` is filled in. For example,
11395 `sm.smCount` will reflect the amount of streaming multiprocessors
11396 available in this resource. - If `typename` is
11397 `cudaDevResourceTypeWorkqueueConfig`, the
11398 cudaDevWorkqueueConfigResource structure `wqConfig` is filled in.
11399 - If `typename` is `cudaDevResourceTypeWorkqueue`, the
11400 cudaDevWorkqueueResource structure `wq` is filled in.
11402 Attributes
11403 ----------
11404 type : cudaDevResourceType
11405 Type of resource, dictates which union field was last set
11406 _internal_padding : bytes
11408 sm : cudaDevSmResource
11409 Resource corresponding to cudaDevResourceTypeSm `typename`.
11410 wqConfig : cudaDevWorkqueueConfigResource
11411 Resource corresponding to cudaDevResourceTypeWorkqueueConfig
11412 `typename`.
11413 wq : cudaDevWorkqueueResource
11414 Resource corresponding to cudaDevResourceTypeWorkqueue `typename`.
11415 _oversize : bytes
11417 nextResource : cudaDevResource_st
11420 Methods
11421 -------
11422 getPtr()
11423 Get memory address of class instance
11424 """
11425 def __cinit__(self, void_ptr _ptr = 0):
11426 if _ptr == 0:
11427 self._val_ptr = <cyruntime.cudaDevResource_st *>calloc(1, sizeof(cyruntime.cudaDevResource_st))
11428 self._pvt_ptr = self._val_ptr
11429 else:
11430 self._pvt_ptr = <cyruntime.cudaDevResource_st *>_ptr
11431 def __init__(self, void_ptr _ptr = 0):
11432 pass
11433 self._sm = cudaDevSmResource(_ptr=<void_ptr>&self._pvt_ptr[0].sm)
11434 self._wqConfig = cudaDevWorkqueueConfigResource(_ptr=<void_ptr>&self._pvt_ptr[0].wqConfig)
11435 self._wq = cudaDevWorkqueueResource(_ptr=<void_ptr>&self._pvt_ptr[0].wq)
11436 def __dealloc__(self):
11437 if self._val_ptr is not NULL:
11438 free(self._val_ptr)
11439 if self._nextResource is not NULL:
11440 free(self._nextResource)
11441 def getPtr(self):
11442 return <void_ptr>self._pvt_ptr
11443 def __repr__(self):
11444 if self._pvt_ptr is not NULL:
11445 str_list = []
11446 try:
11447 str_list += ['type : ' + str(self.type)]
11448 except ValueError:
11449 str_list += ['type : <ValueError>']
11450 try:
11451 str_list += ['_internal_padding : ' + str(self._internal_padding)]
11452 except ValueError:
11453 str_list += ['_internal_padding : <ValueError>']
11454 try:
11455 str_list += ['sm :\n' + '\n'.join([' ' + line for line in str(self.sm).splitlines()])]
11456 except ValueError:
11457 str_list += ['sm : <ValueError>']
11458 try:
11459 str_list += ['wqConfig :\n' + '\n'.join([' ' + line for line in str(self.wqConfig).splitlines()])]
11460 except ValueError:
11461 str_list += ['wqConfig : <ValueError>']
11462 try:
11463 str_list += ['wq :\n' + '\n'.join([' ' + line for line in str(self.wq).splitlines()])]
11464 except ValueError:
11465 str_list += ['wq : <ValueError>']
11466 try:
11467 str_list += ['_oversize : ' + str(self._oversize)]
11468 except ValueError:
11469 str_list += ['_oversize : <ValueError>']
11470 try:
11471 str_list += ['nextResource : ' + str(self.nextResource)]
11472 except ValueError:
11473 str_list += ['nextResource : <ValueError>']
11474 return '\n'.join(str_list)
11475 else:
11476 return ''
11477 @property
11478 def type(self):
11479 if self._pvt_ptr[0].type not in _dict_cudaDevResourceType:
11480 return None
11481 return _dict_cudaDevResourceType[self._pvt_ptr[0].type]
11482 @type.setter
11483 def type(self, type not None : cudaDevResourceType):
11484 self._pvt_ptr[0].type = type.value
11485 @property
11486 def _internal_padding(self):
11487 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0]._internal_padding, 92)
11488 @_internal_padding.setter
11489 def _internal_padding(self, _internal_padding):
11490 if len(_internal_padding) != 92:
11491 raise ValueError("_internal_padding length must be 92, is " + str(len(_internal_padding)))
11492 for i, b in enumerate(_internal_padding):
11493 self._pvt_ptr[0]._internal_padding[i] = b
11494 @property
11495 def sm(self):
11496 return self._sm
11497 @sm.setter
11498 def sm(self, sm not None : cudaDevSmResource):
11499 string.memcpy(&self._pvt_ptr[0].sm, <cyruntime.cudaDevSmResource*><void_ptr>sm.getPtr(), sizeof(self._pvt_ptr[0].sm))
11500 @property
11501 def wqConfig(self):
11502 return self._wqConfig
11503 @wqConfig.setter
11504 def wqConfig(self, wqConfig not None : cudaDevWorkqueueConfigResource):
11505 string.memcpy(&self._pvt_ptr[0].wqConfig, <cyruntime.cudaDevWorkqueueConfigResource*><void_ptr>wqConfig.getPtr(), sizeof(self._pvt_ptr[0].wqConfig))
11506 @property
11507 def wq(self):
11508 return self._wq
11509 @wq.setter
11510 def wq(self, wq not None : cudaDevWorkqueueResource):
11511 string.memcpy(&self._pvt_ptr[0].wq, <cyruntime.cudaDevWorkqueueResource*><void_ptr>wq.getPtr(), sizeof(self._pvt_ptr[0].wq))
11512 @property
11513 def _oversize(self):
11514 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0]._oversize, 40)
11515 @_oversize.setter
11516 def _oversize(self, _oversize):
11517 if len(_oversize) != 40:
11518 raise ValueError("_oversize length must be 40, is " + str(len(_oversize)))
11519 for i, b in enumerate(_oversize):
11520 self._pvt_ptr[0]._oversize[i] = b
11521 @property
11522 def nextResource(self):
11523 arrs = [<void_ptr>self._pvt_ptr[0].nextResource + x*sizeof(cyruntime.cudaDevResource_st) for x in range(self._nextResource_length)]
11524 return [cudaDevResource_st(_ptr=arr) for arr in arrs]
11525 @nextResource.setter
11526 def nextResource(self, val):
11527 if len(val) == 0:
11528 free(self._nextResource)
11529 self._nextResource_length = 0
11530 self._pvt_ptr[0].nextResource = NULL
11531 else:
11532 if self._nextResource_length != <size_t>len(val):
11533 free(self._nextResource)
11534 self._nextResource = <cyruntime.cudaDevResource_st*> calloc(len(val), sizeof(cyruntime.cudaDevResource_st))
11535 if self._nextResource is NULL:
11536 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaDevResource_st)))
11537 self._nextResource_length = <size_t>len(val)
11538 self._pvt_ptr[0].nextResource = self._nextResource
11539 for idx in range(len(val)):
11540 string.memcpy(&self._nextResource[idx], (<cudaDevResource_st>val[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource_st))
11543cdef class cudalibraryHostUniversalFunctionAndDataTable:
11544 """
11545 Attributes
11546 ----------
11547 functionTable : Any
11549 functionWindowSize : size_t
11551 dataTable : Any
11553 dataWindowSize : size_t
11556 Methods
11557 -------
11558 getPtr()
11559 Get memory address of class instance
11560 """
11561 def __cinit__(self, void_ptr _ptr = 0):
11562 if _ptr == 0:
11563 self._pvt_ptr = &self._pvt_val
11564 else:
11565 self._pvt_ptr = <cyruntime.cudalibraryHostUniversalFunctionAndDataTable *>_ptr
11566 def __init__(self, void_ptr _ptr = 0):
11567 pass
11568 def __dealloc__(self):
11569 pass
11570 def getPtr(self):
11571 return <void_ptr>self._pvt_ptr
11572 def __repr__(self):
11573 if self._pvt_ptr is not NULL:
11574 str_list = []
11575 try:
11576 str_list += ['functionTable : ' + hex(self.functionTable)]
11577 except ValueError:
11578 str_list += ['functionTable : <ValueError>']
11579 try:
11580 str_list += ['functionWindowSize : ' + str(self.functionWindowSize)]
11581 except ValueError:
11582 str_list += ['functionWindowSize : <ValueError>']
11583 try:
11584 str_list += ['dataTable : ' + hex(self.dataTable)]
11585 except ValueError:
11586 str_list += ['dataTable : <ValueError>']
11587 try:
11588 str_list += ['dataWindowSize : ' + str(self.dataWindowSize)]
11589 except ValueError:
11590 str_list += ['dataWindowSize : <ValueError>']
11591 return '\n'.join(str_list)
11592 else:
11593 return ''
11594 @property
11595 def functionTable(self):
11596 return <void_ptr>self._pvt_ptr[0].functionTable
11597 @functionTable.setter
11598 def functionTable(self, functionTable):
11599 _cfunctionTable = _HelperInputVoidPtr(functionTable)
11600 self._pvt_ptr[0].functionTable = <void*><void_ptr>_cfunctionTable.cptr
11601 @property
11602 def functionWindowSize(self):
11603 return self._pvt_ptr[0].functionWindowSize
11604 @functionWindowSize.setter
11605 def functionWindowSize(self, size_t functionWindowSize):
11606 self._pvt_ptr[0].functionWindowSize = functionWindowSize
11607 @property
11608 def dataTable(self):
11609 return <void_ptr>self._pvt_ptr[0].dataTable
11610 @dataTable.setter
11611 def dataTable(self, dataTable):
11612 _cdataTable = _HelperInputVoidPtr(dataTable)
11613 self._pvt_ptr[0].dataTable = <void*><void_ptr>_cdataTable.cptr
11614 @property
11615 def dataWindowSize(self):
11616 return self._pvt_ptr[0].dataWindowSize
11617 @dataWindowSize.setter
11618 def dataWindowSize(self, size_t dataWindowSize):
11619 self._pvt_ptr[0].dataWindowSize = dataWindowSize
11621cdef class cudaKernelNodeParams:
11622 """
11623 CUDA GPU kernel node parameters
11625 Attributes
11626 ----------
11627 func : Any
11628 Kernel to launch
11629 gridDim : dim3
11630 Grid dimensions
11631 blockDim : dim3
11632 Block dimensions
11633 sharedMemBytes : unsigned int
11634 Dynamic shared-memory size per thread block in bytes
11635 kernelParams : Any
11636 Array of pointers to individual kernel arguments
11637 extra : Any
11638 Pointer to kernel arguments in the "extra" format
11640 Methods
11641 -------
11642 getPtr()
11643 Get memory address of class instance
11644 """
11645 def __cinit__(self, void_ptr _ptr = 0):
11646 if _ptr == 0:
11647 self._pvt_ptr = &self._pvt_val
11648 else:
11649 self._pvt_ptr = <cyruntime.cudaKernelNodeParams *>_ptr
11650 def __init__(self, void_ptr _ptr = 0):
11651 pass
11652 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].gridDim)
11653 self._blockDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].blockDim)
11654 def __dealloc__(self):
11655 pass
11656 def getPtr(self):
11657 return <void_ptr>self._pvt_ptr
11658 def __repr__(self):
11659 if self._pvt_ptr is not NULL:
11660 str_list = []
11661 try:
11662 str_list += ['func : ' + hex(self.func)]
11663 except ValueError:
11664 str_list += ['func : <ValueError>']
11665 try:
11666 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]
11667 except ValueError:
11668 str_list += ['gridDim : <ValueError>']
11669 try:
11670 str_list += ['blockDim :\n' + '\n'.join([' ' + line for line in str(self.blockDim).splitlines()])]
11671 except ValueError:
11672 str_list += ['blockDim : <ValueError>']
11673 try:
11674 str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
11675 except ValueError:
11676 str_list += ['sharedMemBytes : <ValueError>']
11677 try:
11678 str_list += ['kernelParams : ' + str(self.kernelParams)]
11679 except ValueError:
11680 str_list += ['kernelParams : <ValueError>']
11681 try:
11682 str_list += ['extra : ' + str(self.extra)]
11683 except ValueError:
11684 str_list += ['extra : <ValueError>']
11685 return '\n'.join(str_list)
11686 else:
11687 return ''
11688 @property
11689 def func(self):
11690 return <void_ptr>self._pvt_ptr[0].func
11691 @func.setter
11692 def func(self, func):
11693 _cfunc = _HelperInputVoidPtr(func)
11694 self._pvt_ptr[0].func = <void*><void_ptr>_cfunc.cptr
11695 @property
11696 def gridDim(self):
11697 return self._gridDim
11698 @gridDim.setter
11699 def gridDim(self, gridDim not None : dim3):
11700 string.memcpy(&self._pvt_ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].gridDim))
11701 @property
11702 def blockDim(self):
11703 return self._blockDim
11704 @blockDim.setter
11705 def blockDim(self, blockDim not None : dim3):
11706 string.memcpy(&self._pvt_ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._pvt_ptr[0].blockDim))
11707 @property
11708 def sharedMemBytes(self):
11709 return self._pvt_ptr[0].sharedMemBytes
11710 @sharedMemBytes.setter
11711 def sharedMemBytes(self, unsigned int sharedMemBytes):
11712 self._pvt_ptr[0].sharedMemBytes = sharedMemBytes
11713 @property
11714 def kernelParams(self):
11715 return <void_ptr>self._pvt_ptr[0].kernelParams
11716 @kernelParams.setter
11717 def kernelParams(self, kernelParams):
11718 self._cykernelParams = _HelperKernelParams(kernelParams)
11719 self._pvt_ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
11720 @property
11721 def extra(self):
11722 return <void_ptr>self._pvt_ptr[0].extra
11723 @extra.setter
11724 def extra(self, void_ptr extra):
11725 self._pvt_ptr[0].extra = <void**>extra
11727cdef class cudaKernelNodeParamsV2:
11728 """
11729 CUDA GPU kernel node parameters
11731 Attributes
11732 ----------
11733 func : Any
11734 Kernel to launch
11735 gridDim : dim3
11736 Grid dimensions
11737 blockDim : dim3
11738 Block dimensions
11739 sharedMemBytes : unsigned int
11740 Dynamic shared-memory size per thread block in bytes
11741 kernelParams : Any
11742 Array of pointers to individual kernel arguments
11743 extra : Any
11744 Pointer to kernel arguments in the "extra" format
11745 ctx : cudaExecutionContext_t
11746 Context in which to run the kernel. If NULL will try to use the
11747 current context.
11749 Methods
11750 -------
11751 getPtr()
11752 Get memory address of class instance
11753 """
11754 def __cinit__(self, void_ptr _ptr = 0):
11755 if _ptr == 0:
11756 self._pvt_ptr = &self._pvt_val
11757 else:
11758 self._pvt_ptr = <cyruntime.cudaKernelNodeParamsV2 *>_ptr
11759 def __init__(self, void_ptr _ptr = 0):
11760 pass
11761 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].gridDim)
11762 self._blockDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].blockDim)
11763 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)
11764 def __dealloc__(self):
11765 pass
11766 def getPtr(self):
11767 return <void_ptr>self._pvt_ptr
11768 def __repr__(self):
11769 if self._pvt_ptr is not NULL:
11770 str_list = []
11771 try:
11772 str_list += ['func : ' + hex(self.func)]
11773 except ValueError:
11774 str_list += ['func : <ValueError>']
11775 try:
11776 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]
11777 except ValueError:
11778 str_list += ['gridDim : <ValueError>']
11779 try:
11780 str_list += ['blockDim :\n' + '\n'.join([' ' + line for line in str(self.blockDim).splitlines()])]
11781 except ValueError:
11782 str_list += ['blockDim : <ValueError>']
11783 try:
11784 str_list += ['sharedMemBytes : ' + str(self.sharedMemBytes)]
11785 except ValueError:
11786 str_list += ['sharedMemBytes : <ValueError>']
11787 try:
11788 str_list += ['kernelParams : ' + str(self.kernelParams)]
11789 except ValueError:
11790 str_list += ['kernelParams : <ValueError>']
11791 try:
11792 str_list += ['extra : ' + str(self.extra)]
11793 except ValueError:
11794 str_list += ['extra : <ValueError>']
11795 try:
11796 str_list += ['ctx : ' + str(self.ctx)]
11797 except ValueError:
11798 str_list += ['ctx : <ValueError>']
11799 return '\n'.join(str_list)
11800 else:
11801 return ''
11802 @property
11803 def func(self):
11804 return <void_ptr>self._pvt_ptr[0].func
11805 @func.setter
11806 def func(self, func):
11807 _cfunc = _HelperInputVoidPtr(func)
11808 self._pvt_ptr[0].func = <void*><void_ptr>_cfunc.cptr
11809 @property
11810 def gridDim(self):
11811 return self._gridDim
11812 @gridDim.setter
11813 def gridDim(self, gridDim not None : dim3):
11814 string.memcpy(&self._pvt_ptr[0].gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].gridDim))
11815 @property
11816 def blockDim(self):
11817 return self._blockDim
11818 @blockDim.setter
11819 def blockDim(self, blockDim not None : dim3):
11820 string.memcpy(&self._pvt_ptr[0].blockDim, <cyruntime.dim3*><void_ptr>blockDim.getPtr(), sizeof(self._pvt_ptr[0].blockDim))
11821 @property
11822 def sharedMemBytes(self):
11823 return self._pvt_ptr[0].sharedMemBytes
11824 @sharedMemBytes.setter
11825 def sharedMemBytes(self, unsigned int sharedMemBytes):
11826 self._pvt_ptr[0].sharedMemBytes = sharedMemBytes
11827 @property
11828 def kernelParams(self):
11829 return <void_ptr>self._pvt_ptr[0].kernelParams
11830 @kernelParams.setter
11831 def kernelParams(self, kernelParams):
11832 self._cykernelParams = _HelperKernelParams(kernelParams)
11833 self._pvt_ptr[0].kernelParams = <void**><void_ptr>self._cykernelParams.ckernelParams
11834 @property
11835 def extra(self):
11836 return <void_ptr>self._pvt_ptr[0].extra
11837 @extra.setter
11838 def extra(self, void_ptr extra):
11839 self._pvt_ptr[0].extra = <void**>extra
11840 @property
11841 def ctx(self):
11842 return self._ctx
11843 @ctx.setter
11844 def ctx(self, ctx):
11845 cdef cyruntime.cudaExecutionContext_t cyctx
11846 if ctx is None:
11847 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0
11848 elif isinstance(ctx, (cudaExecutionContext_t,)):
11849 pctx = int(ctx)
11850 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
11851 else:
11852 pctx = int(cudaExecutionContext_t(ctx))
11853 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
11854 self._ctx._pvt_ptr[0] = cyctx
11856cdef class cudaExternalSemaphoreSignalNodeParams:
11857 """
11858 External semaphore signal node parameters
11860 Attributes
11861 ----------
11862 extSemArray : cudaExternalSemaphore_t
11863 Array of external semaphore handles.
11864 paramsArray : cudaExternalSemaphoreSignalParams
11865 Array of external semaphore signal parameters.
11866 numExtSems : unsigned int
11867 Number of handles and parameters supplied in extSemArray and
11868 paramsArray.
11870 Methods
11871 -------
11872 getPtr()
11873 Get memory address of class instance
11874 """
11875 def __cinit__(self, void_ptr _ptr = 0):
11876 if _ptr == 0:
11877 self._pvt_ptr = &self._pvt_val
11878 else:
11879 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParams *>_ptr
11880 def __init__(self, void_ptr _ptr = 0):
11881 pass
11882 def __dealloc__(self):
11883 pass
11884 if self._extSemArray is not NULL:
11885 free(self._extSemArray)
11886 if self._paramsArray is not NULL:
11887 free(self._paramsArray)
11888 def getPtr(self):
11889 return <void_ptr>self._pvt_ptr
11890 def __repr__(self):
11891 if self._pvt_ptr is not NULL:
11892 str_list = []
11893 try:
11894 str_list += ['extSemArray : ' + str(self.extSemArray)]
11895 except ValueError:
11896 str_list += ['extSemArray : <ValueError>']
11897 try:
11898 str_list += ['paramsArray : ' + str(self.paramsArray)]
11899 except ValueError:
11900 str_list += ['paramsArray : <ValueError>']
11901 try:
11902 str_list += ['numExtSems : ' + str(self.numExtSems)]
11903 except ValueError:
11904 str_list += ['numExtSems : <ValueError>']
11905 return '\n'.join(str_list)
11906 else:
11907 return ''
11908 @property
11909 def extSemArray(self):
11910 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
11911 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
11912 @extSemArray.setter
11913 def extSemArray(self, val):
11914 if len(val) == 0:
11915 free(self._extSemArray)
11916 self._extSemArray_length = 0
11917 self._pvt_ptr[0].extSemArray = NULL
11918 else:
11919 if self._extSemArray_length != <size_t>len(val):
11920 free(self._extSemArray)
11921 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
11922 if self._extSemArray is NULL:
11923 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
11924 self._extSemArray_length = <size_t>len(val)
11925 self._pvt_ptr[0].extSemArray = self._extSemArray
11926 for idx in range(len(val)):
11927 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]
11929 @property
11930 def paramsArray(self):
11931 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]
11932 return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]
11933 @paramsArray.setter
11934 def paramsArray(self, val):
11935 if len(val) == 0:
11936 free(self._paramsArray)
11937 self._paramsArray_length = 0
11938 self._pvt_ptr[0].paramsArray = NULL
11939 else:
11940 if self._paramsArray_length != <size_t>len(val):
11941 free(self._paramsArray)
11942 self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
11943 if self._paramsArray is NULL:
11944 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
11945 self._paramsArray_length = <size_t>len(val)
11946 self._pvt_ptr[0].paramsArray = self._paramsArray
11947 for idx in range(len(val)):
11948 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
11950 @property
11951 def numExtSems(self):
11952 return self._pvt_ptr[0].numExtSems
11953 @numExtSems.setter
11954 def numExtSems(self, unsigned int numExtSems):
11955 self._pvt_ptr[0].numExtSems = numExtSems
11957cdef class cudaExternalSemaphoreSignalNodeParamsV2:
11958 """
11959 External semaphore signal node parameters
11961 Attributes
11962 ----------
11963 extSemArray : cudaExternalSemaphore_t
11964 Array of external semaphore handles.
11965 paramsArray : cudaExternalSemaphoreSignalParams
11966 Array of external semaphore signal parameters.
11967 numExtSems : unsigned int
11968 Number of handles and parameters supplied in extSemArray and
11969 paramsArray.
11971 Methods
11972 -------
11973 getPtr()
11974 Get memory address of class instance
11975 """
11976 def __cinit__(self, void_ptr _ptr = 0):
11977 if _ptr == 0:
11978 self._pvt_ptr = &self._pvt_val
11979 else:
11980 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2 *>_ptr
11981 def __init__(self, void_ptr _ptr = 0):
11982 pass
11983 def __dealloc__(self):
11984 pass
11985 if self._extSemArray is not NULL:
11986 free(self._extSemArray)
11987 if self._paramsArray is not NULL:
11988 free(self._paramsArray)
11989 def getPtr(self):
11990 return <void_ptr>self._pvt_ptr
11991 def __repr__(self):
11992 if self._pvt_ptr is not NULL:
11993 str_list = []
11994 try:
11995 str_list += ['extSemArray : ' + str(self.extSemArray)]
11996 except ValueError:
11997 str_list += ['extSemArray : <ValueError>']
11998 try:
11999 str_list += ['paramsArray : ' + str(self.paramsArray)]
12000 except ValueError:
12001 str_list += ['paramsArray : <ValueError>']
12002 try:
12003 str_list += ['numExtSems : ' + str(self.numExtSems)]
12004 except ValueError:
12005 str_list += ['numExtSems : <ValueError>']
12006 return '\n'.join(str_list)
12007 else:
12008 return ''
12009 @property
12010 def extSemArray(self):
12011 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
12012 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
12013 @extSemArray.setter
12014 def extSemArray(self, val):
12015 if len(val) == 0:
12016 free(self._extSemArray)
12017 self._extSemArray_length = 0
12018 self._pvt_ptr[0].extSemArray = NULL
12019 else:
12020 if self._extSemArray_length != <size_t>len(val):
12021 free(self._extSemArray)
12022 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
12023 if self._extSemArray is NULL:
12024 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
12025 self._extSemArray_length = <size_t>len(val)
12026 self._pvt_ptr[0].extSemArray = self._extSemArray
12027 for idx in range(len(val)):
12028 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]
12030 @property
12031 def paramsArray(self):
12032 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreSignalParams) for x in range(self._paramsArray_length)]
12033 return [cudaExternalSemaphoreSignalParams(_ptr=arr) for arr in arrs]
12034 @paramsArray.setter
12035 def paramsArray(self, val):
12036 if len(val) == 0:
12037 free(self._paramsArray)
12038 self._paramsArray_length = 0
12039 self._pvt_ptr[0].paramsArray = NULL
12040 else:
12041 if self._paramsArray_length != <size_t>len(val):
12042 free(self._paramsArray)
12043 self._paramsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
12044 if self._paramsArray is NULL:
12045 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
12046 self._paramsArray_length = <size_t>len(val)
12047 self._pvt_ptr[0].paramsArray = self._paramsArray
12048 for idx in range(len(val)):
12049 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreSignalParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
12051 @property
12052 def numExtSems(self):
12053 return self._pvt_ptr[0].numExtSems
12054 @numExtSems.setter
12055 def numExtSems(self, unsigned int numExtSems):
12056 self._pvt_ptr[0].numExtSems = numExtSems
12058cdef class cudaExternalSemaphoreWaitNodeParams:
12059 """
12060 External semaphore wait node parameters
12062 Attributes
12063 ----------
12064 extSemArray : cudaExternalSemaphore_t
12065 Array of external semaphore handles.
12066 paramsArray : cudaExternalSemaphoreWaitParams
12067 Array of external semaphore wait parameters.
12068 numExtSems : unsigned int
12069 Number of handles and parameters supplied in extSemArray and
12070 paramsArray.
12072 Methods
12073 -------
12074 getPtr()
12075 Get memory address of class instance
12076 """
12077 def __cinit__(self, void_ptr _ptr = 0):
12078 if _ptr == 0:
12079 self._pvt_ptr = &self._pvt_val
12080 else:
12081 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParams *>_ptr
12082 def __init__(self, void_ptr _ptr = 0):
12083 pass
12084 def __dealloc__(self):
12085 pass
12086 if self._extSemArray is not NULL:
12087 free(self._extSemArray)
12088 if self._paramsArray is not NULL:
12089 free(self._paramsArray)
12090 def getPtr(self):
12091 return <void_ptr>self._pvt_ptr
12092 def __repr__(self):
12093 if self._pvt_ptr is not NULL:
12094 str_list = []
12095 try:
12096 str_list += ['extSemArray : ' + str(self.extSemArray)]
12097 except ValueError:
12098 str_list += ['extSemArray : <ValueError>']
12099 try:
12100 str_list += ['paramsArray : ' + str(self.paramsArray)]
12101 except ValueError:
12102 str_list += ['paramsArray : <ValueError>']
12103 try:
12104 str_list += ['numExtSems : ' + str(self.numExtSems)]
12105 except ValueError:
12106 str_list += ['numExtSems : <ValueError>']
12107 return '\n'.join(str_list)
12108 else:
12109 return ''
12110 @property
12111 def extSemArray(self):
12112 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
12113 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
12114 @extSemArray.setter
12115 def extSemArray(self, val):
12116 if len(val) == 0:
12117 free(self._extSemArray)
12118 self._extSemArray_length = 0
12119 self._pvt_ptr[0].extSemArray = NULL
12120 else:
12121 if self._extSemArray_length != <size_t>len(val):
12122 free(self._extSemArray)
12123 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
12124 if self._extSemArray is NULL:
12125 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
12126 self._extSemArray_length = <size_t>len(val)
12127 self._pvt_ptr[0].extSemArray = self._extSemArray
12128 for idx in range(len(val)):
12129 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]
12131 @property
12132 def paramsArray(self):
12133 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]
12134 return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]
12135 @paramsArray.setter
12136 def paramsArray(self, val):
12137 if len(val) == 0:
12138 free(self._paramsArray)
12139 self._paramsArray_length = 0
12140 self._pvt_ptr[0].paramsArray = NULL
12141 else:
12142 if self._paramsArray_length != <size_t>len(val):
12143 free(self._paramsArray)
12144 self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
12145 if self._paramsArray is NULL:
12146 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
12147 self._paramsArray_length = <size_t>len(val)
12148 self._pvt_ptr[0].paramsArray = self._paramsArray
12149 for idx in range(len(val)):
12150 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
12152 @property
12153 def numExtSems(self):
12154 return self._pvt_ptr[0].numExtSems
12155 @numExtSems.setter
12156 def numExtSems(self, unsigned int numExtSems):
12157 self._pvt_ptr[0].numExtSems = numExtSems
12159cdef class cudaExternalSemaphoreWaitNodeParamsV2:
12160 """
12161 External semaphore wait node parameters
12163 Attributes
12164 ----------
12165 extSemArray : cudaExternalSemaphore_t
12166 Array of external semaphore handles.
12167 paramsArray : cudaExternalSemaphoreWaitParams
12168 Array of external semaphore wait parameters.
12169 numExtSems : unsigned int
12170 Number of handles and parameters supplied in extSemArray and
12171 paramsArray.
12173 Methods
12174 -------
12175 getPtr()
12176 Get memory address of class instance
12177 """
12178 def __cinit__(self, void_ptr _ptr = 0):
12179 if _ptr == 0:
12180 self._pvt_ptr = &self._pvt_val
12181 else:
12182 self._pvt_ptr = <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2 *>_ptr
12183 def __init__(self, void_ptr _ptr = 0):
12184 pass
12185 def __dealloc__(self):
12186 pass
12187 if self._extSemArray is not NULL:
12188 free(self._extSemArray)
12189 if self._paramsArray is not NULL:
12190 free(self._paramsArray)
12191 def getPtr(self):
12192 return <void_ptr>self._pvt_ptr
12193 def __repr__(self):
12194 if self._pvt_ptr is not NULL:
12195 str_list = []
12196 try:
12197 str_list += ['extSemArray : ' + str(self.extSemArray)]
12198 except ValueError:
12199 str_list += ['extSemArray : <ValueError>']
12200 try:
12201 str_list += ['paramsArray : ' + str(self.paramsArray)]
12202 except ValueError:
12203 str_list += ['paramsArray : <ValueError>']
12204 try:
12205 str_list += ['numExtSems : ' + str(self.numExtSems)]
12206 except ValueError:
12207 str_list += ['numExtSems : <ValueError>']
12208 return '\n'.join(str_list)
12209 else:
12210 return ''
12211 @property
12212 def extSemArray(self):
12213 arrs = [<void_ptr>self._pvt_ptr[0].extSemArray + x*sizeof(cyruntime.cudaExternalSemaphore_t) for x in range(self._extSemArray_length)]
12214 return [cudaExternalSemaphore_t(_ptr=arr) for arr in arrs]
12215 @extSemArray.setter
12216 def extSemArray(self, val):
12217 if len(val) == 0:
12218 free(self._extSemArray)
12219 self._extSemArray_length = 0
12220 self._pvt_ptr[0].extSemArray = NULL
12221 else:
12222 if self._extSemArray_length != <size_t>len(val):
12223 free(self._extSemArray)
12224 self._extSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphore_t))
12225 if self._extSemArray is NULL:
12226 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
12227 self._extSemArray_length = <size_t>len(val)
12228 self._pvt_ptr[0].extSemArray = self._extSemArray
12229 for idx in range(len(val)):
12230 self._extSemArray[idx] = (<cudaExternalSemaphore_t>val[idx])._pvt_ptr[0]
12232 @property
12233 def paramsArray(self):
12234 arrs = [<void_ptr>self._pvt_ptr[0].paramsArray + x*sizeof(cyruntime.cudaExternalSemaphoreWaitParams) for x in range(self._paramsArray_length)]
12235 return [cudaExternalSemaphoreWaitParams(_ptr=arr) for arr in arrs]
12236 @paramsArray.setter
12237 def paramsArray(self, val):
12238 if len(val) == 0:
12239 free(self._paramsArray)
12240 self._paramsArray_length = 0
12241 self._pvt_ptr[0].paramsArray = NULL
12242 else:
12243 if self._paramsArray_length != <size_t>len(val):
12244 free(self._paramsArray)
12245 self._paramsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(val), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
12246 if self._paramsArray is NULL:
12247 raise MemoryError('Failed to allocate length x size memory: ' + str(len(val)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
12248 self._paramsArray_length = <size_t>len(val)
12249 self._pvt_ptr[0].paramsArray = self._paramsArray
12250 for idx in range(len(val)):
12251 string.memcpy(&self._paramsArray[idx], (<cudaExternalSemaphoreWaitParams>val[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
12253 @property
12254 def numExtSems(self):
12255 return self._pvt_ptr[0].numExtSems
12256 @numExtSems.setter
12257 def numExtSems(self, unsigned int numExtSems):
12258 self._pvt_ptr[0].numExtSems = numExtSems
12260cdef class cudaConditionalNodeParams:
12261 """
12262 CUDA conditional node parameters
12264 Attributes
12265 ----------
12266 handle : cudaGraphConditionalHandle
12267 Conditional node handle. Handles must be created in advance of
12268 creating the node using cudaGraphConditionalHandleCreate.
12269 type : cudaGraphConditionalNodeType
12270 Type of conditional node.
12271 size : unsigned int
12272 Size of graph output array. Allowed values are 1 for
12273 cudaGraphCondTypeWhile, 1 or 2 for cudaGraphCondTypeIf, or any
12274 value greater than zero for cudaGraphCondTypeSwitch.
12275 phGraph_out : cudaGraph_t
12276 CUDA-owned array populated with conditional node child graphs
12277 during creation of the node. Valid for the lifetime of the
12278 conditional node. The contents of the graph(s) are subject to the
12279 following constraints: - Allowed node types are kernel nodes,
12280 empty nodes, child graphs, memsets, memcopies, and conditionals.
12281 This applies recursively to child graphs and conditional bodies.
12282 - All kernels, including kernels in nested conditionals or child
12283 graphs at any level, must belong to the same CUDA context.
12284 These graphs may be populated using graph node creation APIs or
12285 cudaStreamBeginCaptureToGraph. cudaGraphCondTypeIf: phGraph_out[0]
12286 is executed when the condition is non-zero. If `size` == 2,
12287 phGraph_out[1] will be executed when the condition is zero.
12288 cudaGraphCondTypeWhile: phGraph_out[0] is executed as long as the
12289 condition is non-zero. cudaGraphCondTypeSwitch: phGraph_out[n] is
12290 executed when the condition is equal to n. If the condition >=
12291 `size`, no body graph is executed.
12292 ctx : cudaExecutionContext_t
12293 CUDA Execution Context
12295 Methods
12296 -------
12297 getPtr()
12298 Get memory address of class instance
12299 """
12300 def __cinit__(self, void_ptr _ptr = 0):
12301 if _ptr == 0:
12302 self._pvt_ptr = &self._pvt_val
12303 else:
12304 self._pvt_ptr = <cyruntime.cudaConditionalNodeParams *>_ptr
12305 def __init__(self, void_ptr _ptr = 0):
12306 pass
12307 self._handle = cudaGraphConditionalHandle(_ptr=<void_ptr>&self._pvt_ptr[0].handle)
12308 self._ctx = cudaExecutionContext_t(_ptr=<void_ptr>&self._pvt_ptr[0].ctx)
12309 def __dealloc__(self):
12310 pass
12311 def getPtr(self):
12312 return <void_ptr>self._pvt_ptr
12313 def __repr__(self):
12314 if self._pvt_ptr is not NULL:
12315 str_list = []
12316 try:
12317 str_list += ['handle : ' + str(self.handle)]
12318 except ValueError:
12319 str_list += ['handle : <ValueError>']
12320 try:
12321 str_list += ['type : ' + str(self.type)]
12322 except ValueError:
12323 str_list += ['type : <ValueError>']
12324 try:
12325 str_list += ['size : ' + str(self.size)]
12326 except ValueError:
12327 str_list += ['size : <ValueError>']
12328 try:
12329 str_list += ['phGraph_out : ' + str(self.phGraph_out)]
12330 except ValueError:
12331 str_list += ['phGraph_out : <ValueError>']
12332 try:
12333 str_list += ['ctx : ' + str(self.ctx)]
12334 except ValueError:
12335 str_list += ['ctx : <ValueError>']
12336 return '\n'.join(str_list)
12337 else:
12338 return ''
12339 @property
12340 def handle(self):
12341 return self._handle
12342 @handle.setter
12343 def handle(self, handle):
12344 cdef cyruntime.cudaGraphConditionalHandle cyhandle
12345 if handle is None:
12346 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>0
12347 elif isinstance(handle, (cudaGraphConditionalHandle)):
12348 phandle = int(handle)
12349 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle
12350 else:
12351 phandle = int(cudaGraphConditionalHandle(handle))
12352 cyhandle = <cyruntime.cudaGraphConditionalHandle><void_ptr>phandle
12353 self._handle._pvt_ptr[0] = cyhandle
12355 @property
12356 def type(self):
12357 if self._pvt_ptr[0].type not in _dict_cudaGraphConditionalNodeType:
12358 return None
12359 return _dict_cudaGraphConditionalNodeType[self._pvt_ptr[0].type]
12360 @type.setter
12361 def type(self, type not None : cudaGraphConditionalNodeType):
12362 self._pvt_ptr[0].type = type.value
12363 @property
12364 def size(self):
12365 return self._pvt_ptr[0].size
12366 @size.setter
12367 def size(self, unsigned int size):
12368 self._pvt_ptr[0].size = size
12369 @property
12370 def phGraph_out(self):
12371 arrs = [<void_ptr>self._pvt_ptr[0].phGraph_out + x*sizeof(cyruntime.cudaGraph_t) for x in range(self.size)]
12372 return [cudaGraph_t(_ptr=arr) for arr in arrs]
12373 @property
12374 def ctx(self):
12375 return self._ctx
12376 @ctx.setter
12377 def ctx(self, ctx):
12378 cdef cyruntime.cudaExecutionContext_t cyctx
12379 if ctx is None:
12380 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>0
12381 elif isinstance(ctx, (cudaExecutionContext_t,)):
12382 pctx = int(ctx)
12383 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
12384 else:
12385 pctx = int(cudaExecutionContext_t(ctx))
12386 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
12387 self._ctx._pvt_ptr[0] = cyctx
12389cdef class cudaChildGraphNodeParams:
12390 """
12391 Child graph node parameters
12393 Attributes
12394 ----------
12395 graph : cudaGraph_t
12396 The child graph to clone into the node for node creation, or a
12397 handle to the graph owned by the node for node query. The graph
12398 must not contain conditional nodes. Graphs containing memory
12399 allocation or memory free nodes must set the ownership to be moved
12400 to the parent.
12401 ownership : cudaGraphChildGraphNodeOwnership
12402 The ownership relationship of the child graph node.
12404 Methods
12405 -------
12406 getPtr()
12407 Get memory address of class instance
12408 """
12409 def __cinit__(self, void_ptr _ptr = 0):
12410 if _ptr == 0:
12411 self._pvt_ptr = &self._pvt_val
12412 else:
12413 self._pvt_ptr = <cyruntime.cudaChildGraphNodeParams *>_ptr
12414 def __init__(self, void_ptr _ptr = 0):
12415 pass
12416 self._graph = cudaGraph_t(_ptr=<void_ptr>&self._pvt_ptr[0].graph)
12417 def __dealloc__(self):
12418 pass
12419 def getPtr(self):
12420 return <void_ptr>self._pvt_ptr
12421 def __repr__(self):
12422 if self._pvt_ptr is not NULL:
12423 str_list = []
12424 try:
12425 str_list += ['graph : ' + str(self.graph)]
12426 except ValueError:
12427 str_list += ['graph : <ValueError>']
12428 try:
12429 str_list += ['ownership : ' + str(self.ownership)]
12430 except ValueError:
12431 str_list += ['ownership : <ValueError>']
12432 return '\n'.join(str_list)
12433 else:
12434 return ''
12435 @property
12436 def graph(self):
12437 return self._graph
12438 @graph.setter
12439 def graph(self, graph):
12440 cdef cyruntime.cudaGraph_t cygraph
12441 if graph is None:
12442 cygraph = <cyruntime.cudaGraph_t><void_ptr>0
12443 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
12444 pgraph = int(graph)
12445 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
12446 else:
12447 pgraph = int(cudaGraph_t(graph))
12448 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
12449 self._graph._pvt_ptr[0] = cygraph
12450 @property
12451 def ownership(self):
12452 if self._pvt_ptr[0].ownership not in _dict_cudaGraphChildGraphNodeOwnership:
12453 return None
12454 return _dict_cudaGraphChildGraphNodeOwnership[self._pvt_ptr[0].ownership]
12455 @ownership.setter
12456 def ownership(self, ownership not None : cudaGraphChildGraphNodeOwnership):
12457 self._pvt_ptr[0].ownership = ownership.value
12459cdef class cudaEventRecordNodeParams:
12460 """
12461 Event record node parameters
12463 Attributes
12464 ----------
12465 event : cudaEvent_t
12466 The event to record when the node executes
12468 Methods
12469 -------
12470 getPtr()
12471 Get memory address of class instance
12472 """
12473 def __cinit__(self, void_ptr _ptr = 0):
12474 if _ptr == 0:
12475 self._pvt_ptr = &self._pvt_val
12476 else:
12477 self._pvt_ptr = <cyruntime.cudaEventRecordNodeParams *>_ptr
12478 def __init__(self, void_ptr _ptr = 0):
12479 pass
12480 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].event)
12481 def __dealloc__(self):
12482 pass
12483 def getPtr(self):
12484 return <void_ptr>self._pvt_ptr
12485 def __repr__(self):
12486 if self._pvt_ptr is not NULL:
12487 str_list = []
12488 try:
12489 str_list += ['event : ' + str(self.event)]
12490 except ValueError:
12491 str_list += ['event : <ValueError>']
12492 return '\n'.join(str_list)
12493 else:
12494 return ''
12495 @property
12496 def event(self):
12497 return self._event
12498 @event.setter
12499 def event(self, event):
12500 cdef cyruntime.cudaEvent_t cyevent
12501 if event is None:
12502 cyevent = <cyruntime.cudaEvent_t><void_ptr>0
12503 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
12504 pevent = int(event)
12505 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
12506 else:
12507 pevent = int(cudaEvent_t(event))
12508 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
12509 self._event._pvt_ptr[0] = cyevent
12511cdef class cudaEventWaitNodeParams:
12512 """
12513 Event wait node parameters
12515 Attributes
12516 ----------
12517 event : cudaEvent_t
12518 The event to wait on from the node
12520 Methods
12521 -------
12522 getPtr()
12523 Get memory address of class instance
12524 """
12525 def __cinit__(self, void_ptr _ptr = 0):
12526 if _ptr == 0:
12527 self._pvt_ptr = &self._pvt_val
12528 else:
12529 self._pvt_ptr = <cyruntime.cudaEventWaitNodeParams *>_ptr
12530 def __init__(self, void_ptr _ptr = 0):
12531 pass
12532 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].event)
12533 def __dealloc__(self):
12534 pass
12535 def getPtr(self):
12536 return <void_ptr>self._pvt_ptr
12537 def __repr__(self):
12538 if self._pvt_ptr is not NULL:
12539 str_list = []
12540 try:
12541 str_list += ['event : ' + str(self.event)]
12542 except ValueError:
12543 str_list += ['event : <ValueError>']
12544 return '\n'.join(str_list)
12545 else:
12546 return ''
12547 @property
12548 def event(self):
12549 return self._event
12550 @event.setter
12551 def event(self, event):
12552 cdef cyruntime.cudaEvent_t cyevent
12553 if event is None:
12554 cyevent = <cyruntime.cudaEvent_t><void_ptr>0
12555 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
12556 pevent = int(event)
12557 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
12558 else:
12559 pevent = int(cudaEvent_t(event))
12560 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
12561 self._event._pvt_ptr[0] = cyevent
12563cdef class cudaGraphNodeParams:
12564 """
12565 Graph node parameters. See cudaGraphAddNode.
12567 Attributes
12568 ----------
12569 type : cudaGraphNodeType
12570 Type of the node
12571 reserved0 : list[int]
12572 Reserved. Must be zero.
12573 reserved1 : list[long long]
12574 Padding. Unused bytes must be zero.
12575 kernel : cudaKernelNodeParamsV2
12576 Kernel node parameters.
12577 memcpy : cudaMemcpyNodeParams
12578 Memcpy node parameters.
12579 memset : cudaMemsetParamsV2
12580 Memset node parameters.
12581 host : cudaHostNodeParamsV2
12582 Host node parameters.
12583 graph : cudaChildGraphNodeParams
12584 Child graph node parameters.
12585 eventWait : cudaEventWaitNodeParams
12586 Event wait node parameters.
12587 eventRecord : cudaEventRecordNodeParams
12588 Event record node parameters.
12589 extSemSignal : cudaExternalSemaphoreSignalNodeParamsV2
12590 External semaphore signal node parameters.
12591 extSemWait : cudaExternalSemaphoreWaitNodeParamsV2
12592 External semaphore wait node parameters.
12593 alloc : cudaMemAllocNodeParamsV2
12594 Memory allocation node parameters.
12595 free : cudaMemFreeNodeParams
12596 Memory free node parameters.
12597 conditional : cudaConditionalNodeParams
12598 Conditional node parameters.
12599 reserved2 : long long
12600 Reserved bytes. Must be zero.
12602 Methods
12603 -------
12604 getPtr()
12605 Get memory address of class instance
12606 """
12607 def __cinit__(self, void_ptr _ptr = 0):
12608 if _ptr == 0:
12609 self._val_ptr = <cyruntime.cudaGraphNodeParams *>calloc(1, sizeof(cyruntime.cudaGraphNodeParams))
12610 self._pvt_ptr = self._val_ptr
12611 else:
12612 self._pvt_ptr = <cyruntime.cudaGraphNodeParams *>_ptr
12613 def __init__(self, void_ptr _ptr = 0):
12614 pass
12615 self._kernel = cudaKernelNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].kernel)
12616 self._memcpy = cudaMemcpyNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].memcpy)
12617 self._memset = cudaMemsetParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].memset)
12618 self._host = cudaHostNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].host)
12619 self._graph = cudaChildGraphNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].graph)
12620 self._eventWait = cudaEventWaitNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].eventWait)
12621 self._eventRecord = cudaEventRecordNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].eventRecord)
12622 self._extSemSignal = cudaExternalSemaphoreSignalNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].extSemSignal)
12623 self._extSemWait = cudaExternalSemaphoreWaitNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].extSemWait)
12624 self._alloc = cudaMemAllocNodeParamsV2(_ptr=<void_ptr>&self._pvt_ptr[0].alloc)
12625 self._free = cudaMemFreeNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].free)
12626 self._conditional = cudaConditionalNodeParams(_ptr=<void_ptr>&self._pvt_ptr[0].conditional)
12627 def __dealloc__(self):
12628 if self._val_ptr is not NULL:
12629 free(self._val_ptr)
12630 def getPtr(self):
12631 return <void_ptr>self._pvt_ptr
12632 def __repr__(self):
12633 if self._pvt_ptr is not NULL:
12634 str_list = []
12635 try:
12636 str_list += ['type : ' + str(self.type)]
12637 except ValueError:
12638 str_list += ['type : <ValueError>']
12639 try:
12640 str_list += ['reserved0 : ' + str(self.reserved0)]
12641 except ValueError:
12642 str_list += ['reserved0 : <ValueError>']
12643 try:
12644 str_list += ['reserved1 : ' + str(self.reserved1)]
12645 except ValueError:
12646 str_list += ['reserved1 : <ValueError>']
12647 try:
12648 str_list += ['kernel :\n' + '\n'.join([' ' + line for line in str(self.kernel).splitlines()])]
12649 except ValueError:
12650 str_list += ['kernel : <ValueError>']
12651 try:
12652 str_list += ['memcpy :\n' + '\n'.join([' ' + line for line in str(self.memcpy).splitlines()])]
12653 except ValueError:
12654 str_list += ['memcpy : <ValueError>']
12655 try:
12656 str_list += ['memset :\n' + '\n'.join([' ' + line for line in str(self.memset).splitlines()])]
12657 except ValueError:
12658 str_list += ['memset : <ValueError>']
12659 try:
12660 str_list += ['host :\n' + '\n'.join([' ' + line for line in str(self.host).splitlines()])]
12661 except ValueError:
12662 str_list += ['host : <ValueError>']
12663 try:
12664 str_list += ['graph :\n' + '\n'.join([' ' + line for line in str(self.graph).splitlines()])]
12665 except ValueError:
12666 str_list += ['graph : <ValueError>']
12667 try:
12668 str_list += ['eventWait :\n' + '\n'.join([' ' + line for line in str(self.eventWait).splitlines()])]
12669 except ValueError:
12670 str_list += ['eventWait : <ValueError>']
12671 try:
12672 str_list += ['eventRecord :\n' + '\n'.join([' ' + line for line in str(self.eventRecord).splitlines()])]
12673 except ValueError:
12674 str_list += ['eventRecord : <ValueError>']
12675 try:
12676 str_list += ['extSemSignal :\n' + '\n'.join([' ' + line for line in str(self.extSemSignal).splitlines()])]
12677 except ValueError:
12678 str_list += ['extSemSignal : <ValueError>']
12679 try:
12680 str_list += ['extSemWait :\n' + '\n'.join([' ' + line for line in str(self.extSemWait).splitlines()])]
12681 except ValueError:
12682 str_list += ['extSemWait : <ValueError>']
12683 try:
12684 str_list += ['alloc :\n' + '\n'.join([' ' + line for line in str(self.alloc).splitlines()])]
12685 except ValueError:
12686 str_list += ['alloc : <ValueError>']
12687 try:
12688 str_list += ['free :\n' + '\n'.join([' ' + line for line in str(self.free).splitlines()])]
12689 except ValueError:
12690 str_list += ['free : <ValueError>']
12691 try:
12692 str_list += ['conditional :\n' + '\n'.join([' ' + line for line in str(self.conditional).splitlines()])]
12693 except ValueError:
12694 str_list += ['conditional : <ValueError>']
12695 try:
12696 str_list += ['reserved2 : ' + str(self.reserved2)]
12697 except ValueError:
12698 str_list += ['reserved2 : <ValueError>']
12699 return '\n'.join(str_list)
12700 else:
12701 return ''
12702 @property
12703 def type(self):
12704 if self._pvt_ptr[0].type not in _dict_cudaGraphNodeType:
12705 return None
12706 return _dict_cudaGraphNodeType[self._pvt_ptr[0].type]
12707 @type.setter
12708 def type(self, type not None : cudaGraphNodeType):
12709 self._pvt_ptr[0].type = type.value
12710 @property
12711 def reserved0(self):
12712 return self._pvt_ptr[0].reserved0
12713 @reserved0.setter
12714 def reserved0(self, reserved0):
12715 self._pvt_ptr[0].reserved0 = reserved0
12716 @property
12717 def reserved1(self):
12718 return self._pvt_ptr[0].reserved1
12719 @reserved1.setter
12720 def reserved1(self, reserved1):
12721 self._pvt_ptr[0].reserved1 = reserved1
12722 @property
12723 def kernel(self):
12724 return self._kernel
12725 @kernel.setter
12726 def kernel(self, kernel not None : cudaKernelNodeParamsV2):
12727 string.memcpy(&self._pvt_ptr[0].kernel, <cyruntime.cudaKernelNodeParamsV2*><void_ptr>kernel.getPtr(), sizeof(self._pvt_ptr[0].kernel))
12728 @property
12729 def memcpy(self):
12730 return self._memcpy
12731 @memcpy.setter
12732 def memcpy(self, memcpy not None : cudaMemcpyNodeParams):
12733 string.memcpy(&self._pvt_ptr[0].memcpy, <cyruntime.cudaMemcpyNodeParams*><void_ptr>memcpy.getPtr(), sizeof(self._pvt_ptr[0].memcpy))
12734 @property
12735 def memset(self):
12736 return self._memset
12737 @memset.setter
12738 def memset(self, memset not None : cudaMemsetParamsV2):
12739 string.memcpy(&self._pvt_ptr[0].memset, <cyruntime.cudaMemsetParamsV2*><void_ptr>memset.getPtr(), sizeof(self._pvt_ptr[0].memset))
12740 @property
12741 def host(self):
12742 return self._host
12743 @host.setter
12744 def host(self, host not None : cudaHostNodeParamsV2):
12745 string.memcpy(&self._pvt_ptr[0].host, <cyruntime.cudaHostNodeParamsV2*><void_ptr>host.getPtr(), sizeof(self._pvt_ptr[0].host))
12746 @property
12747 def graph(self):
12748 return self._graph
12749 @graph.setter
12750 def graph(self, graph not None : cudaChildGraphNodeParams):
12751 string.memcpy(&self._pvt_ptr[0].graph, <cyruntime.cudaChildGraphNodeParams*><void_ptr>graph.getPtr(), sizeof(self._pvt_ptr[0].graph))
12752 @property
12753 def eventWait(self):
12754 return self._eventWait
12755 @eventWait.setter
12756 def eventWait(self, eventWait not None : cudaEventWaitNodeParams):
12757 string.memcpy(&self._pvt_ptr[0].eventWait, <cyruntime.cudaEventWaitNodeParams*><void_ptr>eventWait.getPtr(), sizeof(self._pvt_ptr[0].eventWait))
12758 @property
12759 def eventRecord(self):
12760 return self._eventRecord
12761 @eventRecord.setter
12762 def eventRecord(self, eventRecord not None : cudaEventRecordNodeParams):
12763 string.memcpy(&self._pvt_ptr[0].eventRecord, <cyruntime.cudaEventRecordNodeParams*><void_ptr>eventRecord.getPtr(), sizeof(self._pvt_ptr[0].eventRecord))
12764 @property
12765 def extSemSignal(self):
12766 return self._extSemSignal
12767 @extSemSignal.setter
12768 def extSemSignal(self, extSemSignal not None : cudaExternalSemaphoreSignalNodeParamsV2):
12769 string.memcpy(&self._pvt_ptr[0].extSemSignal, <cyruntime.cudaExternalSemaphoreSignalNodeParamsV2*><void_ptr>extSemSignal.getPtr(), sizeof(self._pvt_ptr[0].extSemSignal))
12770 @property
12771 def extSemWait(self):
12772 return self._extSemWait
12773 @extSemWait.setter
12774 def extSemWait(self, extSemWait not None : cudaExternalSemaphoreWaitNodeParamsV2):
12775 string.memcpy(&self._pvt_ptr[0].extSemWait, <cyruntime.cudaExternalSemaphoreWaitNodeParamsV2*><void_ptr>extSemWait.getPtr(), sizeof(self._pvt_ptr[0].extSemWait))
12776 @property
12777 def alloc(self):
12778 return self._alloc
12779 @alloc.setter
12780 def alloc(self, alloc not None : cudaMemAllocNodeParamsV2):
12781 string.memcpy(&self._pvt_ptr[0].alloc, <cyruntime.cudaMemAllocNodeParamsV2*><void_ptr>alloc.getPtr(), sizeof(self._pvt_ptr[0].alloc))
12782 @property
12783 def free(self):
12784 return self._free
12785 @free.setter
12786 def free(self, free not None : cudaMemFreeNodeParams):
12787 string.memcpy(&self._pvt_ptr[0].free, <cyruntime.cudaMemFreeNodeParams*><void_ptr>free.getPtr(), sizeof(self._pvt_ptr[0].free))
12788 @property
12789 def conditional(self):
12790 return self._conditional
12791 @conditional.setter
12792 def conditional(self, conditional not None : cudaConditionalNodeParams):
12793 string.memcpy(&self._pvt_ptr[0].conditional, <cyruntime.cudaConditionalNodeParams*><void_ptr>conditional.getPtr(), sizeof(self._pvt_ptr[0].conditional))
12794 @property
12795 def reserved2(self):
12796 return self._pvt_ptr[0].reserved2
12797 @reserved2.setter
12798 def reserved2(self, long long reserved2):
12799 self._pvt_ptr[0].reserved2 = reserved2
12801cdef class cudaGraphEdgeData_st:
12802 """
12803 Optional annotation for edges in a CUDA graph. Note, all edges
12804 implicitly have annotations and default to a zero-initialized value
12805 if not specified. A zero-initialized struct indicates a standard
12806 full serialization of two nodes with memory visibility.
12808 Attributes
12809 ----------
12810 from_port : bytes
12811 This indicates when the dependency is triggered from the upstream
12812 node on the edge. The meaning is specfic to the node type. A value
12813 of 0 in all cases means full completion of the upstream node, with
12814 memory visibility to the downstream node or portion thereof
12815 (indicated by `to_port`). Only kernel nodes define non-zero
12816 ports. A kernel node can use the following output port types:
12817 cudaGraphKernelNodePortDefault,
12818 cudaGraphKernelNodePortProgrammatic, or
12819 cudaGraphKernelNodePortLaunchCompletion.
12820 to_port : bytes
12821 This indicates what portion of the downstream node is dependent on
12822 the upstream node or portion thereof (indicated by `from_port`).
12823 The meaning is specific to the node type. A value of 0 in all cases
12824 means the entirety of the downstream node is dependent on the
12825 upstream work. Currently no node types define non-zero ports.
12826 Accordingly, this field must be set to zero.
12827 type : bytes
12828 This should be populated with a value from
12829 ::cudaGraphDependencyType. (It is typed as char due to compiler-
12830 specific layout of bitfields.) See ::cudaGraphDependencyType.
12831 reserved : bytes
12832 These bytes are unused and must be zeroed. This ensures
12833 compatibility if additional fields are added in the future.
12835 Methods
12836 -------
12837 getPtr()
12838 Get memory address of class instance
12839 """
12840 def __cinit__(self, void_ptr _ptr = 0):
12841 if _ptr == 0:
12842 self._pvt_ptr = &self._pvt_val
12843 else:
12844 self._pvt_ptr = <cyruntime.cudaGraphEdgeData_st *>_ptr
12845 def __init__(self, void_ptr _ptr = 0):
12846 pass
12847 def __dealloc__(self):
12848 pass
12849 def getPtr(self):
12850 return <void_ptr>self._pvt_ptr
12851 def __repr__(self):
12852 if self._pvt_ptr is not NULL:
12853 str_list = []
12854 try:
12855 str_list += ['from_port : ' + str(self.from_port)]
12856 except ValueError:
12857 str_list += ['from_port : <ValueError>']
12858 try:
12859 str_list += ['to_port : ' + str(self.to_port)]
12860 except ValueError:
12861 str_list += ['to_port : <ValueError>']
12862 try:
12863 str_list += ['type : ' + str(self.type)]
12864 except ValueError:
12865 str_list += ['type : <ValueError>']
12866 try:
12867 str_list += ['reserved : ' + str(self.reserved)]
12868 except ValueError:
12869 str_list += ['reserved : <ValueError>']
12870 return '\n'.join(str_list)
12871 else:
12872 return ''
12873 @property
12874 def from_port(self):
12875 return self._pvt_ptr[0].from_port
12876 @from_port.setter
12877 def from_port(self, unsigned char from_port):
12878 self._pvt_ptr[0].from_port = from_port
12879 @property
12880 def to_port(self):
12881 return self._pvt_ptr[0].to_port
12882 @to_port.setter
12883 def to_port(self, unsigned char to_port):
12884 self._pvt_ptr[0].to_port = to_port
12885 @property
12886 def type(self):
12887 return self._pvt_ptr[0].type
12888 @type.setter
12889 def type(self, unsigned char type):
12890 self._pvt_ptr[0].type = type
12891 @property
12892 def reserved(self):
12893 return PyBytes_FromStringAndSize(<char*>self._pvt_ptr[0].reserved, 5)
12894 @reserved.setter
12895 def reserved(self, reserved):
12896 if len(reserved) != 5:
12897 raise ValueError("reserved length must be 5, is " + str(len(reserved)))
12898 for i, b in enumerate(reserved):
12899 self._pvt_ptr[0].reserved[i] = b
12901cdef class cudaGraphInstantiateParams_st:
12902 """
12903 Graph instantiation parameters
12905 Attributes
12906 ----------
12907 flags : unsigned long long
12908 Instantiation flags
12909 uploadStream : cudaStream_t
12910 Upload stream
12911 errNode_out : cudaGraphNode_t
12912 The node which caused instantiation to fail, if any
12913 result_out : cudaGraphInstantiateResult
12914 Whether instantiation was successful. If it failed, the reason why
12916 Methods
12917 -------
12918 getPtr()
12919 Get memory address of class instance
12920 """
12921 def __cinit__(self, void_ptr _ptr = 0):
12922 if _ptr == 0:
12923 self._pvt_ptr = &self._pvt_val
12924 else:
12925 self._pvt_ptr = <cyruntime.cudaGraphInstantiateParams_st *>_ptr
12926 def __init__(self, void_ptr _ptr = 0):
12927 pass
12928 self._uploadStream = cudaStream_t(_ptr=<void_ptr>&self._pvt_ptr[0].uploadStream)
12929 self._errNode_out = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errNode_out)
12930 def __dealloc__(self):
12931 pass
12932 def getPtr(self):
12933 return <void_ptr>self._pvt_ptr
12934 def __repr__(self):
12935 if self._pvt_ptr is not NULL:
12936 str_list = []
12937 try:
12938 str_list += ['flags : ' + str(self.flags)]
12939 except ValueError:
12940 str_list += ['flags : <ValueError>']
12941 try:
12942 str_list += ['uploadStream : ' + str(self.uploadStream)]
12943 except ValueError:
12944 str_list += ['uploadStream : <ValueError>']
12945 try:
12946 str_list += ['errNode_out : ' + str(self.errNode_out)]
12947 except ValueError:
12948 str_list += ['errNode_out : <ValueError>']
12949 try:
12950 str_list += ['result_out : ' + str(self.result_out)]
12951 except ValueError:
12952 str_list += ['result_out : <ValueError>']
12953 return '\n'.join(str_list)
12954 else:
12955 return ''
12956 @property
12957 def flags(self):
12958 return self._pvt_ptr[0].flags
12959 @flags.setter
12960 def flags(self, unsigned long long flags):
12961 self._pvt_ptr[0].flags = flags
12962 @property
12963 def uploadStream(self):
12964 return self._uploadStream
12965 @uploadStream.setter
12966 def uploadStream(self, uploadStream):
12967 cdef cyruntime.cudaStream_t cyuploadStream
12968 if uploadStream is None:
12969 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>0
12970 elif isinstance(uploadStream, (cudaStream_t,driver.CUstream)):
12971 puploadStream = int(uploadStream)
12972 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream
12973 else:
12974 puploadStream = int(cudaStream_t(uploadStream))
12975 cyuploadStream = <cyruntime.cudaStream_t><void_ptr>puploadStream
12976 self._uploadStream._pvt_ptr[0] = cyuploadStream
12977 @property
12978 def errNode_out(self):
12979 return self._errNode_out
12980 @errNode_out.setter
12981 def errNode_out(self, errNode_out):
12982 cdef cyruntime.cudaGraphNode_t cyerrNode_out
12983 if errNode_out is None:
12984 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>0
12985 elif isinstance(errNode_out, (cudaGraphNode_t,driver.CUgraphNode)):
12986 perrNode_out = int(errNode_out)
12987 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out
12988 else:
12989 perrNode_out = int(cudaGraphNode_t(errNode_out))
12990 cyerrNode_out = <cyruntime.cudaGraphNode_t><void_ptr>perrNode_out
12991 self._errNode_out._pvt_ptr[0] = cyerrNode_out
12992 @property
12993 def result_out(self):
12994 if self._pvt_ptr[0].result_out not in _dict_cudaGraphInstantiateResult:
12995 return None
12996 return _dict_cudaGraphInstantiateResult[self._pvt_ptr[0].result_out]
12997 @result_out.setter
12998 def result_out(self, result_out not None : cudaGraphInstantiateResult):
12999 self._pvt_ptr[0].result_out = result_out.value
13001cdef class cudaGraphExecUpdateResultInfo_st:
13002 """
13003 Result information returned by cudaGraphExecUpdate
13005 Attributes
13006 ----------
13007 result : cudaGraphExecUpdateResult
13008 Gives more specific detail when a cuda graph update fails.
13009 errorNode : cudaGraphNode_t
13010 The "to node" of the error edge when the topologies do not match.
13011 The error node when the error is associated with a specific node.
13012 NULL when the error is generic.
13013 errorFromNode : cudaGraphNode_t
13014 The from node of error edge when the topologies do not match.
13015 Otherwise NULL.
13017 Methods
13018 -------
13019 getPtr()
13020 Get memory address of class instance
13021 """
13022 def __cinit__(self, void_ptr _ptr = 0):
13023 if _ptr == 0:
13024 self._pvt_ptr = &self._pvt_val
13025 else:
13026 self._pvt_ptr = <cyruntime.cudaGraphExecUpdateResultInfo_st *>_ptr
13027 def __init__(self, void_ptr _ptr = 0):
13028 pass
13029 self._errorNode = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errorNode)
13030 self._errorFromNode = cudaGraphNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].errorFromNode)
13031 def __dealloc__(self):
13032 pass
13033 def getPtr(self):
13034 return <void_ptr>self._pvt_ptr
13035 def __repr__(self):
13036 if self._pvt_ptr is not NULL:
13037 str_list = []
13038 try:
13039 str_list += ['result : ' + str(self.result)]
13040 except ValueError:
13041 str_list += ['result : <ValueError>']
13042 try:
13043 str_list += ['errorNode : ' + str(self.errorNode)]
13044 except ValueError:
13045 str_list += ['errorNode : <ValueError>']
13046 try:
13047 str_list += ['errorFromNode : ' + str(self.errorFromNode)]
13048 except ValueError:
13049 str_list += ['errorFromNode : <ValueError>']
13050 return '\n'.join(str_list)
13051 else:
13052 return ''
13053 @property
13054 def result(self):
13055 if self._pvt_ptr[0].result not in _dict_cudaGraphExecUpdateResult:
13056 return None
13057 return _dict_cudaGraphExecUpdateResult[self._pvt_ptr[0].result]
13058 @result.setter
13059 def result(self, result not None : cudaGraphExecUpdateResult):
13060 self._pvt_ptr[0].result = result.value
13061 @property
13062 def errorNode(self):
13063 return self._errorNode
13064 @errorNode.setter
13065 def errorNode(self, errorNode):
13066 cdef cyruntime.cudaGraphNode_t cyerrorNode
13067 if errorNode is None:
13068 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>0
13069 elif isinstance(errorNode, (cudaGraphNode_t,driver.CUgraphNode)):
13070 perrorNode = int(errorNode)
13071 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode
13072 else:
13073 perrorNode = int(cudaGraphNode_t(errorNode))
13074 cyerrorNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorNode
13075 self._errorNode._pvt_ptr[0] = cyerrorNode
13076 @property
13077 def errorFromNode(self):
13078 return self._errorFromNode
13079 @errorFromNode.setter
13080 def errorFromNode(self, errorFromNode):
13081 cdef cyruntime.cudaGraphNode_t cyerrorFromNode
13082 if errorFromNode is None:
13083 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>0
13084 elif isinstance(errorFromNode, (cudaGraphNode_t,driver.CUgraphNode)):
13085 perrorFromNode = int(errorFromNode)
13086 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode
13087 else:
13088 perrorFromNode = int(cudaGraphNode_t(errorFromNode))
13089 cyerrorFromNode = <cyruntime.cudaGraphNode_t><void_ptr>perrorFromNode
13090 self._errorFromNode._pvt_ptr[0] = cyerrorFromNode
13092cdef class anon_struct16:
13093 """
13094 Attributes
13095 ----------
13096 pValue : Any
13098 offset : size_t
13100 size : size_t
13103 Methods
13104 -------
13105 getPtr()
13106 Get memory address of class instance
13107 """
13108 def __cinit__(self, void_ptr _ptr):
13109 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
13111 def __init__(self, void_ptr _ptr):
13112 pass
13113 def __dealloc__(self):
13114 pass
13115 def getPtr(self):
13116 return <void_ptr>&self._pvt_ptr[0].updateData.param
13117 def __repr__(self):
13118 if self._pvt_ptr is not NULL:
13119 str_list = []
13120 try:
13121 str_list += ['pValue : ' + hex(self.pValue)]
13122 except ValueError:
13123 str_list += ['pValue : <ValueError>']
13124 try:
13125 str_list += ['offset : ' + str(self.offset)]
13126 except ValueError:
13127 str_list += ['offset : <ValueError>']
13128 try:
13129 str_list += ['size : ' + str(self.size)]
13130 except ValueError:
13131 str_list += ['size : <ValueError>']
13132 return '\n'.join(str_list)
13133 else:
13134 return ''
13135 @property
13136 def pValue(self):
13137 return <void_ptr>self._pvt_ptr[0].updateData.param.pValue
13138 @pValue.setter
13139 def pValue(self, pValue):
13140 _cpValue = _HelperInputVoidPtr(pValue)
13141 self._pvt_ptr[0].updateData.param.pValue = <void*><void_ptr>_cpValue.cptr
13142 @property
13143 def offset(self):
13144 return self._pvt_ptr[0].updateData.param.offset
13145 @offset.setter
13146 def offset(self, size_t offset):
13147 self._pvt_ptr[0].updateData.param.offset = offset
13148 @property
13149 def size(self):
13150 return self._pvt_ptr[0].updateData.param.size
13151 @size.setter
13152 def size(self, size_t size):
13153 self._pvt_ptr[0].updateData.param.size = size
13155cdef class anon_union8:
13156 """
13157 Attributes
13158 ----------
13159 gridDim : dim3
13161 param : anon_struct16
13163 isEnabled : unsigned int
13166 Methods
13167 -------
13168 getPtr()
13169 Get memory address of class instance
13170 """
13171 def __cinit__(self, void_ptr _ptr):
13172 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
13174 def __init__(self, void_ptr _ptr):
13175 pass
13176 self._gridDim = dim3(_ptr=<void_ptr>&self._pvt_ptr[0].updateData.gridDim)
13177 self._param = anon_struct16(_ptr=<void_ptr>self._pvt_ptr)
13178 def __dealloc__(self):
13179 pass
13180 def getPtr(self):
13181 return <void_ptr>&self._pvt_ptr[0].updateData
13182 def __repr__(self):
13183 if self._pvt_ptr is not NULL:
13184 str_list = []
13185 try:
13186 str_list += ['gridDim :\n' + '\n'.join([' ' + line for line in str(self.gridDim).splitlines()])]
13187 except ValueError:
13188 str_list += ['gridDim : <ValueError>']
13189 try:
13190 str_list += ['param :\n' + '\n'.join([' ' + line for line in str(self.param).splitlines()])]
13191 except ValueError:
13192 str_list += ['param : <ValueError>']
13193 try:
13194 str_list += ['isEnabled : ' + str(self.isEnabled)]
13195 except ValueError:
13196 str_list += ['isEnabled : <ValueError>']
13197 return '\n'.join(str_list)
13198 else:
13199 return ''
13200 @property
13201 def gridDim(self):
13202 return self._gridDim
13203 @gridDim.setter
13204 def gridDim(self, gridDim not None : dim3):
13205 string.memcpy(&self._pvt_ptr[0].updateData.gridDim, <cyruntime.dim3*><void_ptr>gridDim.getPtr(), sizeof(self._pvt_ptr[0].updateData.gridDim))
13206 @property
13207 def param(self):
13208 return self._param
13209 @param.setter
13210 def param(self, param not None : anon_struct16):
13211 string.memcpy(&self._pvt_ptr[0].updateData.param, <cyruntime.anon_struct16*><void_ptr>param.getPtr(), sizeof(self._pvt_ptr[0].updateData.param))
13212 @property
13213 def isEnabled(self):
13214 return self._pvt_ptr[0].updateData.isEnabled
13215 @isEnabled.setter
13216 def isEnabled(self, unsigned int isEnabled):
13217 self._pvt_ptr[0].updateData.isEnabled = isEnabled
13219cdef class cudaGraphKernelNodeUpdate:
13220 """
13221 Struct to specify a single node update to pass as part of a larger
13222 array to ::cudaGraphKernelNodeUpdatesApply
13224 Attributes
13225 ----------
13226 node : cudaGraphDeviceNode_t
13227 Node to update
13228 field : cudaGraphKernelNodeField
13229 Which type of update to apply. Determines how updateData is
13230 interpreted
13231 updateData : anon_union8
13232 Update data to apply. Which field is used depends on field's value
13234 Methods
13235 -------
13236 getPtr()
13237 Get memory address of class instance
13238 """
13239 def __cinit__(self, void_ptr _ptr = 0):
13240 if _ptr == 0:
13241 self._val_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>calloc(1, sizeof(cyruntime.cudaGraphKernelNodeUpdate))
13242 self._pvt_ptr = self._val_ptr
13243 else:
13244 self._pvt_ptr = <cyruntime.cudaGraphKernelNodeUpdate *>_ptr
13245 def __init__(self, void_ptr _ptr = 0):
13246 pass
13247 self._node = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].node)
13248 self._updateData = anon_union8(_ptr=<void_ptr>self._pvt_ptr)
13249 def __dealloc__(self):
13250 if self._val_ptr is not NULL:
13251 free(self._val_ptr)
13252 def getPtr(self):
13253 return <void_ptr>self._pvt_ptr
13254 def __repr__(self):
13255 if self._pvt_ptr is not NULL:
13256 str_list = []
13257 try:
13258 str_list += ['node : ' + str(self.node)]
13259 except ValueError:
13260 str_list += ['node : <ValueError>']
13261 try:
13262 str_list += ['field : ' + str(self.field)]
13263 except ValueError:
13264 str_list += ['field : <ValueError>']
13265 try:
13266 str_list += ['updateData :\n' + '\n'.join([' ' + line for line in str(self.updateData).splitlines()])]
13267 except ValueError:
13268 str_list += ['updateData : <ValueError>']
13269 return '\n'.join(str_list)
13270 else:
13271 return ''
13272 @property
13273 def node(self):
13274 return self._node
13275 @node.setter
13276 def node(self, node):
13277 cdef cyruntime.cudaGraphDeviceNode_t cynode
13278 if node is None:
13279 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0
13280 elif isinstance(node, (cudaGraphDeviceNode_t,)):
13281 pnode = int(node)
13282 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode
13283 else:
13284 pnode = int(cudaGraphDeviceNode_t(node))
13285 cynode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pnode
13286 self._node._pvt_ptr[0] = cynode
13287 @property
13288 def field(self):
13289 if self._pvt_ptr[0].field not in _dict_cudaGraphKernelNodeField:
13290 return None
13291 return _dict_cudaGraphKernelNodeField[self._pvt_ptr[0].field]
13292 @field.setter
13293 def field(self, field not None : cudaGraphKernelNodeField):
13294 self._pvt_ptr[0].field = field.value
13295 @property
13296 def updateData(self):
13297 return self._updateData
13298 @updateData.setter
13299 def updateData(self, updateData not None : anon_union8):
13300 string.memcpy(&self._pvt_ptr[0].updateData, <cyruntime.anon_union8*><void_ptr>updateData.getPtr(), sizeof(self._pvt_ptr[0].updateData))
13302cdef class cudaLaunchMemSyncDomainMap_st:
13303 """
13304 Memory Synchronization Domain map See cudaLaunchMemSyncDomain. By
13305 default, kernels are launched in domain 0. Kernel launched with
13306 cudaLaunchMemSyncDomainRemote will have a different domain ID. User
13307 may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for
13308 a specific stream / graph node / kernel launch. See
13309 cudaLaunchAttributeMemSyncDomainMap. Domain ID range is available
13310 through cudaDevAttrMemSyncDomainCount.
13312 Attributes
13313 ----------
13314 default_ : bytes
13315 The default domain ID to use for designated kernels
13316 remote : bytes
13317 The remote domain ID to use for designated kernels
13319 Methods
13320 -------
13321 getPtr()
13322 Get memory address of class instance
13323 """
13324 def __cinit__(self, void_ptr _ptr = 0):
13325 if _ptr == 0:
13326 self._pvt_ptr = &self._pvt_val
13327 else:
13328 self._pvt_ptr = <cyruntime.cudaLaunchMemSyncDomainMap_st *>_ptr
13329 def __init__(self, void_ptr _ptr = 0):
13330 pass
13331 def __dealloc__(self):
13332 pass
13333 def getPtr(self):
13334 return <void_ptr>self._pvt_ptr
13335 def __repr__(self):
13336 if self._pvt_ptr is not NULL:
13337 str_list = []
13338 try:
13339 str_list += ['default_ : ' + str(self.default_)]
13340 except ValueError:
13341 str_list += ['default_ : <ValueError>']
13342 try:
13343 str_list += ['remote : ' + str(self.remote)]
13344 except ValueError:
13345 str_list += ['remote : <ValueError>']
13346 return '\n'.join(str_list)
13347 else:
13348 return ''
13349 @property
13350 def default_(self):
13351 return self._pvt_ptr[0].default_
13352 @default_.setter
13353 def default_(self, unsigned char default_):
13354 self._pvt_ptr[0].default_ = default_
13355 @property
13356 def remote(self):
13357 return self._pvt_ptr[0].remote
13358 @remote.setter
13359 def remote(self, unsigned char remote):
13360 self._pvt_ptr[0].remote = remote
13362cdef class anon_struct17:
13363 """
13364 Attributes
13365 ----------
13366 x : unsigned int
13368 y : unsigned int
13370 z : unsigned int
13373 Methods
13374 -------
13375 getPtr()
13376 Get memory address of class instance
13377 """
13378 def __cinit__(self, void_ptr _ptr):
13379 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13381 def __init__(self, void_ptr _ptr):
13382 pass
13383 def __dealloc__(self):
13384 pass
13385 def getPtr(self):
13386 return <void_ptr>&self._pvt_ptr[0].clusterDim
13387 def __repr__(self):
13388 if self._pvt_ptr is not NULL:
13389 str_list = []
13390 try:
13391 str_list += ['x : ' + str(self.x)]
13392 except ValueError:
13393 str_list += ['x : <ValueError>']
13394 try:
13395 str_list += ['y : ' + str(self.y)]
13396 except ValueError:
13397 str_list += ['y : <ValueError>']
13398 try:
13399 str_list += ['z : ' + str(self.z)]
13400 except ValueError:
13401 str_list += ['z : <ValueError>']
13402 return '\n'.join(str_list)
13403 else:
13404 return ''
13405 @property
13406 def x(self):
13407 return self._pvt_ptr[0].clusterDim.x
13408 @x.setter
13409 def x(self, unsigned int x):
13410 self._pvt_ptr[0].clusterDim.x = x
13411 @property
13412 def y(self):
13413 return self._pvt_ptr[0].clusterDim.y
13414 @y.setter
13415 def y(self, unsigned int y):
13416 self._pvt_ptr[0].clusterDim.y = y
13417 @property
13418 def z(self):
13419 return self._pvt_ptr[0].clusterDim.z
13420 @z.setter
13421 def z(self, unsigned int z):
13422 self._pvt_ptr[0].clusterDim.z = z
13424cdef class anon_struct18:
13425 """
13426 Attributes
13427 ----------
13428 event : cudaEvent_t
13430 flags : int
13432 triggerAtBlockStart : int
13435 Methods
13436 -------
13437 getPtr()
13438 Get memory address of class instance
13439 """
13440 def __cinit__(self, void_ptr _ptr):
13441 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13443 def __init__(self, void_ptr _ptr):
13444 pass
13445 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].programmaticEvent.event)
13446 def __dealloc__(self):
13447 pass
13448 def getPtr(self):
13449 return <void_ptr>&self._pvt_ptr[0].programmaticEvent
13450 def __repr__(self):
13451 if self._pvt_ptr is not NULL:
13452 str_list = []
13453 try:
13454 str_list += ['event : ' + str(self.event)]
13455 except ValueError:
13456 str_list += ['event : <ValueError>']
13457 try:
13458 str_list += ['flags : ' + str(self.flags)]
13459 except ValueError:
13460 str_list += ['flags : <ValueError>']
13461 try:
13462 str_list += ['triggerAtBlockStart : ' + str(self.triggerAtBlockStart)]
13463 except ValueError:
13464 str_list += ['triggerAtBlockStart : <ValueError>']
13465 return '\n'.join(str_list)
13466 else:
13467 return ''
13468 @property
13469 def event(self):
13470 return self._event
13471 @event.setter
13472 def event(self, event):
13473 cdef cyruntime.cudaEvent_t cyevent
13474 if event is None:
13475 cyevent = <cyruntime.cudaEvent_t><void_ptr>0
13476 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
13477 pevent = int(event)
13478 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
13479 else:
13480 pevent = int(cudaEvent_t(event))
13481 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
13482 self._event._pvt_ptr[0] = cyevent
13483 @property
13484 def flags(self):
13485 return self._pvt_ptr[0].programmaticEvent.flags
13486 @flags.setter
13487 def flags(self, int flags):
13488 self._pvt_ptr[0].programmaticEvent.flags = flags
13489 @property
13490 def triggerAtBlockStart(self):
13491 return self._pvt_ptr[0].programmaticEvent.triggerAtBlockStart
13492 @triggerAtBlockStart.setter
13493 def triggerAtBlockStart(self, int triggerAtBlockStart):
13494 self._pvt_ptr[0].programmaticEvent.triggerAtBlockStart = triggerAtBlockStart
13496cdef class anon_struct19:
13497 """
13498 Attributes
13499 ----------
13500 x : unsigned int
13502 y : unsigned int
13504 z : unsigned int
13507 Methods
13508 -------
13509 getPtr()
13510 Get memory address of class instance
13511 """
13512 def __cinit__(self, void_ptr _ptr):
13513 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13515 def __init__(self, void_ptr _ptr):
13516 pass
13517 def __dealloc__(self):
13518 pass
13519 def getPtr(self):
13520 return <void_ptr>&self._pvt_ptr[0].preferredClusterDim
13521 def __repr__(self):
13522 if self._pvt_ptr is not NULL:
13523 str_list = []
13524 try:
13525 str_list += ['x : ' + str(self.x)]
13526 except ValueError:
13527 str_list += ['x : <ValueError>']
13528 try:
13529 str_list += ['y : ' + str(self.y)]
13530 except ValueError:
13531 str_list += ['y : <ValueError>']
13532 try:
13533 str_list += ['z : ' + str(self.z)]
13534 except ValueError:
13535 str_list += ['z : <ValueError>']
13536 return '\n'.join(str_list)
13537 else:
13538 return ''
13539 @property
13540 def x(self):
13541 return self._pvt_ptr[0].preferredClusterDim.x
13542 @x.setter
13543 def x(self, unsigned int x):
13544 self._pvt_ptr[0].preferredClusterDim.x = x
13545 @property
13546 def y(self):
13547 return self._pvt_ptr[0].preferredClusterDim.y
13548 @y.setter
13549 def y(self, unsigned int y):
13550 self._pvt_ptr[0].preferredClusterDim.y = y
13551 @property
13552 def z(self):
13553 return self._pvt_ptr[0].preferredClusterDim.z
13554 @z.setter
13555 def z(self, unsigned int z):
13556 self._pvt_ptr[0].preferredClusterDim.z = z
13558cdef class anon_struct20:
13559 """
13560 Attributes
13561 ----------
13562 event : cudaEvent_t
13564 flags : int
13567 Methods
13568 -------
13569 getPtr()
13570 Get memory address of class instance
13571 """
13572 def __cinit__(self, void_ptr _ptr):
13573 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13575 def __init__(self, void_ptr _ptr):
13576 pass
13577 self._event = cudaEvent_t(_ptr=<void_ptr>&self._pvt_ptr[0].launchCompletionEvent.event)
13578 def __dealloc__(self):
13579 pass
13580 def getPtr(self):
13581 return <void_ptr>&self._pvt_ptr[0].launchCompletionEvent
13582 def __repr__(self):
13583 if self._pvt_ptr is not NULL:
13584 str_list = []
13585 try:
13586 str_list += ['event : ' + str(self.event)]
13587 except ValueError:
13588 str_list += ['event : <ValueError>']
13589 try:
13590 str_list += ['flags : ' + str(self.flags)]
13591 except ValueError:
13592 str_list += ['flags : <ValueError>']
13593 return '\n'.join(str_list)
13594 else:
13595 return ''
13596 @property
13597 def event(self):
13598 return self._event
13599 @event.setter
13600 def event(self, event):
13601 cdef cyruntime.cudaEvent_t cyevent
13602 if event is None:
13603 cyevent = <cyruntime.cudaEvent_t><void_ptr>0
13604 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
13605 pevent = int(event)
13606 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
13607 else:
13608 pevent = int(cudaEvent_t(event))
13609 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
13610 self._event._pvt_ptr[0] = cyevent
13611 @property
13612 def flags(self):
13613 return self._pvt_ptr[0].launchCompletionEvent.flags
13614 @flags.setter
13615 def flags(self, int flags):
13616 self._pvt_ptr[0].launchCompletionEvent.flags = flags
13618cdef class anon_struct21:
13619 """
13620 Attributes
13621 ----------
13622 deviceUpdatable : int
13624 devNode : cudaGraphDeviceNode_t
13627 Methods
13628 -------
13629 getPtr()
13630 Get memory address of class instance
13631 """
13632 def __cinit__(self, void_ptr _ptr):
13633 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13635 def __init__(self, void_ptr _ptr):
13636 pass
13637 self._devNode = cudaGraphDeviceNode_t(_ptr=<void_ptr>&self._pvt_ptr[0].deviceUpdatableKernelNode.devNode)
13638 def __dealloc__(self):
13639 pass
13640 def getPtr(self):
13641 return <void_ptr>&self._pvt_ptr[0].deviceUpdatableKernelNode
13642 def __repr__(self):
13643 if self._pvt_ptr is not NULL:
13644 str_list = []
13645 try:
13646 str_list += ['deviceUpdatable : ' + str(self.deviceUpdatable)]
13647 except ValueError:
13648 str_list += ['deviceUpdatable : <ValueError>']
13649 try:
13650 str_list += ['devNode : ' + str(self.devNode)]
13651 except ValueError:
13652 str_list += ['devNode : <ValueError>']
13653 return '\n'.join(str_list)
13654 else:
13655 return ''
13656 @property
13657 def deviceUpdatable(self):
13658 return self._pvt_ptr[0].deviceUpdatableKernelNode.deviceUpdatable
13659 @deviceUpdatable.setter
13660 def deviceUpdatable(self, int deviceUpdatable):
13661 self._pvt_ptr[0].deviceUpdatableKernelNode.deviceUpdatable = deviceUpdatable
13662 @property
13663 def devNode(self):
13664 return self._devNode
13665 @devNode.setter
13666 def devNode(self, devNode):
13667 cdef cyruntime.cudaGraphDeviceNode_t cydevNode
13668 if devNode is None:
13669 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>0
13670 elif isinstance(devNode, (cudaGraphDeviceNode_t,)):
13671 pdevNode = int(devNode)
13672 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode
13673 else:
13674 pdevNode = int(cudaGraphDeviceNode_t(devNode))
13675 cydevNode = <cyruntime.cudaGraphDeviceNode_t><void_ptr>pdevNode
13676 self._devNode._pvt_ptr[0] = cydevNode
13678cdef class cudaLaunchAttributeValue:
13679 """
13680 Launch attributes union; used as value field of
13681 ::cudaLaunchAttribute
13683 Attributes
13684 ----------
13685 pad : bytes
13687 accessPolicyWindow : cudaAccessPolicyWindow
13688 Value of launch attribute cudaLaunchAttributeAccessPolicyWindow.
13689 cooperative : int
13690 Value of launch attribute cudaLaunchAttributeCooperative. Nonzero
13691 indicates a cooperative kernel (see cudaLaunchCooperativeKernel).
13692 syncPolicy : cudaSynchronizationPolicy
13693 Value of launch attribute cudaLaunchAttributeSynchronizationPolicy.
13694 ::cudaSynchronizationPolicy for work queued up in this stream.
13695 clusterDim : anon_struct17
13696 Value of launch attribute cudaLaunchAttributeClusterDimension that
13697 represents the desired cluster dimensions for the kernel. Opaque
13698 type with the following fields: - `x` - The X dimension of the
13699 cluster, in blocks. Must be a divisor of the grid X dimension. -
13700 `y` - The Y dimension of the cluster, in blocks. Must be a divisor
13701 of the grid Y dimension. - `z` - The Z dimension of the cluster,
13702 in blocks. Must be a divisor of the grid Z dimension.
13703 clusterSchedulingPolicyPreference : cudaClusterSchedulingPolicy
13704 Value of launch attribute
13705 cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
13706 scheduling policy preference for the kernel.
13707 programmaticStreamSerializationAllowed : int
13708 Value of launch attribute
13709 cudaLaunchAttributeProgrammaticStreamSerialization.
13710 programmaticEvent : anon_struct18
13711 Value of launch attribute cudaLaunchAttributeProgrammaticEvent with
13712 the following fields: - `cudaEvent_t` event - Event to fire when
13713 all blocks trigger it. - `int` flags; - Event record flags, see
13714 cudaEventRecordWithFlags. Does not accept cudaEventRecordExternal.
13715 - `int` triggerAtBlockStart - If this is set to non-0, each block
13716 launch will automatically trigger the event.
13717 priority : int
13718 Value of launch attribute cudaLaunchAttributePriority. Execution
13719 priority of the kernel.
13720 memSyncDomainMap : cudaLaunchMemSyncDomainMap
13721 Value of launch attribute cudaLaunchAttributeMemSyncDomainMap. See
13722 ::cudaLaunchMemSyncDomainMap.
13723 memSyncDomain : cudaLaunchMemSyncDomain
13724 Value of launch attribute cudaLaunchAttributeMemSyncDomain. See
13725 cudaLaunchMemSyncDomain.
13726 preferredClusterDim : anon_struct19
13727 Value of launch attribute
13728 cudaLaunchAttributePreferredClusterDimension that represents the
13729 desired preferred cluster dimensions for the kernel. Opaque type
13730 with the following fields: - `x` - The X dimension of the preferred
13731 cluster, in blocks. Must be a divisor of the grid X dimension, and
13732 must be a multiple of the `x` field of
13733 cudaLaunchAttributeValue::clusterDim. - `y` - The Y dimension of
13734 the preferred cluster, in blocks. Must be a divisor of the grid Y
13735 dimension, and must be a multiple of the `y` field of
13736 cudaLaunchAttributeValue::clusterDim. - `z` - The Z dimension of
13737 the preferred cluster, in blocks. Must be equal to the `z` field of
13738 cudaLaunchAttributeValue::clusterDim.
13739 launchCompletionEvent : anon_struct20
13740 Value of launch attribute cudaLaunchAttributeLaunchCompletionEvent
13741 with the following fields: - `cudaEvent_t` event - Event to fire
13742 when the last block launches. - `int` flags - Event record
13743 flags, see cudaEventRecordWithFlags. Does not accept
13744 cudaEventRecordExternal.
13745 deviceUpdatableKernelNode : anon_struct21
13746 Value of launch attribute
13747 cudaLaunchAttributeDeviceUpdatableKernelNode with the following
13748 fields: - `int` deviceUpdatable - Whether or not the resulting
13749 kernel node should be device-updatable. -
13750 `cudaGraphDeviceNode_t` devNode - Returns a handle to pass to the
13751 various device-side update functions.
13752 sharedMemCarveout : unsigned int
13753 Value of launch attribute
13754 cudaLaunchAttributePreferredSharedMemoryCarveout.
13755 nvlinkUtilCentricScheduling : unsigned int
13756 Value of launch attribute
13757 cudaLaunchAttributeNvlinkUtilCentricScheduling.
13759 Methods
13760 -------
13761 getPtr()
13762 Get memory address of class instance
13763 """
13764 def __cinit__(self, void_ptr _ptr = 0):
13765 if _ptr == 0:
13766 self._pvt_ptr = &self._pvt_val
13767 else:
13768 self._pvt_ptr = <cyruntime.cudaLaunchAttributeValue *>_ptr
13769 def __init__(self, void_ptr _ptr = 0):
13770 pass
13771 self._accessPolicyWindow = cudaAccessPolicyWindow(_ptr=<void_ptr>&self._pvt_ptr[0].accessPolicyWindow)
13772 self._clusterDim = anon_struct17(_ptr=<void_ptr>self._pvt_ptr)
13773 self._programmaticEvent = anon_struct18(_ptr=<void_ptr>self._pvt_ptr)
13774 self._memSyncDomainMap = cudaLaunchMemSyncDomainMap(_ptr=<void_ptr>&self._pvt_ptr[0].memSyncDomainMap)
13775 self._preferredClusterDim = anon_struct19(_ptr=<void_ptr>self._pvt_ptr)
13776 self._launchCompletionEvent = anon_struct20(_ptr=<void_ptr>self._pvt_ptr)
13777 self._deviceUpdatableKernelNode = anon_struct21(_ptr=<void_ptr>self._pvt_ptr)
13778 def __dealloc__(self):
13779 pass
13780 def getPtr(self):
13781 return <void_ptr>self._pvt_ptr
13782 def __repr__(self):
13783 if self._pvt_ptr is not NULL:
13784 str_list = []
13785 try:
13786 str_list += ['pad : ' + str(self.pad)]
13787 except ValueError:
13788 str_list += ['pad : <ValueError>']
13789 try:
13790 str_list += ['accessPolicyWindow :\n' + '\n'.join([' ' + line for line in str(self.accessPolicyWindow).splitlines()])]
13791 except ValueError:
13792 str_list += ['accessPolicyWindow : <ValueError>']
13793 try:
13794 str_list += ['cooperative : ' + str(self.cooperative)]
13795 except ValueError:
13796 str_list += ['cooperative : <ValueError>']
13797 try:
13798 str_list += ['syncPolicy : ' + str(self.syncPolicy)]
13799 except ValueError:
13800 str_list += ['syncPolicy : <ValueError>']
13801 try:
13802 str_list += ['clusterDim :\n' + '\n'.join([' ' + line for line in str(self.clusterDim).splitlines()])]
13803 except ValueError:
13804 str_list += ['clusterDim : <ValueError>']
13805 try:
13806 str_list += ['clusterSchedulingPolicyPreference : ' + str(self.clusterSchedulingPolicyPreference)]
13807 except ValueError:
13808 str_list += ['clusterSchedulingPolicyPreference : <ValueError>']
13809 try:
13810 str_list += ['programmaticStreamSerializationAllowed : ' + str(self.programmaticStreamSerializationAllowed)]
13811 except ValueError:
13812 str_list += ['programmaticStreamSerializationAllowed : <ValueError>']
13813 try:
13814 str_list += ['programmaticEvent :\n' + '\n'.join([' ' + line for line in str(self.programmaticEvent).splitlines()])]
13815 except ValueError:
13816 str_list += ['programmaticEvent : <ValueError>']
13817 try:
13818 str_list += ['priority : ' + str(self.priority)]
13819 except ValueError:
13820 str_list += ['priority : <ValueError>']
13821 try:
13822 str_list += ['memSyncDomainMap :\n' + '\n'.join([' ' + line for line in str(self.memSyncDomainMap).splitlines()])]
13823 except ValueError:
13824 str_list += ['memSyncDomainMap : <ValueError>']
13825 try:
13826 str_list += ['memSyncDomain : ' + str(self.memSyncDomain)]
13827 except ValueError:
13828 str_list += ['memSyncDomain : <ValueError>']
13829 try:
13830 str_list += ['preferredClusterDim :\n' + '\n'.join([' ' + line for line in str(self.preferredClusterDim).splitlines()])]
13831 except ValueError:
13832 str_list += ['preferredClusterDim : <ValueError>']
13833 try:
13834 str_list += ['launchCompletionEvent :\n' + '\n'.join([' ' + line for line in str(self.launchCompletionEvent).splitlines()])]
13835 except ValueError:
13836 str_list += ['launchCompletionEvent : <ValueError>']
13837 try:
13838 str_list += ['deviceUpdatableKernelNode :\n' + '\n'.join([' ' + line for line in str(self.deviceUpdatableKernelNode).splitlines()])]
13839 except ValueError:
13840 str_list += ['deviceUpdatableKernelNode : <ValueError>']
13841 try:
13842 str_list += ['sharedMemCarveout : ' + str(self.sharedMemCarveout)]
13843 except ValueError:
13844 str_list += ['sharedMemCarveout : <ValueError>']
13845 try:
13846 str_list += ['nvlinkUtilCentricScheduling : ' + str(self.nvlinkUtilCentricScheduling)]
13847 except ValueError:
13848 str_list += ['nvlinkUtilCentricScheduling : <ValueError>']
13849 return '\n'.join(str_list)
13850 else:
13851 return ''
13852 @property
13853 def pad(self):
13854 return PyBytes_FromStringAndSize(self._pvt_ptr[0].pad, 64)
13855 @pad.setter
13856 def pad(self, pad):
13857 if len(pad) != 64:
13858 raise ValueError("pad length must be 64, is " + str(len(pad)))
13859 if CHAR_MIN == 0:
13860 for i, b in enumerate(pad):
13861 if b < 0 and b > -129:
13862 b = b + 256
13863 self._pvt_ptr[0].pad[i] = b
13864 else:
13865 for i, b in enumerate(pad):
13866 if b > 127 and b < 256:
13867 b = b - 256
13868 self._pvt_ptr[0].pad[i] = b
13869 @property
13870 def accessPolicyWindow(self):
13871 return self._accessPolicyWindow
13872 @accessPolicyWindow.setter
13873 def accessPolicyWindow(self, accessPolicyWindow not None : cudaAccessPolicyWindow):
13874 string.memcpy(&self._pvt_ptr[0].accessPolicyWindow, <cyruntime.cudaAccessPolicyWindow*><void_ptr>accessPolicyWindow.getPtr(), sizeof(self._pvt_ptr[0].accessPolicyWindow))
13875 @property
13876 def cooperative(self):
13877 return self._pvt_ptr[0].cooperative
13878 @cooperative.setter
13879 def cooperative(self, int cooperative):
13880 self._pvt_ptr[0].cooperative = cooperative
13881 @property
13882 def syncPolicy(self):
13883 if self._pvt_ptr[0].syncPolicy not in _dict_cudaSynchronizationPolicy:
13884 return None
13885 return _dict_cudaSynchronizationPolicy[self._pvt_ptr[0].syncPolicy]
13886 @syncPolicy.setter
13887 def syncPolicy(self, syncPolicy not None : cudaSynchronizationPolicy):
13888 self._pvt_ptr[0].syncPolicy = syncPolicy.value
13889 @property
13890 def clusterDim(self):
13891 return self._clusterDim
13892 @clusterDim.setter
13893 def clusterDim(self, clusterDim not None : anon_struct17):
13894 string.memcpy(&self._pvt_ptr[0].clusterDim, <cyruntime.anon_struct17*><void_ptr>clusterDim.getPtr(), sizeof(self._pvt_ptr[0].clusterDim))
13895 @property
13896 def clusterSchedulingPolicyPreference(self):
13897 if self._pvt_ptr[0].clusterSchedulingPolicyPreference not in _dict_cudaClusterSchedulingPolicy:
13898 return None
13899 return _dict_cudaClusterSchedulingPolicy[self._pvt_ptr[0].clusterSchedulingPolicyPreference]
13900 @clusterSchedulingPolicyPreference.setter
13901 def clusterSchedulingPolicyPreference(self, clusterSchedulingPolicyPreference not None : cudaClusterSchedulingPolicy):
13902 self._pvt_ptr[0].clusterSchedulingPolicyPreference = clusterSchedulingPolicyPreference.value
13903 @property
13904 def programmaticStreamSerializationAllowed(self):
13905 return self._pvt_ptr[0].programmaticStreamSerializationAllowed
13906 @programmaticStreamSerializationAllowed.setter
13907 def programmaticStreamSerializationAllowed(self, int programmaticStreamSerializationAllowed):
13908 self._pvt_ptr[0].programmaticStreamSerializationAllowed = programmaticStreamSerializationAllowed
13909 @property
13910 def programmaticEvent(self):
13911 return self._programmaticEvent
13912 @programmaticEvent.setter
13913 def programmaticEvent(self, programmaticEvent not None : anon_struct18):
13914 string.memcpy(&self._pvt_ptr[0].programmaticEvent, <cyruntime.anon_struct18*><void_ptr>programmaticEvent.getPtr(), sizeof(self._pvt_ptr[0].programmaticEvent))
13915 @property
13916 def priority(self):
13917 return self._pvt_ptr[0].priority
13918 @priority.setter
13919 def priority(self, int priority):
13920 self._pvt_ptr[0].priority = priority
13921 @property
13922 def memSyncDomainMap(self):
13923 return self._memSyncDomainMap
13924 @memSyncDomainMap.setter
13925 def memSyncDomainMap(self, memSyncDomainMap not None : cudaLaunchMemSyncDomainMap):
13926 string.memcpy(&self._pvt_ptr[0].memSyncDomainMap, <cyruntime.cudaLaunchMemSyncDomainMap*><void_ptr>memSyncDomainMap.getPtr(), sizeof(self._pvt_ptr[0].memSyncDomainMap))
13927 @property
13928 def memSyncDomain(self):
13929 if self._pvt_ptr[0].memSyncDomain not in _dict_cudaLaunchMemSyncDomain:
13930 return None
13931 return _dict_cudaLaunchMemSyncDomain[self._pvt_ptr[0].memSyncDomain]
13932 @memSyncDomain.setter
13933 def memSyncDomain(self, memSyncDomain not None : cudaLaunchMemSyncDomain):
13934 self._pvt_ptr[0].memSyncDomain = memSyncDomain.value
13935 @property
13936 def preferredClusterDim(self):
13937 return self._preferredClusterDim
13938 @preferredClusterDim.setter
13939 def preferredClusterDim(self, preferredClusterDim not None : anon_struct19):
13940 string.memcpy(&self._pvt_ptr[0].preferredClusterDim, <cyruntime.anon_struct19*><void_ptr>preferredClusterDim.getPtr(), sizeof(self._pvt_ptr[0].preferredClusterDim))
13941 @property
13942 def launchCompletionEvent(self):
13943 return self._launchCompletionEvent
13944 @launchCompletionEvent.setter
13945 def launchCompletionEvent(self, launchCompletionEvent not None : anon_struct20):
13946 string.memcpy(&self._pvt_ptr[0].launchCompletionEvent, <cyruntime.anon_struct20*><void_ptr>launchCompletionEvent.getPtr(), sizeof(self._pvt_ptr[0].launchCompletionEvent))
13947 @property
13948 def deviceUpdatableKernelNode(self):
13949 return self._deviceUpdatableKernelNode
13950 @deviceUpdatableKernelNode.setter
13951 def deviceUpdatableKernelNode(self, deviceUpdatableKernelNode not None : anon_struct21):
13952 string.memcpy(&self._pvt_ptr[0].deviceUpdatableKernelNode, <cyruntime.anon_struct21*><void_ptr>deviceUpdatableKernelNode.getPtr(), sizeof(self._pvt_ptr[0].deviceUpdatableKernelNode))
13953 @property
13954 def sharedMemCarveout(self):
13955 return self._pvt_ptr[0].sharedMemCarveout
13956 @sharedMemCarveout.setter
13957 def sharedMemCarveout(self, unsigned int sharedMemCarveout):
13958 self._pvt_ptr[0].sharedMemCarveout = sharedMemCarveout
13959 @property
13960 def nvlinkUtilCentricScheduling(self):
13961 return self._pvt_ptr[0].nvlinkUtilCentricScheduling
13962 @nvlinkUtilCentricScheduling.setter
13963 def nvlinkUtilCentricScheduling(self, unsigned int nvlinkUtilCentricScheduling):
13964 self._pvt_ptr[0].nvlinkUtilCentricScheduling = nvlinkUtilCentricScheduling
13966cdef class cudaLaunchAttribute_st:
13967 """
13968 Launch attribute
13970 Attributes
13971 ----------
13972 id : cudaLaunchAttributeID
13973 Attribute to set
13974 val : cudaLaunchAttributeValue
13975 Value of the attribute
13977 Methods
13978 -------
13979 getPtr()
13980 Get memory address of class instance
13981 """
13982 def __cinit__(self, void_ptr _ptr = 0):
13983 if _ptr == 0:
13984 self._pvt_ptr = &self._pvt_val
13985 else:
13986 self._pvt_ptr = <cyruntime.cudaLaunchAttribute_st *>_ptr
13987 def __init__(self, void_ptr _ptr = 0):
13988 pass
13989 self._val = cudaLaunchAttributeValue(_ptr=<void_ptr>&self._pvt_ptr[0].val)
13990 def __dealloc__(self):
13991 pass
13992 def getPtr(self):
13993 return <void_ptr>self._pvt_ptr
13994 def __repr__(self):
13995 if self._pvt_ptr is not NULL:
13996 str_list = []
13997 try:
13998 str_list += ['id : ' + str(self.id)]
13999 except ValueError:
14000 str_list += ['id : <ValueError>']
14001 try:
14002 str_list += ['val :\n' + '\n'.join([' ' + line for line in str(self.val).splitlines()])]
14003 except ValueError:
14004 str_list += ['val : <ValueError>']
14005 return '\n'.join(str_list)
14006 else:
14007 return ''
14008 @property
14009 def id(self):
14010 if self._pvt_ptr[0].id not in _dict_cudaLaunchAttributeID:
14011 return None
14012 return _dict_cudaLaunchAttributeID[self._pvt_ptr[0].id]
14013 @id.setter
14014 def id(self, id not None : cudaLaunchAttributeID):
14015 self._pvt_ptr[0].id = id.value
14016 @property
14017 def val(self):
14018 return self._val
14019 @val.setter
14020 def val(self, val not None : cudaLaunchAttributeValue):
14021 string.memcpy(&self._pvt_ptr[0].val, <cyruntime.cudaLaunchAttributeValue*><void_ptr>val.getPtr(), sizeof(self._pvt_ptr[0].val))
14023cdef class anon_struct22:
14024 """
14025 Attributes
14026 ----------
14027 bytesOverBudget : unsigned long long
14030 Methods
14031 -------
14032 getPtr()
14033 Get memory address of class instance
14034 """
14035 def __cinit__(self, void_ptr _ptr):
14036 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
14038 def __init__(self, void_ptr _ptr):
14039 pass
14040 def __dealloc__(self):
14041 pass
14042 def getPtr(self):
14043 return <void_ptr>&self._pvt_ptr[0].info.overBudget
14044 def __repr__(self):
14045 if self._pvt_ptr is not NULL:
14046 str_list = []
14047 try:
14048 str_list += ['bytesOverBudget : ' + str(self.bytesOverBudget)]
14049 except ValueError:
14050 str_list += ['bytesOverBudget : <ValueError>']
14051 return '\n'.join(str_list)
14052 else:
14053 return ''
14054 @property
14055 def bytesOverBudget(self):
14056 return self._pvt_ptr[0].info.overBudget.bytesOverBudget
14057 @bytesOverBudget.setter
14058 def bytesOverBudget(self, unsigned long long bytesOverBudget):
14059 self._pvt_ptr[0].info.overBudget.bytesOverBudget = bytesOverBudget
14061cdef class anon_union9:
14062 """
14063 Attributes
14064 ----------
14065 overBudget : anon_struct22
14068 Methods
14069 -------
14070 getPtr()
14071 Get memory address of class instance
14072 """
14073 def __cinit__(self, void_ptr _ptr):
14074 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
14076 def __init__(self, void_ptr _ptr):
14077 pass
14078 self._overBudget = anon_struct22(_ptr=<void_ptr>self._pvt_ptr)
14079 def __dealloc__(self):
14080 pass
14081 def getPtr(self):
14082 return <void_ptr>&self._pvt_ptr[0].info
14083 def __repr__(self):
14084 if self._pvt_ptr is not NULL:
14085 str_list = []
14086 try:
14087 str_list += ['overBudget :\n' + '\n'.join([' ' + line for line in str(self.overBudget).splitlines()])]
14088 except ValueError:
14089 str_list += ['overBudget : <ValueError>']
14090 return '\n'.join(str_list)
14091 else:
14092 return ''
14093 @property
14094 def overBudget(self):
14095 return self._overBudget
14096 @overBudget.setter
14097 def overBudget(self, overBudget not None : anon_struct22):
14098 string.memcpy(&self._pvt_ptr[0].info.overBudget, <cyruntime.anon_struct22*><void_ptr>overBudget.getPtr(), sizeof(self._pvt_ptr[0].info.overBudget))
14100cdef class cudaAsyncNotificationInfo:
14101 """
14102 Information describing an async notification event
14104 Attributes
14105 ----------
14106 type : cudaAsyncNotificationType
14107 The type of notification being sent
14108 info : anon_union9
14109 Information about the notification. `typename` must be checked in
14110 order to interpret this field.
14112 Methods
14113 -------
14114 getPtr()
14115 Get memory address of class instance
14116 """
14117 def __cinit__(self, void_ptr _ptr = 0):
14118 if _ptr == 0:
14119 self._val_ptr = <cyruntime.cudaAsyncNotificationInfo *>calloc(1, sizeof(cyruntime.cudaAsyncNotificationInfo))
14120 self._pvt_ptr = self._val_ptr
14121 else:
14122 self._pvt_ptr = <cyruntime.cudaAsyncNotificationInfo *>_ptr
14123 def __init__(self, void_ptr _ptr = 0):
14124 pass
14125 self._info = anon_union9(_ptr=<void_ptr>self._pvt_ptr)
14126 def __dealloc__(self):
14127 if self._val_ptr is not NULL:
14128 free(self._val_ptr)
14129 def getPtr(self):
14130 return <void_ptr>self._pvt_ptr
14131 def __repr__(self):
14132 if self._pvt_ptr is not NULL:
14133 str_list = []
14134 try:
14135 str_list += ['type : ' + str(self.type)]
14136 except ValueError:
14137 str_list += ['type : <ValueError>']
14138 try:
14139 str_list += ['info :\n' + '\n'.join([' ' + line for line in str(self.info).splitlines()])]
14140 except ValueError:
14141 str_list += ['info : <ValueError>']
14142 return '\n'.join(str_list)
14143 else:
14144 return ''
14145 @property
14146 def type(self):
14147 if self._pvt_ptr[0].type not in _dict_cudaAsyncNotificationType:
14148 return None
14149 return _dict_cudaAsyncNotificationType[self._pvt_ptr[0].type]
14150 @type.setter
14151 def type(self, type not None : cudaAsyncNotificationType):
14152 self._pvt_ptr[0].type = type.value
14153 @property
14154 def info(self):
14155 return self._info
14156 @info.setter
14157 def info(self, info not None : anon_union9):
14158 string.memcpy(&self._pvt_ptr[0].info, <cyruntime.anon_union9*><void_ptr>info.getPtr(), sizeof(self._pvt_ptr[0].info))
14160cdef class cudaTextureDesc:
14161 """
14162 CUDA texture descriptor
14164 Attributes
14165 ----------
14166 addressMode : list[cudaTextureAddressMode]
14167 Texture address mode for up to 3 dimensions
14168 filterMode : cudaTextureFilterMode
14169 Texture filter mode
14170 readMode : cudaTextureReadMode
14171 Texture read mode
14172 sRGB : int
14173 Perform sRGB->linear conversion during texture read
14174 borderColor : list[float]
14175 Texture Border Color
14176 normalizedCoords : int
14177 Indicates whether texture reads are normalized or not
14178 maxAnisotropy : unsigned int
14179 Limit to the anisotropy ratio
14180 mipmapFilterMode : cudaTextureFilterMode
14181 Mipmap filter mode
14182 mipmapLevelBias : float
14183 Offset applied to the supplied mipmap level
14184 minMipmapLevelClamp : float
14185 Lower end of the mipmap level range to clamp access to
14186 maxMipmapLevelClamp : float
14187 Upper end of the mipmap level range to clamp access to
14188 disableTrilinearOptimization : int
14189 Disable any trilinear filtering optimizations.
14190 seamlessCubemap : int
14191 Enable seamless cube map filtering.
14193 Methods
14194 -------
14195 getPtr()
14196 Get memory address of class instance
14197 """
14198 def __cinit__(self, void_ptr _ptr = 0):
14199 if _ptr == 0:
14200 self._pvt_ptr = &self._pvt_val
14201 else:
14202 self._pvt_ptr = <cyruntime.cudaTextureDesc *>_ptr
14203 def __init__(self, void_ptr _ptr = 0):
14204 pass
14205 def __dealloc__(self):
14206 pass
14207 def getPtr(self):
14208 return <void_ptr>self._pvt_ptr
14209 def __repr__(self):
14210 if self._pvt_ptr is not NULL:
14211 str_list = []
14212 try:
14213 str_list += ['addressMode : ' + str(self.addressMode)]
14214 except ValueError:
14215 str_list += ['addressMode : <ValueError>']
14216 try:
14217 str_list += ['filterMode : ' + str(self.filterMode)]
14218 except ValueError:
14219 str_list += ['filterMode : <ValueError>']
14220 try:
14221 str_list += ['readMode : ' + str(self.readMode)]
14222 except ValueError:
14223 str_list += ['readMode : <ValueError>']
14224 try:
14225 str_list += ['sRGB : ' + str(self.sRGB)]
14226 except ValueError:
14227 str_list += ['sRGB : <ValueError>']
14228 try:
14229 str_list += ['borderColor : ' + str(self.borderColor)]
14230 except ValueError:
14231 str_list += ['borderColor : <ValueError>']
14232 try:
14233 str_list += ['normalizedCoords : ' + str(self.normalizedCoords)]
14234 except ValueError:
14235 str_list += ['normalizedCoords : <ValueError>']
14236 try:
14237 str_list += ['maxAnisotropy : ' + str(self.maxAnisotropy)]
14238 except ValueError:
14239 str_list += ['maxAnisotropy : <ValueError>']
14240 try:
14241 str_list += ['mipmapFilterMode : ' + str(self.mipmapFilterMode)]
14242 except ValueError:
14243 str_list += ['mipmapFilterMode : <ValueError>']
14244 try:
14245 str_list += ['mipmapLevelBias : ' + str(self.mipmapLevelBias)]
14246 except ValueError:
14247 str_list += ['mipmapLevelBias : <ValueError>']
14248 try:
14249 str_list += ['minMipmapLevelClamp : ' + str(self.minMipmapLevelClamp)]
14250 except ValueError:
14251 str_list += ['minMipmapLevelClamp : <ValueError>']
14252 try:
14253 str_list += ['maxMipmapLevelClamp : ' + str(self.maxMipmapLevelClamp)]
14254 except ValueError:
14255 str_list += ['maxMipmapLevelClamp : <ValueError>']
14256 try:
14257 str_list += ['disableTrilinearOptimization : ' + str(self.disableTrilinearOptimization)]
14258 except ValueError:
14259 str_list += ['disableTrilinearOptimization : <ValueError>']
14260 try:
14261 str_list += ['seamlessCubemap : ' + str(self.seamlessCubemap)]
14262 except ValueError:
14263 str_list += ['seamlessCubemap : <ValueError>']
14264 return '\n'.join(str_list)
14265 else:
14266 return ''
14267 @property
14268 def addressMode(self):
14269 return [_dict_cudaTextureAddressMode[_x] if _x in _dict_cudaTextureAddressMode else None for _x in list(self._pvt_ptr[0].addressMode)]
14270 @addressMode.setter
14271 def addressMode(self, addressMode):
14272 self._pvt_ptr[0].addressMode = [_x.value for _x in addressMode]
14273 @property
14274 def filterMode(self):
14275 if self._pvt_ptr[0].filterMode not in _dict_cudaTextureFilterMode:
14276 return None
14277 return _dict_cudaTextureFilterMode[self._pvt_ptr[0].filterMode]
14278 @filterMode.setter
14279 def filterMode(self, filterMode not None : cudaTextureFilterMode):
14280 self._pvt_ptr[0].filterMode = filterMode.value
14281 @property
14282 def readMode(self):
14283 if self._pvt_ptr[0].readMode not in _dict_cudaTextureReadMode:
14284 return None
14285 return _dict_cudaTextureReadMode[self._pvt_ptr[0].readMode]
14286 @readMode.setter
14287 def readMode(self, readMode not None : cudaTextureReadMode):
14288 self._pvt_ptr[0].readMode = readMode.value
14289 @property
14290 def sRGB(self):
14291 return self._pvt_ptr[0].sRGB
14292 @sRGB.setter
14293 def sRGB(self, int sRGB):
14294 self._pvt_ptr[0].sRGB = sRGB
14295 @property
14296 def borderColor(self):
14297 return self._pvt_ptr[0].borderColor
14298 @borderColor.setter
14299 def borderColor(self, borderColor):
14300 self._pvt_ptr[0].borderColor = borderColor
14301 @property
14302 def normalizedCoords(self):
14303 return self._pvt_ptr[0].normalizedCoords
14304 @normalizedCoords.setter
14305 def normalizedCoords(self, int normalizedCoords):
14306 self._pvt_ptr[0].normalizedCoords = normalizedCoords
14307 @property
14308 def maxAnisotropy(self):
14309 return self._pvt_ptr[0].maxAnisotropy
14310 @maxAnisotropy.setter
14311 def maxAnisotropy(self, unsigned int maxAnisotropy):
14312 self._pvt_ptr[0].maxAnisotropy = maxAnisotropy
14313 @property
14314 def mipmapFilterMode(self):
14315 if self._pvt_ptr[0].mipmapFilterMode not in _dict_cudaTextureFilterMode:
14316 return None
14317 return _dict_cudaTextureFilterMode[self._pvt_ptr[0].mipmapFilterMode]
14318 @mipmapFilterMode.setter
14319 def mipmapFilterMode(self, mipmapFilterMode not None : cudaTextureFilterMode):
14320 self._pvt_ptr[0].mipmapFilterMode = mipmapFilterMode.value
14321 @property
14322 def mipmapLevelBias(self):
14323 return self._pvt_ptr[0].mipmapLevelBias
14324 @mipmapLevelBias.setter
14325 def mipmapLevelBias(self, float mipmapLevelBias):
14326 self._pvt_ptr[0].mipmapLevelBias = mipmapLevelBias
14327 @property
14328 def minMipmapLevelClamp(self):
14329 return self._pvt_ptr[0].minMipmapLevelClamp
14330 @minMipmapLevelClamp.setter
14331 def minMipmapLevelClamp(self, float minMipmapLevelClamp):
14332 self._pvt_ptr[0].minMipmapLevelClamp = minMipmapLevelClamp
14333 @property
14334 def maxMipmapLevelClamp(self):
14335 return self._pvt_ptr[0].maxMipmapLevelClamp
14336 @maxMipmapLevelClamp.setter
14337 def maxMipmapLevelClamp(self, float maxMipmapLevelClamp):
14338 self._pvt_ptr[0].maxMipmapLevelClamp = maxMipmapLevelClamp
14339 @property
14340 def disableTrilinearOptimization(self):
14341 return self._pvt_ptr[0].disableTrilinearOptimization
14342 @disableTrilinearOptimization.setter
14343 def disableTrilinearOptimization(self, int disableTrilinearOptimization):
14344 self._pvt_ptr[0].disableTrilinearOptimization = disableTrilinearOptimization
14345 @property
14346 def seamlessCubemap(self):
14347 return self._pvt_ptr[0].seamlessCubemap
14348 @seamlessCubemap.setter
14349 def seamlessCubemap(self, int seamlessCubemap):
14350 self._pvt_ptr[0].seamlessCubemap = seamlessCubemap
14352cdef class cudaEglPlaneDesc_st:
14353 """
14354 CUDA EGL Plane Descriptor - structure defining each plane of a CUDA
14355 EGLFrame
14357 Attributes
14358 ----------
14359 width : unsigned int
14360 Width of plane
14361 height : unsigned int
14362 Height of plane
14363 depth : unsigned int
14364 Depth of plane
14365 pitch : unsigned int
14366 Pitch of plane
14367 numChannels : unsigned int
14368 Number of channels for the plane
14369 channelDesc : cudaChannelFormatDesc
14370 Channel Format Descriptor
14371 reserved : list[unsigned int]
14372 Reserved for future use
14374 Methods
14375 -------
14376 getPtr()
14377 Get memory address of class instance
14378 """
14379 def __cinit__(self, void_ptr _ptr = 0):
14380 if _ptr == 0:
14381 self._pvt_ptr = &self._pvt_val
14382 else:
14383 self._pvt_ptr = <cyruntime.cudaEglPlaneDesc_st *>_ptr
14384 def __init__(self, void_ptr _ptr = 0):
14385 pass
14386 self._channelDesc = cudaChannelFormatDesc(_ptr=<void_ptr>&self._pvt_ptr[0].channelDesc)
14387 def __dealloc__(self):
14388 pass
14389 def getPtr(self):
14390 return <void_ptr>self._pvt_ptr
14391 def __repr__(self):
14392 if self._pvt_ptr is not NULL:
14393 str_list = []
14394 try:
14395 str_list += ['width : ' + str(self.width)]
14396 except ValueError:
14397 str_list += ['width : <ValueError>']
14398 try:
14399 str_list += ['height : ' + str(self.height)]
14400 except ValueError:
14401 str_list += ['height : <ValueError>']
14402 try:
14403 str_list += ['depth : ' + str(self.depth)]
14404 except ValueError:
14405 str_list += ['depth : <ValueError>']
14406 try:
14407 str_list += ['pitch : ' + str(self.pitch)]
14408 except ValueError:
14409 str_list += ['pitch : <ValueError>']
14410 try:
14411 str_list += ['numChannels : ' + str(self.numChannels)]
14412 except ValueError:
14413 str_list += ['numChannels : <ValueError>']
14414 try:
14415 str_list += ['channelDesc :\n' + '\n'.join([' ' + line for line in str(self.channelDesc).splitlines()])]
14416 except ValueError:
14417 str_list += ['channelDesc : <ValueError>']
14418 try:
14419 str_list += ['reserved : ' + str(self.reserved)]
14420 except ValueError:
14421 str_list += ['reserved : <ValueError>']
14422 return '\n'.join(str_list)
14423 else:
14424 return ''
14425 @property
14426 def width(self):
14427 return self._pvt_ptr[0].width
14428 @width.setter
14429 def width(self, unsigned int width):
14430 self._pvt_ptr[0].width = width
14431 @property
14432 def height(self):
14433 return self._pvt_ptr[0].height
14434 @height.setter
14435 def height(self, unsigned int height):
14436 self._pvt_ptr[0].height = height
14437 @property
14438 def depth(self):
14439 return self._pvt_ptr[0].depth
14440 @depth.setter
14441 def depth(self, unsigned int depth):
14442 self._pvt_ptr[0].depth = depth
14443 @property
14444 def pitch(self):
14445 return self._pvt_ptr[0].pitch
14446 @pitch.setter
14447 def pitch(self, unsigned int pitch):
14448 self._pvt_ptr[0].pitch = pitch
14449 @property
14450 def numChannels(self):
14451 return self._pvt_ptr[0].numChannels
14452 @numChannels.setter
14453 def numChannels(self, unsigned int numChannels):
14454 self._pvt_ptr[0].numChannels = numChannels
14455 @property
14456 def channelDesc(self):
14457 return self._channelDesc
14458 @channelDesc.setter
14459 def channelDesc(self, channelDesc not None : cudaChannelFormatDesc):
14460 string.memcpy(&self._pvt_ptr[0].channelDesc, <cyruntime.cudaChannelFormatDesc*><void_ptr>channelDesc.getPtr(), sizeof(self._pvt_ptr[0].channelDesc))
14461 @property
14462 def reserved(self):
14463 return self._pvt_ptr[0].reserved
14464 @reserved.setter
14465 def reserved(self, reserved):
14466 self._pvt_ptr[0].reserved = reserved
14468cdef class anon_union10:
14469 """
14470 Attributes
14471 ----------
14472 pArray : list[cudaArray_t]
14474 pPitch : list[cudaPitchedPtr]
14477 Methods
14478 -------
14479 getPtr()
14480 Get memory address of class instance
14481 """
14482 def __cinit__(self, void_ptr _ptr):
14483 self._pvt_ptr = <cyruntime.cudaEglFrame_st *>_ptr
14485 def __init__(self, void_ptr _ptr):
14486 pass
14487 def __dealloc__(self):
14488 pass
14489 def getPtr(self):
14490 return <void_ptr>&self._pvt_ptr[0].frame
14491 def __repr__(self):
14492 if self._pvt_ptr is not NULL:
14493 str_list = []
14494 try:
14495 str_list += ['pArray : ' + str(self.pArray)]
14496 except ValueError:
14497 str_list += ['pArray : <ValueError>']
14498 try:
14499 str_list += ['pPitch :\n' + '\n'.join([' ' + line for line in str(self.pPitch).splitlines()])]
14500 except ValueError:
14501 str_list += ['pPitch : <ValueError>']
14502 return '\n'.join(str_list)
14503 else:
14504 return ''
14505 @property
14506 def pArray(self):
14507 return [cudaArray_t(init_value=<void_ptr>_pArray) for _pArray in self._pvt_ptr[0].frame.pArray]
14508 @pArray.setter
14509 def pArray(self, pArray : list[cudaArray_t]):
14510 if len(pArray) != 3:
14511 raise IndexError('not enough values found during array assignment, expected 3, got', len(pArray))
14512 pArray = [int(_pArray) for _pArray in pArray]
14513 for _idx, _pArray in enumerate(pArray):
14514 self._pvt_ptr[0].frame.pArray[_idx] = <cyruntime.cudaArray_t><void_ptr>_pArray
14516 @property
14517 def pPitch(self):
14518 out_pPitch = [cudaPitchedPtr() for _pPitch in self._pvt_ptr[0].frame.pPitch]
14519 for _idx in range(len(out_pPitch)):
14520 string.memcpy(<cyruntime.cudaPitchedPtr*><void_ptr>out_pPitch[_idx].getPtr(), &self._pvt_ptr[0].frame.pPitch[_idx], sizeof(cyruntime.cudaPitchedPtr))
14521 return out_pPitch
14522 @pPitch.setter
14523 def pPitch(self, pPitch : list[cudaPitchedPtr]):
14524 if len(pPitch) != 3:
14525 raise IndexError('not enough values found during array assignment, expected 3, got', len(pPitch))
14526 for _idx in range(len(pPitch)):
14527 string.memcpy(&self._pvt_ptr[0].frame.pPitch[_idx], <cyruntime.cudaPitchedPtr*><void_ptr>pPitch[_idx].getPtr(), sizeof(cyruntime.cudaPitchedPtr))
14530cdef class cudaEglFrame_st:
14531 """
14532 CUDA EGLFrame Descriptor - structure defining one frame of EGL.
14533 Each frame may contain one or more planes depending on whether the
14534 surface is Multiplanar or not. Each plane of EGLFrame is
14535 represented by cudaEglPlaneDesc which is defined as:
14536 typedefstructcudaEglPlaneDesc_st unsignedintwidth;
14537 unsignedintheight; unsignedintdepth; unsignedintpitch;
14538 unsignedintnumChannels; structcudaChannelFormatDescchannelDesc;
14539 unsignedintreserved[4]; cudaEglPlaneDesc;
14541 Attributes
14542 ----------
14543 frame : anon_union10
14545 planeDesc : list[cudaEglPlaneDesc]
14546 CUDA EGL Plane Descriptor cudaEglPlaneDesc
14547 planeCount : unsigned int
14548 Number of planes
14549 frameType : cudaEglFrameType
14550 Array or Pitch
14551 eglColorFormat : cudaEglColorFormat
14552 CUDA EGL Color Format
14554 Methods
14555 -------
14556 getPtr()
14557 Get memory address of class instance
14558 """
14559 def __cinit__(self, void_ptr _ptr = 0):
14560 if _ptr == 0:
14561 self._val_ptr = <cyruntime.cudaEglFrame_st *>calloc(1, sizeof(cyruntime.cudaEglFrame_st))
14562 self._pvt_ptr = self._val_ptr
14563 else:
14564 self._pvt_ptr = <cyruntime.cudaEglFrame_st *>_ptr
14565 def __init__(self, void_ptr _ptr = 0):
14566 pass
14567 self._frame = anon_union10(_ptr=<void_ptr>self._pvt_ptr)
14568 def __dealloc__(self):
14569 if self._val_ptr is not NULL:
14570 free(self._val_ptr)
14571 def getPtr(self):
14572 return <void_ptr>self._pvt_ptr
14573 def __repr__(self):
14574 if self._pvt_ptr is not NULL:
14575 str_list = []
14576 try:
14577 str_list += ['frame :\n' + '\n'.join([' ' + line for line in str(self.frame).splitlines()])]
14578 except ValueError:
14579 str_list += ['frame : <ValueError>']
14580 try:
14581 str_list += ['planeDesc :\n' + '\n'.join([' ' + line for line in str(self.planeDesc).splitlines()])]
14582 except ValueError:
14583 str_list += ['planeDesc : <ValueError>']
14584 try:
14585 str_list += ['planeCount : ' + str(self.planeCount)]
14586 except ValueError:
14587 str_list += ['planeCount : <ValueError>']
14588 try:
14589 str_list += ['frameType : ' + str(self.frameType)]
14590 except ValueError:
14591 str_list += ['frameType : <ValueError>']
14592 try:
14593 str_list += ['eglColorFormat : ' + str(self.eglColorFormat)]
14594 except ValueError:
14595 str_list += ['eglColorFormat : <ValueError>']
14596 return '\n'.join(str_list)
14597 else:
14598 return ''
14599 @property
14600 def frame(self):
14601 return self._frame
14602 @frame.setter
14603 def frame(self, frame not None : anon_union10):
14604 string.memcpy(&self._pvt_ptr[0].frame, <cyruntime.anon_union10*><void_ptr>frame.getPtr(), sizeof(self._pvt_ptr[0].frame))
14605 @property
14606 def planeDesc(self):
14607 out_planeDesc = [cudaEglPlaneDesc() for _planeDesc in self._pvt_ptr[0].planeDesc]
14608 for _idx in range(len(out_planeDesc)):
14609 string.memcpy(<cyruntime.cudaEglPlaneDesc*><void_ptr>out_planeDesc[_idx].getPtr(), &self._pvt_ptr[0].planeDesc[_idx], sizeof(cyruntime.cudaEglPlaneDesc))
14610 return out_planeDesc
14611 @planeDesc.setter
14612 def planeDesc(self, planeDesc : list[cudaEglPlaneDesc]):
14613 if len(planeDesc) != 3:
14614 raise IndexError('not enough values found during array assignment, expected 3, got', len(planeDesc))
14615 for _idx in range(len(planeDesc)):
14616 string.memcpy(&self._pvt_ptr[0].planeDesc[_idx], <cyruntime.cudaEglPlaneDesc*><void_ptr>planeDesc[_idx].getPtr(), sizeof(cyruntime.cudaEglPlaneDesc))
14618 @property
14619 def planeCount(self):
14620 return self._pvt_ptr[0].planeCount
14621 @planeCount.setter
14622 def planeCount(self, unsigned int planeCount):
14623 self._pvt_ptr[0].planeCount = planeCount
14624 @property
14625 def frameType(self):
14626 if self._pvt_ptr[0].frameType not in _dict_cudaEglFrameType:
14627 return None
14628 return _dict_cudaEglFrameType[self._pvt_ptr[0].frameType]
14629 @frameType.setter
14630 def frameType(self, frameType not None : cudaEglFrameType):
14631 self._pvt_ptr[0].frameType = frameType.value
14632 @property
14633 def eglColorFormat(self):
14634 if self._pvt_ptr[0].eglColorFormat not in _dict_cudaEglColorFormat:
14635 return None
14636 return _dict_cudaEglColorFormat[self._pvt_ptr[0].eglColorFormat]
14637 @eglColorFormat.setter
14638 def eglColorFormat(self, eglColorFormat not None : cudaEglColorFormat):
14639 self._pvt_ptr[0].eglColorFormat = eglColorFormat.value
14641cdef class cudaGraphConditionalHandle:
14642 """
14644 CUDA handle for conditional graph nodes
14646 Methods
14647 -------
14648 getPtr()
14649 Get memory address of class instance
14651 """
14652 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
14653 if _ptr == 0:
14654 self._pvt_ptr = &self._pvt_val
14655 else:
14656 self._pvt_ptr = <cyruntime.cudaGraphConditionalHandle *>_ptr
14657 if init_value:
14658 self._pvt_ptr[0] = init_value
14659 def __dealloc__(self):
14660 pass
14661 def __repr__(self):
14662 return '<cudaGraphConditionalHandle ' + str(self.__int__()) + '>'
14663 def __int__(self):
14664 return <unsigned long long>self._pvt_ptr[0]
14665 def getPtr(self):
14666 return <void_ptr>self._pvt_ptr
14668cdef class cudaLogIterator:
14669 """
14671 Methods
14672 -------
14673 getPtr()
14674 Get memory address of class instance
14676 """
14677 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
14678 if _ptr == 0:
14679 self._pvt_ptr = &self._pvt_val
14680 else:
14681 self._pvt_ptr = <cyruntime.cudaLogIterator *>_ptr
14682 if init_value:
14683 self._pvt_ptr[0] = init_value
14684 def __dealloc__(self):
14685 pass
14686 def __repr__(self):
14687 return '<cudaLogIterator ' + str(self.__int__()) + '>'
14688 def __int__(self):
14689 return <unsigned int>self._pvt_ptr[0]
14690 def getPtr(self):
14691 return <void_ptr>self._pvt_ptr
14693cdef class cudaSurfaceObject_t:
14694 """
14696 An opaque value that represents a CUDA Surface object
14698 Methods
14699 -------
14700 getPtr()
14701 Get memory address of class instance
14703 """
14704 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
14705 if _ptr == 0:
14706 self._pvt_ptr = &self._pvt_val
14707 else:
14708 self._pvt_ptr = <cyruntime.cudaSurfaceObject_t *>_ptr
14709 if init_value:
14710 self._pvt_ptr[0] = init_value
14711 def __dealloc__(self):
14712 pass
14713 def __repr__(self):
14714 return '<cudaSurfaceObject_t ' + str(self.__int__()) + '>'
14715 def __int__(self):
14716 return <unsigned long long>self._pvt_ptr[0]
14717 def getPtr(self):
14718 return <void_ptr>self._pvt_ptr
14720cdef class cudaTextureObject_t:
14721 """
14723 An opaque value that represents a CUDA texture object
14725 Methods
14726 -------
14727 getPtr()
14728 Get memory address of class instance
14730 """
14731 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
14732 if _ptr == 0:
14733 self._pvt_ptr = &self._pvt_val
14734 else:
14735 self._pvt_ptr = <cyruntime.cudaTextureObject_t *>_ptr
14736 if init_value:
14737 self._pvt_ptr[0] = init_value
14738 def __dealloc__(self):
14739 pass
14740 def __repr__(self):
14741 return '<cudaTextureObject_t ' + str(self.__int__()) + '>'
14742 def __int__(self):
14743 return <unsigned long long>self._pvt_ptr[0]
14744 def getPtr(self):
14745 return <void_ptr>self._pvt_ptr
14747cdef class GLenum:
14748 """
14750 Methods
14751 -------
14752 getPtr()
14753 Get memory address of class instance
14755 """
14756 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
14757 if _ptr == 0:
14758 self._pvt_ptr = &self._pvt_val
14759 else:
14760 self._pvt_ptr = <cyruntime.GLenum *>_ptr
14761 if init_value:
14762 self._pvt_ptr[0] = init_value
14763 def __dealloc__(self):
14764 pass
14765 def __repr__(self):
14766 return '<GLenum ' + str(self.__int__()) + '>'
14767 def __int__(self):
14768 return <unsigned int>self._pvt_ptr[0]
14769 def getPtr(self):
14770 return <void_ptr>self._pvt_ptr
14772cdef class GLuint:
14773 """
14775 Methods
14776 -------
14777 getPtr()
14778 Get memory address of class instance
14780 """
14781 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
14782 if _ptr == 0:
14783 self._pvt_ptr = &self._pvt_val
14784 else:
14785 self._pvt_ptr = <cyruntime.GLuint *>_ptr
14786 if init_value:
14787 self._pvt_ptr[0] = init_value
14788 def __dealloc__(self):
14789 pass
14790 def __repr__(self):
14791 return '<GLuint ' + str(self.__int__()) + '>'
14792 def __int__(self):
14793 return <unsigned int>self._pvt_ptr[0]
14794 def getPtr(self):
14795 return <void_ptr>self._pvt_ptr
14797cdef class EGLint:
14798 """
14800 Methods
14801 -------
14802 getPtr()
14803 Get memory address of class instance
14805 """
14806 def __cinit__(self, unsigned int init_value = 0, void_ptr _ptr = 0):
14807 if _ptr == 0:
14808 self._pvt_ptr = &self._pvt_val
14809 else:
14810 self._pvt_ptr = <cyruntime.EGLint *>_ptr
14811 if init_value:
14812 self._pvt_ptr[0] = init_value
14813 def __dealloc__(self):
14814 pass
14815 def __repr__(self):
14816 return '<EGLint ' + str(self.__int__()) + '>'
14817 def __int__(self):
14818 return <unsigned int>self._pvt_ptr[0]
14819 def getPtr(self):
14820 return <void_ptr>self._pvt_ptr
14822cdef class VdpDevice:
14823 """
14825 Methods
14826 -------
14827 getPtr()
14828 Get memory address of class instance
14830 """
14831 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
14832 if _ptr == 0:
14833 self._pvt_ptr = &self._pvt_val
14834 else:
14835 self._pvt_ptr = <cyruntime.VdpDevice *>_ptr
14836 if init_value:
14837 self._pvt_ptr[0] = init_value
14838 def __dealloc__(self):
14839 pass
14840 def __repr__(self):
14841 return '<VdpDevice ' + str(self.__int__()) + '>'
14842 def __int__(self):
14843 return <uint32_t>self._pvt_ptr[0]
14844 def getPtr(self):
14845 return <void_ptr>self._pvt_ptr
14847cdef class VdpGetProcAddress:
14848 """
14850 Methods
14851 -------
14852 getPtr()
14853 Get memory address of class instance
14855 """
14856 def __cinit__(self, unsigned long long init_value = 0, void_ptr _ptr = 0):
14857 if _ptr == 0:
14858 self._pvt_ptr = &self._pvt_val
14859 else:
14860 self._pvt_ptr = <cyruntime.VdpGetProcAddress *>_ptr
14861 if init_value:
14862 self._pvt_ptr[0] = init_value
14863 def __dealloc__(self):
14864 pass
14865 def __repr__(self):
14866 return '<VdpGetProcAddress ' + str(self.__int__()) + '>'
14867 def __int__(self):
14868 return <unsigned long long>self._pvt_ptr[0]
14869 def getPtr(self):
14870 return <void_ptr>self._pvt_ptr
14872cdef class VdpVideoSurface:
14873 """
14875 Methods
14876 -------
14877 getPtr()
14878 Get memory address of class instance
14880 """
14881 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
14882 if _ptr == 0:
14883 self._pvt_ptr = &self._pvt_val
14884 else:
14885 self._pvt_ptr = <cyruntime.VdpVideoSurface *>_ptr
14886 if init_value:
14887 self._pvt_ptr[0] = init_value
14888 def __dealloc__(self):
14889 pass
14890 def __repr__(self):
14891 return '<VdpVideoSurface ' + str(self.__int__()) + '>'
14892 def __int__(self):
14893 return <uint32_t>self._pvt_ptr[0]
14894 def getPtr(self):
14895 return <void_ptr>self._pvt_ptr
14897cdef class VdpOutputSurface:
14898 """
14900 Methods
14901 -------
14902 getPtr()
14903 Get memory address of class instance
14905 """
14906 def __cinit__(self, uint32_t init_value = 0, void_ptr _ptr = 0):
14907 if _ptr == 0:
14908 self._pvt_ptr = &self._pvt_val
14909 else:
14910 self._pvt_ptr = <cyruntime.VdpOutputSurface *>_ptr
14911 if init_value:
14912 self._pvt_ptr[0] = init_value
14913 def __dealloc__(self):
14914 pass
14915 def __repr__(self):
14916 return '<VdpOutputSurface ' + str(self.__int__()) + '>'
14917 def __int__(self):
14918 return <uint32_t>self._pvt_ptr[0]
14919 def getPtr(self):
14920 return <void_ptr>self._pvt_ptr
14922@cython.embedsignature(True)
14923def cudaDeviceReset():
14924 """ Destroy all allocations and reset all state on the current device in the current process.
14926 Explicitly destroys and cleans up all resources associated with the
14927 current device in the current process. It is the caller's
14928 responsibility to ensure that the resources are not accessed or passed
14929 in subsequent API calls and doing so will result in undefined behavior.
14930 These resources include CUDA types :py:obj:`~.cudaStream_t`,
14931 :py:obj:`~.cudaEvent_t`, :py:obj:`~.cudaArray_t`,
14932 :py:obj:`~.cudaMipmappedArray_t`, :py:obj:`~.cudaPitchedPtr`,
14933 :py:obj:`~.cudaTextureObject_t`, :py:obj:`~.cudaSurfaceObject_t`,
14934 :py:obj:`~.textureReference`, :py:obj:`~.surfaceReference`,
14935 :py:obj:`~.cudaExternalMemory_t`, :py:obj:`~.cudaExternalSemaphore_t`
14936 and :py:obj:`~.cudaGraphicsResource_t`. These resources also include
14937 memory allocations by :py:obj:`~.cudaMalloc`,
14938 :py:obj:`~.cudaMallocHost`, :py:obj:`~.cudaMallocManaged` and
14939 :py:obj:`~.cudaMallocPitch`. Any subsequent API call to this device
14940 will reinitialize the device.
14942 Note that this function will reset the device immediately. It is the
14943 caller's responsibility to ensure that the device is not being accessed
14944 by any other host threads from the process when this function is
14945 called.
14947 Returns
14948 -------
14949 cudaError_t
14950 :py:obj:`~.cudaSuccess`
14952 See Also
14953 --------
14954 :py:obj:`~.cudaDeviceSynchronize`
14956 Notes
14957 -----
14958 :py:obj:`~.cudaDeviceReset()` will not destroy memory allocations by :py:obj:`~.cudaMallocAsync()` and :py:obj:`~.cudaMallocFromPoolAsync()`. These memory allocations need to be destroyed explicitly.
14960 If a non-primary :py:obj:`~.CUcontext` is current to the thread, :py:obj:`~.cudaDeviceReset()` will destroy only the internal CUDA RT state for that :py:obj:`~.CUcontext`.
14961 """
14962 with nogil:
14963 err = cyruntime.cudaDeviceReset()
14964 return (_dict_cudaError_t[err],)
14966@cython.embedsignature(True)
14967def cudaDeviceSynchronize():
14968 """ Wait for compute device to finish.
14970 Blocks until the device has completed all preceding requested tasks.
14971 :py:obj:`~.cudaDeviceSynchronize()` returns an error if one of the
14972 preceding tasks has failed. If the
14973 :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this
14974 device, the host thread will block until the device has finished its
14975 work.
14977 Returns
14978 -------
14979 cudaError_t
14980 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`
14982 See Also
14983 --------
14984 :py:obj:`~.cudaDeviceReset`, :py:obj:`~.cuCtxSynchronize`
14985 """
14986 with nogil:
14987 err = cyruntime.cudaDeviceSynchronize()
14988 return (_dict_cudaError_t[err],)
14990@cython.embedsignature(True)
14991def cudaDeviceSetLimit(limit not None : cudaLimit, size_t value):
14992 """ Set resource limits.
14994 Setting `limit` to `value` is a request by the application to update
14995 the current limit maintained by the device. The driver is free to
14996 modify the requested value to meet h/w requirements (this could be
14997 clamping to minimum or maximum values, rounding up to nearest element
14998 size, etc). The application can use :py:obj:`~.cudaDeviceGetLimit()` to
14999 find out exactly what the limit has been set to.
15001 Setting each :py:obj:`~.cudaLimit` has its own specific restrictions,
15002 so each is discussed here.
15004 - :py:obj:`~.cudaLimitStackSize` controls the stack size in bytes of
15005 each GPU thread.
15007 - :py:obj:`~.cudaLimitPrintfFifoSize` controls the size in bytes of the
15008 shared FIFO used by the :py:obj:`~.printf()` device system call.
15009 Setting :py:obj:`~.cudaLimitPrintfFifoSize` must not be performed
15010 after launching any kernel that uses the :py:obj:`~.printf()` device
15011 system call - in such case :py:obj:`~.cudaErrorInvalidValue` will be
15012 returned.
15014 - :py:obj:`~.cudaLimitMallocHeapSize` controls the size in bytes of the
15015 heap used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device
15016 system calls. Setting :py:obj:`~.cudaLimitMallocHeapSize` must not be
15017 performed after launching any kernel that uses the
15018 :py:obj:`~.malloc()` or :py:obj:`~.free()` device system calls - in
15019 such case :py:obj:`~.cudaErrorInvalidValue` will be returned.
15021 - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` controls the maximum nesting
15022 depth of a grid at which a thread can safely call
15023 :py:obj:`~.cudaDeviceSynchronize()`. Setting this limit must be
15024 performed before any launch of a kernel that uses the device runtime
15025 and calls :py:obj:`~.cudaDeviceSynchronize()` above the default sync
15026 depth, two levels of grids. Calls to
15027 :py:obj:`~.cudaDeviceSynchronize()` will fail with error code
15028 :py:obj:`~.cudaErrorSyncDepthExceeded` if the limitation is violated.
15029 This limit can be set smaller than the default or up the maximum
15030 launch depth of 24. When setting this limit, keep in mind that
15031 additional levels of sync depth require the runtime to reserve large
15032 amounts of device memory which can no longer be used for user
15033 allocations. If these reservations of device memory fail,
15034 :py:obj:`~.cudaDeviceSetLimit` will return
15035 :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to
15036 a lower value. This limit is only applicable to devices of compute
15037 capability < 9.0. Attempting to set this limit on devices of other
15038 compute capability will results in error
15039 :py:obj:`~.cudaErrorUnsupportedLimit` being returned.
15041 - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` controls the
15042 maximum number of outstanding device runtime launches that can be
15043 made from the current device. A grid is outstanding from the point of
15044 launch up until the grid is known to have been completed. Device
15045 runtime launches which violate this limitation fail and return
15046 :py:obj:`~.cudaErrorLaunchPendingCountExceeded` when
15047 :py:obj:`~.cudaGetLastError()` is called after launch. If more
15048 pending launches than the default (2048 launches) are needed for a
15049 module using the device runtime, this limit can be increased. Keep in
15050 mind that being able to sustain additional pending launches will
15051 require the runtime to reserve larger amounts of device memory
15052 upfront which can no longer be used for allocations. If these
15053 reservations fail, :py:obj:`~.cudaDeviceSetLimit` will return
15054 :py:obj:`~.cudaErrorMemoryAllocation`, and the limit can be reset to
15055 a lower value. This limit is only applicable to devices of compute
15056 capability 3.5 and higher. Attempting to set this limit on devices of
15057 compute capability less than 3.5 will result in the error
15058 :py:obj:`~.cudaErrorUnsupportedLimit` being returned.
15060 - :py:obj:`~.cudaLimitMaxL2FetchGranularity` controls the L2 cache
15061 fetch granularity. Values can range from 0B to 128B. This is purely a
15062 performance hint and it can be ignored or clamped depending on the
15063 platform.
15065 - :py:obj:`~.cudaLimitPersistingL2CacheSize` controls size in bytes
15066 available for persisting L2 cache. This is purely a performance hint
15067 and it can be ignored or clamped depending on the platform.
15069 Parameters
15070 ----------
15071 limit : :py:obj:`~.cudaLimit`
15072 Limit to set
15073 value : size_t
15074 Size of limit
15076 Returns
15077 -------
15078 cudaError_t
15079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
15081 See Also
15082 --------
15083 :py:obj:`~.cudaDeviceGetLimit`, :py:obj:`~.cuCtxSetLimit`
15084 """
15085 cdef cyruntime.cudaLimit cylimit = limit.value
15086 with nogil:
15087 err = cyruntime.cudaDeviceSetLimit(cylimit, value)
15088 return (_dict_cudaError_t[err],)
15090@cython.embedsignature(True)
15091def cudaDeviceGetLimit(limit not None : cudaLimit):
15092 """ Return resource limits.
15094 Returns in `*pValue` the current size of `limit`. The following
15095 :py:obj:`~.cudaLimit` values are supported.
15097 - :py:obj:`~.cudaLimitStackSize` is the stack size in bytes of each GPU
15098 thread.
15100 - :py:obj:`~.cudaLimitPrintfFifoSize` is the size in bytes of the
15101 shared FIFO used by the :py:obj:`~.printf()` device system call.
15103 - :py:obj:`~.cudaLimitMallocHeapSize` is the size in bytes of the heap
15104 used by the :py:obj:`~.malloc()` and :py:obj:`~.free()` device system
15105 calls.
15107 - :py:obj:`~.cudaLimitDevRuntimeSyncDepth` is the maximum grid depth at
15108 which a thread can isssue the device runtime call
15109 :py:obj:`~.cudaDeviceSynchronize()` to wait on child grid launches to
15110 complete. This functionality is removed for devices of compute
15111 capability >= 9.0, and hence will return error
15112 :py:obj:`~.cudaErrorUnsupportedLimit` on such devices.
15114 - :py:obj:`~.cudaLimitDevRuntimePendingLaunchCount` is the maximum
15115 number of outstanding device runtime launches.
15117 - :py:obj:`~.cudaLimitMaxL2FetchGranularity` is the L2 cache fetch
15118 granularity.
15120 - :py:obj:`~.cudaLimitPersistingL2CacheSize` is the persisting L2 cache
15121 size in bytes.
15123 Parameters
15124 ----------
15125 limit : :py:obj:`~.cudaLimit`
15126 Limit to query
15128 Returns
15129 -------
15130 cudaError_t
15131 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`
15132 pValue : int
15133 Returned size of the limit
15135 See Also
15136 --------
15137 :py:obj:`~.cudaDeviceSetLimit`, :py:obj:`~.cuCtxGetLimit`
15138 """
15139 cdef size_t pValue = 0
15140 cdef cyruntime.cudaLimit cylimit = limit.value
15141 with nogil:
15142 err = cyruntime.cudaDeviceGetLimit(&pValue, cylimit)
15143 if err != cyruntime.cudaSuccess:
15144 return (_dict_cudaError_t[err], None)
15145 return (_dict_cudaError_t[err], pValue)
15147@cython.embedsignature(True)
15148def cudaDeviceGetTexture1DLinearMaxWidth(fmtDesc : Optional[cudaChannelFormatDesc], int device):
15149 """ Returns the maximum number of elements allocatable in a 1D linear texture for a given element size.
15151 Returns in `maxWidthInElements` the maximum number of elements
15152 allocatable in a 1D linear texture for given format descriptor
15153 `fmtDesc`.
15155 Parameters
15156 ----------
15157 fmtDesc : :py:obj:`~.cudaChannelFormatDesc`
15158 Texture format description.
15159 None : int
15160 None
15162 Returns
15163 -------
15164 cudaError_t
15165 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorUnsupportedLimit`, :py:obj:`~.cudaErrorInvalidValue`
15166 maxWidthInElements : int
15167 Returns maximum number of texture elements allocatable for given
15168 `fmtDesc`.
15170 See Also
15171 --------
15172 :py:obj:`~.cuDeviceGetTexture1DLinearMaxWidth`
15173 """
15174 cdef size_t maxWidthInElements = 0
15175 cdef cyruntime.cudaChannelFormatDesc* cyfmtDesc_ptr = fmtDesc._pvt_ptr if fmtDesc is not None else NULL
15176 with nogil:
15177 err = cyruntime.cudaDeviceGetTexture1DLinearMaxWidth(&maxWidthInElements, cyfmtDesc_ptr, device)
15178 if err != cyruntime.cudaSuccess:
15179 return (_dict_cudaError_t[err], None)
15180 return (_dict_cudaError_t[err], maxWidthInElements)
15182@cython.embedsignature(True)
15183def cudaDeviceGetCacheConfig():
15184 """ Returns the preferred cache configuration for the current device.
15186 On devices where the L1 cache and shared memory use the same hardware
15187 resources, this returns through `pCacheConfig` the preferred cache
15188 configuration for the current device. This is only a preference. The
15189 runtime will use the requested configuration if possible, but it is
15190 free to choose a different configuration if required to execute
15191 functions.
15193 This will return a `pCacheConfig` of
15194 :py:obj:`~.cudaFuncCachePreferNone` on devices where the size of the L1
15195 cache and shared memory are fixed.
15197 The supported cache configurations are:
15199 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
15200 or L1 (default)
15202 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
15203 and smaller L1 cache
15205 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
15206 shared memory
15208 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
15209 shared memory
15211 Returns
15212 -------
15213 cudaError_t
15214 :py:obj:`~.cudaSuccess`
15215 pCacheConfig : :py:obj:`~.cudaFuncCache`
15216 Returned cache configuration
15218 See Also
15219 --------
15220 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxGetCacheConfig`
15221 """
15222 cdef cyruntime.cudaFuncCache pCacheConfig
15223 with nogil:
15224 err = cyruntime.cudaDeviceGetCacheConfig(&pCacheConfig)
15225 if err != cyruntime.cudaSuccess:
15226 return (_dict_cudaError_t[err], None)
15227 return (_dict_cudaError_t[err], cudaFuncCache(pCacheConfig))
15229@cython.embedsignature(True)
15230def cudaDeviceGetStreamPriorityRange():
15231 """ Returns numerical values that correspond to the least and greatest stream priorities.
15233 Returns in `*leastPriority` and `*greatestPriority` the numerical
15234 values that correspond to the least and greatest stream priorities
15235 respectively. Stream priorities follow a convention where lower numbers
15236 imply greater priorities. The range of meaningful stream priorities is
15237 given by [`*greatestPriority`, `*leastPriority`]. If the user attempts
15238 to create a stream with a priority value that is outside the the
15239 meaningful range as specified by this API, the priority is
15240 automatically clamped down or up to either `*leastPriority` or
15241 `*greatestPriority` respectively. See
15242 :py:obj:`~.cudaStreamCreateWithPriority` for details on creating a
15243 priority stream. A NULL may be passed in for `*leastPriority` or
15244 `*greatestPriority` if the value is not desired.
15246 This function will return '0' in both `*leastPriority` and
15247 `*greatestPriority` if the current context's device does not support
15248 stream priorities (see :py:obj:`~.cudaDeviceGetAttribute`).
15250 Returns
15251 -------
15252 cudaError_t
15253 :py:obj:`~.cudaSuccess`
15254 leastPriority : int
15255 Pointer to an int in which the numerical value for least stream
15256 priority is returned
15257 greatestPriority : int
15258 Pointer to an int in which the numerical value for greatest stream
15259 priority is returned
15261 See Also
15262 --------
15263 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cuCtxGetStreamPriorityRange`
15264 """
15265 cdef int leastPriority = 0
15266 cdef int greatestPriority = 0
15267 with nogil:
15268 err = cyruntime.cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority)
15269 if err != cyruntime.cudaSuccess:
15270 return (_dict_cudaError_t[err], None, None)
15271 return (_dict_cudaError_t[err], leastPriority, greatestPriority)
15273@cython.embedsignature(True)
15274def cudaDeviceSetCacheConfig(cacheConfig not None : cudaFuncCache):
15275 """ Sets the preferred cache configuration for the current device.
15277 On devices where the L1 cache and shared memory use the same hardware
15278 resources, this sets through `cacheConfig` the preferred cache
15279 configuration for the current device. This is only a preference. The
15280 runtime will use the requested configuration if possible, but it is
15281 free to choose a different configuration if required to execute the
15282 function. Any function preference set via
15283 :py:obj:`~.cudaFuncSetCacheConfig (C API)` or cudaFuncSetCacheConfig
15284 (C++ API) will be preferred over this device-wide setting. Setting the
15285 device-wide cache configuration to :py:obj:`~.cudaFuncCachePreferNone`
15286 will cause subsequent kernel launches to prefer to not change the cache
15287 configuration unless required to launch the kernel.
15289 This setting does nothing on devices where the size of the L1 cache and
15290 shared memory are fixed.
15292 Launching a kernel with a different preference than the most recent
15293 preference setting may insert a device-side synchronization point.
15295 The supported cache configurations are:
15297 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
15298 or L1 (default)
15300 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
15301 and smaller L1 cache
15303 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
15304 shared memory
15306 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
15307 shared memory
15309 Parameters
15310 ----------
15311 cacheConfig : :py:obj:`~.cudaFuncCache`
15312 Requested cache configuration
15314 Returns
15315 -------
15316 cudaError_t
15317 :py:obj:`~.cudaSuccess`
15319 See Also
15320 --------
15321 :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cuCtxSetCacheConfig`
15322 """
15323 cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value
15324 with nogil:
15325 err = cyruntime.cudaDeviceSetCacheConfig(cycacheConfig)
15326 return (_dict_cudaError_t[err],)
15328@cython.embedsignature(True)
15329def cudaDeviceGetByPCIBusId(char* pciBusId):
15330 """ Returns a handle to a compute device.
15332 Returns in `*device` a device ordinal given a PCI bus ID string.
15334 where `domain`, `bus`, `device`, and `function` are all hexadecimal
15335 values
15337 Parameters
15338 ----------
15339 pciBusId : bytes
15340 String in one of the following forms:
15342 Returns
15343 -------
15344 cudaError_t
15345 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
15346 device : int
15347 Returned device ordinal
15349 See Also
15350 --------
15351 :py:obj:`~.cudaDeviceGetPCIBusId`, :py:obj:`~.cuDeviceGetByPCIBusId`
15352 """
15353 cdef int device = 0
15354 with nogil:
15355 err = cyruntime.cudaDeviceGetByPCIBusId(&device, pciBusId)
15356 if err != cyruntime.cudaSuccess:
15357 return (_dict_cudaError_t[err], None)
15358 return (_dict_cudaError_t[err], device)
15360@cython.embedsignature(True)
15361def cudaDeviceGetPCIBusId(int length, int device):
15362 """ Returns a PCI Bus Id string for the device.
15364 Returns an ASCII string identifying the device `dev` in the NULL-
15365 terminated string pointed to by `pciBusId`. `length` specifies the
15366 maximum length of the string that may be returned.
15368 where `domain`, `bus`, `device`, and `function` are all hexadecimal
15369 values. pciBusId should be large enough to store 13 characters
15370 including the NULL-terminator.
15372 Parameters
15373 ----------
15374 length : int
15375 Maximum length of string to store in `name`
15376 device : int
15377 Device to get identifier string for
15379 Returns
15380 -------
15381 cudaError_t
15382 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
15383 pciBusId : bytes
15384 Returned identifier string for the device in the following format
15386 See Also
15387 --------
15388 :py:obj:`~.cudaDeviceGetByPCIBusId`, :py:obj:`~.cuDeviceGetPCIBusId`
15389 """
15390 pypciBusId = b" " * length
15391 cdef char* pciBusId = pypciBusId
15392 with nogil:
15393 err = cyruntime.cudaDeviceGetPCIBusId(pciBusId, length, device)
15394 if err != cyruntime.cudaSuccess:
15395 return (_dict_cudaError_t[err], None)
15396 return (_dict_cudaError_t[err], pypciBusId)
15398@cython.embedsignature(True)
15399def cudaIpcGetEventHandle(event):
15400 """ Gets an interprocess handle for a previously allocated event.
15402 Takes as input a previously allocated event. This event must have been
15403 created with the :py:obj:`~.cudaEventInterprocess` and
15404 :py:obj:`~.cudaEventDisableTiming` flags set. This opaque handle may be
15405 copied into other processes and opened with
15406 :py:obj:`~.cudaIpcOpenEventHandle` to allow efficient hardware
15407 synchronization between GPU work in different processes.
15409 After the event has been been opened in the importing process,
15410 :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`,
15411 :py:obj:`~.cudaStreamWaitEvent` and :py:obj:`~.cudaEventQuery` may be
15412 used in either process. Performing operations on the imported event
15413 after the exported event has been freed with
15414 :py:obj:`~.cudaEventDestroy` will result in undefined behavior.
15416 IPC functionality is restricted to devices with support for unified
15417 addressing on Linux and Windows operating systems. IPC functionality on
15418 Windows is supported for compatibility purposes but not recommended as
15419 it comes with performance cost. Users can test their device for IPC
15420 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
15421 :py:obj:`~.cudaDevAttrIpcEventSupport`
15423 Parameters
15424 ----------
15425 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
15426 Event allocated with :py:obj:`~.cudaEventInterprocess` and
15427 :py:obj:`~.cudaEventDisableTiming` flags.
15429 Returns
15430 -------
15431 cudaError_t
15432 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
15433 handle : :py:obj:`~.cudaIpcEventHandle_t`
15434 Pointer to a user allocated cudaIpcEventHandle in which to return
15435 the opaque event handle
15437 See Also
15438 --------
15439 :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetEventHandle`
15440 """
15441 cdef cyruntime.cudaEvent_t cyevent
15442 if event is None:
15443 pevent = 0
15444 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
15445 pevent = int(event)
15446 else:
15447 pevent = int(cudaEvent_t(event))
15448 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
15449 cdef cudaIpcEventHandle_t handle = cudaIpcEventHandle_t()
15450 with nogil:
15451 err = cyruntime.cudaIpcGetEventHandle(<cyruntime.cudaIpcEventHandle_t*>handle._pvt_ptr, cyevent)
15452 if err != cyruntime.cudaSuccess:
15453 return (_dict_cudaError_t[err], None)
15454 return (_dict_cudaError_t[err], handle)
15456@cython.embedsignature(True)
15457def cudaIpcOpenEventHandle(handle not None : cudaIpcEventHandle_t):
15458 """ Opens an interprocess event handle for use in the current process.
15460 Opens an interprocess event handle exported from another process with
15461 :py:obj:`~.cudaIpcGetEventHandle`. This function returns a
15462 :py:obj:`~.cudaEvent_t` that behaves like a locally created event with
15463 the :py:obj:`~.cudaEventDisableTiming` flag specified. This event must
15464 be freed with :py:obj:`~.cudaEventDestroy`.
15466 Performing operations on the imported event after the exported event
15467 has been freed with :py:obj:`~.cudaEventDestroy` will result in
15468 undefined behavior.
15470 IPC functionality is restricted to devices with support for unified
15471 addressing on Linux and Windows operating systems. IPC functionality on
15472 Windows is supported for compatibility purposes but not recommended as
15473 it comes with performance cost. Users can test their device for IPC
15474 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
15475 :py:obj:`~.cudaDevAttrIpcEventSupport`
15477 Parameters
15478 ----------
15479 handle : :py:obj:`~.cudaIpcEventHandle_t`
15480 Interprocess handle to open
15482 Returns
15483 -------
15484 cudaError_t
15485 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUninitialized`
15486 event : :py:obj:`~.cudaEvent_t`
15487 Returns the imported event
15489 See Also
15490 --------
15491 :py:obj:`~.cudaEventCreate`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcOpenEventHandle`
15492 """
15493 cdef cudaEvent_t event = cudaEvent_t()
15494 with nogil:
15495 err = cyruntime.cudaIpcOpenEventHandle(<cyruntime.cudaEvent_t*>event._pvt_ptr, handle._pvt_ptr[0])
15496 if err != cyruntime.cudaSuccess:
15497 return (_dict_cudaError_t[err], None)
15498 return (_dict_cudaError_t[err], event)
15500@cython.embedsignature(True)
15501def cudaIpcGetMemHandle(devPtr):
15502 """ Gets an interprocess memory handle for an existing device memory allocation.
15504 Takes a pointer to the base of an existing device memory allocation
15505 created with :py:obj:`~.cudaMalloc` and exports it for use in another
15506 process. This is a lightweight operation and may be called multiple
15507 times on an allocation without adverse effects.
15509 If a region of memory is freed with :py:obj:`~.cudaFree` and a
15510 subsequent call to :py:obj:`~.cudaMalloc` returns memory with the same
15511 device address, :py:obj:`~.cudaIpcGetMemHandle` will return a unique
15512 handle for the new memory.
15514 IPC functionality is restricted to devices with support for unified
15515 addressing on Linux and Windows operating systems. IPC functionality on
15516 Windows is supported for compatibility purposes but not recommended as
15517 it comes with performance cost. Users can test their device for IPC
15518 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
15519 :py:obj:`~.cudaDevAttrIpcEventSupport`
15521 Parameters
15522 ----------
15523 devPtr : Any
15524 Base pointer to previously allocated device memory
15526 Returns
15527 -------
15528 cudaError_t
15529 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
15530 handle : :py:obj:`~.cudaIpcMemHandle_t`
15531 Pointer to user allocated :py:obj:`~.cudaIpcMemHandle` to return
15532 the handle in.
15534 See Also
15535 --------
15536 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cuIpcGetMemHandle`
15537 """
15538 cdef cudaIpcMemHandle_t handle = cudaIpcMemHandle_t()
15539 cydevPtr = _HelperInputVoidPtr(devPtr)
15540 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
15541 with nogil:
15542 err = cyruntime.cudaIpcGetMemHandle(<cyruntime.cudaIpcMemHandle_t*>handle._pvt_ptr, cydevPtr_ptr)
15543 if err != cyruntime.cudaSuccess:
15544 return (_dict_cudaError_t[err], None)
15545 return (_dict_cudaError_t[err], handle)
15547@cython.embedsignature(True)
15548def cudaIpcOpenMemHandle(handle not None : cudaIpcMemHandle_t, unsigned int flags):
15549 """ Opens an interprocess memory handle exported from another process and returns a device pointer usable in the local process.
15551 Maps memory exported from another process with
15552 :py:obj:`~.cudaIpcGetMemHandle` into the current device address space.
15553 For contexts on different devices :py:obj:`~.cudaIpcOpenMemHandle` can
15554 attempt to enable peer access between the devices as if the user called
15555 :py:obj:`~.cudaDeviceEnablePeerAccess`. This behavior is controlled by
15556 the :py:obj:`~.cudaIpcMemLazyEnablePeerAccess` flag.
15557 :py:obj:`~.cudaDeviceCanAccessPeer` can determine if a mapping is
15558 possible.
15560 :py:obj:`~.cudaIpcOpenMemHandle` can open handles to devices that may
15561 not be visible in the process calling the API.
15563 Contexts that may open :py:obj:`~.cudaIpcMemHandles` are restricted in
15564 the following way. :py:obj:`~.cudaIpcMemHandles` from each device in a
15565 given process may only be opened by one context per device per other
15566 process.
15568 If the memory handle has already been opened by the current context,
15569 the reference count on the handle is incremented by 1 and the existing
15570 device pointer is returned.
15572 Memory returned from :py:obj:`~.cudaIpcOpenMemHandle` must be freed
15573 with :py:obj:`~.cudaIpcCloseMemHandle`.
15575 Calling :py:obj:`~.cudaFree` on an exported memory region before
15576 calling :py:obj:`~.cudaIpcCloseMemHandle` in the importing context will
15577 result in undefined behavior.
15579 IPC functionality is restricted to devices with support for unified
15580 addressing on Linux and Windows operating systems. IPC functionality on
15581 Windows is supported for compatibility purposes but not recommended as
15582 it comes with performance cost. Users can test their device for IPC
15583 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
15584 :py:obj:`~.cudaDevAttrIpcEventSupport`
15586 Parameters
15587 ----------
15588 handle : :py:obj:`~.cudaIpcMemHandle_t`
15589 :py:obj:`~.cudaIpcMemHandle` to open
15590 flags : unsigned int
15591 Flags for this operation. Must be specified as
15592 :py:obj:`~.cudaIpcMemLazyEnablePeerAccess`
15594 Returns
15595 -------
15596 cudaError_t
15597 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorTooManyPeers`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
15598 devPtr : Any
15599 Returned device pointer
15601 See Also
15602 --------
15603 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcCloseMemHandle`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuIpcOpenMemHandle`
15605 Notes
15606 -----
15607 No guarantees are made about the address returned in `*devPtr`.
15608 In particular, multiple processes may not receive the same address for the same `handle`.
15609 """
15610 cdef void_ptr devPtr = 0
15611 with nogil:
15612 err = cyruntime.cudaIpcOpenMemHandle(<void**>&devPtr, handle._pvt_ptr[0], flags)
15613 if err != cyruntime.cudaSuccess:
15614 return (_dict_cudaError_t[err], None)
15615 return (_dict_cudaError_t[err], devPtr)
15617@cython.embedsignature(True)
15618def cudaIpcCloseMemHandle(devPtr):
15619 """ Attempts to close memory mapped with cudaIpcOpenMemHandle.
15621 Decrements the reference count of the memory returnd by
15622 :py:obj:`~.cudaIpcOpenMemHandle` by 1. When the reference count reaches
15623 0, this API unmaps the memory. The original allocation in the exporting
15624 process as well as imported mappings in other processes will be
15625 unaffected.
15627 Any resources used to enable peer access will be freed if this is the
15628 last mapping using them.
15630 IPC functionality is restricted to devices with support for unified
15631 addressing on Linux and Windows operating systems. IPC functionality on
15632 Windows is supported for compatibility purposes but not recommended as
15633 it comes with performance cost. Users can test their device for IPC
15634 functionality by calling :py:obj:`~.cudaDeviceGetAttribute` with
15635 :py:obj:`~.cudaDevAttrIpcEventSupport`
15637 Parameters
15638 ----------
15639 devPtr : Any
15640 Device pointer returned by :py:obj:`~.cudaIpcOpenMemHandle`
15642 Returns
15643 -------
15644 cudaError_t
15645 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMapBufferObjectFailed`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
15647 See Also
15648 --------
15649 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaIpcGetEventHandle`, :py:obj:`~.cudaIpcOpenEventHandle`, :py:obj:`~.cudaIpcGetMemHandle`, :py:obj:`~.cudaIpcOpenMemHandle`, :py:obj:`~.cuIpcCloseMemHandle`
15650 """
15651 cydevPtr = _HelperInputVoidPtr(devPtr)
15652 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
15653 with nogil:
15654 err = cyruntime.cudaIpcCloseMemHandle(cydevPtr_ptr)
15655 return (_dict_cudaError_t[err],)
15657@cython.embedsignature(True)
15658def cudaDeviceFlushGPUDirectRDMAWrites(target not None : cudaFlushGPUDirectRDMAWritesTarget, scope not None : cudaFlushGPUDirectRDMAWritesScope):
15659 """ Blocks until remote writes are visible to the specified scope.
15661 Blocks until remote writes to the target context via mappings created
15662 through GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see
15663 https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are
15664 visible to the specified scope.
15666 If the scope equals or lies within the scope indicated by
15667 :py:obj:`~.cudaDevAttrGPUDirectRDMAWritesOrdering`, the call will be a
15668 no-op and can be safely omitted for performance. This can be determined
15669 by comparing the numerical values between the two enums, with smaller
15670 scopes having smaller values.
15672 Users may query support for this API via
15673 :py:obj:`~.cudaDevAttrGPUDirectRDMAFlushWritesOptions`.
15675 Parameters
15676 ----------
15677 target : :py:obj:`~.cudaFlushGPUDirectRDMAWritesTarget`
15678 The target of the operation, see cudaFlushGPUDirectRDMAWritesTarget
15679 scope : :py:obj:`~.cudaFlushGPUDirectRDMAWritesScope`
15680 The scope of the operation, see cudaFlushGPUDirectRDMAWritesScope
15682 Returns
15683 -------
15684 cudaError_t
15685 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`,
15687 See Also
15688 --------
15689 :py:obj:`~.cuFlushGPUDirectRDMAWrites`
15690 """
15691 cdef cyruntime.cudaFlushGPUDirectRDMAWritesTarget cytarget = target.value
15692 cdef cyruntime.cudaFlushGPUDirectRDMAWritesScope cyscope = scope.value
15693 with nogil:
15694 err = cyruntime.cudaDeviceFlushGPUDirectRDMAWrites(cytarget, cyscope)
15695 return (_dict_cudaError_t[err],)
15697ctypedef struct cudaAsyncCallbackData_st:
15698 cyruntime.cudaAsyncCallback callback
15699 void *userData
15701ctypedef cudaAsyncCallbackData_st cudaAsyncCallbackData
15703@cython.show_performance_hints(False)
15704cdef void cudaAsyncNotificationCallbackWrapper(cyruntime.cudaAsyncNotificationInfo_t *info, void *data, cyruntime.cudaAsyncCallbackHandle_t handle) nogil:
15705 cdef cudaAsyncCallbackData *cbData = <cudaAsyncCallbackData *>data
15706 with gil:
15707 cbData.callback(info, cbData.userData, handle)
15709@cython.embedsignature(True)
15710def cudaDeviceRegisterAsyncNotification(int device, callbackFunc, userData):
15711 """ Registers a callback function to receive async notifications.
15713 Registers `callbackFunc` to receive async notifications.
15715 The `userData` parameter is passed to the callback function at async
15716 notification time. Likewise, `callback` is also passed to the callback
15717 function to distinguish between multiple registered callbacks.
15719 The callback function being registered should be designed to return
15720 quickly (~10ms). Any long running tasks should be queued for execution
15721 on an application thread.
15723 Callbacks may not call cudaDeviceRegisterAsyncNotification or
15724 cudaDeviceUnregisterAsyncNotification. Doing so will result in
15725 :py:obj:`~.cudaErrorNotPermitted`. Async notification callbacks execute
15726 in an undefined order and may be serialized.
15728 Returns in `*callback` a handle representing the registered callback
15729 instance.
15731 Parameters
15732 ----------
15733 device : int
15734 The device on which to register the callback
15735 callbackFunc : :py:obj:`~.cudaAsyncCallback`
15736 The function to register as a callback
15737 userData : Any
15738 A generic pointer to user data. This is passed into the callback
15739 function.
15741 Returns
15742 -------
15743 cudaError_t
15744 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`
15745 callback : :py:obj:`~.cudaAsyncCallbackHandle_t`
15746 A handle representing the registered callback instance
15748 See Also
15749 --------
15750 :py:obj:`~.cudaDeviceUnregisterAsyncNotification`
15751 """
15752 cdef cyruntime.cudaAsyncCallback cycallbackFunc
15753 if callbackFunc is None:
15754 pcallbackFunc = 0
15755 elif isinstance(callbackFunc, (cudaAsyncCallback,)):
15756 pcallbackFunc = int(callbackFunc)
15757 else:
15758 pcallbackFunc = int(cudaAsyncCallback(callbackFunc))
15759 cycallbackFunc = <cyruntime.cudaAsyncCallback><void_ptr>pcallbackFunc
15760 cyuserData = _HelperInputVoidPtr(userData)
15761 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
15763 cdef cudaAsyncCallbackData *cbData = NULL
15764 cbData = <cudaAsyncCallbackData *>malloc(sizeof(cbData[0]))
15765 if cbData == NULL:
15766 return (cudaError_t.cudaErrorMemoryAllocation, None)
15767 cbData.callback = cycallbackFunc
15768 cbData.userData = cyuserData_ptr
15770 cdef cudaAsyncCallbackHandle_t callback = cudaAsyncCallbackHandle_t()
15771 with nogil:
15772 err = cyruntime.cudaDeviceRegisterAsyncNotification(device, <cyruntime.cudaAsyncCallback>cudaAsyncNotificationCallbackWrapper, <void *>cbData, <cyruntime.cudaAsyncCallbackHandle_t*>callback._pvt_ptr)
15773 if err != cyruntime.cudaSuccess:
15774 free(cbData)
15775 else:
15776 m_global._allocated[int(callback)] = cbData
15777 if err != cyruntime.cudaSuccess:
15778 return (_dict_cudaError_t[err], None)
15779 return (_dict_cudaError_t[err], callback)
15781@cython.embedsignature(True)
15782def cudaDeviceUnregisterAsyncNotification(int device, callback):
15783 """ Unregisters an async notification callback.
15785 Unregisters `callback` so that the corresponding callback function will
15786 stop receiving async notifications.
15788 Parameters
15789 ----------
15790 device : int
15791 The device from which to remove `callback`.
15792 callback : :py:obj:`~.cudaAsyncCallbackHandle_t`
15793 The callback instance to unregister from receiving async
15794 notifications.
15796 Returns
15797 -------
15798 cudaError_t
15799 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorNotSupported` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotPermitted` :py:obj:`~.cudaErrorUnknown`
15801 See Also
15802 --------
15803 :py:obj:`~.cudaDeviceRegisterAsyncNotification`
15804 """
15805 cdef cyruntime.cudaAsyncCallbackHandle_t cycallback
15806 if callback is None:
15807 pcallback = 0
15808 elif isinstance(callback, (cudaAsyncCallbackHandle_t,)):
15809 pcallback = int(callback)
15810 else:
15811 pcallback = int(cudaAsyncCallbackHandle_t(callback))
15812 cycallback = <cyruntime.cudaAsyncCallbackHandle_t><void_ptr>pcallback
15813 with nogil:
15814 err = cyruntime.cudaDeviceUnregisterAsyncNotification(device, cycallback)
15815 if err == cyruntime.cudaSuccess:
15816 free(m_global._allocated[pcallback])
15817 m_global._allocated.erase(<void_ptr>pcallback)
15818 return (_dict_cudaError_t[err],)
15820@cython.embedsignature(True)
15821def cudaDeviceGetSharedMemConfig():
15822 """ Returns the shared memory configuration for the current device.
15824 [Deprecated]
15826 This function will return in `pConfig` the current size of shared
15827 memory banks on the current device. On devices with configurable shared
15828 memory banks, :py:obj:`~.cudaDeviceSetSharedMemConfig` can be used to
15829 change this setting, so that all subsequent kernel launches will by
15830 default use the new bank size. When
15831 :py:obj:`~.cudaDeviceGetSharedMemConfig` is called on devices without
15832 configurable shared memory, it will return the fixed bank size of the
15833 hardware.
15835 The returned bank configurations can be either:
15837 - :py:obj:`~.cudaSharedMemBankSizeFourByte` - shared memory bank width
15838 is four bytes.
15840 - :py:obj:`~.cudaSharedMemBankSizeEightByte` - shared memory bank width
15841 is eight bytes.
15843 Returns
15844 -------
15845 cudaError_t
15846 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
15847 pConfig : :py:obj:`~.cudaSharedMemConfig`
15848 Returned cache configuration
15850 See Also
15851 --------
15852 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxGetSharedMemConfig`
15853 """
15854 cdef cyruntime.cudaSharedMemConfig pConfig
15855 with nogil:
15856 err = cyruntime.cudaDeviceGetSharedMemConfig(&pConfig)
15857 if err != cyruntime.cudaSuccess:
15858 return (_dict_cudaError_t[err], None)
15859 return (_dict_cudaError_t[err], cudaSharedMemConfig(pConfig))
15861@cython.embedsignature(True)
15862def cudaDeviceSetSharedMemConfig(config not None : cudaSharedMemConfig):
15863 """ Sets the shared memory configuration for the current device.
15865 [Deprecated]
15867 On devices with configurable shared memory banks, this function will
15868 set the shared memory bank size which is used for all subsequent kernel
15869 launches. Any per-function setting of shared memory set via
15870 :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide
15871 setting.
15873 Changing the shared memory configuration between launches may introduce
15874 a device side synchronization point.
15876 Changing the shared memory bank size will not increase shared memory
15877 usage or affect occupancy of kernels, but may have major effects on
15878 performance. Larger bank sizes will allow for greater potential
15879 bandwidth to shared memory, but will change what kinds of accesses to
15880 shared memory will result in bank conflicts.
15882 This function will do nothing on devices with fixed shared memory bank
15883 size.
15885 The supported bank configurations are:
15887 - :py:obj:`~.cudaSharedMemBankSizeDefault`: set bank width the device
15888 default (currently, four bytes)
15890 - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank
15891 width to be four bytes natively.
15893 - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank
15894 width to be eight bytes natively.
15896 Parameters
15897 ----------
15898 config : :py:obj:`~.cudaSharedMemConfig`
15899 Requested cache configuration
15901 Returns
15902 -------
15903 cudaError_t
15904 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
15906 See Also
15907 --------
15908 :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuCtxSetSharedMemConfig`
15909 """
15910 cdef cyruntime.cudaSharedMemConfig cyconfig = config.value
15911 with nogil:
15912 err = cyruntime.cudaDeviceSetSharedMemConfig(cyconfig)
15913 return (_dict_cudaError_t[err],)
15915@cython.embedsignature(True)
15916def cudaGetLastError():
15917 """ Returns the last error from a runtime call.
15919 Returns the last error that has been produced by any of the runtime
15920 calls in the same instance of the CUDA Runtime library in the host
15921 thread and resets it to :py:obj:`~.cudaSuccess`.
15923 Note: Multiple instances of the CUDA Runtime library can be present in
15924 an application when using a library that statically links the CUDA
15925 Runtime.
15927 Returns
15928 -------
15929 cudaError_t
15930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`
15932 See Also
15933 --------
15934 :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`
15935 """
15936 with nogil:
15937 err = cyruntime.cudaGetLastError()
15938 return (_dict_cudaError_t[err],)
15940@cython.embedsignature(True)
15941def cudaPeekAtLastError():
15942 """ Returns the last error from a runtime call.
15944 Returns the last error that has been produced by any of the runtime
15945 calls in the same instance of the CUDA Runtime library in the host
15946 thread. This call does not reset the error to :py:obj:`~.cudaSuccess`
15947 like :py:obj:`~.cudaGetLastError()`.
15949 Note: Multiple instances of the CUDA Runtime library can be present in
15950 an application when using a library that statically links the CUDA
15951 Runtime.
15953 Returns
15954 -------
15955 cudaError_t
15956 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMissingConfiguration`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorLaunchOutOfResources`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidConfiguration`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidSymbol`, :py:obj:`~.cudaErrorUnmapBufferObjectFailed`, :py:obj:`~.cudaErrorInvalidDevicePointer`, :py:obj:`~.cudaErrorInvalidTexture`, :py:obj:`~.cudaErrorInvalidTextureBinding`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`, :py:obj:`~.cudaErrorInvalidFilterSetting`, :py:obj:`~.cudaErrorInvalidNormSetting`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInsufficientDriver`, :py:obj:`~.cudaErrorNoDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`, :py:obj:`~.cudaErrorStartupFailure`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorJitCompilerNotFound`, :py:obj:`~.cudaErrorJitCompilationDisabled`
15958 See Also
15959 --------
15960 :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaError`
15961 """
15962 with nogil:
15963 err = cyruntime.cudaPeekAtLastError()
15964 return (_dict_cudaError_t[err],)
15966@cython.embedsignature(True)
15967def cudaGetErrorName(error not None : cudaError_t):
15968 """ Returns the string representation of an error code enum name.
15970 Returns a string containing the name of an error code in the enum. If
15971 the error code is not recognized, "unrecognized error code" is
15972 returned.
15974 Parameters
15975 ----------
15976 error : :py:obj:`~.cudaError_t`
15977 Error code to convert to string
15979 Returns
15980 -------
15981 cudaError_t.cudaSuccess
15982 cudaError_t.cudaSuccess
15983 bytes
15984 `char*` pointer to a NULL-terminated string
15986 See Also
15987 --------
15988 :py:obj:`~.cudaGetErrorString`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorName`
15989 """
15990 cdef cyruntime.cudaError_t cyerror = error.value
15991 with nogil:
15992 err = cyruntime.cudaGetErrorName(cyerror)
15993 return (cudaError_t.cudaSuccess, err)
15995@cython.embedsignature(True)
15996def cudaGetErrorString(error not None : cudaError_t):
15997 """ Returns the description string for an error code.
15999 Returns the description string for an error code. If the error code is
16000 not recognized, "unrecognized error code" is returned.
16002 Parameters
16003 ----------
16004 error : :py:obj:`~.cudaError_t`
16005 Error code to convert to string
16007 Returns
16008 -------
16009 cudaError_t.cudaSuccess
16010 cudaError_t.cudaSuccess
16011 bytes
16012 `char*` pointer to a NULL-terminated string
16014 See Also
16015 --------
16016 :py:obj:`~.cudaGetErrorName`, :py:obj:`~.cudaGetLastError`, :py:obj:`~.cudaPeekAtLastError`, :py:obj:`~.cudaError`, :py:obj:`~.cuGetErrorString`
16017 """
16018 cdef cyruntime.cudaError_t cyerror = error.value
16019 with nogil:
16020 err = cyruntime.cudaGetErrorString(cyerror)
16021 return (cudaError_t.cudaSuccess, err)
16023@cython.embedsignature(True)
16024def cudaGetDeviceCount():
16025 """ Returns the number of compute-capable devices.
16027 Returns in `*count` the number of devices with compute capability
16028 greater or equal to 2.0 that are available for execution.
16030 Returns
16031 -------
16032 cudaError_t
16033 :py:obj:`~.cudaSuccess`
16034 count : int
16035 Returns the number of devices with compute capability greater or
16036 equal to 2.0
16038 See Also
16039 --------
16040 :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetCount`
16041 """
16042 cdef int count = 0
16043 with nogil:
16044 err = cyruntime.cudaGetDeviceCount(&count)
16045 if err != cyruntime.cudaSuccess:
16046 return (_dict_cudaError_t[err], None)
16047 return (_dict_cudaError_t[err], count)
16049@cython.embedsignature(True)
16050def cudaGetDeviceProperties(int device):
16051 """ Returns information about the compute-device.
16053 Returns in `*prop` the properties of device `dev`.
16055 Parameters
16056 ----------
16057 device : int
16058 Device number to get properties for
16060 Returns
16061 -------
16062 cudaError_t
16063 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
16064 prop : :py:obj:`~.cudaDeviceProp`
16065 Properties for the specified device
16067 See Also
16068 --------
16069 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`, :py:obj:`~.cuDeviceGetName`
16070 """
16071 cdef cudaDeviceProp prop = cudaDeviceProp()
16072 with nogil:
16073 err = cyruntime.cudaGetDeviceProperties(<cyruntime.cudaDeviceProp*>prop._pvt_ptr, device)
16074 if err != cyruntime.cudaSuccess:
16075 return (_dict_cudaError_t[err], None)
16076 return (_dict_cudaError_t[err], prop)
16078@cython.embedsignature(True)
16079def cudaDeviceGetAttribute(attr not None : cudaDeviceAttr, int device):
16080 """ Returns information about the device.
16082 Returns in `*value` the integer value of the attribute `attr` on device
16083 `device`.
16085 Parameters
16086 ----------
16087 attr : :py:obj:`~.cudaDeviceAttr`
16088 Device attribute to query
16089 device : int
16090 Device number to query
16092 Returns
16093 -------
16094 cudaError_t
16095 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
16096 value : int
16097 Returned device attribute value
16099 See Also
16100 --------
16101 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuDeviceGetAttribute`
16102 """
16103 cdef int value = 0
16104 cdef cyruntime.cudaDeviceAttr cyattr = attr.value
16105 with nogil:
16106 err = cyruntime.cudaDeviceGetAttribute(&value, cyattr, device)
16107 if err != cyruntime.cudaSuccess:
16108 return (_dict_cudaError_t[err], None)
16109 return (_dict_cudaError_t[err], value)
16111@cython.embedsignature(True)
16112def cudaDeviceGetHostAtomicCapabilities(operations : Optional[tuple[cudaAtomicOperation] | list[cudaAtomicOperation]], unsigned int count, int device):
16113 """ Queries details about atomic operations supported between the device and host.
16115 Returns in `*capabilities` the details about requested atomic
16116 `*operations` over the the link between `dev` and the host. The
16117 allocated size of `*operations` and `*capabilities` must be `count`.
16119 For each :py:obj:`~.cudaAtomicOperation` in `*operations`, the
16120 corresponding result in `*capabilities` will be a bitmask indicating
16121 which of :py:obj:`~.cudaAtomicOperationCapability` the link supports
16122 natively.
16124 Returns :py:obj:`~.cudaErrorInvalidDevice` if `dev` is not valid.
16126 Returns :py:obj:`~.cudaErrorInvalidValue` if `*capabilities` or
16127 `*operations` is NULL, if `count` is 0, or if any of `*operations` is
16128 not valid.
16130 Parameters
16131 ----------
16132 operations : list[:py:obj:`~.cudaAtomicOperation`]
16133 Requested operations
16134 count : unsigned int
16135 Count of requested operations and size of capabilities
16136 dev : int
16137 Device handle
16139 Returns
16140 -------
16141 cudaError_t
16142 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
16143 capabilities : list[unsigned int]
16144 Returned capability details of each requested operation
16146 See Also
16147 --------
16148 :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`, :py:obj:`~.cuDeviceGeHostAtomicCapabilities`
16149 """
16150 operations = [] if operations is None else operations
16151 if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations):
16152 raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]")
16153 cdef unsigned int* cycapabilities = NULL
16154 pycapabilities = []
16155 if count != 0:
16156 cycapabilities = <unsigned int*>calloc(count, sizeof(unsigned int))
16157 if cycapabilities is NULL:
16158 raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int)))
16159 cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [pyoperations.value for pyoperations in (operations)]
16160 if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count))
16161 with nogil:
16162 err = cyruntime.cudaDeviceGetHostAtomicCapabilities(cycapabilities, cyoperations.data(), count, device)
16163 if cudaError_t(err) == cudaError_t(0):
16164 pycapabilities = [<unsigned int>cycapabilities[idx] for idx in range(count)]
16165 if cycapabilities is not NULL:
16166 free(cycapabilities)
16167 if err != cyruntime.cudaSuccess:
16168 return (_dict_cudaError_t[err], None)
16169 return (_dict_cudaError_t[err], pycapabilities)
16171@cython.embedsignature(True)
16172def cudaDeviceGetDefaultMemPool(int device):
16173 """ Returns the default mempool of a device.
16175 The default mempool of a device contains device memory from that
16176 device.
16178 Parameters
16179 ----------
16180 device : int
16181 None
16183 Returns
16184 -------
16185 cudaError_t
16186 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`
16187 memPool : :py:obj:`~.cudaMemPool_t`
16188 None
16190 See Also
16191 --------
16192 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMemPoolTrimTo`, :py:obj:`~.cudaMemPoolGetAttribute`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolSetAccess`
16193 """
16194 cdef cudaMemPool_t memPool = cudaMemPool_t()
16195 with nogil:
16196 err = cyruntime.cudaDeviceGetDefaultMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, device)
16197 if err != cyruntime.cudaSuccess:
16198 return (_dict_cudaError_t[err], None)
16199 return (_dict_cudaError_t[err], memPool)
16201@cython.embedsignature(True)
16202def cudaDeviceSetMemPool(int device, memPool):
16203 """ Sets the current memory pool of a device.
16205 The memory pool must be local to the specified device. Unless a mempool
16206 is specified in the :py:obj:`~.cudaMallocAsync` call,
16207 :py:obj:`~.cudaMallocAsync` allocates from the current mempool of the
16208 provided stream's device. By default, a device's current memory pool is
16209 its default memory pool.
16211 Parameters
16212 ----------
16213 device : int
16214 None
16215 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
16216 None
16218 Returns
16219 -------
16220 cudaError_t
16221 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidDevice` :py:obj:`~.cudaErrorNotSupported`
16223 See Also
16224 --------
16225 :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolDestroy`, :py:obj:`~.cudaMallocFromPoolAsync`
16227 Notes
16228 -----
16229 Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.
16230 """
16231 cdef cyruntime.cudaMemPool_t cymemPool
16232 if memPool is None:
16233 pmemPool = 0
16234 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
16235 pmemPool = int(memPool)
16236 else:
16237 pmemPool = int(cudaMemPool_t(memPool))
16238 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
16239 with nogil:
16240 err = cyruntime.cudaDeviceSetMemPool(device, cymemPool)
16241 return (_dict_cudaError_t[err],)
16243@cython.embedsignature(True)
16244def cudaDeviceGetMemPool(int device):
16245 """ Gets the current mempool for a device.
16247 Returns the last pool provided to :py:obj:`~.cudaDeviceSetMemPool` for
16248 this device or the device's default memory pool if
16249 :py:obj:`~.cudaDeviceSetMemPool` has never been called. By default the
16250 current mempool is the default mempool for a device, otherwise the
16251 returned pool must have been set with :py:obj:`~.cuDeviceSetMemPool` or
16252 :py:obj:`~.cudaDeviceSetMemPool`.
16254 Parameters
16255 ----------
16256 device : int
16257 None
16259 Returns
16260 -------
16261 cudaError_t
16262 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorNotSupported`
16263 memPool : :py:obj:`~.cudaMemPool_t`
16264 None
16266 See Also
16267 --------
16268 :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceSetMemPool`
16269 """
16270 cdef cudaMemPool_t memPool = cudaMemPool_t()
16271 with nogil:
16272 err = cyruntime.cudaDeviceGetMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, device)
16273 if err != cyruntime.cudaSuccess:
16274 return (_dict_cudaError_t[err], None)
16275 return (_dict_cudaError_t[err], memPool)
16277@cython.embedsignature(True)
16278def cudaDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, int device, int flags):
16279 """ Return NvSciSync attributes that this device can support.
16281 Returns in `nvSciSyncAttrList`, the properties of NvSciSync that this
16282 CUDA device, `dev` can support. The returned `nvSciSyncAttrList` can be
16283 used to create an NvSciSync that matches this device's capabilities.
16285 If NvSciSyncAttrKey_RequiredPerm field in `nvSciSyncAttrList` is
16286 already set this API will return :py:obj:`~.cudaErrorInvalidValue`.
16288 The applications should set `nvSciSyncAttrList` to a valid
16289 NvSciSyncAttrList failing which this API will return
16290 :py:obj:`~.cudaErrorInvalidHandle`.
16292 The `flags` controls how applications intends to use the NvSciSync
16293 created from the `nvSciSyncAttrList`. The valid flags are:
16295 - :py:obj:`~.cudaNvSciSyncAttrSignal`, specifies that the applications
16296 intends to signal an NvSciSync on this CUDA device.
16298 - :py:obj:`~.cudaNvSciSyncAttrWait`, specifies that the applications
16299 intends to wait on an NvSciSync on this CUDA device.
16301 At least one of these flags must be set, failing which the API returns
16302 :py:obj:`~.cudaErrorInvalidValue`. Both the flags are orthogonal to one
16303 another: a developer may set both these flags that allows to set both
16304 wait and signal specific attributes in the same `nvSciSyncAttrList`.
16306 Note that this API updates the input `nvSciSyncAttrList` with values
16307 equivalent to the following public attribute key-values:
16308 NvSciSyncAttrKey_RequiredPerm is set to
16310 - NvSciSyncAccessPerm_SignalOnly if :py:obj:`~.cudaNvSciSyncAttrSignal`
16311 is set in `flags`.
16313 - NvSciSyncAccessPerm_WaitOnly if :py:obj:`~.cudaNvSciSyncAttrWait` is
16314 set in `flags`.
16316 - NvSciSyncAccessPerm_WaitSignal if both
16317 :py:obj:`~.cudaNvSciSyncAttrWait` and
16318 :py:obj:`~.cudaNvSciSyncAttrSignal` are set in `flags`.
16319 NvSciSyncAttrKey_PrimitiveInfo is set to
16321 - NvSciSyncAttrValPrimitiveType_SysmemSemaphore on any valid `device`.
16323 - NvSciSyncAttrValPrimitiveType_Syncpoint if `device` is a Tegra
16324 device.
16326 - NvSciSyncAttrValPrimitiveType_SysmemSemaphorePayload64b if `device`
16327 is GA10X+. NvSciSyncAttrKey_GpuId is set to the same UUID that is
16328 returned in `None` from :py:obj:`~.cudaDeviceGetProperties` for this
16329 `device`.
16331 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorDeviceUninitialized`,
16332 :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidHandle`,
16333 :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorNotSupported`,
16334 :py:obj:`~.cudaErrorMemoryAllocation`
16336 Parameters
16337 ----------
16338 nvSciSyncAttrList : Any
16339 Return NvSciSync attributes supported.
16340 device : int
16341 Valid Cuda Device to get NvSciSync attributes for.
16342 flags : int
16343 flags describing NvSciSync usage.
16345 Returns
16346 -------
16347 cudaError_t
16350 See Also
16351 --------
16352 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
16353 """
16354 cynvSciSyncAttrList = _HelperInputVoidPtr(nvSciSyncAttrList)
16355 cdef void* cynvSciSyncAttrList_ptr = <void*><void_ptr>cynvSciSyncAttrList.cptr
16356 with nogil:
16357 err = cyruntime.cudaDeviceGetNvSciSyncAttributes(cynvSciSyncAttrList_ptr, device, flags)
16358 return (_dict_cudaError_t[err],)
16360@cython.embedsignature(True)
16361def cudaDeviceGetP2PAttribute(attr not None : cudaDeviceP2PAttr, int srcDevice, int dstDevice):
16362 """ Queries attributes of the link between two devices.
16364 Returns in `*value` the value of the requested attribute `attrib` of
16365 the link between `srcDevice` and `dstDevice`. The supported attributes
16366 are:
16368 - :py:obj:`~.cudaDevP2PAttrPerformanceRank`: A relative value
16369 indicating the performance of the link between two devices. Lower
16370 value means better performance (0 being the value used for most
16371 performant link).
16373 - :py:obj:`~.cudaDevP2PAttrAccessSupported`: 1 if peer access is
16374 enabled.
16376 - :py:obj:`~.cudaDevP2PAttrNativeAtomicSupported`: 1 if all native
16377 atomic operations over the link are supported.
16379 - :py:obj:`~.cudaDevP2PAttrCudaArrayAccessSupported`: 1 if accessing
16380 CUDA arrays over the link is supported.
16382 - :py:obj:`~.cudaDevP2PAttrOnlyPartialNativeAtomicSupported`: 1 if some
16383 CUDA-valid atomic operations over the link are supported. Information
16384 about specific operations can be retrieved with
16385 :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`.
16387 Returns :py:obj:`~.cudaErrorInvalidDevice` if `srcDevice` or
16388 `dstDevice` are not valid or if they represent the same device.
16390 Returns :py:obj:`~.cudaErrorInvalidValue` if `attrib` is not valid or
16391 if `value` is a null pointer.
16393 Parameters
16394 ----------
16395 attrib : :py:obj:`~.cudaDeviceP2PAttr`
16396 The requested attribute of the link between `srcDevice` and
16397 `dstDevice`.
16398 srcDevice : int
16399 The source device of the target link.
16400 dstDevice : int
16401 The destination device of the target link.
16403 Returns
16404 -------
16405 cudaError_t
16406 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
16407 value : int
16408 Returned value of the requested attribute
16410 See Also
16411 --------
16412 :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cuDeviceGetP2PAttribute` :py:obj:`~.cudaDeviceGetP2PAtomicCapabilities`
16413 """
16414 cdef int value = 0
16415 cdef cyruntime.cudaDeviceP2PAttr cyattr = attr.value
16416 with nogil:
16417 err = cyruntime.cudaDeviceGetP2PAttribute(&value, cyattr, srcDevice, dstDevice)
16418 if err != cyruntime.cudaSuccess:
16419 return (_dict_cudaError_t[err], None)
16420 return (_dict_cudaError_t[err], value)
16422@cython.embedsignature(True)
16423def cudaDeviceGetP2PAtomicCapabilities(operations : Optional[tuple[cudaAtomicOperation] | list[cudaAtomicOperation]], unsigned int count, int srcDevice, int dstDevice):
16424 """ Queries details about atomic operations supported between two devices.
16426 Returns in `*capabilities` the details about requested atomic
16427 `*operations` over the the link between `srcDevice` and `dstDevice`.
16428 The allocated size of `*operations` and `*capabilities` must be
16429 `count`.
16431 For each :py:obj:`~.cudaAtomicOperation` in `*operations`, the
16432 corresponding result in `*capabilities` will be a bitmask indicating
16433 which of :py:obj:`~.cudaAtomicOperationCapability` the link supports
16434 natively.
16436 Returns :py:obj:`~.cudaErrorInvalidDevice` if `srcDevice` or
16437 `dstDevice` are not valid or if they represent the same device.
16439 Returns :py:obj:`~.cudaErrorInvalidValue` if `*capabilities` or
16440 `*operations` is NULL, if `count` is 0, or if any of `*operations` is
16441 not valid.
16443 Parameters
16444 ----------
16445 operations : list[:py:obj:`~.cudaAtomicOperation`]
16446 Requested operations
16447 count : unsigned int
16448 Count of requested operations and size of capabilities
16449 srcDevice : int
16450 The source device of the target link
16451 dstDevice : int
16452 The destination device of the target link
16454 Returns
16455 -------
16456 cudaError_t
16457 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
16458 capabilities : list[unsigned int]
16459 Returned capability details of each requested operation
16461 See Also
16462 --------
16463 :py:obj:`~.cudaDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAttribute`, :py:obj:`~.cuDeviceGetP2PAtomicCapabilities`
16464 """
16465 operations = [] if operations is None else operations
16466 if not all(isinstance(_x, (cudaAtomicOperation)) for _x in operations):
16467 raise TypeError("Argument 'operations' is not instance of type (expected tuple[cyruntime.cudaAtomicOperation] or list[cyruntime.cudaAtomicOperation]")
16468 cdef unsigned int* cycapabilities = NULL
16469 pycapabilities = []
16470 if count != 0:
16471 cycapabilities = <unsigned int*>calloc(count, sizeof(unsigned int))
16472 if cycapabilities is NULL:
16473 raise MemoryError('Failed to allocate length x size memory: ' + str(count) + 'x' + str(sizeof(unsigned int)))
16474 cdef vector[cyruntime.cudaAtomicOperation] cyoperations = [pyoperations.value for pyoperations in (operations)]
16475 if count > len(operations): raise RuntimeError("List is too small: " + str(len(operations)) + " < " + str(count))
16476 with nogil:
16477 err = cyruntime.cudaDeviceGetP2PAtomicCapabilities(cycapabilities, cyoperations.data(), count, srcDevice, dstDevice)
16478 if cudaError_t(err) == cudaError_t(0):
16479 pycapabilities = [<unsigned int>cycapabilities[idx] for idx in range(count)]
16480 if cycapabilities is not NULL:
16481 free(cycapabilities)
16482 if err != cyruntime.cudaSuccess:
16483 return (_dict_cudaError_t[err], None)
16484 return (_dict_cudaError_t[err], pycapabilities)
16486@cython.embedsignature(True)
16487def cudaChooseDevice(prop : Optional[cudaDeviceProp]):
16488 """ Select compute-device which best matches criteria.
16490 Returns in `*device` the device which has properties that best match
16491 `*prop`.
16493 Parameters
16494 ----------
16495 prop : :py:obj:`~.cudaDeviceProp`
16496 Desired device properties
16498 Returns
16499 -------
16500 cudaError_t
16501 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
16502 device : int
16503 Device with best match
16505 See Also
16506 --------
16507 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaInitDevice`
16508 """
16509 cdef int device = 0
16510 cdef cyruntime.cudaDeviceProp* cyprop_ptr = prop._pvt_ptr if prop is not None else NULL
16511 with nogil:
16512 err = cyruntime.cudaChooseDevice(&device, cyprop_ptr)
16513 if err != cyruntime.cudaSuccess:
16514 return (_dict_cudaError_t[err], None)
16515 return (_dict_cudaError_t[err], device)
16517@cython.embedsignature(True)
16518def cudaInitDevice(int device, unsigned int deviceFlags, unsigned int flags):
16519 """ Initialize device to be used for GPU executions.
16521 This function will initialize the CUDA Runtime structures and primary
16522 context on `device` when called, but the context will not be made
16523 current to `device`.
16525 When :py:obj:`~.cudaInitDeviceFlagsAreValid` is set in `flags`,
16526 deviceFlags are applied to the requested device. The values of
16527 deviceFlags match those of the flags parameters in
16528 :py:obj:`~.cudaSetDeviceFlags`. The effect may be verified by
16529 :py:obj:`~.cudaGetDeviceFlags`.
16531 This function will return an error if the device is in
16532 :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another
16533 process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.
16535 Parameters
16536 ----------
16537 device : int
16538 Device on which the runtime will initialize itself.
16539 deviceFlags : unsigned int
16540 Parameters for device operation.
16541 flags : unsigned int
16542 Flags for controlling the device initialization.
16544 Returns
16545 -------
16546 cudaError_t
16547 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`,
16549 See Also
16550 --------
16551 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaSetDevice` :py:obj:`~.cuCtxSetCurrent`
16552 """
16553 with nogil:
16554 err = cyruntime.cudaInitDevice(device, deviceFlags, flags)
16555 return (_dict_cudaError_t[err],)
16557@cython.embedsignature(True)
16558def cudaSetDevice(int device):
16559 """ Set device to be used for GPU executions.
16561 Sets `device` as the current device for the calling host thread. Valid
16562 device id's are 0 to (:py:obj:`~.cudaGetDeviceCount()` - 1).
16564 Any device memory subsequently allocated from this host thread using
16565 :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()` or
16566 :py:obj:`~.cudaMallocArray()` will be physically resident on `device`.
16567 Any host memory allocated from this host thread using
16568 :py:obj:`~.cudaMallocHost()` or :py:obj:`~.cudaHostAlloc()` or
16569 :py:obj:`~.cudaHostRegister()` will have its lifetime associated with
16570 `device`. Any streams or events created from this host thread will be
16571 associated with `device`. Any kernels launched from this host thread
16572 using the <<<>>> operator or :py:obj:`~.cudaLaunchKernel()` will be
16573 executed on `device`.
16575 This call may be made from any host thread, to any device, and at any
16576 time. This function will do no synchronization with the previous or new
16577 device, and should only take significant time when it initializes the
16578 runtime's context state. This call will bind the primary context of the
16579 specified device to the calling thread and all the subsequent memory
16580 allocations, stream and event creations, and kernel launches will be
16581 associated with the primary context. This function will also
16582 immediately initialize the runtime state on the primary context, and
16583 the context will be current on `device` immediately. This function will
16584 return an error if the device is in
16585 :py:obj:`~.cudaComputeModeExclusiveProcess` and is occupied by another
16586 process or if the device is in :py:obj:`~.cudaComputeModeProhibited`.
16588 It is not required to call :py:obj:`~.cudaInitDevice` before using this
16589 function.
16591 Parameters
16592 ----------
16593 device : int
16594 Device on which the active host thread should execute the device
16595 code.
16597 Returns
16598 -------
16599 cudaError_t
16600 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorDeviceUnavailable`,
16602 See Also
16603 --------
16604 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxSetCurrent`
16605 """
16606 with nogil:
16607 err = cyruntime.cudaSetDevice(device)
16608 return (_dict_cudaError_t[err],)
16610@cython.embedsignature(True)
16611def cudaGetDevice():
16612 """ Returns which device is currently being used.
16614 Returns in `*device` the current device for the calling host thread.
16616 Returns
16617 -------
16618 cudaError_t
16619 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUnavailable`,
16620 device : int
16621 Returns the device on which the active host thread executes the
16622 device code.
16624 See Also
16625 --------
16626 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuCtxGetCurrent`
16627 """
16628 cdef int device = 0
16629 with nogil:
16630 err = cyruntime.cudaGetDevice(&device)
16631 if err != cyruntime.cudaSuccess:
16632 return (_dict_cudaError_t[err], None)
16633 return (_dict_cudaError_t[err], device)
16635@cython.embedsignature(True)
16636def cudaSetDeviceFlags(unsigned int flags):
16637 """ Sets flags to be used for device executions.
16639 Records `flags` as the flags for the current device. If the current
16640 device has been set and that device has already been initialized, the
16641 previous flags are overwritten. If the current device has not been
16642 initialized, it is initialized with the provided flags. If no device
16643 has been made current to the calling thread, a default device is
16644 selected and initialized with the provided flags.
16646 The three LSBs of the `flags` parameter can be used to control how the
16647 CPU thread interacts with the OS scheduler when waiting for results
16648 from the device.
16650 - :py:obj:`~.cudaDeviceScheduleAuto`: The default value if the `flags`
16651 parameter is zero, uses a heuristic based on the number of active
16652 CUDA contexts in the process `C` and the number of logical processors
16653 in the system `P`. If `C` > `P`, then CUDA will yield to other OS
16654 threads when waiting for the device, otherwise CUDA will not yield
16655 while waiting for results and actively spin on the processor.
16656 Additionally, on Tegra devices, :py:obj:`~.cudaDeviceScheduleAuto`
16657 uses a heuristic based on the power profile of the platform and may
16658 choose :py:obj:`~.cudaDeviceScheduleBlockingSync` for low-powered
16659 devices.
16661 - :py:obj:`~.cudaDeviceScheduleSpin`: Instruct CUDA to actively spin
16662 when waiting for results from the device. This can decrease latency
16663 when waiting for the device, but may lower the performance of CPU
16664 threads if they are performing work in parallel with the CUDA thread.
16666 - :py:obj:`~.cudaDeviceScheduleYield`: Instruct CUDA to yield its
16667 thread when waiting for results from the device. This can increase
16668 latency when waiting for the device, but can increase the performance
16669 of CPU threads performing work in parallel with the device.
16671 - :py:obj:`~.cudaDeviceScheduleBlockingSync`: Instruct CUDA to block
16672 the CPU thread on a synchronization primitive when waiting for the
16673 device to finish work.
16675 - :py:obj:`~.cudaDeviceBlockingSync`: Instruct CUDA to block the CPU
16676 thread on a synchronization primitive when waiting for the device to
16677 finish work. :py:obj:`~.Deprecated:` This flag was deprecated as of
16678 CUDA 4.0 and replaced with
16679 :py:obj:`~.cudaDeviceScheduleBlockingSync`.
16681 - :py:obj:`~.cudaDeviceMapHost`: This flag enables allocating pinned
16682 host memory that is accessible to the device. It is implicit for the
16683 runtime but may be absent if a context is created using the driver
16684 API. If this flag is not set, :py:obj:`~.cudaHostGetDevicePointer()`
16685 will always return a failure code.
16687 - :py:obj:`~.cudaDeviceLmemResizeToMax`: Instruct CUDA to not reduce
16688 local memory after resizing local memory for a kernel. This can
16689 prevent thrashing by local memory allocations when launching many
16690 kernels with high local memory usage at the cost of potentially
16691 increased memory usage. :py:obj:`~.Deprecated:` This flag is
16692 deprecated and the behavior enabled by this flag is now the default
16693 and cannot be disabled.
16695 - :py:obj:`~.cudaDeviceSyncMemops`: Ensures that synchronous memory
16696 operations initiated on this context will always synchronize. See
16697 further documentation in the section titled "API Synchronization
16698 behavior" to learn more about cases when synchronous memory
16699 operations can exhibit asynchronous behavior.
16701 Parameters
16702 ----------
16703 flags : unsigned int
16704 Parameters for device operation
16706 Returns
16707 -------
16708 cudaError_t
16709 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
16711 See Also
16712 --------
16713 :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetValidDevices`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cuDevicePrimaryCtxSetFlags`
16714 """
16715 with nogil:
16716 err = cyruntime.cudaSetDeviceFlags(flags)
16717 return (_dict_cudaError_t[err],)
16719@cython.embedsignature(True)
16720def cudaGetDeviceFlags():
16721 """ Gets the flags for the current device.
16723 Returns in `flags` the flags for the current device. If there is a
16724 current device for the calling thread, the flags for the device are
16725 returned. If there is no current device, the flags for the first device
16726 are returned, which may be the default flags. Compare to the behavior
16727 of :py:obj:`~.cudaSetDeviceFlags`.
16729 Typically, the flags returned should match the behavior that will be
16730 seen if the calling thread uses a device after this call, without any
16731 change to the flags or current device inbetween by this or another
16732 thread. Note that if the device is not initialized, it is possible for
16733 another thread to change the flags for the current device before it is
16734 initialized. Additionally, when using exclusive mode, if this thread
16735 has not requested a specific device, it may use a device other than the
16736 first device, contrary to the assumption made by this function.
16738 If a context has been created via the driver API and is current to the
16739 calling thread, the flags for that context are always returned.
16741 Flags returned by this function may specifically include
16742 :py:obj:`~.cudaDeviceMapHost` even though it is not accepted by
16743 :py:obj:`~.cudaSetDeviceFlags` because it is implicit in runtime API
16744 flags. The reason for this is that the current context may have been
16745 created via the driver API in which case the flag is not implicit and
16746 may be unset.
16748 Returns
16749 -------
16750 cudaError_t
16751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
16752 flags : unsigned int
16753 Pointer to store the device flags
16755 See Also
16756 --------
16757 :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaGetDeviceProperties`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuCtxGetFlags`, :py:obj:`~.cuDevicePrimaryCtxGetState`
16758 """
16759 cdef unsigned int flags = 0
16760 with nogil:
16761 err = cyruntime.cudaGetDeviceFlags(&flags)
16762 if err != cyruntime.cudaSuccess:
16763 return (_dict_cudaError_t[err], None)
16764 return (_dict_cudaError_t[err], flags)
16766@cython.embedsignature(True)
16767def cudaStreamCreate():
16768 """ Create an asynchronous stream.
16770 Creates a new asynchronous stream on the context that is current to the
16771 calling host thread. If no context is current to the calling host
16772 thread, then the primary context for a device is selected, made current
16773 to the calling thread, and initialized before creating a stream on it.
16775 Returns
16776 -------
16777 cudaError_t
16778 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
16779 pStream : :py:obj:`~.cudaStream_t`
16780 Pointer to new stream identifier
16782 See Also
16783 --------
16784 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`
16785 """
16786 cdef cudaStream_t pStream = cudaStream_t()
16787 with nogil:
16788 err = cyruntime.cudaStreamCreate(<cyruntime.cudaStream_t*>pStream._pvt_ptr)
16789 if err != cyruntime.cudaSuccess:
16790 return (_dict_cudaError_t[err], None)
16791 return (_dict_cudaError_t[err], pStream)
16793@cython.embedsignature(True)
16794def cudaStreamCreateWithFlags(unsigned int flags):
16795 """ Create an asynchronous stream.
16797 Creates a new asynchronous stream on the context that is current to the
16798 calling host thread. If no context is current to the calling host
16799 thread, then the primary context for a device is selected, made current
16800 to the calling thread, and initialized before creating a stream on it.
16801 The `flags` argument determines the behaviors of the stream. Valid
16802 values for `flags` are
16804 - :py:obj:`~.cudaStreamDefault`: Default stream creation flag.
16806 - :py:obj:`~.cudaStreamNonBlocking`: Specifies that work running in the
16807 created stream may run concurrently with work in stream 0 (the NULL
16808 stream), and that the created stream should perform no implicit
16809 synchronization with stream 0.
16811 Parameters
16812 ----------
16813 flags : unsigned int
16814 Parameters for stream creation
16816 Returns
16817 -------
16818 cudaError_t
16819 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
16820 pStream : :py:obj:`~.cudaStream_t`
16821 Pointer to new stream identifier
16823 See Also
16824 --------
16825 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreate`
16826 """
16827 cdef cudaStream_t pStream = cudaStream_t()
16828 with nogil:
16829 err = cyruntime.cudaStreamCreateWithFlags(<cyruntime.cudaStream_t*>pStream._pvt_ptr, flags)
16830 if err != cyruntime.cudaSuccess:
16831 return (_dict_cudaError_t[err], None)
16832 return (_dict_cudaError_t[err], pStream)
16834@cython.embedsignature(True)
16835def cudaStreamCreateWithPriority(unsigned int flags, int priority):
16836 """ Create an asynchronous stream with the specified priority.
16838 Creates a stream with the specified priority and returns a handle in
16839 `pStream`. The stream is created on the context that is current to the
16840 calling host thread. If no context is current to the calling host
16841 thread, then the primary context for a device is selected, made current
16842 to the calling thread, and initialized before creating a stream on it.
16843 This affects the scheduling priority of work in the stream. Priorities
16844 provide a hint to preferentially run work with higher priority when
16845 possible, but do not preempt already-running work or provide any other
16846 functional guarantee on execution order.
16848 `priority` follows a convention where lower numbers represent higher
16849 priorities. '0' represents default priority. The range of meaningful
16850 numerical priorities can be queried using
16851 :py:obj:`~.cudaDeviceGetStreamPriorityRange`. If the specified priority
16852 is outside the numerical range returned by
16853 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, it will automatically be
16854 clamped to the lowest or the highest number in the range.
16856 Parameters
16857 ----------
16858 flags : unsigned int
16859 Flags for stream creation. See
16860 :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags
16861 that can be passed
16862 priority : int
16863 Priority of the stream. Lower numbers represent higher priorities.
16864 See :py:obj:`~.cudaDeviceGetStreamPriorityRange` for more
16865 information about the meaningful stream priorities that can be
16866 passed.
16868 Returns
16869 -------
16870 cudaError_t
16871 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
16872 pStream : :py:obj:`~.cudaStream_t`
16873 Pointer to new stream identifier
16875 See Also
16876 --------
16877 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamCreateWithPriority`
16879 Notes
16880 -----
16881 Stream priorities are supported only on GPUs with compute capability 3.5 or higher.
16883 In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.
16884 """
16885 cdef cudaStream_t pStream = cudaStream_t()
16886 with nogil:
16887 err = cyruntime.cudaStreamCreateWithPriority(<cyruntime.cudaStream_t*>pStream._pvt_ptr, flags, priority)
16888 if err != cyruntime.cudaSuccess:
16889 return (_dict_cudaError_t[err], None)
16890 return (_dict_cudaError_t[err], pStream)
16892@cython.embedsignature(True)
16893def cudaStreamGetPriority(hStream):
16894 """ Query the priority of a stream.
16896 Query the priority of a stream. The priority is returned in in
16897 `priority`. Note that if the stream was created with a priority outside
16898 the meaningful numerical range returned by
16899 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, this function returns the
16900 clamped priority. See :py:obj:`~.cudaStreamCreateWithPriority` for
16901 details about priority clamping.
16903 Parameters
16904 ----------
16905 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
16906 Handle to the stream to be queried
16908 Returns
16909 -------
16910 cudaError_t
16911 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
16912 priority : int
16913 Pointer to a signed integer in which the stream's priority is
16914 returned
16916 See Also
16917 --------
16918 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cuStreamGetPriority`
16919 """
16920 cdef cyruntime.cudaStream_t cyhStream
16921 if hStream is None:
16922 phStream = 0
16923 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
16924 phStream = int(hStream)
16925 else:
16926 phStream = int(cudaStream_t(hStream))
16927 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
16928 cdef int priority = 0
16929 with nogil:
16930 err = cyruntime.cudaStreamGetPriority(cyhStream, &priority)
16931 if err != cyruntime.cudaSuccess:
16932 return (_dict_cudaError_t[err], None)
16933 return (_dict_cudaError_t[err], priority)
16935@cython.embedsignature(True)
16936def cudaStreamGetFlags(hStream):
16937 """ Query the flags of a stream.
16939 Query the flags of a stream. The flags are returned in `flags`. See
16940 :py:obj:`~.cudaStreamCreateWithFlags` for a list of valid flags.
16942 Parameters
16943 ----------
16944 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
16945 Handle to the stream to be queried
16947 Returns
16948 -------
16949 cudaError_t
16950 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
16951 flags : unsigned int
16952 Pointer to an unsigned integer in which the stream's flags are
16953 returned
16955 See Also
16956 --------
16957 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cuStreamGetFlags`
16958 """
16959 cdef cyruntime.cudaStream_t cyhStream
16960 if hStream is None:
16961 phStream = 0
16962 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
16963 phStream = int(hStream)
16964 else:
16965 phStream = int(cudaStream_t(hStream))
16966 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
16967 cdef unsigned int flags = 0
16968 with nogil:
16969 err = cyruntime.cudaStreamGetFlags(cyhStream, &flags)
16970 if err != cyruntime.cudaSuccess:
16971 return (_dict_cudaError_t[err], None)
16972 return (_dict_cudaError_t[err], flags)
16974@cython.embedsignature(True)
16975def cudaStreamGetId(hStream):
16976 """ Query the Id of a stream.
16978 Query the Id of a stream. The Id is returned in `streamId`. The Id is
16979 unique for the life of the program.
16981 The stream handle `hStream` can refer to any of the following:
16983 - a stream created via any of the CUDA runtime APIs such as
16984 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`
16985 and :py:obj:`~.cudaStreamCreateWithPriority`, or their driver API
16986 equivalents such as :py:obj:`~.cuStreamCreate` or
16987 :py:obj:`~.cuStreamCreateWithPriority`. Passing an invalid handle
16988 will result in undefined behavior.
16990 - any of the special streams such as the NULL stream,
16991 :py:obj:`~.cudaStreamLegacy` and :py:obj:`~.cudaStreamPerThread`
16992 respectively. The driver API equivalents of these are also accepted
16993 which are NULL, :py:obj:`~.CU_STREAM_LEGACY` and
16994 :py:obj:`~.CU_STREAM_PER_THREAD`.
16996 Parameters
16997 ----------
16998 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
16999 Handle to the stream to be queried
17001 Returns
17002 -------
17003 cudaError_t
17004 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17005 streamId : unsigned long long
17006 Pointer to an unsigned long long in which the stream Id is returned
17008 See Also
17009 --------
17010 :py:obj:`~.cudaStreamCreateWithPriority`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId`
17011 """
17012 cdef cyruntime.cudaStream_t cyhStream
17013 if hStream is None:
17014 phStream = 0
17015 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
17016 phStream = int(hStream)
17017 else:
17018 phStream = int(cudaStream_t(hStream))
17019 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
17020 cdef unsigned long long streamId = 0
17021 with nogil:
17022 err = cyruntime.cudaStreamGetId(cyhStream, &streamId)
17023 if err != cyruntime.cudaSuccess:
17024 return (_dict_cudaError_t[err], None)
17025 return (_dict_cudaError_t[err], streamId)
17027@cython.embedsignature(True)
17028def cudaStreamGetDevice(hStream):
17029 """ Query the device of a stream.
17031 Returns in `*device` the device of the stream.
17033 Parameters
17034 ----------
17035 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17036 Handle to the stream to be queried
17038 Returns
17039 -------
17040 cudaError_t
17041 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorDeviceUnavailable`,
17042 device : int
17043 Returns the device to which the stream belongs
17045 See Also
17046 --------
17047 :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cuStreamGetId`
17048 """
17049 cdef cyruntime.cudaStream_t cyhStream
17050 if hStream is None:
17051 phStream = 0
17052 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
17053 phStream = int(hStream)
17054 else:
17055 phStream = int(cudaStream_t(hStream))
17056 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
17057 cdef int device = 0
17058 with nogil:
17059 err = cyruntime.cudaStreamGetDevice(cyhStream, &device)
17060 if err != cyruntime.cudaSuccess:
17061 return (_dict_cudaError_t[err], None)
17062 return (_dict_cudaError_t[err], device)
17064@cython.embedsignature(True)
17065def cudaCtxResetPersistingL2Cache():
17066 """ Resets all persisting lines in cache to normal status.
17068 Resets all persisting lines in cache to normal status. Takes effect on
17069 function return.
17071 Returns
17072 -------
17073 cudaError_t
17074 :py:obj:`~.cudaSuccess`,
17076 See Also
17077 --------
17078 :py:obj:`~.cudaAccessPolicyWindow`
17079 """
17080 with nogil:
17081 err = cyruntime.cudaCtxResetPersistingL2Cache()
17082 return (_dict_cudaError_t[err],)
17084@cython.embedsignature(True)
17085def cudaStreamCopyAttributes(dst, src):
17086 """ Copies attributes from source stream to destination stream.
17088 Copies attributes from source stream `src` to destination stream `dst`.
17089 Both streams must have the same context.
17091 Parameters
17092 ----------
17093 dst : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17094 Destination stream
17095 src : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17096 Source stream For attributes see :py:obj:`~.cudaStreamAttrID`
17098 Returns
17099 -------
17100 cudaError_t
17101 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`
17103 See Also
17104 --------
17105 :py:obj:`~.cudaAccessPolicyWindow`
17106 """
17107 cdef cyruntime.cudaStream_t cysrc
17108 if src is None:
17109 psrc = 0
17110 elif isinstance(src, (cudaStream_t,driver.CUstream)):
17111 psrc = int(src)
17112 else:
17113 psrc = int(cudaStream_t(src))
17114 cysrc = <cyruntime.cudaStream_t><void_ptr>psrc
17115 cdef cyruntime.cudaStream_t cydst
17116 if dst is None:
17117 pdst = 0
17118 elif isinstance(dst, (cudaStream_t,driver.CUstream)):
17119 pdst = int(dst)
17120 else:
17121 pdst = int(cudaStream_t(dst))
17122 cydst = <cyruntime.cudaStream_t><void_ptr>pdst
17123 with nogil:
17124 err = cyruntime.cudaStreamCopyAttributes(cydst, cysrc)
17125 return (_dict_cudaError_t[err],)
17127@cython.embedsignature(True)
17128def cudaStreamGetAttribute(hStream, attr not None : cudaStreamAttrID):
17129 """ Queries stream attribute.
17131 Queries attribute `attr` from `hStream` and stores it in corresponding
17132 member of `value_out`.
17134 Parameters
17135 ----------
17136 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17138 attr : :py:obj:`~.cudaStreamAttrID`
17141 Returns
17142 -------
17143 cudaError_t
17144 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17145 value_out : :py:obj:`~.cudaStreamAttrValue`
17148 See Also
17149 --------
17150 :py:obj:`~.cudaAccessPolicyWindow`
17151 """
17152 cdef cyruntime.cudaStream_t cyhStream
17153 if hStream is None:
17154 phStream = 0
17155 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
17156 phStream = int(hStream)
17157 else:
17158 phStream = int(cudaStream_t(hStream))
17159 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
17160 cdef cyruntime.cudaStreamAttrID cyattr = attr.value
17161 cdef cudaStreamAttrValue value_out = cudaStreamAttrValue()
17162 with nogil:
17163 err = cyruntime.cudaStreamGetAttribute(cyhStream, cyattr, <cyruntime.cudaStreamAttrValue*>value_out._pvt_ptr)
17164 if err != cyruntime.cudaSuccess:
17165 return (_dict_cudaError_t[err], None)
17166 return (_dict_cudaError_t[err], value_out)
17168@cython.embedsignature(True)
17169def cudaStreamSetAttribute(hStream, attr not None : cudaStreamAttrID, value : Optional[cudaStreamAttrValue]):
17170 """ Sets stream attribute.
17172 Sets attribute `attr` on `hStream` from corresponding attribute of
17173 `value`. The updated attribute will be applied to subsequent work
17174 submitted to the stream. It will not affect previously submitted work.
17176 Parameters
17177 ----------
17178 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17180 attr : :py:obj:`~.cudaStreamAttrID`
17182 value : :py:obj:`~.cudaStreamAttrValue`
17185 Returns
17186 -------
17187 cudaError_t
17188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17190 See Also
17191 --------
17192 :py:obj:`~.cudaAccessPolicyWindow`
17193 """
17194 cdef cyruntime.cudaStream_t cyhStream
17195 if hStream is None:
17196 phStream = 0
17197 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
17198 phStream = int(hStream)
17199 else:
17200 phStream = int(cudaStream_t(hStream))
17201 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
17202 cdef cyruntime.cudaStreamAttrID cyattr = attr.value
17203 cdef cyruntime.cudaStreamAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL
17204 with nogil:
17205 err = cyruntime.cudaStreamSetAttribute(cyhStream, cyattr, cyvalue_ptr)
17206 return (_dict_cudaError_t[err],)
17208@cython.embedsignature(True)
17209def cudaStreamDestroy(stream):
17210 """ Destroys and cleans up an asynchronous stream.
17212 Destroys and cleans up the asynchronous stream specified by `stream`.
17214 In case the device is still doing work in the stream `stream` when
17215 :py:obj:`~.cudaStreamDestroy()` is called, the function will return
17216 immediately and the resources associated with `stream` will be released
17217 automatically once the device has completed all work in `stream`.
17219 Parameters
17220 ----------
17221 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17222 Stream identifier
17224 Returns
17225 -------
17226 cudaError_t
17227 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17229 See Also
17230 --------
17231 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuStreamDestroy`
17232 """
17233 cdef cyruntime.cudaStream_t cystream
17234 if stream is None:
17235 pstream = 0
17236 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17237 pstream = int(stream)
17238 else:
17239 pstream = int(cudaStream_t(stream))
17240 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17241 with nogil:
17242 err = cyruntime.cudaStreamDestroy(cystream)
17243 return (_dict_cudaError_t[err],)
17245@cython.embedsignature(True)
17246def cudaStreamWaitEvent(stream, event, unsigned int flags):
17247 """ Make a compute stream wait on an event.
17249 Makes all future work submitted to `stream` wait for all work captured
17250 in `event`. See :py:obj:`~.cudaEventRecord()` for details on what is
17251 captured by an event. The synchronization will be performed efficiently
17252 on the device when applicable. `event` may be from a different device
17253 than `stream`.
17255 flags include:
17257 - :py:obj:`~.cudaEventWaitDefault`: Default event creation flag.
17259 - :py:obj:`~.cudaEventWaitExternal`: Event is captured in the graph as
17260 an external event node when performing stream capture.
17262 Parameters
17263 ----------
17264 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17265 Stream to wait
17266 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
17267 Event to wait on
17268 flags : unsigned int
17269 Parameters for the operation(See above)
17271 Returns
17272 -------
17273 cudaError_t
17274 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17276 See Also
17277 --------
17278 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamWaitEvent`
17279 """
17280 cdef cyruntime.cudaEvent_t cyevent
17281 if event is None:
17282 pevent = 0
17283 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
17284 pevent = int(event)
17285 else:
17286 pevent = int(cudaEvent_t(event))
17287 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
17288 cdef cyruntime.cudaStream_t cystream
17289 if stream is None:
17290 pstream = 0
17291 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17292 pstream = int(stream)
17293 else:
17294 pstream = int(cudaStream_t(stream))
17295 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17296 with nogil:
17297 err = cyruntime.cudaStreamWaitEvent(cystream, cyevent, flags)
17298 return (_dict_cudaError_t[err],)
17300ctypedef struct cudaStreamCallbackData_st:
17301 cyruntime.cudaStreamCallback_t callback
17302 void *userData
17304ctypedef cudaStreamCallbackData_st cudaStreamCallbackData
17306@cython.show_performance_hints(False)
17307cdef void cudaStreamRtCallbackWrapper(cyruntime.cudaStream_t stream, cyruntime.cudaError_t status, void *data) nogil:
17308 cdef cudaStreamCallbackData *cbData = <cudaStreamCallbackData *>data
17309 with gil:
17310 cbData.callback(stream, status, cbData.userData)
17311 free(cbData)
17313@cython.embedsignature(True)
17314def cudaStreamAddCallback(stream, callback, userData, unsigned int flags):
17315 """ Add a callback to a compute stream.
17317 Adds a callback to be called on the host after all currently enqueued
17318 items in the stream have completed. For each cudaStreamAddCallback
17319 call, a callback will be executed exactly once. The callback will block
17320 later work in the stream until it is finished.
17322 The callback may be passed :py:obj:`~.cudaSuccess` or an error code. In
17323 the event of a device error, all subsequently executed callbacks will
17324 receive an appropriate :py:obj:`~.cudaError_t`.
17326 Callbacks must not make any CUDA API calls. Attempting to use CUDA APIs
17327 may result in :py:obj:`~.cudaErrorNotPermitted`. Callbacks must not
17328 perform any synchronization that may depend on outstanding device work
17329 or other callbacks that are not mandated to run earlier. Callbacks
17330 without a mandated order (in independent streams) execute in undefined
17331 order and may be serialized.
17333 For the purposes of Unified Memory, callback execution makes a number
17334 of guarantees:
17336 - The callback stream is considered idle for the duration of the
17337 callback. Thus, for example, a callback may always use memory
17338 attached to the callback stream.
17340 - The start of execution of a callback has the same effect as
17341 synchronizing an event recorded in the same stream immediately prior
17342 to the callback. It thus synchronizes streams which have been
17343 "joined" prior to the callback.
17345 - Adding device work to any stream does not have the effect of making
17346 the stream active until all preceding callbacks have executed. Thus,
17347 for example, a callback might use global attached memory even if work
17348 has been added to another stream, if it has been properly ordered
17349 with an event.
17351 - Completion of a callback does not cause a stream to become active
17352 except as described above. The callback stream will remain idle if no
17353 device work follows the callback, and will remain idle across
17354 consecutive callbacks without device work in between. Thus, for
17355 example, stream synchronization can be done by signaling from a
17356 callback at the end of the stream.
17358 Parameters
17359 ----------
17360 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17361 Stream to add callback to
17362 callback : :py:obj:`~.cudaStreamCallback_t`
17363 The function to call once preceding stream operations are complete
17364 userData : Any
17365 User specified data to be passed to the callback function
17366 flags : unsigned int
17367 Reserved for future use, must be 0
17369 Returns
17370 -------
17371 cudaError_t
17372 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
17374 See Also
17375 --------
17376 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cuStreamAddCallback`
17378 Notes
17379 -----
17380 This function is slated for eventual deprecation and removal. If you do not require the callback to execute in case of a device error, consider using :py:obj:`~.cudaLaunchHostFunc`. Additionally, this function is not supported with :py:obj:`~.cudaStreamBeginCapture` and :py:obj:`~.cudaStreamEndCapture`, unlike :py:obj:`~.cudaLaunchHostFunc`.
17381 """
17382 cdef cyruntime.cudaStreamCallback_t cycallback
17383 if callback is None:
17384 pcallback = 0
17385 elif isinstance(callback, (cudaStreamCallback_t,)):
17386 pcallback = int(callback)
17387 else:
17388 pcallback = int(cudaStreamCallback_t(callback))
17389 cycallback = <cyruntime.cudaStreamCallback_t><void_ptr>pcallback
17390 cdef cyruntime.cudaStream_t cystream
17391 if stream is None:
17392 pstream = 0
17393 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17394 pstream = int(stream)
17395 else:
17396 pstream = int(cudaStream_t(stream))
17397 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17398 cyuserData = _HelperInputVoidPtr(userData)
17399 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
17401 cdef cudaStreamCallbackData *cbData = NULL
17402 cbData = <cudaStreamCallbackData *>malloc(sizeof(cbData[0]))
17403 if cbData == NULL:
17404 return (cudaError_t.cudaErrorMemoryAllocation,)
17405 cbData.callback = cycallback
17406 cbData.userData = cyuserData_ptr
17408 with nogil:
17409 err = cyruntime.cudaStreamAddCallback(cystream, <cyruntime.cudaStreamCallback_t>cudaStreamRtCallbackWrapper, <void *>cbData, flags)
17410 if err != cyruntime.cudaSuccess:
17411 free(cbData)
17412 return (_dict_cudaError_t[err],)
17414@cython.embedsignature(True)
17415def cudaStreamSynchronize(stream):
17416 """ Waits for stream tasks to complete.
17418 Blocks until `stream` has completed all operations. If the
17419 :py:obj:`~.cudaDeviceScheduleBlockingSync` flag was set for this
17420 device, the host thread will block until the stream is finished with
17421 all of its tasks.
17423 Parameters
17424 ----------
17425 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17426 Stream identifier
17428 Returns
17429 -------
17430 cudaError_t
17431 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17433 See Also
17434 --------
17435 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamSynchronize`
17436 """
17437 cdef cyruntime.cudaStream_t cystream
17438 if stream is None:
17439 pstream = 0
17440 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17441 pstream = int(stream)
17442 else:
17443 pstream = int(cudaStream_t(stream))
17444 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17445 with nogil:
17446 err = cyruntime.cudaStreamSynchronize(cystream)
17447 return (_dict_cudaError_t[err],)
17449@cython.embedsignature(True)
17450def cudaStreamQuery(stream):
17451 """ Queries an asynchronous stream for completion status.
17453 Returns :py:obj:`~.cudaSuccess` if all operations in `stream` have
17454 completed, or :py:obj:`~.cudaErrorNotReady` if not.
17456 For the purposes of Unified Memory, a return value of
17457 :py:obj:`~.cudaSuccess` is equivalent to having called
17458 :py:obj:`~.cudaStreamSynchronize()`.
17460 Parameters
17461 ----------
17462 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17463 Stream identifier
17465 Returns
17466 -------
17467 cudaError_t
17468 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17470 See Also
17471 --------
17472 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cuStreamQuery`
17473 """
17474 cdef cyruntime.cudaStream_t cystream
17475 if stream is None:
17476 pstream = 0
17477 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17478 pstream = int(stream)
17479 else:
17480 pstream = int(cudaStream_t(stream))
17481 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17482 with nogil:
17483 err = cyruntime.cudaStreamQuery(cystream)
17484 return (_dict_cudaError_t[err],)
17486@cython.embedsignature(True)
17487def cudaStreamAttachMemAsync(stream, devPtr, size_t length, unsigned int flags):
17488 """ Attach memory to a stream asynchronously.
17490 Enqueues an operation in `stream` to specify stream association of
17491 `length` bytes of memory starting from `devPtr`. This function is a
17492 stream-ordered operation, meaning that it is dependent on, and will
17493 only take effect when, previous work in stream has completed. Any
17494 previous association is automatically replaced.
17496 `devPtr` must point to an one of the following types of memories:
17498 - managed memory declared using the managed keyword or allocated with
17499 :py:obj:`~.cudaMallocManaged`.
17501 - a valid host-accessible region of system-allocated pageable memory.
17502 This type of memory may only be specified if the device associated
17503 with the stream reports a non-zero value for the device attribute
17504 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
17506 For managed allocations, `length` must be either zero or the entire
17507 allocation's size. Both indicate that the entire allocation's stream
17508 association is being changed. Currently, it is not possible to change
17509 stream association for a portion of a managed allocation.
17511 For pageable allocations, `length` must be non-zero.
17513 The stream association is specified using `flags` which must be one of
17514 :py:obj:`~.cudaMemAttachGlobal`, :py:obj:`~.cudaMemAttachHost` or
17515 :py:obj:`~.cudaMemAttachSingle`. The default value for `flags` is
17516 :py:obj:`~.cudaMemAttachSingle` If the :py:obj:`~.cudaMemAttachGlobal`
17517 flag is specified, the memory can be accessed by any stream on any
17518 device. If the :py:obj:`~.cudaMemAttachHost` flag is specified, the
17519 program makes a guarantee that it won't access the memory on the device
17520 from any stream on a device that has a zero value for the device
17521 attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If the
17522 :py:obj:`~.cudaMemAttachSingle` flag is specified and `stream` is
17523 associated with a device that has a zero value for the device attribute
17524 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, the program makes a
17525 guarantee that it will only access the memory on the device from
17526 `stream`. It is illegal to attach singly to the NULL stream, because
17527 the NULL stream is a virtual global stream and not a specific stream.
17528 An error will be returned in this case.
17530 When memory is associated with a single stream, the Unified Memory
17531 system will allow CPU access to this memory region so long as all
17532 operations in `stream` have completed, regardless of whether other
17533 streams are active. In effect, this constrains exclusive ownership of
17534 the managed memory region by an active GPU to per-stream activity
17535 instead of whole-GPU activity.
17537 Accessing memory on the device from streams that are not associated
17538 with it will produce undefined results. No error checking is performed
17539 by the Unified Memory system to ensure that kernels launched into other
17540 streams do not access this region.
17542 It is a program's responsibility to order calls to
17543 :py:obj:`~.cudaStreamAttachMemAsync` via events, synchronization or
17544 other means to ensure legal access to memory at all times. Data
17545 visibility and coherency will be changed appropriately for all kernels
17546 which follow a stream-association change.
17548 If `stream` is destroyed while data is associated with it, the
17549 association is removed and the association reverts to the default
17550 visibility of the allocation as specified at
17551 :py:obj:`~.cudaMallocManaged`. For managed variables, the default
17552 association is always :py:obj:`~.cudaMemAttachGlobal`. Note that
17553 destroying a stream is an asynchronous operation, and as a result, the
17554 change to default association won't happen until all work in the stream
17555 has completed.
17557 Parameters
17558 ----------
17559 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17560 Stream in which to enqueue the attach operation
17561 devPtr : Any
17562 Pointer to memory (must be a pointer to managed memory or to a
17563 valid host-accessible region of system-allocated memory)
17564 length : size_t
17565 Length of memory (defaults to zero)
17566 flags : unsigned int
17567 Must be one of :py:obj:`~.cudaMemAttachGlobal`,
17568 :py:obj:`~.cudaMemAttachHost` or :py:obj:`~.cudaMemAttachSingle`
17569 (defaults to :py:obj:`~.cudaMemAttachSingle`)
17571 Returns
17572 -------
17573 cudaError_t
17574 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
17576 See Also
17577 --------
17578 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamCreateWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cuStreamAttachMemAsync`
17579 """
17580 cdef cyruntime.cudaStream_t cystream
17581 if stream is None:
17582 pstream = 0
17583 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17584 pstream = int(stream)
17585 else:
17586 pstream = int(cudaStream_t(stream))
17587 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17588 cydevPtr = _HelperInputVoidPtr(devPtr)
17589 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
17590 with nogil:
17591 err = cyruntime.cudaStreamAttachMemAsync(cystream, cydevPtr_ptr, length, flags)
17592 return (_dict_cudaError_t[err],)
17594@cython.embedsignature(True)
17595def cudaStreamBeginCapture(stream, mode not None : cudaStreamCaptureMode):
17596 """ Begins graph capture on a stream.
17598 Begin graph capture on `stream`. When a stream is in capture mode, all
17599 operations pushed into the stream will not be executed, but will
17600 instead be captured into a graph, which will be returned via
17601 :py:obj:`~.cudaStreamEndCapture`. Capture may not be initiated if
17602 `stream` is :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the
17603 same stream in which it was initiated, and it may only be initiated if
17604 the stream is not already in capture mode. The capture mode may be
17605 queried via :py:obj:`~.cudaStreamIsCapturing`. A unique id representing
17606 the capture sequence may be queried via
17607 :py:obj:`~.cudaStreamGetCaptureInfo`.
17609 If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,
17610 :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the
17611 same thread.
17613 Parameters
17614 ----------
17615 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17616 Stream in which to initiate capture
17617 mode : :py:obj:`~.cudaStreamCaptureMode`
17618 Controls the interaction of this capture sequence with other API
17619 calls that are potentially unsafe. For more details see
17620 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.
17622 Returns
17623 -------
17624 cudaError_t
17625 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
17627 See Also
17628 --------
17629 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
17631 Notes
17632 -----
17633 Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
17634 """
17635 cdef cyruntime.cudaStream_t cystream
17636 if stream is None:
17637 pstream = 0
17638 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17639 pstream = int(stream)
17640 else:
17641 pstream = int(cudaStream_t(stream))
17642 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17643 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
17644 with nogil:
17645 err = cyruntime.cudaStreamBeginCapture(cystream, cymode)
17646 return (_dict_cudaError_t[err],)
17648@cython.embedsignature(True)
17649def cudaStreamBeginCaptureToGraph(stream, graph, dependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, mode not None : cudaStreamCaptureMode):
17650 """ Begins graph capture on a stream to an existing graph.
17652 Begin graph capture on `stream`. When a stream is in capture mode, all
17653 operations pushed into the stream will not be executed, but will
17654 instead be captured into `graph`, which will be returned via
17655 :py:obj:`~.cudaStreamEndCapture`.
17657 Capture may not be initiated if `stream` is
17658 :py:obj:`~.cudaStreamLegacy`. Capture must be ended on the same stream
17659 in which it was initiated, and it may only be initiated if the stream
17660 is not already in capture mode. The capture mode may be queried via
17661 :py:obj:`~.cudaStreamIsCapturing`. A unique id representing the capture
17662 sequence may be queried via :py:obj:`~.cudaStreamGetCaptureInfo`.
17664 If `mode` is not :py:obj:`~.cudaStreamCaptureModeRelaxed`,
17665 :py:obj:`~.cudaStreamEndCapture` must be called on this stream from the
17666 same thread.
17668 Parameters
17669 ----------
17670 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17671 Stream in which to initiate capture.
17672 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
17673 Graph to capture into.
17674 dependencies : list[:py:obj:`~.cudaGraphNode_t`]
17675 Dependencies of the first node captured in the stream. Can be NULL
17676 if numDependencies is 0.
17677 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]
17678 Optional array of data associated with each dependency.
17679 numDependencies : size_t
17680 Number of dependencies.
17681 mode : :py:obj:`~.cudaStreamCaptureMode`
17682 Controls the interaction of this capture sequence with other API
17683 calls that are potentially unsafe. For more details see
17684 :py:obj:`~.cudaThreadExchangeStreamCaptureMode`.
17686 Returns
17687 -------
17688 cudaError_t
17689 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
17691 See Also
17692 --------
17693 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamEndCapture`, :py:obj:`~.cudaThreadExchangeStreamCaptureMode`
17695 Notes
17696 -----
17697 Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
17698 """
17699 dependencyData = [] if dependencyData is None else dependencyData
17700 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
17701 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")
17702 dependencies = [] if dependencies is None else dependencies
17703 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):
17704 raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
17705 cdef cyruntime.cudaGraph_t cygraph
17706 if graph is None:
17707 pgraph = 0
17708 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
17709 pgraph = int(graph)
17710 else:
17711 pgraph = int(cudaGraph_t(graph))
17712 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
17713 cdef cyruntime.cudaStream_t cystream
17714 if stream is None:
17715 pstream = 0
17716 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17717 pstream = int(stream)
17718 else:
17719 pstream = int(cudaStream_t(stream))
17720 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17721 cdef cyruntime.cudaGraphNode_t* cydependencies = NULL
17722 if len(dependencies) > 1:
17723 cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))
17724 if cydependencies is NULL:
17725 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
17726 else:
17727 for idx in range(len(dependencies)):
17728 cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._pvt_ptr[0]
17729 elif len(dependencies) == 1:
17730 cydependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._pvt_ptr
17731 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
17732 if len(dependencyData) > 1:
17733 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
17734 if cydependencyData is NULL:
17735 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
17736 for idx in range(len(dependencyData)):
17737 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))
17738 elif len(dependencyData) == 1:
17739 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr
17740 if numDependencies > <size_t>len(dependencies): raise RuntimeError("List is too small: " + str(len(dependencies)) + " < " + str(numDependencies))
17741 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
17742 with nogil:
17743 err = cyruntime.cudaStreamBeginCaptureToGraph(cystream, cygraph, cydependencies, cydependencyData, numDependencies, cymode)
17744 if len(dependencies) > 1 and cydependencies is not NULL:
17745 free(cydependencies)
17746 if len(dependencyData) > 1 and cydependencyData is not NULL:
17747 free(cydependencyData)
17748 return (_dict_cudaError_t[err],)
17750@cython.embedsignature(True)
17751def cudaThreadExchangeStreamCaptureMode(mode not None : cudaStreamCaptureMode):
17752 """ Swaps the stream capture interaction mode for a thread.
17754 Sets the calling thread's stream capture interaction mode to the value
17755 contained in `*mode`, and overwrites `*mode` with the previous mode for
17756 the thread. To facilitate deterministic behavior across function or
17757 module boundaries, callers are encouraged to use this API in a push-pop
17758 fashion:
17760 **View CUDA Toolkit Documentation for a C++ code example**
17762 During stream capture (see :py:obj:`~.cudaStreamBeginCapture`), some
17763 actions, such as a call to :py:obj:`~.cudaMalloc`, may be unsafe. In
17764 the case of :py:obj:`~.cudaMalloc`, the operation is not enqueued
17765 asynchronously to a stream, and is not observed by stream capture.
17766 Therefore, if the sequence of operations captured via
17767 :py:obj:`~.cudaStreamBeginCapture` depended on the allocation being
17768 replayed whenever the graph is launched, the captured graph would be
17769 invalid.
17771 Therefore, stream capture places restrictions on API calls that can be
17772 made within or concurrently to a
17773 :py:obj:`~.cudaStreamBeginCapture`-:py:obj:`~.cudaStreamEndCapture`
17774 sequence. This behavior can be controlled via this API and flags to
17775 :py:obj:`~.cudaStreamBeginCapture`.
17777 A thread's mode is one of the following:
17779 - `cudaStreamCaptureModeGlobal:` This is the default mode. If the local
17780 thread has an ongoing capture sequence that was not initiated with
17781 `cudaStreamCaptureModeRelaxed` at `cuStreamBeginCapture`, or if any
17782 other thread has a concurrent capture sequence initiated with
17783 `cudaStreamCaptureModeGlobal`, this thread is prohibited from
17784 potentially unsafe API calls.
17786 - `cudaStreamCaptureModeThreadLocal:` If the local thread has an
17787 ongoing capture sequence not initiated with
17788 `cudaStreamCaptureModeRelaxed`, it is prohibited from potentially
17789 unsafe API calls. Concurrent capture sequences in other threads are
17790 ignored.
17792 - `cudaStreamCaptureModeRelaxed:` The local thread is not prohibited
17793 from potentially unsafe API calls. Note that the thread is still
17794 prohibited from API calls which necessarily conflict with stream
17795 capture, for example, attempting :py:obj:`~.cudaEventQuery` on an
17796 event that was last recorded inside a capture sequence.
17798 Parameters
17799 ----------
17800 mode : :py:obj:`~.cudaStreamCaptureMode`
17801 Pointer to mode value to swap with the current mode
17803 Returns
17804 -------
17805 cudaError_t
17806 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
17807 mode : :py:obj:`~.cudaStreamCaptureMode`
17808 Pointer to mode value to swap with the current mode
17810 See Also
17811 --------
17812 :py:obj:`~.cudaStreamBeginCapture`
17813 """
17814 cdef cyruntime.cudaStreamCaptureMode cymode = mode.value
17815 with nogil:
17816 err = cyruntime.cudaThreadExchangeStreamCaptureMode(&cymode)
17817 if err != cyruntime.cudaSuccess:
17818 return (_dict_cudaError_t[err], None)
17819 return (_dict_cudaError_t[err], cudaStreamCaptureMode(cymode))
17821@cython.embedsignature(True)
17822def cudaStreamEndCapture(stream):
17823 """ Ends capture on a stream, returning the captured graph.
17825 End capture on `stream`, returning the captured graph via `pGraph`.
17826 Capture must have been initiated on `stream` via a call to
17827 :py:obj:`~.cudaStreamBeginCapture`. If capture was invalidated, due to
17828 a violation of the rules of stream capture, then a NULL graph will be
17829 returned.
17831 If the `mode` argument to :py:obj:`~.cudaStreamBeginCapture` was not
17832 :py:obj:`~.cudaStreamCaptureModeRelaxed`, this call must be from the
17833 same thread as :py:obj:`~.cudaStreamBeginCapture`.
17835 Parameters
17836 ----------
17837 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17838 Stream to query
17840 Returns
17841 -------
17842 cudaError_t
17843 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureWrongThread`
17844 pGraph : :py:obj:`~.cudaGraph_t`
17845 The captured graph
17847 See Also
17848 --------
17849 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaGraphDestroy`
17850 """
17851 cdef cyruntime.cudaStream_t cystream
17852 if stream is None:
17853 pstream = 0
17854 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17855 pstream = int(stream)
17856 else:
17857 pstream = int(cudaStream_t(stream))
17858 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17859 cdef cudaGraph_t pGraph = cudaGraph_t()
17860 with nogil:
17861 err = cyruntime.cudaStreamEndCapture(cystream, <cyruntime.cudaGraph_t*>pGraph._pvt_ptr)
17862 if err != cyruntime.cudaSuccess:
17863 return (_dict_cudaError_t[err], None)
17864 return (_dict_cudaError_t[err], pGraph)
17866@cython.embedsignature(True)
17867def cudaStreamIsCapturing(stream):
17868 """ Returns a stream's capture status.
17870 Return the capture status of `stream` via `pCaptureStatus`. After a
17871 successful call, `*pCaptureStatus` will contain one of the following:
17873 - :py:obj:`~.cudaStreamCaptureStatusNone`: The stream is not capturing.
17875 - :py:obj:`~.cudaStreamCaptureStatusActive`: The stream is capturing.
17877 - :py:obj:`~.cudaStreamCaptureStatusInvalidated`: The stream was
17878 capturing but an error has invalidated the capture sequence. The
17879 capture sequence must be terminated with
17880 :py:obj:`~.cudaStreamEndCapture` on the stream where it was initiated
17881 in order to continue using `stream`.
17883 Note that, if this is called on :py:obj:`~.cudaStreamLegacy` (the "null
17884 stream") while a blocking stream on the same device is capturing, it
17885 will return :py:obj:`~.cudaErrorStreamCaptureImplicit` and
17886 `*pCaptureStatus` is unspecified after the call. The blocking stream
17887 capture is not invalidated.
17889 When a blocking stream is capturing, the legacy stream is in an
17890 unusable state until the blocking stream capture is terminated. The
17891 legacy stream is not supported for stream capture, but attempted use
17892 would have an implicit dependency on the capturing stream(s).
17894 Parameters
17895 ----------
17896 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17897 Stream to query
17899 Returns
17900 -------
17901 cudaError_t
17902 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`
17903 pCaptureStatus : :py:obj:`~.cudaStreamCaptureStatus`
17904 Returns the stream's capture status
17906 See Also
17907 --------
17908 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamEndCapture`
17909 """
17910 cdef cyruntime.cudaStream_t cystream
17911 if stream is None:
17912 pstream = 0
17913 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
17914 pstream = int(stream)
17915 else:
17916 pstream = int(cudaStream_t(stream))
17917 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
17918 cdef cyruntime.cudaStreamCaptureStatus pCaptureStatus
17919 with nogil:
17920 err = cyruntime.cudaStreamIsCapturing(cystream, &pCaptureStatus)
17921 if err != cyruntime.cudaSuccess:
17922 return (_dict_cudaError_t[err], None)
17923 return (_dict_cudaError_t[err], cudaStreamCaptureStatus(pCaptureStatus))
17925@cython.embedsignature(True)
17926def cudaStreamGetCaptureInfo(stream):
17927 """ Query a stream's capture state.
17929 Query stream state related to stream capture.
17931 If called on :py:obj:`~.cudaStreamLegacy` (the "null stream") while a
17932 stream not created with :py:obj:`~.cudaStreamNonBlocking` is capturing,
17933 returns :py:obj:`~.cudaErrorStreamCaptureImplicit`.
17935 Valid data (other than capture status) is returned only if both of the
17936 following are true:
17938 - the call returns cudaSuccess
17940 - the returned capture status is
17941 :py:obj:`~.cudaStreamCaptureStatusActive`
17943 If `edgeData_out` is non-NULL then `dependencies_out` must be as well.
17944 If `dependencies_out` is non-NULL and `edgeData_out` is NULL, but there
17945 is non-zero edge data for one or more of the current stream
17946 dependencies, the call will return :py:obj:`~.cudaErrorLossyQuery`.
17948 Parameters
17949 ----------
17950 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
17951 The stream to query
17953 Returns
17954 -------
17955 cudaError_t
17956 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorStreamCaptureImplicit`, :py:obj:`~.cudaErrorLossyQuery`
17957 captureStatus_out : :py:obj:`~.cudaStreamCaptureStatus`
17958 Location to return the capture status of the stream; required
17959 id_out : unsigned long long
17960 Optional location to return an id for the capture sequence, which
17961 is unique over the lifetime of the process
17962 graph_out : :py:obj:`~.cudaGraph_t`
17963 Optional location to return the graph being captured into. All
17964 operations other than destroy and node removal are permitted on the
17965 graph while the capture sequence is in progress. This API does not
17966 transfer ownership of the graph, which is transferred or destroyed
17967 at :py:obj:`~.cudaStreamEndCapture`. Note that the graph handle may
17968 be invalidated before end of capture for certain errors. Nodes that
17969 are or become unreachable from the original stream at
17970 :py:obj:`~.cudaStreamEndCapture` due to direct actions on the graph
17971 do not trigger :py:obj:`~.cudaErrorStreamCaptureUnjoined`.
17972 dependencies_out : list[:py:obj:`~.cudaGraphNode_t`]
17973 Optional location to store a pointer to an array of nodes. The next
17974 node to be captured in the stream will depend on this set of nodes,
17975 absent operations such as event wait which modify this set. The
17976 array pointer is valid until the next API call which operates on
17977 the stream or until the capture is terminated. The node handles may
17978 be copied out and are valid until they or the graph is destroyed.
17979 The driver-owned array may also be passed directly to APIs that
17980 operate on the graph (not the stream) without copying.
17981 edgeData_out : list[:py:obj:`~.cudaGraphEdgeData`]
17982 Optional location to store a pointer to an array of graph edge
17983 data. This array parallels `dependencies_out`; the next node to be
17984 added has an edge to `dependencies_out`[i] with annotation
17985 `edgeData_out`[i] for each `i`. The array pointer is valid until
17986 the next API call which operates on the stream or until the capture
17987 is terminated.
17988 numDependencies_out : int
17989 Optional location to store the size of the array returned in
17990 dependencies_out.
17992 See Also
17993 --------
17994 :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamIsCapturing`, :py:obj:`~.cudaStreamUpdateCaptureDependencies`
17995 """
17996 cdef cyruntime.cudaStream_t cystream
17997 if stream is None:
17998 pstream = 0
17999 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
18000 pstream = int(stream)
18001 else:
18002 pstream = int(cudaStream_t(stream))
18003 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
18004 cdef cyruntime.cudaStreamCaptureStatus captureStatus_out
18005 cdef unsigned long long id_out = 0
18006 cdef cudaGraph_t graph_out = cudaGraph_t()
18007 cdef const cyruntime.cudaGraphNode_t* cydependencies_out = NULL
18008 pydependencies_out = []
18009 cdef const cyruntime.cudaGraphEdgeData* cyedgeData_out = NULL
18010 pyedgeData_out = []
18011 cdef size_t numDependencies_out = 0
18012 with nogil:
18013 err = cyruntime.cudaStreamGetCaptureInfo(cystream, &captureStatus_out, &id_out, <cyruntime.cudaGraph_t*>graph_out._pvt_ptr, &cydependencies_out, &cyedgeData_out, &numDependencies_out)
18014 if cudaError_t(err) == cudaError_t(0):
18015 pydependencies_out = [cudaGraphNode_t(init_value=<void_ptr>cydependencies_out[idx]) for idx in range(numDependencies_out)]
18016 if cudaError_t(err) == cudaError_t(0):
18017 pyedgeData_out = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData_out[idx]) for idx in range(numDependencies_out)]
18018 if err != cyruntime.cudaSuccess:
18019 return (_dict_cudaError_t[err], None, None, None, None, None, None)
18020 return (_dict_cudaError_t[err], cudaStreamCaptureStatus(captureStatus_out), id_out, graph_out, pydependencies_out, pyedgeData_out, numDependencies_out)
18022@cython.embedsignature(True)
18023def cudaStreamUpdateCaptureDependencies(stream, dependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, unsigned int flags):
18024 """ Update the set of dependencies in a capturing stream.
18026 Modifies the dependency set of a capturing stream. The dependency set
18027 is the set of nodes that the next captured node in the stream will
18028 depend on.
18030 Valid flags are :py:obj:`~.cudaStreamAddCaptureDependencies` and
18031 :py:obj:`~.cudaStreamSetCaptureDependencies`. These control whether the
18032 set passed to the API is added to the existing set or replaces it. A
18033 flags value of 0 defaults to
18034 :py:obj:`~.cudaStreamAddCaptureDependencies`.
18036 Nodes that are removed from the dependency set via this API do not
18037 result in :py:obj:`~.cudaErrorStreamCaptureUnjoined` if they are
18038 unreachable from the stream at :py:obj:`~.cudaStreamEndCapture`.
18040 Returns :py:obj:`~.cudaErrorIllegalState` if the stream is not
18041 capturing.
18043 Parameters
18044 ----------
18045 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
18046 The stream to update
18047 dependencies : list[:py:obj:`~.cudaGraphNode_t`]
18048 The set of dependencies to add
18049 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]
18050 Optional array of data associated with each dependency.
18051 numDependencies : size_t
18052 The size of the dependencies array
18053 flags : unsigned int
18054 See above
18056 Returns
18057 -------
18058 cudaError_t
18059 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorIllegalState`
18061 See Also
18062 --------
18063 :py:obj:`~.cudaStreamBeginCapture`, :py:obj:`~.cudaStreamGetCaptureInfo`,
18064 """
18065 dependencyData = [] if dependencyData is None else dependencyData
18066 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
18067 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")
18068 dependencies = [] if dependencies is None else dependencies
18069 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in dependencies):
18070 raise TypeError("Argument 'dependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
18071 cdef cyruntime.cudaStream_t cystream
18072 if stream is None:
18073 pstream = 0
18074 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
18075 pstream = int(stream)
18076 else:
18077 pstream = int(cudaStream_t(stream))
18078 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
18079 cdef cyruntime.cudaGraphNode_t* cydependencies = NULL
18080 if len(dependencies) > 1:
18081 cydependencies = <cyruntime.cudaGraphNode_t*> calloc(len(dependencies), sizeof(cyruntime.cudaGraphNode_t))
18082 if cydependencies is NULL:
18083 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
18084 else:
18085 for idx in range(len(dependencies)):
18086 cydependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>dependencies[idx])._pvt_ptr[0]
18087 elif len(dependencies) == 1:
18088 cydependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>dependencies[0])._pvt_ptr
18089 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
18090 if len(dependencyData) > 1:
18091 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
18092 if cydependencyData is NULL:
18093 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
18094 for idx in range(len(dependencyData)):
18095 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))
18096 elif len(dependencyData) == 1:
18097 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr
18098 with nogil:
18099 err = cyruntime.cudaStreamUpdateCaptureDependencies(cystream, cydependencies, cydependencyData, numDependencies, flags)
18100 if len(dependencies) > 1 and cydependencies is not NULL:
18101 free(cydependencies)
18102 if len(dependencyData) > 1 and cydependencyData is not NULL:
18103 free(cydependencyData)
18104 return (_dict_cudaError_t[err],)
18106@cython.embedsignature(True)
18107def cudaEventCreate():
18108 """ Creates an event object.
18110 Creates an event object for the current device using
18111 :py:obj:`~.cudaEventDefault`.
18113 Returns
18114 -------
18115 cudaError_t
18116 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
18117 event : :py:obj:`~.cudaEvent_t`
18118 Newly created event
18120 See Also
18121 --------
18122 cudaEventCreate (C++ API), :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`
18123 """
18124 cdef cudaEvent_t event = cudaEvent_t()
18125 with nogil:
18126 err = cyruntime.cudaEventCreate(<cyruntime.cudaEvent_t*>event._pvt_ptr)
18127 if err != cyruntime.cudaSuccess:
18128 return (_dict_cudaError_t[err], None)
18129 return (_dict_cudaError_t[err], event)
18131@cython.embedsignature(True)
18132def cudaEventCreateWithFlags(unsigned int flags):
18133 """ Creates an event object with the specified flags.
18135 Creates an event object for the current device with the specified
18136 flags. Valid flags include:
18138 - :py:obj:`~.cudaEventDefault`: Default event creation flag.
18140 - :py:obj:`~.cudaEventBlockingSync`: Specifies that event should use
18141 blocking synchronization. A host thread that uses
18142 :py:obj:`~.cudaEventSynchronize()` to wait on an event created with
18143 this flag will block until the event actually completes.
18145 - :py:obj:`~.cudaEventDisableTiming`: Specifies that the created event
18146 does not need to record timing data. Events created with this flag
18147 specified and the :py:obj:`~.cudaEventBlockingSync` flag not
18148 specified will provide the best performance when used with
18149 :py:obj:`~.cudaStreamWaitEvent()` and :py:obj:`~.cudaEventQuery()`.
18151 - :py:obj:`~.cudaEventInterprocess`: Specifies that the created event
18152 may be used as an interprocess event by
18153 :py:obj:`~.cudaIpcGetEventHandle()`.
18154 :py:obj:`~.cudaEventInterprocess` must be specified along with
18155 :py:obj:`~.cudaEventDisableTiming`.
18157 Parameters
18158 ----------
18159 flags : unsigned int
18160 Flags for new event
18162 Returns
18163 -------
18164 cudaError_t
18165 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
18166 event : :py:obj:`~.cudaEvent_t`
18167 Newly created event
18169 See Also
18170 --------
18171 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuEventCreate`
18172 """
18173 cdef cudaEvent_t event = cudaEvent_t()
18174 with nogil:
18175 err = cyruntime.cudaEventCreateWithFlags(<cyruntime.cudaEvent_t*>event._pvt_ptr, flags)
18176 if err != cyruntime.cudaSuccess:
18177 return (_dict_cudaError_t[err], None)
18178 return (_dict_cudaError_t[err], event)
18180@cython.embedsignature(True)
18181def cudaEventRecord(event, stream):
18182 """ Records an event.
18184 Captures in `event` the contents of `stream` at the time of this call.
18185 `event` and `stream` must be on the same CUDA context. Calls such as
18186 :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will
18187 then examine or wait for completion of the work that was captured. Uses
18188 of `stream` after this call do not modify `event`. See note on default
18189 stream behavior for what is captured in the default case.
18191 :py:obj:`~.cudaEventRecord()` can be called multiple times on the same
18192 event and will overwrite the previously captured state. Other APIs such
18193 as :py:obj:`~.cudaStreamWaitEvent()` use the most recently captured
18194 state at the time of the API call, and are not affected by later calls
18195 to :py:obj:`~.cudaEventRecord()`. Before the first call to
18196 :py:obj:`~.cudaEventRecord()`, an event represents an empty set of
18197 work, so for example :py:obj:`~.cudaEventQuery()` would return
18198 :py:obj:`~.cudaSuccess`.
18200 Parameters
18201 ----------
18202 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18203 Event to record
18204 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
18205 Stream in which to record event
18207 Returns
18208 -------
18209 cudaError_t
18210 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
18212 See Also
18213 --------
18214 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cuEventRecord`
18215 """
18216 cdef cyruntime.cudaStream_t cystream
18217 if stream is None:
18218 pstream = 0
18219 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
18220 pstream = int(stream)
18221 else:
18222 pstream = int(cudaStream_t(stream))
18223 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
18224 cdef cyruntime.cudaEvent_t cyevent
18225 if event is None:
18226 pevent = 0
18227 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
18228 pevent = int(event)
18229 else:
18230 pevent = int(cudaEvent_t(event))
18231 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
18232 with nogil:
18233 err = cyruntime.cudaEventRecord(cyevent, cystream)
18234 return (_dict_cudaError_t[err],)
18236@cython.embedsignature(True)
18237def cudaEventRecordWithFlags(event, stream, unsigned int flags):
18238 """ Records an event.
18240 Captures in `event` the contents of `stream` at the time of this call.
18241 `event` and `stream` must be on the same CUDA context. Calls such as
18242 :py:obj:`~.cudaEventQuery()` or :py:obj:`~.cudaStreamWaitEvent()` will
18243 then examine or wait for completion of the work that was captured. Uses
18244 of `stream` after this call do not modify `event`. See note on default
18245 stream behavior for what is captured in the default case.
18247 :py:obj:`~.cudaEventRecordWithFlags()` can be called multiple times on
18248 the same event and will overwrite the previously captured state. Other
18249 APIs such as :py:obj:`~.cudaStreamWaitEvent()` use the most recently
18250 captured state at the time of the API call, and are not affected by
18251 later calls to :py:obj:`~.cudaEventRecordWithFlags()`. Before the first
18252 call to :py:obj:`~.cudaEventRecordWithFlags()`, an event represents an
18253 empty set of work, so for example :py:obj:`~.cudaEventQuery()` would
18254 return :py:obj:`~.cudaSuccess`.
18256 flags include:
18258 - :py:obj:`~.cudaEventRecordDefault`: Default event creation flag.
18260 - :py:obj:`~.cudaEventRecordExternal`: Event is captured in the graph
18261 as an external event node when performing stream capture.
18263 Parameters
18264 ----------
18265 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18266 Event to record
18267 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
18268 Stream in which to record event
18269 flags : unsigned int
18270 Parameters for the operation(See above)
18272 Returns
18273 -------
18274 cudaError_t
18275 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
18277 See Also
18278 --------
18279 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventRecord`,
18280 """
18281 cdef cyruntime.cudaStream_t cystream
18282 if stream is None:
18283 pstream = 0
18284 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
18285 pstream = int(stream)
18286 else:
18287 pstream = int(cudaStream_t(stream))
18288 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
18289 cdef cyruntime.cudaEvent_t cyevent
18290 if event is None:
18291 pevent = 0
18292 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
18293 pevent = int(event)
18294 else:
18295 pevent = int(cudaEvent_t(event))
18296 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
18297 with nogil:
18298 err = cyruntime.cudaEventRecordWithFlags(cyevent, cystream, flags)
18299 return (_dict_cudaError_t[err],)
18301@cython.embedsignature(True)
18302def cudaEventQuery(event):
18303 """ Queries an event's status.
18305 Queries the status of all work currently captured by `event`. See
18306 :py:obj:`~.cudaEventRecord()` for details on what is captured by an
18307 event.
18309 Returns :py:obj:`~.cudaSuccess` if all captured work has been
18310 completed, or :py:obj:`~.cudaErrorNotReady` if any captured work is
18311 incomplete.
18313 For the purposes of Unified Memory, a return value of
18314 :py:obj:`~.cudaSuccess` is equivalent to having called
18315 :py:obj:`~.cudaEventSynchronize()`.
18317 Parameters
18318 ----------
18319 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18320 Event to query
18322 Returns
18323 -------
18324 cudaError_t
18325 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
18327 See Also
18328 --------
18329 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventQuery`
18330 """
18331 cdef cyruntime.cudaEvent_t cyevent
18332 if event is None:
18333 pevent = 0
18334 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
18335 pevent = int(event)
18336 else:
18337 pevent = int(cudaEvent_t(event))
18338 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
18339 with nogil:
18340 err = cyruntime.cudaEventQuery(cyevent)
18341 return (_dict_cudaError_t[err],)
18343@cython.embedsignature(True)
18344def cudaEventSynchronize(event):
18345 """ Waits for an event to complete.
18347 Waits until the completion of all work currently captured in `event`.
18348 See :py:obj:`~.cudaEventRecord()` for details on what is captured by an
18349 event.
18351 Waiting for an event that was created with the
18352 :py:obj:`~.cudaEventBlockingSync` flag will cause the calling CPU
18353 thread to block until the event has been completed by the device. If
18354 the :py:obj:`~.cudaEventBlockingSync` flag has not been set, then the
18355 CPU thread will busy-wait until the event has been completed by the
18356 device.
18358 Parameters
18359 ----------
18360 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18361 Event to wait for
18363 Returns
18364 -------
18365 cudaError_t
18366 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
18368 See Also
18369 --------
18370 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventSynchronize`
18371 """
18372 cdef cyruntime.cudaEvent_t cyevent
18373 if event is None:
18374 pevent = 0
18375 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
18376 pevent = int(event)
18377 else:
18378 pevent = int(cudaEvent_t(event))
18379 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
18380 with nogil:
18381 err = cyruntime.cudaEventSynchronize(cyevent)
18382 return (_dict_cudaError_t[err],)
18384@cython.embedsignature(True)
18385def cudaEventDestroy(event):
18386 """ Destroys an event object.
18388 Destroys the event specified by `event`.
18390 An event may be destroyed before it is complete (i.e., while
18391 :py:obj:`~.cudaEventQuery()` would return
18392 :py:obj:`~.cudaErrorNotReady`). In this case, the call does not block
18393 on completion of the event, and any associated resources will
18394 automatically be released asynchronously at completion.
18396 Parameters
18397 ----------
18398 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18399 Event to destroy
18401 Returns
18402 -------
18403 cudaError_t
18404 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`
18406 See Also
18407 --------
18408 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaEventElapsedTime`, :py:obj:`~.cuEventDestroy`
18409 """
18410 cdef cyruntime.cudaEvent_t cyevent
18411 if event is None:
18412 pevent = 0
18413 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
18414 pevent = int(event)
18415 else:
18416 pevent = int(cudaEvent_t(event))
18417 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
18418 with nogil:
18419 err = cyruntime.cudaEventDestroy(cyevent)
18420 return (_dict_cudaError_t[err],)
18422@cython.embedsignature(True)
18423def cudaEventElapsedTime(start, end):
18424 """ Computes the elapsed time between events.
18426 Computes the elapsed time between two events (in milliseconds with a
18427 resolution of around 0.5 microseconds). Note this API is not guaranteed
18428 to return the latest errors for pending work. As such this API is
18429 intended to serve as a elapsed time calculation only and polling for
18430 completion on the events to be compared should be done with
18431 :py:obj:`~.cudaEventQuery` instead.
18433 If either event was last recorded in a non-NULL stream, the resulting
18434 time may be greater than expected (even if both used the same stream
18435 handle). This happens because the :py:obj:`~.cudaEventRecord()`
18436 operation takes place asynchronously and there is no guarantee that the
18437 measured latency is actually just between the two events. Any number of
18438 other different stream operations could execute in between the two
18439 measured events, thus altering the timing in a significant way.
18441 If :py:obj:`~.cudaEventRecord()` has not been called on either event,
18442 then :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If
18443 :py:obj:`~.cudaEventRecord()` has been called on both events but one or
18444 both of them has not yet been completed (that is,
18445 :py:obj:`~.cudaEventQuery()` would return :py:obj:`~.cudaErrorNotReady`
18446 on at least one of the events), :py:obj:`~.cudaErrorNotReady` is
18447 returned. If either event was created with the
18448 :py:obj:`~.cudaEventDisableTiming` flag, then this function will return
18449 :py:obj:`~.cudaErrorInvalidResourceHandle`.
18451 Parameters
18452 ----------
18453 start : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18454 Starting event
18455 end : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
18456 Ending event
18458 Returns
18459 -------
18460 cudaError_t
18461 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotReady`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorUnknown`
18462 ms : float
18463 Time between `start` and `end` in ms
18465 See Also
18466 --------
18467 :py:obj:`~.cudaEventCreate (C API)`, :py:obj:`~.cudaEventCreateWithFlags`, :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cuEventElapsedTime`
18468 """
18469 cdef cyruntime.cudaEvent_t cyend
18470 if end is None:
18471 pend = 0
18472 elif isinstance(end, (cudaEvent_t,driver.CUevent)):
18473 pend = int(end)
18474 else:
18475 pend = int(cudaEvent_t(end))
18476 cyend = <cyruntime.cudaEvent_t><void_ptr>pend
18477 cdef cyruntime.cudaEvent_t cystart
18478 if start is None:
18479 pstart = 0
18480 elif isinstance(start, (cudaEvent_t,driver.CUevent)):
18481 pstart = int(start)
18482 else:
18483 pstart = int(cudaEvent_t(start))
18484 cystart = <cyruntime.cudaEvent_t><void_ptr>pstart
18485 cdef float ms = 0
18486 with nogil:
18487 err = cyruntime.cudaEventElapsedTime(&ms, cystart, cyend)
18488 if err != cyruntime.cudaSuccess:
18489 return (_dict_cudaError_t[err], None)
18490 return (_dict_cudaError_t[err], ms)
18492@cython.embedsignature(True)
18493def cudaImportExternalMemory(memHandleDesc : Optional[cudaExternalMemoryHandleDesc]):
18494 """ Imports an external memory object.
18496 Imports an externally allocated memory object and returns a handle to
18497 that in `extMem_out`.
18499 The properties of the handle being imported must be described in
18500 `memHandleDesc`. The :py:obj:`~.cudaExternalMemoryHandleDesc` structure
18501 is defined as follows:
18503 **View CUDA Toolkit Documentation for a C++ code example**
18505 where :py:obj:`~.cudaExternalMemoryHandleDesc.type` specifies the type
18506 of handle being imported. :py:obj:`~.cudaExternalMemoryHandleType` is
18507 defined as:
18509 **View CUDA Toolkit Documentation for a C++ code example**
18511 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18512 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueFd`, then
18513 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::fd must be a valid
18514 file descriptor referencing a memory object. Ownership of the file
18515 descriptor is transferred to the CUDA driver when the handle is
18516 imported successfully. Performing any operations on the file descriptor
18517 after it is imported results in undefined behavior.
18519 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18520 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32`, then exactly one
18521 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
18522 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
18523 be NULL. If
18524 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
18525 NULL, then it must represent a valid shared NT handle that references a
18526 memory object. Ownership of this handle is not transferred to CUDA
18527 after the import operation, so the application must release the handle
18528 using the appropriate system call. If
18529 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
18530 NULL, then it must point to a NULL-terminated array of UTF-16
18531 characters that refers to a memory object.
18533 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18534 :py:obj:`~.cudaExternalMemoryHandleTypeOpaqueWin32Kmt`, then
18535 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be
18536 non-NULL and
18537 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be
18538 NULL. The handle specified must be a globally shared KMT handle. This
18539 handle does not hold a reference to the underlying object, and thus
18540 will be invalid when all references to the memory object are destroyed.
18542 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18543 :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Heap`, then exactly one of
18544 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
18545 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
18546 be NULL. If
18547 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
18548 NULL, then it must represent a valid shared NT handle that is returned
18549 by ID3D12Device::CreateSharedHandle when referring to a ID3D12Heap
18550 object. This handle holds a reference to the underlying object. If
18551 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
18552 NULL, then it must point to a NULL-terminated array of UTF-16
18553 characters that refers to a ID3D12Heap object.
18555 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18556 :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`, then exactly one
18557 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
18558 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
18559 be NULL. If
18560 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is not
18561 NULL, then it must represent a valid shared NT handle that is returned
18562 by ID3D12Device::CreateSharedHandle when referring to a ID3D12Resource
18563 object. This handle holds a reference to the underlying object. If
18564 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
18565 NULL, then it must point to a NULL-terminated array of UTF-16
18566 characters that refers to a ID3D12Resource object.
18568 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18569 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`,then exactly one
18570 of :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle and
18571 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must not
18572 be NULL. If
18573 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle is
18574 not NULL, then it must represent a valid shared NT handle that is
18575 returned by IDXGIResource1::CreateSharedHandle when referring to a
18576 ID3D11Resource object. If
18577 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name is not
18578 NULL, then it must point to a NULL-terminated array of UTF-16
18579 characters that refers to a ID3D11Resource object.
18581 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18582 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`, then
18583 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::handle must be
18584 non-NULL and
18585 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::win32::name must be
18586 NULL. The handle specified must be a valid shared KMT handle that is
18587 returned by IDXGIResource::GetSharedHandle when referring to a
18588 ID3D11Resource object.
18590 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is
18591 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then
18592 :py:obj:`~.cudaExternalMemoryHandleDesc`::handle::nvSciBufObject must
18593 be NON-NULL and reference a valid NvSciBuf object. If the NvSciBuf
18594 object imported into CUDA is also mapped by other drivers, then the
18595 application must use :py:obj:`~.cudaWaitExternalSemaphoresAsync` or
18596 :py:obj:`~.cudaSignalExternalSemaphoresAsync` as approprriate barriers
18597 to maintain coherence between CUDA and the other drivers. See
18598 :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync` and
18599 :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync` for memory
18600 synchronization.
18602 The size of the memory object must be specified in
18603 :py:obj:`~.cudaExternalMemoryHandleDesc.size`.
18605 Specifying the flag :py:obj:`~.cudaExternalMemoryDedicated` in
18606 :py:obj:`~.cudaExternalMemoryHandleDesc.flags` indicates that the
18607 resource is a dedicated resource. The definition of what a dedicated
18608 resource is outside the scope of this extension. This flag must be set
18609 if :py:obj:`~.cudaExternalMemoryHandleDesc.type` is one of the
18610 following: :py:obj:`~.cudaExternalMemoryHandleTypeD3D12Resource`
18611 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11Resource`
18612 :py:obj:`~.cudaExternalMemoryHandleTypeD3D11ResourceKmt`
18614 Parameters
18615 ----------
18616 memHandleDesc : :py:obj:`~.cudaExternalMemoryHandleDesc`
18617 Memory import handle descriptor
18619 Returns
18620 -------
18621 cudaError_t
18622 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`
18623 extMem_out : :py:obj:`~.cudaExternalMemory_t`
18624 Returned handle to an external memory object
18626 See Also
18627 --------
18628 :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
18630 Notes
18631 -----
18632 If the Vulkan memory imported into CUDA is mapped on the CPU then the application must use vkInvalidateMappedMemoryRanges/vkFlushMappedMemoryRanges as well as appropriate Vulkan pipeline barriers to maintain coherence between CPU and GPU. For more information on these APIs, please refer to "Synchronization
18633 and Cache Control" chapter from Vulkan specification.
18634 """
18635 cdef cudaExternalMemory_t extMem_out = cudaExternalMemory_t()
18636 cdef cyruntime.cudaExternalMemoryHandleDesc* cymemHandleDesc_ptr = memHandleDesc._pvt_ptr if memHandleDesc is not None else NULL
18637 with nogil:
18638 err = cyruntime.cudaImportExternalMemory(<cyruntime.cudaExternalMemory_t*>extMem_out._pvt_ptr, cymemHandleDesc_ptr)
18639 if err != cyruntime.cudaSuccess:
18640 return (_dict_cudaError_t[err], None)
18641 return (_dict_cudaError_t[err], extMem_out)
18643@cython.embedsignature(True)
18644def cudaExternalMemoryGetMappedBuffer(extMem, bufferDesc : Optional[cudaExternalMemoryBufferDesc]):
18645 """ Maps a buffer onto an imported memory object.
18647 Maps a buffer onto an imported memory object and returns a device
18648 pointer in `devPtr`.
18650 The properties of the buffer being mapped must be described in
18651 `bufferDesc`. The :py:obj:`~.cudaExternalMemoryBufferDesc` structure is
18652 defined as follows:
18654 **View CUDA Toolkit Documentation for a C++ code example**
18656 where :py:obj:`~.cudaExternalMemoryBufferDesc.offset` is the offset in
18657 the memory object where the buffer's base address is.
18658 :py:obj:`~.cudaExternalMemoryBufferDesc.size` is the size of the
18659 buffer. :py:obj:`~.cudaExternalMemoryBufferDesc.flags` must be zero.
18661 The offset and size have to be suitably aligned to match the
18662 requirements of the external API. Mapping two buffers whose ranges
18663 overlap may or may not result in the same virtual address being
18664 returned for the overlapped portion. In such cases, the application
18665 must ensure that all accesses to that region from the GPU are volatile.
18666 Otherwise writes made via one address are not guaranteed to be visible
18667 via the other address, even if they're issued by the same thread. It is
18668 recommended that applications map the combined range instead of mapping
18669 separate buffers and then apply the appropriate offsets to the returned
18670 pointer to derive the individual buffers.
18672 The returned pointer `devPtr` must be freed using :py:obj:`~.cudaFree`.
18674 Parameters
18675 ----------
18676 extMem : :py:obj:`~.cudaExternalMemory_t`
18677 Handle to external memory object
18678 bufferDesc : :py:obj:`~.cudaExternalMemoryBufferDesc`
18679 Buffer descriptor
18681 Returns
18682 -------
18683 cudaError_t
18684 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
18685 devPtr : Any
18686 Returned device pointer to buffer
18688 See Also
18689 --------
18690 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
18691 """
18692 cdef cyruntime.cudaExternalMemory_t cyextMem
18693 if extMem is None:
18694 pextMem = 0
18695 elif isinstance(extMem, (cudaExternalMemory_t,)):
18696 pextMem = int(extMem)
18697 else:
18698 pextMem = int(cudaExternalMemory_t(extMem))
18699 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
18700 cdef void_ptr devPtr = 0
18701 cdef cyruntime.cudaExternalMemoryBufferDesc* cybufferDesc_ptr = bufferDesc._pvt_ptr if bufferDesc is not None else NULL
18702 with nogil:
18703 err = cyruntime.cudaExternalMemoryGetMappedBuffer(<void**>&devPtr, cyextMem, cybufferDesc_ptr)
18704 if err != cyruntime.cudaSuccess:
18705 return (_dict_cudaError_t[err], None)
18706 return (_dict_cudaError_t[err], devPtr)
18708@cython.embedsignature(True)
18709def cudaExternalMemoryGetMappedMipmappedArray(extMem, mipmapDesc : Optional[cudaExternalMemoryMipmappedArrayDesc]):
18710 """ Maps a CUDA mipmapped array onto an external memory object.
18712 Maps a CUDA mipmapped array onto an external object and returns a
18713 handle to it in `mipmap`.
18715 The properties of the CUDA mipmapped array being mapped must be
18716 described in `mipmapDesc`. The structure
18717 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc` is defined as follows:
18719 **View CUDA Toolkit Documentation for a C++ code example**
18721 where :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.offset` is the
18722 offset in the memory object where the base level of the mipmap chain
18723 is. :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.formatDesc`
18724 describes the format of the data.
18725 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.extent` specifies the
18726 dimensions of the base level of the mipmap chain.
18727 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags` are flags
18728 associated with CUDA mipmapped arrays. For further details, please
18729 refer to the documentation for :py:obj:`~.cudaMalloc3DArray`. Note that
18730 if the mipmapped array is bound as a color target in the graphics API,
18731 then the flag :py:obj:`~.cudaArrayColorAttachment` must be specified in
18732 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.flags`.
18733 :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` specifies
18734 the total number of levels in the mipmap chain.
18736 The returned CUDA mipmapped array must be freed using
18737 :py:obj:`~.cudaFreeMipmappedArray`.
18739 Parameters
18740 ----------
18741 extMem : :py:obj:`~.cudaExternalMemory_t`
18742 Handle to external memory object
18743 mipmapDesc : :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc`
18744 CUDA array descriptor
18746 Returns
18747 -------
18748 cudaError_t
18749 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
18750 mipmap : :py:obj:`~.cudaMipmappedArray_t`
18751 Returned CUDA mipmapped array
18753 See Also
18754 --------
18755 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaDestroyExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`
18757 Notes
18758 -----
18759 If :py:obj:`~.cudaExternalMemoryHandleDesc.type` is :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`, then :py:obj:`~.cudaExternalMemoryMipmappedArrayDesc.numLevels` must not be greater than 1.
18760 """
18761 cdef cyruntime.cudaExternalMemory_t cyextMem
18762 if extMem is None:
18763 pextMem = 0
18764 elif isinstance(extMem, (cudaExternalMemory_t,)):
18765 pextMem = int(extMem)
18766 else:
18767 pextMem = int(cudaExternalMemory_t(extMem))
18768 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
18769 cdef cudaMipmappedArray_t mipmap = cudaMipmappedArray_t()
18770 cdef cyruntime.cudaExternalMemoryMipmappedArrayDesc* cymipmapDesc_ptr = mipmapDesc._pvt_ptr if mipmapDesc is not None else NULL
18771 with nogil:
18772 err = cyruntime.cudaExternalMemoryGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmap._pvt_ptr, cyextMem, cymipmapDesc_ptr)
18773 if err != cyruntime.cudaSuccess:
18774 return (_dict_cudaError_t[err], None)
18775 return (_dict_cudaError_t[err], mipmap)
18777@cython.embedsignature(True)
18778def cudaDestroyExternalMemory(extMem):
18779 """ Destroys an external memory object.
18781 Destroys the specified external memory object. Any existing buffers and
18782 CUDA mipmapped arrays mapped onto this object must no longer be used
18783 and must be explicitly freed using :py:obj:`~.cudaFree` and
18784 :py:obj:`~.cudaFreeMipmappedArray` respectively.
18786 Parameters
18787 ----------
18788 extMem : :py:obj:`~.cudaExternalMemory_t`
18789 External memory object to be destroyed
18791 Returns
18792 -------
18793 cudaError_t
18794 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
18796 See Also
18797 --------
18798 :py:obj:`~.cudaImportExternalMemory`, :py:obj:`~.cudaExternalMemoryGetMappedBuffer`, :py:obj:`~.cudaExternalMemoryGetMappedMipmappedArray`
18799 """
18800 cdef cyruntime.cudaExternalMemory_t cyextMem
18801 if extMem is None:
18802 pextMem = 0
18803 elif isinstance(extMem, (cudaExternalMemory_t,)):
18804 pextMem = int(extMem)
18805 else:
18806 pextMem = int(cudaExternalMemory_t(extMem))
18807 cyextMem = <cyruntime.cudaExternalMemory_t><void_ptr>pextMem
18808 with nogil:
18809 err = cyruntime.cudaDestroyExternalMemory(cyextMem)
18810 return (_dict_cudaError_t[err],)
18812@cython.embedsignature(True)
18813def cudaImportExternalSemaphore(semHandleDesc : Optional[cudaExternalSemaphoreHandleDesc]):
18814 """ Imports an external semaphore.
18816 Imports an externally allocated synchronization object and returns a
18817 handle to that in `extSem_out`.
18819 The properties of the handle being imported must be described in
18820 `semHandleDesc`. The :py:obj:`~.cudaExternalSemaphoreHandleDesc` is
18821 defined as follows:
18823 **View CUDA Toolkit Documentation for a C++ code example**
18825 where :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` specifies the
18826 type of handle being imported.
18827 :py:obj:`~.cudaExternalSemaphoreHandleType` is defined as:
18829 **View CUDA Toolkit Documentation for a C++ code example**
18831 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18832 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`, then
18833 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid
18834 file descriptor referencing a synchronization object. Ownership of the
18835 file descriptor is transferred to the CUDA driver when the handle is
18836 imported successfully. Performing any operations on the file descriptor
18837 after it is imported results in undefined behavior.
18839 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18840 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`, then exactly
18841 one of
18842 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and
18843 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
18844 not be NULL. If
18845 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
18846 not NULL, then it must represent a valid shared NT handle that
18847 references a synchronization object. Ownership of this handle is not
18848 transferred to CUDA after the import operation, so the application must
18849 release the handle using the appropriate system call. If
18850 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
18851 NULL, then it must name a valid synchronization object.
18853 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18854 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt`, then
18855 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must
18856 be non-NULL and
18857 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
18858 be NULL. The handle specified must be a globally shared KMT handle.
18859 This handle does not hold a reference to the underlying object, and
18860 thus will be invalid when all references to the synchronization object
18861 are destroyed.
18863 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18864 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`, then exactly one
18865 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
18866 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
18867 must not be NULL. If
18868 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
18869 not NULL, then it must represent a valid shared NT handle that is
18870 returned by ID3D12Device::CreateSharedHandle when referring to a
18871 ID3D12Fence object. This handle holds a reference to the underlying
18872 object. If
18873 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
18874 NULL, then it must name a valid synchronization object that refers to a
18875 valid ID3D12Fence object.
18877 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18878 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`, then exactly one
18879 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
18880 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
18881 must not be NULL. If
18882 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
18883 not NULL, then it must represent a valid shared NT handle that is
18884 returned by ID3D11Fence::CreateSharedHandle. If
18885 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
18886 NULL, then it must name a valid synchronization object that refers to a
18887 valid ID3D11Fence object.
18889 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18890 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, then
18891 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::nvSciSyncObj
18892 represents a valid NvSciSyncObj.
18894 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`, then exactly one
18895 of :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle
18896 and :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name
18897 must not be NULL. If
18898 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
18899 not NULL, then it represent a valid shared NT handle that is returned
18900 by IDXGIResource1::CreateSharedHandle when referring to a
18901 IDXGIKeyedMutex object.
18903 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18904 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then
18905 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle must
18906 be non-NULL and
18907 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
18908 be NULL. The handle specified must represent a valid KMT handle that is
18909 returned by IDXGIResource::GetSharedHandle when referring to a
18910 IDXGIKeyedMutex object.
18912 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18913 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`, then
18914 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::fd must be a valid
18915 file descriptor referencing a synchronization object. Ownership of the
18916 file descriptor is transferred to the CUDA driver when the handle is
18917 imported successfully. Performing any operations on the file descriptor
18918 after it is imported results in undefined behavior.
18920 If :py:obj:`~.cudaExternalSemaphoreHandleDesc.type` is
18921 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32`, then
18922 exactly one of
18923 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle and
18924 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name must
18925 not be NULL. If
18926 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::handle is
18927 not NULL, then it must represent a valid shared NT handle that
18928 references a synchronization object. Ownership of this handle is not
18929 transferred to CUDA after the import operation, so the application must
18930 release the handle using the appropriate system call. If
18931 :py:obj:`~.cudaExternalSemaphoreHandleDesc`::handle::win32::name is not
18932 NULL, then it must name a valid synchronization object.
18934 Parameters
18935 ----------
18936 semHandleDesc : :py:obj:`~.cudaExternalSemaphoreHandleDesc`
18937 Semaphore import handle descriptor
18939 Returns
18940 -------
18941 cudaError_t
18942 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`
18943 extSem_out : :py:obj:`~.cudaExternalSemaphore_t`
18944 Returned handle to an external semaphore
18946 See Also
18947 --------
18948 :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
18949 """
18950 cdef cudaExternalSemaphore_t extSem_out = cudaExternalSemaphore_t()
18951 cdef cyruntime.cudaExternalSemaphoreHandleDesc* cysemHandleDesc_ptr = semHandleDesc._pvt_ptr if semHandleDesc is not None else NULL
18952 with nogil:
18953 err = cyruntime.cudaImportExternalSemaphore(<cyruntime.cudaExternalSemaphore_t*>extSem_out._pvt_ptr, cysemHandleDesc_ptr)
18954 if err != cyruntime.cudaSuccess:
18955 return (_dict_cudaError_t[err], None)
18956 return (_dict_cudaError_t[err], extSem_out)
18958@cython.embedsignature(True)
18959def cudaSignalExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSemaphore_t] | list[cudaExternalSemaphore_t]], paramsArray : Optional[tuple[cudaExternalSemaphoreSignalParams] | list[cudaExternalSemaphoreSignalParams]], unsigned int numExtSems, stream):
18960 """ Signals a set of external semaphore objects.
18962 Enqueues a signal operation on a set of externally allocated semaphore
18963 object in the specified stream. The operations will be executed when
18964 all prior operations in the stream complete.
18966 The exact semantics of signaling a semaphore depends on the type of the
18967 object.
18969 If the semaphore object is any one of the following types:
18970 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,
18971 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,
18972 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then
18973 signaling the semaphore will set it to the signaled state.
18975 If the semaphore object is any one of the following types:
18976 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,
18977 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,
18978 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,
18979 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then
18980 the semaphore will be set to the value specified in
18981 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::fence::value.
18983 If the semaphore object is of the type
18984 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` this API sets
18985 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
18986 to a value that can be used by subsequent waiters of the same NvSciSync
18987 object to order operations with those currently submitted in `stream`.
18988 Such an update will overwrite previous contents of
18989 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence.
18990 By default, signaling such an external semaphore object causes
18991 appropriate memory synchronization operations to be performed over all
18992 the external memory objects that are imported as
18993 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any
18994 subsequent accesses made by other importers of the same set of NvSciBuf
18995 memory object(s) are coherent. These operations can be skipped by
18996 specifying the flag
18997 :py:obj:`~.cudaExternalSemaphoreSignalSkipNvSciBufMemSync`, which can
18998 be used as a performance optimization when data coherency is not
18999 required. But specifying this flag in scenarios where data coherency is
19000 required results in undefined behavior. Also, for semaphore object of
19001 the type :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the
19002 NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags
19003 in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to
19004 cudaNvSciSyncAttrSignal, this API will return cudaErrorNotSupported.
19006 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
19007 associated with semaphore object of the type
19008 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` can be
19009 deterministic. For this the NvSciSyncAttrList used to create the
19010 semaphore object must have value of
19011 NvSciSyncAttrKey_RequireDeterministicFences key set to true.
19012 Deterministic fences allow users to enqueue a wait over the semaphore
19013 object even before corresponding signal is enqueued. For such a
19014 semaphore object, CUDA guarantees that each signal operation will
19015 increment the fence value by '1'. Users are expected to track count of
19016 signals enqueued on the semaphore object and insert waits accordingly.
19017 When such a semaphore object is signaled from multiple streams, due to
19018 concurrent stream execution, it is possible that the order in which the
19019 semaphore gets signaled is indeterministic. This could lead to waiters
19020 of the semaphore getting unblocked incorrectly. Users are expected to
19021 handle such situations, either by not using the same semaphore object
19022 with deterministic fence support enabled in different streams or by
19023 adding explicit dependency amongst such streams so that the semaphore
19024 is signaled in order.
19025 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
19026 associated with semaphore object of the type
19027 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` can be timestamp
19028 enabled. For this the NvSciSyncAttrList used to create the object must
19029 have the value of NvSciSyncAttrKey_WaiterRequireTimestamps key set to
19030 true. Timestamps are emitted asynchronously by the GPU and CUDA saves
19031 the GPU timestamp in the corresponding NvSciSyncFence at the time of
19032 signal on GPU. Users are expected to convert GPU clocks to CPU clocks
19033 using appropriate scaling functions. Users are expected to wait for the
19034 completion of the fence before extracting timestamp using appropriate
19035 NvSciSync APIs. Users are expected to ensure that there is only one
19036 outstanding timestamp enabled fence per Cuda-NvSciSync object at any
19037 point of time, failing which leads to undefined behavior. Extracting
19038 the timestamp before the corresponding fence is signalled could lead to
19039 undefined behaviour. Timestamp extracted via appropriate NvSciSync API
19040 would be in microseconds.
19042 If the semaphore object is any one of the following types:
19043 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,
19044 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the
19045 keyed mutex will be released with the key specified in
19046 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key.
19048 Parameters
19049 ----------
19050 extSemArray : list[:py:obj:`~.cudaExternalSemaphore_t`]
19051 Set of external semaphores to be signaled
19052 paramsArray : list[:py:obj:`~.cudaExternalSemaphoreSignalParams`]
19053 Array of semaphore parameters
19054 numExtSems : unsigned int
19055 Number of semaphores to signal
19056 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
19057 Stream to enqueue the signal operations in
19059 Returns
19060 -------
19061 cudaError_t
19062 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
19064 See Also
19065 --------
19066 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
19067 """
19068 cdef cyruntime.cudaStream_t cystream
19069 if stream is None:
19070 pstream = 0
19071 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
19072 pstream = int(stream)
19073 else:
19074 pstream = int(cudaStream_t(stream))
19075 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
19076 paramsArray = [] if paramsArray is None else paramsArray
19077 if not all(isinstance(_x, (cudaExternalSemaphoreSignalParams,)) for _x in paramsArray):
19078 raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreSignalParams,] or list[cyruntime.cudaExternalSemaphoreSignalParams,]")
19079 extSemArray = [] if extSemArray is None else extSemArray
19080 if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):
19081 raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]")
19082 cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL
19083 if len(extSemArray) > 1:
19084 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))
19085 if cyextSemArray is NULL:
19086 raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
19087 else:
19088 for idx in range(len(extSemArray)):
19089 cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._pvt_ptr[0]
19090 elif len(extSemArray) == 1:
19091 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._pvt_ptr
19092 cdef cyruntime.cudaExternalSemaphoreSignalParams* cyparamsArray = NULL
19093 if len(paramsArray) > 1:
19094 cyparamsArray = <cyruntime.cudaExternalSemaphoreSignalParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
19095 if cyparamsArray is NULL:
19096 raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreSignalParams)))
19097 for idx in range(len(paramsArray)):
19098 string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreSignalParams>paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreSignalParams))
19099 elif len(paramsArray) == 1:
19100 cyparamsArray = (<cudaExternalSemaphoreSignalParams>paramsArray[0])._pvt_ptr
19101 if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
19102 if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
19103 with nogil:
19104 err = cyruntime.cudaSignalExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream)
19105 if len(extSemArray) > 1 and cyextSemArray is not NULL:
19106 free(cyextSemArray)
19107 if len(paramsArray) > 1 and cyparamsArray is not NULL:
19108 free(cyparamsArray)
19109 return (_dict_cudaError_t[err],)
19111@cython.embedsignature(True)
19112def cudaWaitExternalSemaphoresAsync(extSemArray : Optional[tuple[cudaExternalSemaphore_t] | list[cudaExternalSemaphore_t]], paramsArray : Optional[tuple[cudaExternalSemaphoreWaitParams] | list[cudaExternalSemaphoreWaitParams]], unsigned int numExtSems, stream):
19113 """ Waits on a set of external semaphore objects.
19115 Enqueues a wait operation on a set of externally allocated semaphore
19116 object in the specified stream. The operations will be executed when
19117 all prior operations in the stream complete.
19119 The exact semantics of waiting on a semaphore depends on the type of
19120 the object.
19122 If the semaphore object is any one of the following types:
19123 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueFd`,
19124 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32`,
19125 :py:obj:`~.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt` then waiting
19126 on the semaphore will wait until the semaphore reaches the signaled
19127 state. The semaphore will then be reset to the unsignaled state.
19128 Therefore for every signal operation, there can only be one wait
19129 operation.
19131 If the semaphore object is any one of the following types:
19132 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D12Fence`,
19133 :py:obj:`~.cudaExternalSemaphoreHandleTypeD3D11Fence`,
19134 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd`,
19135 :py:obj:`~.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32` then
19136 waiting on the semaphore will wait until the value of the semaphore is
19137 greater than or equal to
19138 :py:obj:`~.cudaExternalSemaphoreWaitParams`::params::fence::value.
19140 If the semaphore object is of the type
19141 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync` then, waiting on
19142 the semaphore will wait until the
19143 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::nvSciSync::fence
19144 is signaled by the signaler of the NvSciSyncObj that was associated
19145 with this semaphore object. By default, waiting on such an external
19146 semaphore object causes appropriate memory synchronization operations
19147 to be performed over all external memory objects that are imported as
19148 :py:obj:`~.cudaExternalMemoryHandleTypeNvSciBuf`. This ensures that any
19149 subsequent accesses made by other importers of the same set of NvSciBuf
19150 memory object(s) are coherent. These operations can be skipped by
19151 specifying the flag
19152 :py:obj:`~.cudaExternalSemaphoreWaitSkipNvSciBufMemSync`, which can be
19153 used as a performance optimization when data coherency is not required.
19154 But specifying this flag in scenarios where data coherency is required
19155 results in undefined behavior. Also, for semaphore object of the type
19156 :py:obj:`~.cudaExternalSemaphoreHandleTypeNvSciSync`, if the
19157 NvSciSyncAttrList used to create the NvSciSyncObj had not set the flags
19158 in :py:obj:`~.cudaDeviceGetNvSciSyncAttributes` to
19159 cudaNvSciSyncAttrWait, this API will return cudaErrorNotSupported.
19161 If the semaphore object is any one of the following types:
19162 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutex`,
19163 :py:obj:`~.cudaExternalSemaphoreHandleTypeKeyedMutexKmt`, then the
19164 keyed mutex will be acquired when it is released with the key specified
19165 in
19166 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::key
19167 or until the timeout specified by
19168 :py:obj:`~.cudaExternalSemaphoreSignalParams`::params::keyedmutex::timeoutMs
19169 has lapsed. The timeout interval can either be a finite value specified
19170 in milliseconds or an infinite value. In case an infinite value is
19171 specified the timeout never elapses. The windows INFINITE macro must be
19172 used to specify infinite timeout
19174 Parameters
19175 ----------
19176 extSemArray : list[:py:obj:`~.cudaExternalSemaphore_t`]
19177 External semaphores to be waited on
19178 paramsArray : list[:py:obj:`~.cudaExternalSemaphoreWaitParams`]
19179 Array of semaphore parameters
19180 numExtSems : unsigned int
19181 Number of semaphores to wait on
19182 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
19183 Stream to enqueue the wait operations in
19185 Returns
19186 -------
19187 cudaError_t
19188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle` :py:obj:`~.cudaErrorTimeout`
19190 See Also
19191 --------
19192 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaDestroyExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`
19193 """
19194 cdef cyruntime.cudaStream_t cystream
19195 if stream is None:
19196 pstream = 0
19197 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
19198 pstream = int(stream)
19199 else:
19200 pstream = int(cudaStream_t(stream))
19201 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
19202 paramsArray = [] if paramsArray is None else paramsArray
19203 if not all(isinstance(_x, (cudaExternalSemaphoreWaitParams,)) for _x in paramsArray):
19204 raise TypeError("Argument 'paramsArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphoreWaitParams,] or list[cyruntime.cudaExternalSemaphoreWaitParams,]")
19205 extSemArray = [] if extSemArray is None else extSemArray
19206 if not all(isinstance(_x, (cudaExternalSemaphore_t,)) for _x in extSemArray):
19207 raise TypeError("Argument 'extSemArray' is not instance of type (expected tuple[cyruntime.cudaExternalSemaphore_t,] or list[cyruntime.cudaExternalSemaphore_t,]")
19208 cdef cyruntime.cudaExternalSemaphore_t* cyextSemArray = NULL
19209 if len(extSemArray) > 1:
19210 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*> calloc(len(extSemArray), sizeof(cyruntime.cudaExternalSemaphore_t))
19211 if cyextSemArray is NULL:
19212 raise MemoryError('Failed to allocate length x size memory: ' + str(len(extSemArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphore_t)))
19213 else:
19214 for idx in range(len(extSemArray)):
19215 cyextSemArray[idx] = <cyruntime.cudaExternalSemaphore_t>(<cudaExternalSemaphore_t>extSemArray[idx])._pvt_ptr[0]
19216 elif len(extSemArray) == 1:
19217 cyextSemArray = <cyruntime.cudaExternalSemaphore_t*>(<cudaExternalSemaphore_t>extSemArray[0])._pvt_ptr
19218 cdef cyruntime.cudaExternalSemaphoreWaitParams* cyparamsArray = NULL
19219 if len(paramsArray) > 1:
19220 cyparamsArray = <cyruntime.cudaExternalSemaphoreWaitParams*> calloc(len(paramsArray), sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
19221 if cyparamsArray is NULL:
19222 raise MemoryError('Failed to allocate length x size memory: ' + str(len(paramsArray)) + 'x' + str(sizeof(cyruntime.cudaExternalSemaphoreWaitParams)))
19223 for idx in range(len(paramsArray)):
19224 string.memcpy(&cyparamsArray[idx], (<cudaExternalSemaphoreWaitParams>paramsArray[idx])._pvt_ptr, sizeof(cyruntime.cudaExternalSemaphoreWaitParams))
19225 elif len(paramsArray) == 1:
19226 cyparamsArray = (<cudaExternalSemaphoreWaitParams>paramsArray[0])._pvt_ptr
19227 if numExtSems > len(extSemArray): raise RuntimeError("List is too small: " + str(len(extSemArray)) + " < " + str(numExtSems))
19228 if numExtSems > len(paramsArray): raise RuntimeError("List is too small: " + str(len(paramsArray)) + " < " + str(numExtSems))
19229 with nogil:
19230 err = cyruntime.cudaWaitExternalSemaphoresAsync(cyextSemArray, cyparamsArray, numExtSems, cystream)
19231 if len(extSemArray) > 1 and cyextSemArray is not NULL:
19232 free(cyextSemArray)
19233 if len(paramsArray) > 1 and cyparamsArray is not NULL:
19234 free(cyparamsArray)
19235 return (_dict_cudaError_t[err],)
19237@cython.embedsignature(True)
19238def cudaDestroyExternalSemaphore(extSem):
19239 """ Destroys an external semaphore.
19241 Destroys an external semaphore object and releases any references to
19242 the underlying resource. Any outstanding signals or waits must have
19243 completed before the semaphore is destroyed.
19245 Parameters
19246 ----------
19247 extSem : :py:obj:`~.cudaExternalSemaphore_t`
19248 External semaphore to be destroyed
19250 Returns
19251 -------
19252 cudaError_t
19253 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`
19255 See Also
19256 --------
19257 :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
19258 """
19259 cdef cyruntime.cudaExternalSemaphore_t cyextSem
19260 if extSem is None:
19261 pextSem = 0
19262 elif isinstance(extSem, (cudaExternalSemaphore_t,)):
19263 pextSem = int(extSem)
19264 else:
19265 pextSem = int(cudaExternalSemaphore_t(extSem))
19266 cyextSem = <cyruntime.cudaExternalSemaphore_t><void_ptr>pextSem
19267 with nogil:
19268 err = cyruntime.cudaDestroyExternalSemaphore(cyextSem)
19269 return (_dict_cudaError_t[err],)
19271@cython.embedsignature(True)
19272def cudaFuncSetCacheConfig(func, cacheConfig not None : cudaFuncCache):
19273 """ Sets the preferred cache configuration for a device function.
19275 On devices where the L1 cache and shared memory use the same hardware
19276 resources, this sets through `cacheConfig` the preferred cache
19277 configuration for the function specified via `func`. This is only a
19278 preference. The runtime will use the requested configuration if
19279 possible, but it is free to choose a different configuration if
19280 required to execute `func`.
19282 `func` is a device function symbol and must be declared as a `None`
19283 function. If the specified function does not exist, then
19284 :py:obj:`~.cudaErrorInvalidDeviceFunction` is returned. For templated
19285 functions, pass the function symbol as follows:
19286 func_name<template_arg_0,...,template_arg_N>
19288 This setting does nothing on devices where the size of the L1 cache and
19289 shared memory are fixed.
19291 Launching a kernel with a different preference than the most recent
19292 preference setting may insert a device-side synchronization point.
19294 The supported cache configurations are:
19296 - :py:obj:`~.cudaFuncCachePreferNone`: no preference for shared memory
19297 or L1 (default)
19299 - :py:obj:`~.cudaFuncCachePreferShared`: prefer larger shared memory
19300 and smaller L1 cache
19302 - :py:obj:`~.cudaFuncCachePreferL1`: prefer larger L1 cache and smaller
19303 shared memory
19305 - :py:obj:`~.cudaFuncCachePreferEqual`: prefer equal size L1 cache and
19306 shared memory
19308 Parameters
19309 ----------
19310 func : Any
19311 Device function symbol
19312 cacheConfig : :py:obj:`~.cudaFuncCache`
19313 Requested cache configuration
19315 Returns
19316 -------
19317 cudaError_t
19318 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2
19320 See Also
19321 --------
19322 cudaFuncSetCacheConfig (C++ API), :py:obj:`~.cudaFuncGetAttributes (C API)`, :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncSetCacheConfig`
19324 Notes
19325 -----
19326 This API does not accept a :py:obj:`~.cudaKernel_t` casted as void*. If cache config modification is required for a :py:obj:`~.cudaKernel_t` (or a global function), it can be replaced with a call to :py:obj:`~.cudaFuncSetAttributes` with the attribute :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` to specify a more granular L1 cache and shared memory split configuration.
19327 """
19328 cyfunc = _HelperInputVoidPtr(func)
19329 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19330 cdef cyruntime.cudaFuncCache cycacheConfig = cacheConfig.value
19331 with nogil:
19332 err = cyruntime.cudaFuncSetCacheConfig(cyfunc_ptr, cycacheConfig)
19333 return (_dict_cudaError_t[err],)
19335@cython.embedsignature(True)
19336def cudaFuncGetAttributes(func):
19337 """ Find out attributes for a given function.
19339 This function obtains the attributes of a function specified via
19340 `func`. `func` is a device function symbol and must be declared as a
19341 `None` function. The fetched attributes are placed in `attr`. If the
19342 specified function does not exist, then it is assumed to be a
19343 :py:obj:`~.cudaKernel_t` and used as is. For templated functions, pass
19344 the function symbol as follows:
19345 func_name<template_arg_0,...,template_arg_N>
19347 Note that some function attributes such as
19348 :py:obj:`~.maxThreadsPerBlock` may vary based on the device that is
19349 currently being used.
19351 Parameters
19352 ----------
19353 func : Any
19354 Device function symbol
19356 Returns
19357 -------
19358 cudaError_t
19359 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`2
19360 attr : :py:obj:`~.cudaFuncAttributes`
19361 Return pointer to function's attributes
19363 See Also
19364 --------
19365 :py:obj:`~.cudaFuncSetCacheConfig (C API)`, cudaFuncGetAttributes (C++ API), :py:obj:`~.cudaLaunchKernel (C API)`, :py:obj:`~.cuFuncGetAttribute`
19366 """
19367 cdef cudaFuncAttributes attr = cudaFuncAttributes()
19368 cyfunc = _HelperInputVoidPtr(func)
19369 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19370 with nogil:
19371 err = cyruntime.cudaFuncGetAttributes(<cyruntime.cudaFuncAttributes*>attr._pvt_ptr, cyfunc_ptr)
19372 if err != cyruntime.cudaSuccess:
19373 return (_dict_cudaError_t[err], None)
19374 return (_dict_cudaError_t[err], attr)
19376@cython.embedsignature(True)
19377def cudaFuncSetAttribute(func, attr not None : cudaFuncAttribute, int value):
19378 """ Set attributes for a given function.
19380 This function sets the attributes of a function specified via `func`.
19381 The parameter `func` must be a pointer to a function that executes on
19382 the device. The parameter specified by `func` must be declared as a
19383 `None` function. The enumeration defined by `attr` is set to the value
19384 defined by `value`. If the specified function does not exist, then it
19385 is assumed to be a :py:obj:`~.cudaKernel_t` and used as is. If the
19386 specified attribute cannot be written, or if the value is incorrect,
19387 then :py:obj:`~.cudaErrorInvalidValue` is returned.
19389 Valid values for `attr` are:
19391 - :py:obj:`~.cudaFuncAttributeMaxDynamicSharedMemorySize` - The
19392 requested maximum size in bytes of dynamically-allocated shared
19393 memory. The sum of this value and the function attribute
19394 :py:obj:`~.sharedSizeBytes` cannot exceed the device attribute
19395 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`. The maximal size
19396 of requestable dynamic shared memory may differ by GPU architecture.
19398 - :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` - On
19399 devices where the L1 cache and shared memory use the same hardware
19400 resources, this sets the shared memory carveout preference, in
19401 percent of the total shared memory. See
19402 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`. This is only
19403 a hint, and the driver can choose a different ratio if required to
19404 execute the function.
19406 - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required
19407 cluster width in blocks. The width, height, and depth values must
19408 either all be 0 or all be positive. The validity of the cluster
19409 dimensions is checked at launch time. If the value is set during
19410 compile time, it cannot be set at runtime. Setting it at runtime will
19411 return cudaErrorNotPermitted.
19413 - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required
19414 cluster height in blocks. The width, height, and depth values must
19415 either all be 0 or all be positive. The validity of the cluster
19416 dimensions is checked at launch time. If the value is set during
19417 compile time, it cannot be set at runtime. Setting it at runtime will
19418 return cudaErrorNotPermitted.
19420 - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required
19421 cluster depth in blocks. The width, height, and depth values must
19422 either all be 0 or all be positive. The validity of the cluster
19423 dimensions is checked at launch time. If the value is set during
19424 compile time, it cannot be set at runtime. Setting it at runtime will
19425 return cudaErrorNotPermitted.
19427 - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates
19428 whether the function can be launched with non-portable cluster size.
19429 1 is allowed, 0 is disallowed.
19431 - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The
19432 block scheduling policy of a function. The value type is
19433 cudaClusterSchedulingPolicy.
19435 cudaLaunchKernel (C++ API), cudaFuncSetCacheConfig (C++ API),
19436 :py:obj:`~.cudaFuncGetAttributes (C API)`,
19438 Parameters
19439 ----------
19440 func : Any
19441 Function to get attributes of
19442 attr : :py:obj:`~.cudaFuncAttribute`
19443 Attribute to set
19444 value : int
19445 Value to set
19447 Returns
19448 -------
19449 cudaError_t
19450 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`
19451 """
19452 cyfunc = _HelperInputVoidPtr(func)
19453 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19454 cdef cyruntime.cudaFuncAttribute cyattr = attr.value
19455 with nogil:
19456 err = cyruntime.cudaFuncSetAttribute(cyfunc_ptr, cyattr, value)
19457 return (_dict_cudaError_t[err],)
19459ctypedef struct cudaStreamHostCallbackData_st:
19460 cyruntime.cudaHostFn_t callback
19461 void *userData
19463ctypedef cudaStreamHostCallbackData_st cudaStreamHostCallbackData
19465@cython.show_performance_hints(False)
19466cdef void cudaStreamRtHostCallbackWrapper(void *data) nogil:
19467 cdef cudaStreamHostCallbackData *cbData = <cudaStreamHostCallbackData *>data
19468 with gil:
19469 cbData.callback(cbData.userData)
19470 free(cbData)
19472@cython.embedsignature(True)
19473def cudaLaunchHostFunc(stream, fn, userData):
19474 """ Enqueues a host function call in a stream.
19476 Enqueues a host function to run in a stream. The function will be
19477 called after currently enqueued work and will block work added after
19478 it.
19480 The host function must not make any CUDA API calls. Attempting to use a
19481 CUDA API may result in :py:obj:`~.cudaErrorNotPermitted`, but this is
19482 not required. The host function must not perform any synchronization
19483 that may depend on outstanding CUDA work not mandated to run earlier.
19484 Host functions without a mandated order (such as in independent
19485 streams) execute in undefined order and may be serialized.
19487 For the purposes of Unified Memory, execution makes a number of
19488 guarantees:
19490 - The stream is considered idle for the duration of the function's
19491 execution. Thus, for example, the function may always use memory
19492 attached to the stream it was enqueued in.
19494 - The start of execution of the function has the same effect as
19495 synchronizing an event recorded in the same stream immediately prior
19496 to the function. It thus synchronizes streams which have been
19497 "joined" prior to the function.
19499 - Adding device work to any stream does not have the effect of making
19500 the stream active until all preceding host functions and stream
19501 callbacks have executed. Thus, for example, a function might use
19502 global attached memory even if work has been added to another stream,
19503 if the work has been ordered behind the function call with an event.
19505 - Completion of the function does not cause a stream to become active
19506 except as described above. The stream will remain idle if no device
19507 work follows the function, and will remain idle across consecutive
19508 host functions or stream callbacks without device work in between.
19509 Thus, for example, stream synchronization can be done by signaling
19510 from a host function at the end of the stream.
19512 Note that, in constrast to :py:obj:`~.cuStreamAddCallback`, the
19513 function will not be called in the event of an error in the CUDA
19514 context.
19516 Parameters
19517 ----------
19518 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
19519 Stream to enqueue function call in
19520 fn : :py:obj:`~.cudaHostFn_t`
19521 The function to call once preceding stream operations are complete
19522 userData : Any
19523 User-specified data to be passed to the function
19525 Returns
19526 -------
19527 cudaError_t
19528 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
19530 See Also
19531 --------
19532 :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cudaStreamAddCallback`, :py:obj:`~.cuLaunchHostFunc`
19533 """
19534 cdef cyruntime.cudaHostFn_t cyfn
19535 if fn is None:
19536 pfn = 0
19537 elif isinstance(fn, (cudaHostFn_t,)):
19538 pfn = int(fn)
19539 else:
19540 pfn = int(cudaHostFn_t(fn))
19541 cyfn = <cyruntime.cudaHostFn_t><void_ptr>pfn
19542 cdef cyruntime.cudaStream_t cystream
19543 if stream is None:
19544 pstream = 0
19545 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
19546 pstream = int(stream)
19547 else:
19548 pstream = int(cudaStream_t(stream))
19549 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
19550 cyuserData = _HelperInputVoidPtr(userData)
19551 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
19553 cdef cudaStreamHostCallbackData *cbData = NULL
19554 cbData = <cudaStreamHostCallbackData *>malloc(sizeof(cbData[0]))
19555 if cbData == NULL:
19556 return (cudaError_t.cudaErrorMemoryAllocation,)
19557 cbData.callback = cyfn
19558 cbData.userData = cyuserData_ptr
19560 with nogil:
19561 err = cyruntime.cudaLaunchHostFunc(cystream, <cyruntime.cudaHostFn_t>cudaStreamRtHostCallbackWrapper, <void *>cbData)
19562 if err != cyruntime.cudaSuccess:
19563 free(cbData)
19564 return (_dict_cudaError_t[err],)
19566@cython.embedsignature(True)
19567def cudaFuncSetSharedMemConfig(func, config not None : cudaSharedMemConfig):
19568 """ Sets the shared memory configuration for a device function.
19570 [Deprecated]
19572 On devices with configurable shared memory banks, this function will
19573 force all subsequent launches of the specified device function to have
19574 the given shared memory bank size configuration. On any given launch of
19575 the function, the shared memory configuration of the device will be
19576 temporarily changed if needed to suit the function's preferred
19577 configuration. Changes in shared memory configuration between
19578 subsequent launches of functions, may introduce a device side
19579 synchronization point.
19581 Any per-function setting of shared memory bank size set via
19582 :py:obj:`~.cudaFuncSetSharedMemConfig` will override the device wide
19583 setting set by :py:obj:`~.cudaDeviceSetSharedMemConfig`.
19585 Changing the shared memory bank size will not increase shared memory
19586 usage or affect occupancy of kernels, but may have major effects on
19587 performance. Larger bank sizes will allow for greater potential
19588 bandwidth to shared memory, but will change what kinds of accesses to
19589 shared memory will result in bank conflicts.
19591 This function will do nothing on devices with fixed shared memory bank
19592 size.
19594 For templated functions, pass the function symbol as follows:
19595 func_name<template_arg_0,...,template_arg_N>
19597 The supported bank configurations are:
19599 - :py:obj:`~.cudaSharedMemBankSizeDefault`: use the device's shared
19600 memory configuration when launching this function.
19602 - :py:obj:`~.cudaSharedMemBankSizeFourByte`: set shared memory bank
19603 width to be four bytes natively when launching this function.
19605 - :py:obj:`~.cudaSharedMemBankSizeEightByte`: set shared memory bank
19606 width to be eight bytes natively when launching this function.
19608 Parameters
19609 ----------
19610 func : Any
19611 Device function symbol
19612 config : :py:obj:`~.cudaSharedMemConfig`
19613 Requested shared memory configuration
19615 Returns
19616 -------
19617 cudaError_t
19618 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`,2
19620 See Also
19621 --------
19622 :py:obj:`~.cudaDeviceSetSharedMemConfig`, :py:obj:`~.cudaDeviceGetSharedMemConfig`, :py:obj:`~.cudaDeviceSetCacheConfig`, :py:obj:`~.cudaDeviceGetCacheConfig`, :py:obj:`~.cudaFuncSetCacheConfig`, :py:obj:`~.cuFuncSetSharedMemConfig`
19623 """
19624 cyfunc = _HelperInputVoidPtr(func)
19625 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19626 cdef cyruntime.cudaSharedMemConfig cyconfig = config.value
19627 with nogil:
19628 err = cyruntime.cudaFuncSetSharedMemConfig(cyfunc_ptr, cyconfig)
19629 return (_dict_cudaError_t[err],)
19631@cython.embedsignature(True)
19632def cudaOccupancyMaxActiveBlocksPerMultiprocessor(func, int blockSize, size_t dynamicSMemSize):
19633 """ Returns occupancy for a device function.
19635 Returns in `*numBlocks` the maximum number of active blocks per
19636 streaming multiprocessor for the device function.
19638 Parameters
19639 ----------
19640 func : Any
19641 Kernel function for which occupancy is calculated
19642 blockSize : int
19643 Block size the kernel is intended to be launched with
19644 dynamicSMemSize : size_t
19645 Per-block dynamic shared memory usage intended, in bytes
19647 Returns
19648 -------
19649 cudaError_t
19650 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
19651 numBlocks : int
19652 Returned occupancy
19654 See Also
19655 --------
19656 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessor`
19657 """
19658 cdef int numBlocks = 0
19659 cyfunc = _HelperInputVoidPtr(func)
19660 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19661 with nogil:
19662 err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessor(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize)
19663 if err != cyruntime.cudaSuccess:
19664 return (_dict_cudaError_t[err], None)
19665 return (_dict_cudaError_t[err], numBlocks)
19667@cython.embedsignature(True)
19668def cudaOccupancyAvailableDynamicSMemPerBlock(func, int numBlocks, int blockSize):
19669 """ Returns dynamic shared memory available per block when launching `numBlocks` blocks on SM.
19671 Returns in `*dynamicSmemSize` the maximum size of dynamic shared memory
19672 to allow `numBlocks` blocks per SM.
19674 Parameters
19675 ----------
19676 func : Any
19677 Kernel function for which occupancy is calculated
19678 numBlocks : int
19679 Number of blocks to fit on SM
19680 blockSize : int
19681 Size of the block
19683 Returns
19684 -------
19685 cudaError_t
19686 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
19687 dynamicSmemSize : int
19688 Returned maximum dynamic shared memory
19690 See Also
19691 --------
19692 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), :py:obj:`~.cudaOccupancyAvailableDynamicSMemPerBlock`
19693 """
19694 cdef size_t dynamicSmemSize = 0
19695 cyfunc = _HelperInputVoidPtr(func)
19696 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19697 with nogil:
19698 err = cyruntime.cudaOccupancyAvailableDynamicSMemPerBlock(&dynamicSmemSize, cyfunc_ptr, numBlocks, blockSize)
19699 if err != cyruntime.cudaSuccess:
19700 return (_dict_cudaError_t[err], None)
19701 return (_dict_cudaError_t[err], dynamicSmemSize)
19703@cython.embedsignature(True)
19704def cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(func, int blockSize, size_t dynamicSMemSize, unsigned int flags):
19705 """ Returns occupancy for a device function with the specified flags.
19707 Returns in `*numBlocks` the maximum number of active blocks per
19708 streaming multiprocessor for the device function.
19710 The `flags` parameter controls how special cases are handled. Valid
19711 flags include:
19713 - :py:obj:`~.cudaOccupancyDefault`: keeps the default behavior as
19714 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`
19716 - :py:obj:`~.cudaOccupancyDisableCachingOverride`: This flag suppresses
19717 the default behavior on platform where global caching affects
19718 occupancy. On such platforms, if caching is enabled, but per-block SM
19719 resource usage would result in zero occupancy, the occupancy
19720 calculator will calculate the occupancy as if caching is disabled.
19721 Setting this flag makes the occupancy calculator to return 0 in such
19722 cases. More information can be found about this feature in the
19723 "Unified L1/Texture Cache" section of the Maxwell tuning guide.
19725 Parameters
19726 ----------
19727 func : Any
19728 Kernel function for which occupancy is calculated
19729 blockSize : int
19730 Block size the kernel is intended to be launched with
19731 dynamicSMemSize : size_t
19732 Per-block dynamic shared memory usage intended, in bytes
19733 flags : unsigned int
19734 Requested behavior for the occupancy calculator
19736 Returns
19737 -------
19738 cudaError_t
19739 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`,
19740 numBlocks : int
19741 Returned occupancy
19743 See Also
19744 --------
19745 :py:obj:`~.cudaOccupancyMaxActiveBlocksPerMultiprocessor`, cudaOccupancyMaxPotentialBlockSize (C++ API), cudaOccupancyMaxPotentialBlockSizeWithFlags (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMem (C++ API), cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags (C++ API), cudaOccupancyAvailableDynamicSMemPerBlock (C++ API), :py:obj:`~.cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags`
19746 """
19747 cdef int numBlocks = 0
19748 cyfunc = _HelperInputVoidPtr(func)
19749 cdef void* cyfunc_ptr = <void*><void_ptr>cyfunc.cptr
19750 with nogil:
19751 err = cyruntime.cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(&numBlocks, cyfunc_ptr, blockSize, dynamicSMemSize, flags)
19752 if err != cyruntime.cudaSuccess:
19753 return (_dict_cudaError_t[err], None)
19754 return (_dict_cudaError_t[err], numBlocks)
19756@cython.embedsignature(True)
19757def cudaMallocManaged(size_t size, unsigned int flags):
19758 """ Allocates memory that will be automatically managed by the Unified Memory system.
19760 Allocates `size` bytes of managed memory on the device and returns in
19761 `*devPtr` a pointer to the allocated memory. If the device doesn't
19762 support allocating managed memory, :py:obj:`~.cudaErrorNotSupported` is
19763 returned. Support for managed memory can be queried using the device
19764 attribute :py:obj:`~.cudaDevAttrManagedMemory`. The allocated memory is
19765 suitably aligned for any kind of variable. The memory is not cleared.
19766 If `size` is 0, :py:obj:`~.cudaMallocManaged` returns
19767 :py:obj:`~.cudaErrorInvalidValue`. The pointer is valid on the CPU and
19768 on all GPUs in the system that support managed memory. All accesses to
19769 this pointer must obey the Unified Memory programming model.
19771 `flags` specifies the default stream association for this allocation.
19772 `flags` must be one of :py:obj:`~.cudaMemAttachGlobal` or
19773 :py:obj:`~.cudaMemAttachHost`. The default value for `flags` is
19774 :py:obj:`~.cudaMemAttachGlobal`. If :py:obj:`~.cudaMemAttachGlobal` is
19775 specified, then this memory is accessible from any stream on any
19776 device. If :py:obj:`~.cudaMemAttachHost` is specified, then the
19777 allocation should not be accessed from devices that have a zero value
19778 for the device attribute
19779 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`; an explicit call to
19780 :py:obj:`~.cudaStreamAttachMemAsync` will be required to enable access
19781 on such devices.
19783 If the association is later changed via
19784 :py:obj:`~.cudaStreamAttachMemAsync` to a single stream, the default
19785 association, as specifed during :py:obj:`~.cudaMallocManaged`, is
19786 restored when that stream is destroyed. For managed variables, the
19787 default association is always :py:obj:`~.cudaMemAttachGlobal`. Note
19788 that destroying a stream is an asynchronous operation, and as a result,
19789 the change to default association won't happen until all work in the
19790 stream has completed.
19792 Memory allocated with :py:obj:`~.cudaMallocManaged` should be released
19793 with :py:obj:`~.cudaFree`.
19795 Device memory oversubscription is possible for GPUs that have a non-
19796 zero value for the device attribute
19797 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Managed memory on such
19798 GPUs may be evicted from device memory to host memory at any time by
19799 the Unified Memory driver in order to make room for other allocations.
19801 In a system where all GPUs have a non-zero value for the device
19802 attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`, managed
19803 memory may not be populated when this API returns and instead may be
19804 populated on access. In such systems, managed memory can migrate to any
19805 processor's memory at any time. The Unified Memory driver will employ
19806 heuristics to maintain data locality and prevent excessive page faults
19807 to the extent possible. The application can also guide the driver about
19808 memory usage patterns via :py:obj:`~.cudaMemAdvise`. The application
19809 can also explicitly migrate memory to a desired processor's memory via
19810 :py:obj:`~.cudaMemPrefetchAsync`.
19812 In a multi-GPU system where all of the GPUs have a zero value for the
19813 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` and all
19814 the GPUs have peer-to-peer support with each other, the physical
19815 storage for managed memory is created on the GPU which is active at the
19816 time :py:obj:`~.cudaMallocManaged` is called. All other GPUs will
19817 reference the data at reduced bandwidth via peer mappings over the PCIe
19818 bus. The Unified Memory driver does not migrate memory among such GPUs.
19820 In a multi-GPU system where not all GPUs have peer-to-peer support with
19821 each other and where the value of the device attribute
19822 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is zero for at least one
19823 of those GPUs, the location chosen for physical storage of managed
19824 memory is system-dependent.
19826 - On Linux, the location chosen will be device memory as long as the
19827 current set of active contexts are on devices that either have peer-
19828 to-peer support with each other or have a non-zero value for the
19829 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. If
19830 there is an active context on a GPU that does not have a non-zero
19831 value for that device attribute and it does not have peer-to-peer
19832 support with the other devices that have active contexts on them,
19833 then the location for physical storage will be 'zero-copy' or host
19834 memory. Note that this means that managed memory that is located in
19835 device memory is migrated to host memory if a new context is created
19836 on a GPU that doesn't have a non-zero value for the device attribute
19837 and does not support peer-to-peer with at least one of the other
19838 devices that has an active context. This in turn implies that context
19839 creation may fail if there is insufficient host memory to migrate all
19840 managed allocations.
19842 - On Windows, the physical storage is always created in 'zero-copy' or
19843 host memory. All GPUs will reference the data at reduced bandwidth
19844 over the PCIe bus. In these circumstances, use of the environment
19845 variable CUDA_VISIBLE_DEVICES is recommended to restrict CUDA to only
19846 use those GPUs that have peer-to-peer support. Alternatively, users
19847 can also set CUDA_MANAGED_FORCE_DEVICE_ALLOC to a non-zero value to
19848 force the driver to always use device memory for physical storage.
19849 When this environment variable is set to a non-zero value, all
19850 devices used in that process that support managed memory have to be
19851 peer-to-peer compatible with each other. The error
19852 :py:obj:`~.cudaErrorInvalidDevice` will be returned if a device that
19853 supports managed memory is used and it is not peer-to-peer compatible
19854 with any of the other managed memory supporting devices that were
19855 previously used in that process, even if :py:obj:`~.cudaDeviceReset`
19856 has been called on those devices. These environment variables are
19857 described in the CUDA programming guide under the "CUDA environment
19858 variables" section.
19860 Parameters
19861 ----------
19862 size : size_t
19863 Requested allocation size in bytes
19864 flags : unsigned int
19865 Must be either :py:obj:`~.cudaMemAttachGlobal` or
19866 :py:obj:`~.cudaMemAttachHost` (defaults to
19867 :py:obj:`~.cudaMemAttachGlobal`)
19869 Returns
19870 -------
19871 cudaError_t
19872 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
19873 devPtr : Any
19874 Pointer to allocated device memory
19876 See Also
19877 --------
19878 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaDeviceGetAttribute`, :py:obj:`~.cudaStreamAttachMemAsync`, :py:obj:`~.cuMemAllocManaged`
19879 """
19880 cdef void_ptr devPtr = 0
19881 with nogil:
19882 err = cyruntime.cudaMallocManaged(<void**>&devPtr, size, flags)
19883 if err != cyruntime.cudaSuccess:
19884 return (_dict_cudaError_t[err], None)
19885 return (_dict_cudaError_t[err], devPtr)
19887@cython.embedsignature(True)
19888def cudaMalloc(size_t size):
19889 """ Allocate memory on the device.
19891 Allocates `size` bytes of linear memory on the device and returns in
19892 `*devPtr` a pointer to the allocated memory. The allocated memory is
19893 suitably aligned for any kind of variable. The memory is not cleared.
19894 :py:obj:`~.cudaMalloc()` returns :py:obj:`~.cudaErrorMemoryAllocation`
19895 in case of failure.
19897 The device version of :py:obj:`~.cudaFree` cannot be used with a
19898 `*devPtr` allocated using the host API, and vice versa.
19900 Parameters
19901 ----------
19902 size : size_t
19903 Requested allocation size in bytes
19905 Returns
19906 -------
19907 cudaError_t
19908 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
19909 devPtr : Any
19910 Pointer to allocated device memory
19912 See Also
19913 --------
19914 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAlloc`
19915 """
19916 cdef void_ptr devPtr = 0
19917 with nogil:
19918 err = cyruntime.cudaMalloc(<void**>&devPtr, size)
19919 if err != cyruntime.cudaSuccess:
19920 return (_dict_cudaError_t[err], None)
19921 return (_dict_cudaError_t[err], devPtr)
19923@cython.embedsignature(True)
19924def cudaMallocHost(size_t size):
19925 """ Allocates page-locked memory on the host.
19927 Allocates `size` bytes of host memory that is page-locked and
19928 accessible to the device. The driver tracks the virtual memory ranges
19929 allocated with this function and automatically accelerates calls to
19930 functions such as :py:obj:`~.cudaMemcpy`*(). Since the memory can be
19931 accessed directly by the device, it can be read or written with much
19932 higher bandwidth than pageable memory obtained with functions such as
19933 :py:obj:`~.malloc()`.
19935 On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is
19936 true, :py:obj:`~.cudaMallocHost` may not page-lock the allocated
19937 memory.
19939 Page-locking excessive amounts of memory with
19940 :py:obj:`~.cudaMallocHost()` may degrade system performance, since it
19941 reduces the amount of memory available to the system for paging. As a
19942 result, this function is best used sparingly to allocate staging areas
19943 for data exchange between host and device.
19945 Parameters
19946 ----------
19947 size : size_t
19948 Requested allocation size in bytes
19950 Returns
19951 -------
19952 cudaError_t
19953 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
19954 ptr : Any
19955 Pointer to allocated host memory
19957 See Also
19958 --------
19959 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, cudaMallocHost (C++ API), :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocHost`
19960 """
19961 cdef void_ptr ptr = 0
19962 with nogil:
19963 err = cyruntime.cudaMallocHost(<void**>&ptr, size)
19964 if err != cyruntime.cudaSuccess:
19965 return (_dict_cudaError_t[err], None)
19966 return (_dict_cudaError_t[err], ptr)
19968@cython.embedsignature(True)
19969def cudaMallocPitch(size_t width, size_t height):
19970 """ Allocates pitched memory on the device.
19972 Allocates at least `width` (in bytes) * `height` bytes of linear memory
19973 on the device and returns in `*devPtr` a pointer to the allocated
19974 memory. The function may pad the allocation to ensure that
19975 corresponding pointers in any given row will continue to meet the
19976 alignment requirements for coalescing as the address is updated from
19977 row to row. The pitch returned in `*pitch` by
19978 :py:obj:`~.cudaMallocPitch()` is the width in bytes of the allocation.
19979 The intended usage of `pitch` is as a separate parameter of the
19980 allocation, used to compute addresses within the 2D array. Given the
19981 row and column of an array element of type `T`, the address is computed
19982 as:
19984 **View CUDA Toolkit Documentation for a C++ code example**
19986 For allocations of 2D arrays, it is recommended that programmers
19987 consider performing pitch allocations using
19988 :py:obj:`~.cudaMallocPitch()`. Due to pitch alignment restrictions in
19989 the hardware, this is especially true if the application will be
19990 performing 2D memory copies between different regions of device memory
19991 (whether linear memory or CUDA arrays).
19993 Parameters
19994 ----------
19995 width : size_t
19996 Requested pitched allocation width (in bytes)
19997 height : size_t
19998 Requested pitched allocation height
20000 Returns
20001 -------
20002 cudaError_t
20003 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20004 devPtr : Any
20005 Pointer to allocated pitched device memory
20006 pitch : int
20007 Pitch for allocation
20009 See Also
20010 --------
20011 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemAllocPitch`
20012 """
20013 cdef void_ptr devPtr = 0
20014 cdef size_t pitch = 0
20015 with nogil:
20016 err = cyruntime.cudaMallocPitch(<void**>&devPtr, &pitch, width, height)
20017 if err != cyruntime.cudaSuccess:
20018 return (_dict_cudaError_t[err], None, None)
20019 return (_dict_cudaError_t[err], devPtr, pitch)
20021@cython.embedsignature(True)
20022def cudaMallocArray(desc : Optional[cudaChannelFormatDesc], size_t width, size_t height, unsigned int flags):
20023 """ Allocate an array on the device.
20025 Allocates a CUDA array according to the
20026 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
20027 to the new CUDA array in `*array`.
20029 The :py:obj:`~.cudaChannelFormatDesc` is defined as:
20031 **View CUDA Toolkit Documentation for a C++ code example**
20033 where :py:obj:`~.cudaChannelFormatKind` is one of
20034 :py:obj:`~.cudaChannelFormatKindSigned`,
20035 :py:obj:`~.cudaChannelFormatKindUnsigned`, or
20036 :py:obj:`~.cudaChannelFormatKindFloat`.
20038 The `flags` parameter enables different options to be specified that
20039 affect the allocation, as follows.
20041 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
20042 and provides default array allocation
20044 - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates an array that can be
20045 read from or written to using a surface reference
20047 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
20048 gather operations will be performed on the array.
20050 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical
20051 backing memory. The subregions within this sparse array can later be
20052 mapped onto a physical memory allocation by calling
20053 :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be
20054 allocated via :py:obj:`~.cuMemCreate`.
20056 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without
20057 physical backing memory. The entire array can later be mapped onto a
20058 physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.
20059 The physical backing memory must be allocated via
20060 :py:obj:`~.cuMemCreate`.
20062 `width` and `height` must meet certain size requirements. See
20063 :py:obj:`~.cudaMalloc3DArray()` for more details.
20065 Parameters
20066 ----------
20067 desc : :py:obj:`~.cudaChannelFormatDesc`
20068 Requested channel format
20069 width : size_t
20070 Requested array allocation width
20071 height : size_t
20072 Requested array allocation height
20073 flags : unsigned int
20074 Requested properties of allocated array
20076 Returns
20077 -------
20078 cudaError_t
20079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20080 array : :py:obj:`~.cudaArray_t`
20081 Pointer to allocated array in device memory
20083 See Also
20084 --------
20085 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayCreate`
20086 """
20087 cdef cudaArray_t array = cudaArray_t()
20088 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL
20089 with nogil:
20090 err = cyruntime.cudaMallocArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cydesc_ptr, width, height, flags)
20091 if err != cyruntime.cudaSuccess:
20092 return (_dict_cudaError_t[err], None)
20093 return (_dict_cudaError_t[err], array)
20095@cython.embedsignature(True)
20096def cudaFree(devPtr):
20097 """ Frees memory on the device.
20099 Frees the memory space pointed to by `devPtr`, which must have been
20100 returned by a previous call to one of the following memory allocation
20101 APIs - :py:obj:`~.cudaMalloc()`, :py:obj:`~.cudaMallocPitch()`,
20102 :py:obj:`~.cudaMallocManaged()`, :py:obj:`~.cudaMallocAsync()`,
20103 :py:obj:`~.cudaMallocFromPoolAsync()`.
20105 Note - This API will not perform any implicit synchronization when the
20106 pointer was allocated with :py:obj:`~.cudaMallocAsync` or
20107 :py:obj:`~.cudaMallocFromPoolAsync`. Callers must ensure that all
20108 accesses to these pointer have completed before invoking
20109 :py:obj:`~.cudaFree`. For best performance and memory reuse, users
20110 should use :py:obj:`~.cudaFreeAsync` to free memory allocated via the
20111 stream ordered memory allocator. For all other pointers, this API may
20112 perform implicit synchronization.
20114 If :py:obj:`~.cudaFree`(`devPtr`) has already been called before, an
20115 error is returned. If `devPtr` is 0, no operation is performed.
20116 :py:obj:`~.cudaFree()` returns :py:obj:`~.cudaErrorValue` in case of
20117 failure.
20119 The device version of :py:obj:`~.cudaFree` cannot be used with a
20120 `*devPtr` allocated using the host API, and vice versa.
20122 Parameters
20123 ----------
20124 devPtr : Any
20125 Device pointer to memory to free
20127 Returns
20128 -------
20129 cudaError_t
20130 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
20132 See Also
20133 --------
20134 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaMallocManaged`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaMallocFromPoolAsync` :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaFreeAsync` :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFree`
20135 """
20136 cydevPtr = _HelperInputVoidPtr(devPtr)
20137 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
20138 with nogil:
20139 err = cyruntime.cudaFree(cydevPtr_ptr)
20140 return (_dict_cudaError_t[err],)
20142@cython.embedsignature(True)
20143def cudaFreeHost(ptr):
20144 """ Frees page-locked memory.
20146 Frees the memory space pointed to by `hostPtr`, which must have been
20147 returned by a previous call to :py:obj:`~.cudaMallocHost()` or
20148 :py:obj:`~.cudaHostAlloc()`.
20150 Parameters
20151 ----------
20152 ptr : Any
20153 Pointer to memory to free
20155 Returns
20156 -------
20157 cudaError_t
20158 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
20160 See Also
20161 --------
20162 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemFreeHost`
20163 """
20164 cyptr = _HelperInputVoidPtr(ptr)
20165 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
20166 with nogil:
20167 err = cyruntime.cudaFreeHost(cyptr_ptr)
20168 return (_dict_cudaError_t[err],)
20170@cython.embedsignature(True)
20171def cudaFreeArray(array):
20172 """ Frees an array on the device.
20174 Frees the CUDA array `array`, which must have been returned by a
20175 previous call to :py:obj:`~.cudaMallocArray()`. If `devPtr` is 0, no
20176 operation is performed.
20178 Parameters
20179 ----------
20180 array : :py:obj:`~.cudaArray_t`
20181 Pointer to array to free
20183 Returns
20184 -------
20185 cudaError_t
20186 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
20188 See Also
20189 --------
20190 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuArrayDestroy`
20191 """
20192 cdef cyruntime.cudaArray_t cyarray
20193 if array is None:
20194 parray = 0
20195 elif isinstance(array, (cudaArray_t,)):
20196 parray = int(array)
20197 else:
20198 parray = int(cudaArray_t(array))
20199 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
20200 with nogil:
20201 err = cyruntime.cudaFreeArray(cyarray)
20202 return (_dict_cudaError_t[err],)
20204@cython.embedsignature(True)
20205def cudaFreeMipmappedArray(mipmappedArray):
20206 """ Frees a mipmapped array on the device.
20208 Frees the CUDA mipmapped array `mipmappedArray`, which must have been
20209 returned by a previous call to :py:obj:`~.cudaMallocMipmappedArray()`.
20210 If `devPtr` is 0, no operation is performed.
20212 Parameters
20213 ----------
20214 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
20215 Pointer to mipmapped array to free
20217 Returns
20218 -------
20219 cudaError_t
20220 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
20222 See Also
20223 --------
20224 :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMipmappedArrayDestroy`
20225 """
20226 cdef cyruntime.cudaMipmappedArray_t cymipmappedArray
20227 if mipmappedArray is None:
20228 pmipmappedArray = 0
20229 elif isinstance(mipmappedArray, (cudaMipmappedArray_t,)):
20230 pmipmappedArray = int(mipmappedArray)
20231 else:
20232 pmipmappedArray = int(cudaMipmappedArray_t(mipmappedArray))
20233 cymipmappedArray = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmappedArray
20234 with nogil:
20235 err = cyruntime.cudaFreeMipmappedArray(cymipmappedArray)
20236 return (_dict_cudaError_t[err],)
20238@cython.embedsignature(True)
20239def cudaHostAlloc(size_t size, unsigned int flags):
20240 """ Allocates page-locked memory on the host.
20242 Allocates `size` bytes of host memory that is page-locked and
20243 accessible to the device. The driver tracks the virtual memory ranges
20244 allocated with this function and automatically accelerates calls to
20245 functions such as :py:obj:`~.cudaMemcpy()`. Since the memory can be
20246 accessed directly by the device, it can be read or written with much
20247 higher bandwidth than pageable memory obtained with functions such as
20248 :py:obj:`~.malloc()`. Allocating excessive amounts of pinned memory may
20249 degrade system performance, since it reduces the amount of memory
20250 available to the system for paging. As a result, this function is best
20251 used sparingly to allocate staging areas for data exchange between host
20252 and device.
20254 The `flags` parameter enables different options to be specified that
20255 affect the allocation, as follows.
20257 - :py:obj:`~.cudaHostAllocDefault`: This flag's value is defined to be
20258 0 and causes :py:obj:`~.cudaHostAlloc()` to emulate
20259 :py:obj:`~.cudaMallocHost()`.
20261 - :py:obj:`~.cudaHostAllocPortable`: The memory returned by this call
20262 will be considered as pinned memory by all CUDA contexts, not just
20263 the one that performed the allocation.
20265 - :py:obj:`~.cudaHostAllocMapped`: Maps the allocation into the CUDA
20266 address space. The device pointer to the memory may be obtained by
20267 calling :py:obj:`~.cudaHostGetDevicePointer()`.
20269 - :py:obj:`~.cudaHostAllocWriteCombined`: Allocates the memory as
20270 write-combined (WC). WC memory can be transferred across the PCI
20271 Express bus more quickly on some system configurations, but cannot be
20272 read efficiently by most CPUs. WC memory is a good option for buffers
20273 that will be written by the CPU and read by the device via mapped
20274 pinned memory or host->device transfers.
20276 All of these flags are orthogonal to one another: a developer may
20277 allocate memory that is portable, mapped and/or write-combined with no
20278 restrictions.
20280 In order for the :py:obj:`~.cudaHostAllocMapped` flag to have any
20281 effect, the CUDA context must support the :py:obj:`~.cudaDeviceMapHost`
20282 flag, which can be checked via :py:obj:`~.cudaGetDeviceFlags()`. The
20283 :py:obj:`~.cudaDeviceMapHost` flag is implicitly set for contexts
20284 created via the runtime API.
20286 The :py:obj:`~.cudaHostAllocMapped` flag may be specified on CUDA
20287 contexts for devices that do not support mapped pinned memory. The
20288 failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because
20289 the memory may be mapped into other CUDA contexts via the
20290 :py:obj:`~.cudaHostAllocPortable` flag.
20292 Memory allocated by this function must be freed with
20293 :py:obj:`~.cudaFreeHost()`.
20295 Parameters
20296 ----------
20297 size : size_t
20298 Requested allocation size in bytes
20299 flags : unsigned int
20300 Requested properties of allocated memory
20302 Returns
20303 -------
20304 cudaError_t
20305 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20306 pHost : Any
20307 Device pointer to allocated memory
20309 See Also
20310 --------
20311 :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaGetDeviceFlags`, :py:obj:`~.cuMemHostAlloc`
20312 """
20313 cdef void_ptr pHost = 0
20314 with nogil:
20315 err = cyruntime.cudaHostAlloc(<void**>&pHost, size, flags)
20316 if err != cyruntime.cudaSuccess:
20317 return (_dict_cudaError_t[err], None)
20318 return (_dict_cudaError_t[err], pHost)
20320@cython.embedsignature(True)
20321def cudaHostRegister(ptr, size_t size, unsigned int flags):
20322 """ Registers an existing host memory range for use by CUDA.
20324 Page-locks the memory range specified by `ptr` and `size` and maps it
20325 for the device(s) as specified by `flags`. This memory range also is
20326 added to the same tracking mechanism as :py:obj:`~.cudaHostAlloc()` to
20327 automatically accelerate calls to functions such as
20328 :py:obj:`~.cudaMemcpy()`. Since the memory can be accessed directly by
20329 the device, it can be read or written with much higher bandwidth than
20330 pageable memory that has not been registered. Page-locking excessive
20331 amounts of memory may degrade system performance, since it reduces the
20332 amount of memory available to the system for paging. As a result, this
20333 function is best used sparingly to register staging areas for data
20334 exchange between host and device.
20336 On systems where :py:obj:`~.pageableMemoryAccessUsesHostPageTables` is
20337 true, :py:obj:`~.cudaHostRegister` will not page-lock the memory range
20338 specified by `ptr` but only populate unpopulated pages.
20340 :py:obj:`~.cudaHostRegister` is supported only on I/O coherent devices
20341 that have a non-zero value for the device attribute
20342 :py:obj:`~.cudaDevAttrHostRegisterSupported`.
20344 The `flags` parameter enables different options to be specified that
20345 affect the allocation, as follows.
20347 - :py:obj:`~.cudaHostRegisterDefault`: On a system with unified virtual
20348 addressing, the memory will be both mapped and portable. On a system
20349 with no unified virtual addressing, the memory will be neither mapped
20350 nor portable.
20352 - :py:obj:`~.cudaHostRegisterPortable`: The memory returned by this
20353 call will be considered as pinned memory by all CUDA contexts, not
20354 just the one that performed the allocation.
20356 - :py:obj:`~.cudaHostRegisterMapped`: Maps the allocation into the CUDA
20357 address space. The device pointer to the memory may be obtained by
20358 calling :py:obj:`~.cudaHostGetDevicePointer()`.
20360 - :py:obj:`~.cudaHostRegisterIoMemory`: The passed memory pointer is
20361 treated as pointing to some memory-mapped I/O space, e.g. belonging
20362 to a third-party PCIe device, and it will marked as non cache-
20363 coherent and contiguous.
20365 - :py:obj:`~.cudaHostRegisterReadOnly`: The passed memory pointer is
20366 treated as pointing to memory that is considered read-only by the
20367 device. On platforms without
20368 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, this
20369 flag is required in order to register memory mapped to the CPU as
20370 read-only. Support for the use of this flag can be queried from the
20371 device attribute
20372 :py:obj:`~.cudaDevAttrHostRegisterReadOnlySupported`. Using this flag
20373 with a current context associated with a device that does not have
20374 this attribute set will cause :py:obj:`~.cudaHostRegister` to error
20375 with cudaErrorNotSupported.
20377 All of these flags are orthogonal to one another: a developer may page-
20378 lock memory that is portable or mapped with no restrictions.
20380 The CUDA context must have been created with the
20381 :py:obj:`~.cudaMapHost` flag in order for the
20382 :py:obj:`~.cudaHostRegisterMapped` flag to have any effect.
20384 The :py:obj:`~.cudaHostRegisterMapped` flag may be specified on CUDA
20385 contexts for devices that do not support mapped pinned memory. The
20386 failure is deferred to :py:obj:`~.cudaHostGetDevicePointer()` because
20387 the memory may be mapped into other CUDA contexts via the
20388 :py:obj:`~.cudaHostRegisterPortable` flag.
20390 For devices that have a non-zero value for the device attribute
20391 :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory
20392 can also be accessed from the device using the host pointer `ptr`. The
20393 device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` may
20394 or may not match the original host pointer `ptr` and depends on the
20395 devices visible to the application. If all devices visible to the
20396 application have a non-zero value for the device attribute, the device
20397 pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match
20398 the original pointer `ptr`. If any device visible to the application
20399 has a zero value for the device attribute, the device pointer returned
20400 by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original
20401 host pointer `ptr`, but it will be suitable for use on all devices
20402 provided Unified Virtual Addressing is enabled. In such systems, it is
20403 valid to access the memory using either pointer on devices that have a
20404 non-zero value for the device attribute. Note however that such devices
20405 should access the memory using only of the two pointers and not both.
20407 The memory page-locked by this function must be unregistered with
20408 :py:obj:`~.cudaHostUnregister()`.
20410 Parameters
20411 ----------
20412 ptr : Any
20413 Host pointer to memory to page-lock
20414 size : size_t
20415 Size in bytes of the address range to page-lock in bytes
20416 flags : unsigned int
20417 Flags for allocation request
20419 Returns
20420 -------
20421 cudaError_t
20422 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorHostMemoryAlreadyRegistered`, :py:obj:`~.cudaErrorNotSupported`
20424 See Also
20425 --------
20426 :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cudaHostGetFlags`, :py:obj:`~.cudaHostGetDevicePointer`, :py:obj:`~.cuMemHostRegister`
20427 """
20428 cyptr = _HelperInputVoidPtr(ptr)
20429 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
20430 with nogil:
20431 err = cyruntime.cudaHostRegister(cyptr_ptr, size, flags)
20432 return (_dict_cudaError_t[err],)
20434@cython.embedsignature(True)
20435def cudaHostUnregister(ptr):
20436 """ Unregisters a memory range that was registered with cudaHostRegister.
20438 Unmaps the memory range whose base address is specified by `ptr`, and
20439 makes it pageable again.
20441 The base address must be the same one specified to
20442 :py:obj:`~.cudaHostRegister()`.
20444 Parameters
20445 ----------
20446 ptr : Any
20447 Host pointer to memory to unregister
20449 Returns
20450 -------
20451 cudaError_t
20452 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorHostMemoryNotRegistered`
20454 See Also
20455 --------
20456 :py:obj:`~.cudaHostUnregister`, :py:obj:`~.cuMemHostUnregister`
20457 """
20458 cyptr = _HelperInputVoidPtr(ptr)
20459 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
20460 with nogil:
20461 err = cyruntime.cudaHostUnregister(cyptr_ptr)
20462 return (_dict_cudaError_t[err],)
20464@cython.embedsignature(True)
20465def cudaHostGetDevicePointer(pHost, unsigned int flags):
20466 """ Passes back device pointer of mapped host memory allocated by cudaHostAlloc or registered by cudaHostRegister.
20468 Passes back the device pointer corresponding to the mapped, pinned host
20469 buffer allocated by :py:obj:`~.cudaHostAlloc()` or registered by
20470 :py:obj:`~.cudaHostRegister()`.
20472 :py:obj:`~.cudaHostGetDevicePointer()` will fail if the
20473 :py:obj:`~.cudaDeviceMapHost` flag was not specified before deferred
20474 context creation occurred, or if called on a device that does not
20475 support mapped, pinned memory.
20477 For devices that have a non-zero value for the device attribute
20478 :py:obj:`~.cudaDevAttrCanUseHostPointerForRegisteredMem`, the memory
20479 can also be accessed from the device using the host pointer `pHost`.
20480 The device pointer returned by :py:obj:`~.cudaHostGetDevicePointer()`
20481 may or may not match the original host pointer `pHost` and depends on
20482 the devices visible to the application. If all devices visible to the
20483 application have a non-zero value for the device attribute, the device
20484 pointer returned by :py:obj:`~.cudaHostGetDevicePointer()` will match
20485 the original pointer `pHost`. If any device visible to the application
20486 has a zero value for the device attribute, the device pointer returned
20487 by :py:obj:`~.cudaHostGetDevicePointer()` will not match the original
20488 host pointer `pHost`, but it will be suitable for use on all devices
20489 provided Unified Virtual Addressing is enabled. In such systems, it is
20490 valid to access the memory using either pointer on devices that have a
20491 non-zero value for the device attribute. Note however that such devices
20492 should access the memory using only of the two pointers and not both.
20494 `flags` provides for future releases. For now, it must be set to 0.
20496 Parameters
20497 ----------
20498 pHost : Any
20499 Requested host pointer mapping
20500 flags : unsigned int
20501 Flags for extensions (must be 0 for now)
20503 Returns
20504 -------
20505 cudaError_t
20506 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20507 pDevice : Any
20508 Returned device pointer for mapped memory
20510 See Also
20511 --------
20512 :py:obj:`~.cudaSetDeviceFlags`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetDevicePointer`
20513 """
20514 cdef void_ptr pDevice = 0
20515 cypHost = _HelperInputVoidPtr(pHost)
20516 cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr
20517 with nogil:
20518 err = cyruntime.cudaHostGetDevicePointer(<void**>&pDevice, cypHost_ptr, flags)
20519 if err != cyruntime.cudaSuccess:
20520 return (_dict_cudaError_t[err], None)
20521 return (_dict_cudaError_t[err], pDevice)
20523@cython.embedsignature(True)
20524def cudaHostGetFlags(pHost):
20525 """ Passes back flags used to allocate pinned host memory allocated by cudaHostAlloc.
20527 :py:obj:`~.cudaHostGetFlags()` will fail if the input pointer does not
20528 reside in an address range allocated by :py:obj:`~.cudaHostAlloc()`.
20530 Parameters
20531 ----------
20532 pHost : Any
20533 Host pointer
20535 Returns
20536 -------
20537 cudaError_t
20538 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
20539 pFlags : unsigned int
20540 Returned flags word
20542 See Also
20543 --------
20544 :py:obj:`~.cudaHostAlloc`, :py:obj:`~.cuMemHostGetFlags`
20545 """
20546 cdef unsigned int pFlags = 0
20547 cypHost = _HelperInputVoidPtr(pHost)
20548 cdef void* cypHost_ptr = <void*><void_ptr>cypHost.cptr
20549 with nogil:
20550 err = cyruntime.cudaHostGetFlags(&pFlags, cypHost_ptr)
20551 if err != cyruntime.cudaSuccess:
20552 return (_dict_cudaError_t[err], None)
20553 return (_dict_cudaError_t[err], pFlags)
20555@cython.embedsignature(True)
20556def cudaMalloc3D(extent not None : cudaExtent):
20557 """ Allocates logical 1D, 2D, or 3D memory objects on the device.
20559 Allocates at least `width` * `height` * `depth` bytes of linear memory
20560 on the device and returns a :py:obj:`~.cudaPitchedPtr` in which `ptr`
20561 is a pointer to the allocated memory. The function may pad the
20562 allocation to ensure hardware alignment requirements are met. The pitch
20563 returned in the `pitch` field of `pitchedDevPtr` is the width in bytes
20564 of the allocation.
20566 The returned :py:obj:`~.cudaPitchedPtr` contains additional fields
20567 `xsize` and `ysize`, the logical width and height of the allocation,
20568 which are equivalent to the `width` and `height` `extent` parameters
20569 provided by the programmer during allocation.
20571 For allocations of 2D and 3D objects, it is highly recommended that
20572 programmers perform allocations using :py:obj:`~.cudaMalloc3D()` or
20573 :py:obj:`~.cudaMallocPitch()`. Due to alignment restrictions in the
20574 hardware, this is especially true if the application will be performing
20575 memory copies involving 2D or 3D objects (whether linear memory or CUDA
20576 arrays).
20578 Parameters
20579 ----------
20580 extent : :py:obj:`~.cudaExtent`
20581 Requested allocation size (`width` field in bytes)
20583 Returns
20584 -------
20585 cudaError_t
20586 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20587 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
20588 Pointer to allocated pitched device memory
20590 See Also
20591 --------
20592 :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMallocArray`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMemAllocPitch`
20593 """
20594 cdef cudaPitchedPtr pitchedDevPtr = cudaPitchedPtr()
20595 with nogil:
20596 err = cyruntime.cudaMalloc3D(<cyruntime.cudaPitchedPtr*>pitchedDevPtr._pvt_ptr, extent._pvt_ptr[0])
20597 if err != cyruntime.cudaSuccess:
20598 return (_dict_cudaError_t[err], None)
20599 return (_dict_cudaError_t[err], pitchedDevPtr)
20601@cython.embedsignature(True)
20602def cudaMalloc3DArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int flags):
20603 """ Allocate an array on the device.
20605 Allocates a CUDA array according to the
20606 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
20607 to the new CUDA array in `*array`.
20609 The :py:obj:`~.cudaChannelFormatDesc` is defined as:
20611 **View CUDA Toolkit Documentation for a C++ code example**
20613 where :py:obj:`~.cudaChannelFormatKind` is one of
20614 :py:obj:`~.cudaChannelFormatKindSigned`,
20615 :py:obj:`~.cudaChannelFormatKindUnsigned`, or
20616 :py:obj:`~.cudaChannelFormatKindFloat`.
20618 :py:obj:`~.cudaMalloc3DArray()` can allocate the following:
20620 - A 1D array is allocated if the height and depth extents are both
20621 zero.
20623 - A 2D array is allocated if only the depth extent is zero.
20625 - A 3D array is allocated if all three extents are non-zero.
20627 - A 1D layered CUDA array is allocated if only the height extent is
20628 zero and the cudaArrayLayered flag is set. Each layer is a 1D array.
20629 The number of layers is determined by the depth extent.
20631 - A 2D layered CUDA array is allocated if all three extents are non-
20632 zero and the cudaArrayLayered flag is set. Each layer is a 2D array.
20633 The number of layers is determined by the depth extent.
20635 - A cubemap CUDA array is allocated if all three extents are non-zero
20636 and the cudaArrayCubemap flag is set. Width must be equal to height,
20637 and depth must be six. A cubemap is a special type of 2D layered CUDA
20638 array, where the six layers represent the six faces of a cube. The
20639 order of the six layers in memory is the same as that listed in
20640 :py:obj:`~.cudaGraphicsCubeFace`.
20642 - A cubemap layered CUDA array is allocated if all three extents are
20643 non-zero, and both, cudaArrayCubemap and cudaArrayLayered flags are
20644 set. Width must be equal to height, and depth must be a multiple of
20645 six. A cubemap layered CUDA array is a special type of 2D layered
20646 CUDA array that consists of a collection of cubemaps. The first six
20647 layers represent the first cubemap, the next six layers form the
20648 second cubemap, and so on.
20650 The `flags` parameter enables different options to be specified that
20651 affect the allocation, as follows.
20653 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
20654 and provides default array allocation
20656 - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA array, with
20657 the depth extent indicating the number of layers
20659 - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA array. Width
20660 must be equal to height, and depth must be six. If the
20661 cudaArrayLayered flag is also set, depth must be a multiple of six.
20663 - :py:obj:`~.cudaArraySurfaceLoadStore`: Allocates a CUDA array that
20664 could be read from or written to using a surface reference.
20666 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
20667 gather operations will be performed on the CUDA array. Texture gather
20668 can only be performed on 2D CUDA arrays.
20670 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA array without physical
20671 backing memory. The subregions within this sparse array can later be
20672 mapped onto a physical memory allocation by calling
20673 :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for
20674 creating 2D, 3D or 2D layered sparse CUDA arrays. The physical
20675 backing memory must be allocated via :py:obj:`~.cuMemCreate`.
20677 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA array without
20678 physical backing memory. The entire array can later be mapped onto a
20679 physical memory allocation by calling :py:obj:`~.cuMemMapArrayAsync`.
20680 The physical backing memory must be allocated via
20681 :py:obj:`~.cuMemCreate`.
20683 The width, height and depth extents must meet certain size requirements
20684 as listed in the following table. All values are specified in elements.
20686 Note that 2D CUDA arrays have different size requirements if the
20687 :py:obj:`~.cudaArrayTextureGather` flag is set. In that case, the valid
20688 range for (width, height, depth) is ((1,maxTexture2DGather[0]),
20689 (1,maxTexture2DGather[1]), 0).
20691 **View CUDA Toolkit Documentation for a table example**
20693 Parameters
20694 ----------
20695 desc : :py:obj:`~.cudaChannelFormatDesc`
20696 Requested channel format
20697 extent : :py:obj:`~.cudaExtent`
20698 Requested allocation size (`width` field in elements)
20699 flags : unsigned int
20700 Flags for extensions
20702 Returns
20703 -------
20704 cudaError_t
20705 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20706 array : :py:obj:`~.cudaArray_t`
20707 Pointer to allocated array in device memory
20709 See Also
20710 --------
20711 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuArray3DCreate`
20712 """
20713 cdef cudaArray_t array = cudaArray_t()
20714 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL
20715 with nogil:
20716 err = cyruntime.cudaMalloc3DArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], flags)
20717 if err != cyruntime.cudaSuccess:
20718 return (_dict_cudaError_t[err], None)
20719 return (_dict_cudaError_t[err], array)
20721@cython.embedsignature(True)
20722def cudaMallocMipmappedArray(desc : Optional[cudaChannelFormatDesc], extent not None : cudaExtent, unsigned int numLevels, unsigned int flags):
20723 """ Allocate a mipmapped array on the device.
20725 Allocates a CUDA mipmapped array according to the
20726 :py:obj:`~.cudaChannelFormatDesc` structure `desc` and returns a handle
20727 to the new CUDA mipmapped array in `*mipmappedArray`. `numLevels`
20728 specifies the number of mipmap levels to be allocated. This value is
20729 clamped to the range [1, 1 + floor(log2(max(width, height, depth)))].
20731 The :py:obj:`~.cudaChannelFormatDesc` is defined as:
20733 **View CUDA Toolkit Documentation for a C++ code example**
20735 where :py:obj:`~.cudaChannelFormatKind` is one of
20736 :py:obj:`~.cudaChannelFormatKindSigned`,
20737 :py:obj:`~.cudaChannelFormatKindUnsigned`, or
20738 :py:obj:`~.cudaChannelFormatKindFloat`.
20740 :py:obj:`~.cudaMallocMipmappedArray()` can allocate the following:
20742 - A 1D mipmapped array is allocated if the height and depth extents are
20743 both zero.
20745 - A 2D mipmapped array is allocated if only the depth extent is zero.
20747 - A 3D mipmapped array is allocated if all three extents are non-zero.
20749 - A 1D layered CUDA mipmapped array is allocated if only the height
20750 extent is zero and the cudaArrayLayered flag is set. Each layer is a
20751 1D mipmapped array. The number of layers is determined by the depth
20752 extent.
20754 - A 2D layered CUDA mipmapped array is allocated if all three extents
20755 are non-zero and the cudaArrayLayered flag is set. Each layer is a 2D
20756 mipmapped array. The number of layers is determined by the depth
20757 extent.
20759 - A cubemap CUDA mipmapped array is allocated if all three extents are
20760 non-zero and the cudaArrayCubemap flag is set. Width must be equal to
20761 height, and depth must be six. The order of the six layers in memory
20762 is the same as that listed in :py:obj:`~.cudaGraphicsCubeFace`.
20764 - A cubemap layered CUDA mipmapped array is allocated if all three
20765 extents are non-zero, and both, cudaArrayCubemap and cudaArrayLayered
20766 flags are set. Width must be equal to height, and depth must be a
20767 multiple of six. A cubemap layered CUDA mipmapped array is a special
20768 type of 2D layered CUDA mipmapped array that consists of a collection
20769 of cubemap mipmapped arrays. The first six layers represent the first
20770 cubemap mipmapped array, the next six layers form the second cubemap
20771 mipmapped array, and so on.
20773 The `flags` parameter enables different options to be specified that
20774 affect the allocation, as follows.
20776 - :py:obj:`~.cudaArrayDefault`: This flag's value is defined to be 0
20777 and provides default mipmapped array allocation
20779 - :py:obj:`~.cudaArrayLayered`: Allocates a layered CUDA mipmapped
20780 array, with the depth extent indicating the number of layers
20782 - :py:obj:`~.cudaArrayCubemap`: Allocates a cubemap CUDA mipmapped
20783 array. Width must be equal to height, and depth must be six. If the
20784 cudaArrayLayered flag is also set, depth must be a multiple of six.
20786 - :py:obj:`~.cudaArraySurfaceLoadStore`: This flag indicates that
20787 individual mipmap levels of the CUDA mipmapped array will be read
20788 from or written to using a surface reference.
20790 - :py:obj:`~.cudaArrayTextureGather`: This flag indicates that texture
20791 gather operations will be performed on the CUDA array. Texture gather
20792 can only be performed on 2D CUDA mipmapped arrays, and the gather
20793 operations are performed only on the most detailed mipmap level.
20795 - :py:obj:`~.cudaArraySparse`: Allocates a CUDA mipmapped array without
20796 physical backing memory. The subregions within this sparse array can
20797 later be mapped onto a physical memory allocation by calling
20798 :py:obj:`~.cuMemMapArrayAsync`. This flag can only be used for
20799 creating 2D, 3D or 2D layered sparse CUDA mipmapped arrays. The
20800 physical backing memory must be allocated via
20801 :py:obj:`~.cuMemCreate`.
20803 - :py:obj:`~.cudaArrayDeferredMapping`: Allocates a CUDA mipmapped
20804 array without physical backing memory. The entire array can later be
20805 mapped onto a physical memory allocation by calling
20806 :py:obj:`~.cuMemMapArrayAsync`. The physical backing memory must be
20807 allocated via :py:obj:`~.cuMemCreate`.
20809 The width, height and depth extents must meet certain size requirements
20810 as listed in the following table. All values are specified in elements.
20812 **View CUDA Toolkit Documentation for a table example**
20814 Parameters
20815 ----------
20816 desc : :py:obj:`~.cudaChannelFormatDesc`
20817 Requested channel format
20818 extent : :py:obj:`~.cudaExtent`
20819 Requested allocation size (`width` field in elements)
20820 numLevels : unsigned int
20821 Number of mipmap levels to allocate
20822 flags : unsigned int
20823 Flags for extensions
20825 Returns
20826 -------
20827 cudaError_t
20828 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
20829 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
20830 Pointer to allocated mipmapped array in device memory
20832 See Also
20833 --------
20834 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayCreate`
20835 """
20836 cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()
20837 cdef cyruntime.cudaChannelFormatDesc* cydesc_ptr = desc._pvt_ptr if desc is not None else NULL
20838 with nogil:
20839 err = cyruntime.cudaMallocMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._pvt_ptr, cydesc_ptr, extent._pvt_ptr[0], numLevels, flags)
20840 if err != cyruntime.cudaSuccess:
20841 return (_dict_cudaError_t[err], None)
20842 return (_dict_cudaError_t[err], mipmappedArray)
20844@cython.embedsignature(True)
20845def cudaGetMipmappedArrayLevel(mipmappedArray, unsigned int level):
20846 """ Gets a mipmap level of a CUDA mipmapped array.
20848 Returns in `*levelArray` a CUDA array that represents a single mipmap
20849 level of the CUDA mipmapped array `mipmappedArray`.
20851 If `level` is greater than the maximum number of levels in this
20852 mipmapped array, :py:obj:`~.cudaErrorInvalidValue` is returned.
20854 If `mipmappedArray` is NULL, :py:obj:`~.cudaErrorInvalidResourceHandle`
20855 is returned.
20857 Parameters
20858 ----------
20859 mipmappedArray : :py:obj:`~.cudaMipmappedArray_const_t`
20860 CUDA mipmapped array
20861 level : unsigned int
20862 Mipmap level
20864 Returns
20865 -------
20866 cudaError_t
20867 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`
20868 levelArray : :py:obj:`~.cudaArray_t`
20869 Returned mipmap level CUDA array
20871 See Also
20872 --------
20873 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc`, :py:obj:`~.cudaMallocPitch`, :py:obj:`~.cudaFree`, :py:obj:`~.cudaFreeArray`, :py:obj:`~.cudaMallocHost (C API)`, :py:obj:`~.cudaFreeHost`, :py:obj:`~.cudaHostAlloc`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.cuMipmappedArrayGetLevel`
20874 """
20875 cdef cyruntime.cudaMipmappedArray_const_t cymipmappedArray
20876 if mipmappedArray is None:
20877 pmipmappedArray = 0
20878 elif isinstance(mipmappedArray, (cudaMipmappedArray_const_t,)):
20879 pmipmappedArray = int(mipmappedArray)
20880 else:
20881 pmipmappedArray = int(cudaMipmappedArray_const_t(mipmappedArray))
20882 cymipmappedArray = <cyruntime.cudaMipmappedArray_const_t><void_ptr>pmipmappedArray
20883 cdef cudaArray_t levelArray = cudaArray_t()
20884 with nogil:
20885 err = cyruntime.cudaGetMipmappedArrayLevel(<cyruntime.cudaArray_t*>levelArray._pvt_ptr, cymipmappedArray, level)
20886 if err != cyruntime.cudaSuccess:
20887 return (_dict_cudaError_t[err], None)
20888 return (_dict_cudaError_t[err], levelArray)
20890@cython.embedsignature(True)
20891def cudaMemcpy3D(p : Optional[cudaMemcpy3DParms]):
20892 """ Copies data between 3D objects.
20894 **View CUDA Toolkit Documentation for a C++ code example**
20896 :py:obj:`~.cudaMemcpy3D()` copies data betwen two 3D objects. The
20897 source and destination objects may be in either host memory, device
20898 memory, or a CUDA array. The source, destination, extent, and kind of
20899 copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct
20900 which should be initialized to zero before use:
20902 **View CUDA Toolkit Documentation for a C++ code example**
20904 The struct passed to :py:obj:`~.cudaMemcpy3D()` must specify one of
20905 `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing more
20906 than one non-zero source or destination will cause
20907 :py:obj:`~.cudaMemcpy3D()` to return an error.
20909 The `srcPos` and `dstPos` fields are optional offsets into the source
20910 and destination objects and are defined in units of each object's
20911 elements. The element for a host or device pointer is assumed to be
20912 unsigned char.
20914 The `extent` field defines the dimensions of the transferred area in
20915 elements. If a CUDA array is participating in the copy, the extent is
20916 defined in terms of that array's elements. If no CUDA array is
20917 participating in the copy then the extents are defined in elements of
20918 unsigned char.
20920 The `kind` field defines the direction of the copy. It must be one of
20921 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
20922 :py:obj:`~.cudaMemcpyDeviceToHost`,
20923 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
20924 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
20925 type of transfer is inferred from the pointer values. However,
20926 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
20927 unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or
20928 :py:obj:`~.cudaMemcpyHostToDevice` or
20929 :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type
20930 passed as source or destination, if the kind implies cudaArray type to
20931 be present on the host, :py:obj:`~.cudaMemcpy3D()` will disregard that
20932 implication and silently correct the kind based on the fact that
20933 cudaArray type can only be present on the device.
20935 If the source and destination are both arrays,
20936 :py:obj:`~.cudaMemcpy3D()` will return an error if they do not have the
20937 same element size.
20939 The source and destination object may not overlap. If overlapping
20940 source and destination objects are specified, undefined behavior will
20941 result.
20943 The source object must entirely contain the region defined by `srcPos`
20944 and `extent`. The destination object must entirely contain the region
20945 defined by `dstPos` and `extent`.
20947 :py:obj:`~.cudaMemcpy3D()` returns an error if the pitch of `srcPtr` or
20948 `dstPtr` exceeds the maximum allowed. The pitch of a
20949 :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`
20950 will always be valid.
20952 Parameters
20953 ----------
20954 p : :py:obj:`~.cudaMemcpy3DParms`
20955 3D memory copy parameters
20957 Returns
20958 -------
20959 cudaError_t
20960 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
20962 See Also
20963 --------
20964 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3DAsync`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3D`
20965 """
20966 cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL
20967 with nogil:
20968 err = cyruntime.cudaMemcpy3D(cyp_ptr)
20969 return (_dict_cudaError_t[err],)
20971@cython.embedsignature(True)
20972def cudaMemcpy3DPeer(p : Optional[cudaMemcpy3DPeerParms]):
20973 """ Copies memory between devices.
20975 Perform a 3D memory copy according to the parameters specified in `p`.
20976 See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure
20977 for documentation of its parameters.
20979 Note that this function is synchronous with respect to the host only if
20980 the source or destination of the transfer is host memory. Note also
20981 that this copy is serialized with respect to all pending and future
20982 asynchronous work in to the current device, the copy's source device,
20983 and the copy's destination device (use
20984 :py:obj:`~.cudaMemcpy3DPeerAsync` to avoid this synchronization).
20986 Parameters
20987 ----------
20988 p : :py:obj:`~.cudaMemcpy3DPeerParms`
20989 Parameters for the memory copy
20991 Returns
20992 -------
20993 cudaError_t
20994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`
20996 See Also
20997 --------
20998 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeer`
20999 """
21000 cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL
21001 with nogil:
21002 err = cyruntime.cudaMemcpy3DPeer(cyp_ptr)
21003 return (_dict_cudaError_t[err],)
21005@cython.embedsignature(True)
21006def cudaMemcpy3DAsync(p : Optional[cudaMemcpy3DParms], stream):
21007 """ Copies data between 3D objects.
21009 **View CUDA Toolkit Documentation for a C++ code example**
21011 :py:obj:`~.cudaMemcpy3DAsync()` copies data betwen two 3D objects. The
21012 source and destination objects may be in either host memory, device
21013 memory, or a CUDA array. The source, destination, extent, and kind of
21014 copy performed is specified by the :py:obj:`~.cudaMemcpy3DParms` struct
21015 which should be initialized to zero before use:
21017 **View CUDA Toolkit Documentation for a C++ code example**
21019 The struct passed to :py:obj:`~.cudaMemcpy3DAsync()` must specify one
21020 of `srcArray` or `srcPtr` and one of `dstArray` or `dstPtr`. Passing
21021 more than one non-zero source or destination will cause
21022 :py:obj:`~.cudaMemcpy3DAsync()` to return an error.
21024 The `srcPos` and `dstPos` fields are optional offsets into the source
21025 and destination objects and are defined in units of each object's
21026 elements. The element for a host or device pointer is assumed to be
21027 unsigned char. For CUDA arrays, positions must be in the range [0,
21028 2048) for any dimension.
21030 The `extent` field defines the dimensions of the transferred area in
21031 elements. If a CUDA array is participating in the copy, the extent is
21032 defined in terms of that array's elements. If no CUDA array is
21033 participating in the copy then the extents are defined in elements of
21034 unsigned char.
21036 The `kind` field defines the direction of the copy. It must be one of
21037 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
21038 :py:obj:`~.cudaMemcpyDeviceToHost`,
21039 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21040 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21041 type of transfer is inferred from the pointer values. However,
21042 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21043 unified virtual addressing. For :py:obj:`~.cudaMemcpyHostToHost` or
21044 :py:obj:`~.cudaMemcpyHostToDevice` or
21045 :py:obj:`~.cudaMemcpyDeviceToHost` passed as kind and cudaArray type
21046 passed as source or destination, if the kind implies cudaArray type to
21047 be present on the host, :py:obj:`~.cudaMemcpy3DAsync()` will disregard
21048 that implication and silently correct the kind based on the fact that
21049 cudaArray type can only be present on the device.
21051 If the source and destination are both arrays,
21052 :py:obj:`~.cudaMemcpy3DAsync()` will return an error if they do not
21053 have the same element size.
21055 The source and destination object may not overlap. If overlapping
21056 source and destination objects are specified, undefined behavior will
21057 result.
21059 The source object must lie entirely within the region defined by
21060 `srcPos` and `extent`. The destination object must lie entirely within
21061 the region defined by `dstPos` and `extent`.
21063 :py:obj:`~.cudaMemcpy3DAsync()` returns an error if the pitch of
21064 `srcPtr` or `dstPtr` exceeds the maximum allowed. The pitch of a
21065 :py:obj:`~.cudaPitchedPtr` allocated with :py:obj:`~.cudaMalloc3D()`
21066 will always be valid.
21068 :py:obj:`~.cudaMemcpy3DAsync()` is asynchronous with respect to the
21069 host, so the call may return before the copy is complete. The copy can
21070 optionally be associated to a stream by passing a non-zero `stream`
21071 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
21072 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
21073 may overlap with operations in other streams.
21075 The device version of this function only handles device to device
21076 copies and cannot be given local or shared pointers.
21078 Parameters
21079 ----------
21080 p : :py:obj:`~.cudaMemcpy3DParms`
21081 3D memory copy parameters
21082 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
21083 Stream identifier
21085 Returns
21086 -------
21087 cudaError_t
21088 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21090 See Also
21091 --------
21092 :py:obj:`~.cudaMalloc3D`, :py:obj:`~.cudaMalloc3DArray`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, ::::py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.make_cudaExtent`, :py:obj:`~.make_cudaPos`, :py:obj:`~.cuMemcpy3DAsync`
21093 """
21094 cdef cyruntime.cudaStream_t cystream
21095 if stream is None:
21096 pstream = 0
21097 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
21098 pstream = int(stream)
21099 else:
21100 pstream = int(cudaStream_t(stream))
21101 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
21102 cdef cyruntime.cudaMemcpy3DParms* cyp_ptr = p._pvt_ptr if p is not None else NULL
21103 with nogil:
21104 err = cyruntime.cudaMemcpy3DAsync(cyp_ptr, cystream)
21105 return (_dict_cudaError_t[err],)
21107@cython.embedsignature(True)
21108def cudaMemcpy3DPeerAsync(p : Optional[cudaMemcpy3DPeerParms], stream):
21109 """ Copies memory between devices asynchronously.
21111 Perform a 3D memory copy according to the parameters specified in `p`.
21112 See the definition of the :py:obj:`~.cudaMemcpy3DPeerParms` structure
21113 for documentation of its parameters.
21115 Parameters
21116 ----------
21117 p : :py:obj:`~.cudaMemcpy3DPeerParms`
21118 Parameters for the memory copy
21119 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
21120 Stream identifier
21122 Returns
21123 -------
21124 cudaError_t
21125 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidPitchValue`
21127 See Also
21128 --------
21129 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpy3DPeerAsync`
21130 """
21131 cdef cyruntime.cudaStream_t cystream
21132 if stream is None:
21133 pstream = 0
21134 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
21135 pstream = int(stream)
21136 else:
21137 pstream = int(cudaStream_t(stream))
21138 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
21139 cdef cyruntime.cudaMemcpy3DPeerParms* cyp_ptr = p._pvt_ptr if p is not None else NULL
21140 with nogil:
21141 err = cyruntime.cudaMemcpy3DPeerAsync(cyp_ptr, cystream)
21142 return (_dict_cudaError_t[err],)
21144@cython.embedsignature(True)
21145def cudaMemGetInfo():
21146 """ Gets free and total device memory.
21148 Returns in `*total` the total amount of memory available to the the
21149 current context. Returns in `*free` the amount of memory on the device
21150 that is free according to the OS. CUDA is not guaranteed to be able to
21151 allocate all of the memory that the OS reports as free. In a multi-
21152 tenet situation, free estimate returned is prone to race condition
21153 where a new allocation/free done by a different process or a different
21154 thread in the same process between the time when free memory was
21155 estimated and reported, will result in deviation in free value reported
21156 and actual free memory.
21158 The integrated GPU on Tegra shares memory with CPU and other component
21159 of the SoC. The free and total values returned by the API excludes the
21160 SWAP memory space maintained by the OS on some platforms. The OS may
21161 move some of the memory pages into swap area as the GPU or CPU allocate
21162 or access memory. See Tegra app note on how to calculate total and free
21163 memory on Tegra.
21165 Returns
21166 -------
21167 cudaError_t
21168 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`
21169 free : int
21170 Returned free memory in bytes
21171 total : int
21172 Returned total memory in bytes
21174 See Also
21175 --------
21176 :py:obj:`~.cuMemGetInfo`
21177 """
21178 cdef size_t free = 0
21179 cdef size_t total = 0
21180 with nogil:
21181 err = cyruntime.cudaMemGetInfo(&free, &total)
21182 if err != cyruntime.cudaSuccess:
21183 return (_dict_cudaError_t[err], None, None)
21184 return (_dict_cudaError_t[err], free, total)
21186@cython.embedsignature(True)
21187def cudaArrayGetInfo(array):
21188 """ Gets info about the specified cudaArray.
21190 Returns in `*desc`, `*extent` and `*flags` respectively, the type,
21191 shape and flags of `array`.
21193 Any of `*desc`, `*extent` and `*flags` may be specified as NULL.
21195 Parameters
21196 ----------
21197 array : :py:obj:`~.cudaArray_t`
21198 The :py:obj:`~.cudaArray` to get info for
21200 Returns
21201 -------
21202 cudaError_t
21203 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
21204 desc : :py:obj:`~.cudaChannelFormatDesc`
21205 Returned array type
21206 extent : :py:obj:`~.cudaExtent`
21207 Returned array shape. 2D arrays will have depth of zero
21208 flags : unsigned int
21209 Returned array flags
21211 See Also
21212 --------
21213 :py:obj:`~.cuArrayGetDescriptor`, :py:obj:`~.cuArray3DGetDescriptor`
21214 """
21215 cdef cyruntime.cudaArray_t cyarray
21216 if array is None:
21217 parray = 0
21218 elif isinstance(array, (cudaArray_t,)):
21219 parray = int(array)
21220 else:
21221 parray = int(cudaArray_t(array))
21222 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
21223 cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()
21224 cdef cudaExtent extent = cudaExtent()
21225 cdef unsigned int flags = 0
21226 with nogil:
21227 err = cyruntime.cudaArrayGetInfo(<cyruntime.cudaChannelFormatDesc*>desc._pvt_ptr, <cyruntime.cudaExtent*>extent._pvt_ptr, &flags, cyarray)
21228 if err != cyruntime.cudaSuccess:
21229 return (_dict_cudaError_t[err], None, None, None)
21230 return (_dict_cudaError_t[err], desc, extent, flags)
21232@cython.embedsignature(True)
21233def cudaArrayGetPlane(hArray, unsigned int planeIdx):
21234 """ Gets a CUDA array plane from a CUDA array.
21236 Returns in `pPlaneArray` a CUDA array that represents a single format
21237 plane of the CUDA array `hArray`.
21239 If `planeIdx` is greater than the maximum number of planes in this
21240 array or if the array does not have a multi-planar format e.g:
21241 :py:obj:`~.cudaChannelFormatKindNV12`, then
21242 :py:obj:`~.cudaErrorInvalidValue` is returned.
21244 Note that if the `hArray` has format
21245 :py:obj:`~.cudaChannelFormatKindNV12`, then passing in 0 for `planeIdx`
21246 returns a CUDA array of the same size as `hArray` but with one 8-bit
21247 channel and :py:obj:`~.cudaChannelFormatKindUnsigned` as its format
21248 kind. If 1 is passed for `planeIdx`, then the returned CUDA array has
21249 half the height and width of `hArray` with two 8-bit channels and
21250 :py:obj:`~.cudaChannelFormatKindUnsigned` as its format kind.
21252 Parameters
21253 ----------
21254 hArray : :py:obj:`~.cudaArray_t`
21255 CUDA array
21256 planeIdx : unsigned int
21257 Plane index
21259 Returns
21260 -------
21261 cudaError_t
21262 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue` :py:obj:`~.cudaErrorInvalidResourceHandle`
21263 pPlaneArray : :py:obj:`~.cudaArray_t`
21264 Returned CUDA array referenced by the `planeIdx`
21266 See Also
21267 --------
21268 :py:obj:`~.cuArrayGetPlane`
21269 """
21270 cdef cyruntime.cudaArray_t cyhArray
21271 if hArray is None:
21272 phArray = 0
21273 elif isinstance(hArray, (cudaArray_t,)):
21274 phArray = int(hArray)
21275 else:
21276 phArray = int(cudaArray_t(hArray))
21277 cyhArray = <cyruntime.cudaArray_t><void_ptr>phArray
21278 cdef cudaArray_t pPlaneArray = cudaArray_t()
21279 with nogil:
21280 err = cyruntime.cudaArrayGetPlane(<cyruntime.cudaArray_t*>pPlaneArray._pvt_ptr, cyhArray, planeIdx)
21281 if err != cyruntime.cudaSuccess:
21282 return (_dict_cudaError_t[err], None)
21283 return (_dict_cudaError_t[err], pPlaneArray)
21285@cython.embedsignature(True)
21286def cudaArrayGetMemoryRequirements(array, int device):
21287 """ Returns the memory requirements of a CUDA array.
21289 Returns the memory requirements of a CUDA array in `memoryRequirements`
21290 If the CUDA array is not allocated with flag
21291 :py:obj:`~.cudaArrayDeferredMapping` :py:obj:`~.cudaErrorInvalidValue`
21292 will be returned.
21294 The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`
21295 represents the total size of the CUDA array. The returned value in
21296 :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents the
21297 alignment necessary for mapping the CUDA array.
21299 Parameters
21300 ----------
21301 array : :py:obj:`~.cudaArray_t`
21302 CUDA array to get the memory requirements of
21303 device : int
21304 Device to get the memory requirements for
21306 Returns
21307 -------
21308 cudaError_t
21309 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
21310 memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`
21311 Pointer to :py:obj:`~.cudaArrayMemoryRequirements`
21313 See Also
21314 --------
21315 :py:obj:`~.cudaMipmappedArrayGetMemoryRequirements`
21316 """
21317 cdef cyruntime.cudaArray_t cyarray
21318 if array is None:
21319 parray = 0
21320 elif isinstance(array, (cudaArray_t,)):
21321 parray = int(array)
21322 else:
21323 parray = int(cudaArray_t(array))
21324 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
21325 cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()
21326 with nogil:
21327 err = cyruntime.cudaArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._pvt_ptr, cyarray, device)
21328 if err != cyruntime.cudaSuccess:
21329 return (_dict_cudaError_t[err], None)
21330 return (_dict_cudaError_t[err], memoryRequirements)
21332@cython.embedsignature(True)
21333def cudaMipmappedArrayGetMemoryRequirements(mipmap, int device):
21334 """ Returns the memory requirements of a CUDA mipmapped array.
21336 Returns the memory requirements of a CUDA mipmapped array in
21337 `memoryRequirements` If the CUDA mipmapped array is not allocated with
21338 flag :py:obj:`~.cudaArrayDeferredMapping`
21339 :py:obj:`~.cudaErrorInvalidValue` will be returned.
21341 The returned value in :py:obj:`~.cudaArrayMemoryRequirements.size`
21342 represents the total size of the CUDA mipmapped array. The returned
21343 value in :py:obj:`~.cudaArrayMemoryRequirements.alignment` represents
21344 the alignment necessary for mapping the CUDA mipmapped array.
21346 Parameters
21347 ----------
21348 mipmap : :py:obj:`~.cudaMipmappedArray_t`
21349 CUDA mipmapped array to get the memory requirements of
21350 device : int
21351 Device to get the memory requirements for
21353 Returns
21354 -------
21355 cudaError_t
21356 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
21357 memoryRequirements : :py:obj:`~.cudaArrayMemoryRequirements`
21358 Pointer to :py:obj:`~.cudaArrayMemoryRequirements`
21360 See Also
21361 --------
21362 :py:obj:`~.cudaArrayGetMemoryRequirements`
21363 """
21364 cdef cyruntime.cudaMipmappedArray_t cymipmap
21365 if mipmap is None:
21366 pmipmap = 0
21367 elif isinstance(mipmap, (cudaMipmappedArray_t,)):
21368 pmipmap = int(mipmap)
21369 else:
21370 pmipmap = int(cudaMipmappedArray_t(mipmap))
21371 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
21372 cdef cudaArrayMemoryRequirements memoryRequirements = cudaArrayMemoryRequirements()
21373 with nogil:
21374 err = cyruntime.cudaMipmappedArrayGetMemoryRequirements(<cyruntime.cudaArrayMemoryRequirements*>memoryRequirements._pvt_ptr, cymipmap, device)
21375 if err != cyruntime.cudaSuccess:
21376 return (_dict_cudaError_t[err], None)
21377 return (_dict_cudaError_t[err], memoryRequirements)
21379@cython.embedsignature(True)
21380def cudaArrayGetSparseProperties(array):
21381 """ Returns the layout properties of a sparse CUDA array.
21383 Returns the layout properties of a sparse CUDA array in
21384 `sparseProperties`. If the CUDA array is not allocated with flag
21385 :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be
21386 returned.
21388 If the returned value in :py:obj:`~.cudaArraySparseProperties.flags`
21389 contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then
21390 :py:obj:`~.cudaArraySparseProperties.miptailSize` represents the total
21391 size of the array. Otherwise, it will be zero. Also, the returned value
21392 in :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is always
21393 zero. Note that the `array` must have been allocated using
21394 :py:obj:`~.cudaMallocArray` or :py:obj:`~.cudaMalloc3DArray`. For CUDA
21395 arrays obtained using :py:obj:`~.cudaMipmappedArrayGetLevel`,
21396 :py:obj:`~.cudaErrorInvalidValue` will be returned. Instead,
21397 :py:obj:`~.cudaMipmappedArrayGetSparseProperties` must be used to
21398 obtain the sparse properties of the entire CUDA mipmapped array to
21399 which `array` belongs to.
21401 Parameters
21402 ----------
21403 array : :py:obj:`~.cudaArray_t`
21404 The CUDA array to get the sparse properties of
21406 Returns
21407 -------
21408 cudaError_t
21409 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
21410 sparseProperties : :py:obj:`~.cudaArraySparseProperties`
21411 Pointer to return the :py:obj:`~.cudaArraySparseProperties`
21413 See Also
21414 --------
21415 :py:obj:`~.cudaMipmappedArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
21416 """
21417 cdef cyruntime.cudaArray_t cyarray
21418 if array is None:
21419 parray = 0
21420 elif isinstance(array, (cudaArray_t,)):
21421 parray = int(array)
21422 else:
21423 parray = int(cudaArray_t(array))
21424 cyarray = <cyruntime.cudaArray_t><void_ptr>parray
21425 cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()
21426 with nogil:
21427 err = cyruntime.cudaArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._pvt_ptr, cyarray)
21428 if err != cyruntime.cudaSuccess:
21429 return (_dict_cudaError_t[err], None)
21430 return (_dict_cudaError_t[err], sparseProperties)
21432@cython.embedsignature(True)
21433def cudaMipmappedArrayGetSparseProperties(mipmap):
21434 """ Returns the layout properties of a sparse CUDA mipmapped array.
21436 Returns the sparse array layout properties in `sparseProperties`. If
21437 the CUDA mipmapped array is not allocated with flag
21438 :py:obj:`~.cudaArraySparse` :py:obj:`~.cudaErrorInvalidValue` will be
21439 returned.
21441 For non-layered CUDA mipmapped arrays,
21442 :py:obj:`~.cudaArraySparseProperties.miptailSize` returns the size of
21443 the mip tail region. The mip tail region includes all mip levels whose
21444 width, height or depth is less than that of the tile. For layered CUDA
21445 mipmapped arrays, if :py:obj:`~.cudaArraySparseProperties.flags`
21446 contains :py:obj:`~.cudaArraySparsePropertiesSingleMipTail`, then
21447 :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies the size of
21448 the mip tail of all layers combined. Otherwise,
21449 :py:obj:`~.cudaArraySparseProperties.miptailSize` specifies mip tail
21450 size per layer. The returned value of
21451 :py:obj:`~.cudaArraySparseProperties.miptailFirstLevel` is valid only
21452 if :py:obj:`~.cudaArraySparseProperties.miptailSize` is non-zero.
21454 Parameters
21455 ----------
21456 mipmap : :py:obj:`~.cudaMipmappedArray_t`
21457 The CUDA mipmapped array to get the sparse properties of
21459 Returns
21460 -------
21461 cudaError_t
21462 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
21463 sparseProperties : :py:obj:`~.cudaArraySparseProperties`
21464 Pointer to return :py:obj:`~.cudaArraySparseProperties`
21466 See Also
21467 --------
21468 :py:obj:`~.cudaArrayGetSparseProperties`, :py:obj:`~.cuMemMapArrayAsync`
21469 """
21470 cdef cyruntime.cudaMipmappedArray_t cymipmap
21471 if mipmap is None:
21472 pmipmap = 0
21473 elif isinstance(mipmap, (cudaMipmappedArray_t,)):
21474 pmipmap = int(mipmap)
21475 else:
21476 pmipmap = int(cudaMipmappedArray_t(mipmap))
21477 cymipmap = <cyruntime.cudaMipmappedArray_t><void_ptr>pmipmap
21478 cdef cudaArraySparseProperties sparseProperties = cudaArraySparseProperties()
21479 with nogil:
21480 err = cyruntime.cudaMipmappedArrayGetSparseProperties(<cyruntime.cudaArraySparseProperties*>sparseProperties._pvt_ptr, cymipmap)
21481 if err != cyruntime.cudaSuccess:
21482 return (_dict_cudaError_t[err], None)
21483 return (_dict_cudaError_t[err], sparseProperties)
21485@cython.embedsignature(True)
21486def cudaMemcpy(dst, src, size_t count, kind not None : cudaMemcpyKind):
21487 """ Copies data between host and device.
21489 Copies `count` bytes from the memory area pointed to by `src` to the
21490 memory area pointed to by `dst`, where `kind` specifies the direction
21491 of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
21492 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
21493 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21494 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21495 type of transfer is inferred from the pointer values. However,
21496 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21497 unified virtual addressing. Calling :py:obj:`~.cudaMemcpy()` with dst
21498 and src pointers that do not match the direction of the copy results in
21499 an undefined behavior.
21501 \note_sync
21503 Parameters
21504 ----------
21505 dst : Any
21506 Destination memory address
21507 src : Any
21508 Source memory address
21509 count : size_t
21510 Size in bytes to copy
21511 kind : :py:obj:`~.cudaMemcpyKind`
21512 Type of transfer
21514 Returns
21515 -------
21516 cudaError_t
21517 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21519 See Also
21520 --------
21521 :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyDtoH`, :py:obj:`~.cuMemcpyHtoD`, :py:obj:`~.cuMemcpyDtoD`, :py:obj:`~.cuMemcpy`
21522 """
21523 cydst = _HelperInputVoidPtr(dst)
21524 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21525 cysrc = _HelperInputVoidPtr(src)
21526 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21527 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21528 with nogil:
21529 err = cyruntime.cudaMemcpy(cydst_ptr, cysrc_ptr, count, cykind)
21530 return (_dict_cudaError_t[err],)
21532@cython.embedsignature(True)
21533def cudaMemcpyPeer(dst, int dstDevice, src, int srcDevice, size_t count):
21534 """ Copies memory between two devices.
21536 Copies memory from one device to memory on another device. `dst` is the
21537 base device pointer of the destination memory and `dstDevice` is the
21538 destination device. `src` is the base device pointer of the source
21539 memory and `srcDevice` is the source device. `count` specifies the
21540 number of bytes to copy.
21542 Note that this function is asynchronous with respect to the host, but
21543 serialized with respect all pending and future asynchronous work in to
21544 the current device, `srcDevice`, and `dstDevice` (use
21545 :py:obj:`~.cudaMemcpyPeerAsync` to avoid this synchronization).
21547 Parameters
21548 ----------
21549 dst : Any
21550 Destination device pointer
21551 dstDevice : int
21552 Destination device
21553 src : Any
21554 Source device pointer
21555 srcDevice : int
21556 Source device
21557 count : size_t
21558 Size of memory copy in bytes
21560 Returns
21561 -------
21562 cudaError_t
21563 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
21565 See Also
21566 --------
21567 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpyPeerAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeer`
21568 """
21569 cydst = _HelperInputVoidPtr(dst)
21570 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21571 cysrc = _HelperInputVoidPtr(src)
21572 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21573 with nogil:
21574 err = cyruntime.cudaMemcpyPeer(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count)
21575 return (_dict_cudaError_t[err],)
21577@cython.embedsignature(True)
21578def cudaMemcpy2D(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):
21579 """ Copies data between host and device.
21581 Copies a matrix (`height` rows of `width` bytes each) from the memory
21582 area pointed to by `src` to the memory area pointed to by `dst`, where
21583 `kind` specifies the direction of the copy, and must be one of
21584 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
21585 :py:obj:`~.cudaMemcpyDeviceToHost`,
21586 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21587 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21588 type of transfer is inferred from the pointer values. However,
21589 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21590 unified virtual addressing. `dpitch` and `spitch` are the widths in
21591 memory in bytes of the 2D arrays pointed to by `dst` and `src`,
21592 including any padding added to the end of each row. The memory areas
21593 may not overlap. `width` must not exceed either `dpitch` or `spitch`.
21594 Calling :py:obj:`~.cudaMemcpy2D()` with `dst` and `src` pointers that
21595 do not match the direction of the copy results in an undefined
21596 behavior. :py:obj:`~.cudaMemcpy2D()` returns an error if `dpitch` or
21597 `spitch` exceeds the maximum allowed.
21599 Parameters
21600 ----------
21601 dst : Any
21602 Destination memory address
21603 dpitch : size_t
21604 Pitch of destination memory
21605 src : Any
21606 Source memory address
21607 spitch : size_t
21608 Pitch of source memory
21609 width : size_t
21610 Width of matrix transfer (columns in bytes)
21611 height : size_t
21612 Height of matrix transfer (rows)
21613 kind : :py:obj:`~.cudaMemcpyKind`
21614 Type of transfer
21616 Returns
21617 -------
21618 cudaError_t
21619 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21621 See Also
21622 --------
21623 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
21624 """
21625 cydst = _HelperInputVoidPtr(dst)
21626 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21627 cysrc = _HelperInputVoidPtr(src)
21628 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21629 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21630 with nogil:
21631 err = cyruntime.cudaMemcpy2D(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind)
21632 return (_dict_cudaError_t[err],)
21634@cython.embedsignature(True)
21635def cudaMemcpy2DToArray(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind):
21636 """ Copies data between host and device.
21638 Copies a matrix (`height` rows of `width` bytes each) from the memory
21639 area pointed to by `src` to the CUDA array `dst` starting at `hOffset`
21640 rows and `wOffset` bytes from the upper left corner, where `kind`
21641 specifies the direction of the copy, and must be one of
21642 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
21643 :py:obj:`~.cudaMemcpyDeviceToHost`,
21644 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21645 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21646 type of transfer is inferred from the pointer values. However,
21647 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21648 unified virtual addressing. `spitch` is the width in memory in bytes of
21649 the 2D array pointed to by `src`, including any padding added to the
21650 end of each row. `wOffset` + `width` must not exceed the width of the
21651 CUDA array `dst`. `width` must not exceed `spitch`.
21652 :py:obj:`~.cudaMemcpy2DToArray()` returns an error if `spitch` exceeds
21653 the maximum allowed.
21655 Parameters
21656 ----------
21657 dst : :py:obj:`~.cudaArray_t`
21658 Destination memory address
21659 wOffset : size_t
21660 Destination starting X offset (columns in bytes)
21661 hOffset : size_t
21662 Destination starting Y offset (rows)
21663 src : Any
21664 Source memory address
21665 spitch : size_t
21666 Pitch of source memory
21667 width : size_t
21668 Width of matrix transfer (columns in bytes)
21669 height : size_t
21670 Height of matrix transfer (rows)
21671 kind : :py:obj:`~.cudaMemcpyKind`
21672 Type of transfer
21674 Returns
21675 -------
21676 cudaError_t
21677 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21679 See Also
21680 --------
21681 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
21682 """
21683 cdef cyruntime.cudaArray_t cydst
21684 if dst is None:
21685 pdst = 0
21686 elif isinstance(dst, (cudaArray_t,)):
21687 pdst = int(dst)
21688 else:
21689 pdst = int(cudaArray_t(dst))
21690 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
21691 cysrc = _HelperInputVoidPtr(src)
21692 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21693 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21694 with nogil:
21695 err = cyruntime.cudaMemcpy2DToArray(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind)
21696 return (_dict_cudaError_t[err],)
21698@cython.embedsignature(True)
21699def cudaMemcpy2DFromArray(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind):
21700 """ Copies data between host and device.
21702 Copies a matrix (`height` rows of `width` bytes each) from the CUDA
21703 array `src` starting at `hOffset` rows and `wOffset` bytes from the
21704 upper left corner to the memory area pointed to by `dst`, where `kind`
21705 specifies the direction of the copy, and must be one of
21706 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
21707 :py:obj:`~.cudaMemcpyDeviceToHost`,
21708 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21709 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21710 type of transfer is inferred from the pointer values. However,
21711 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21712 unified virtual addressing. `dpitch` is the width in memory in bytes of
21713 the 2D array pointed to by `dst`, including any padding added to the
21714 end of each row. `wOffset` + `width` must not exceed the width of the
21715 CUDA array `src`. `width` must not exceed `dpitch`.
21716 :py:obj:`~.cudaMemcpy2DFromArray()` returns an error if `dpitch`
21717 exceeds the maximum allowed.
21719 Parameters
21720 ----------
21721 dst : Any
21722 Destination memory address
21723 dpitch : size_t
21724 Pitch of destination memory
21725 src : :py:obj:`~.cudaArray_const_t`
21726 Source memory address
21727 wOffset : size_t
21728 Source starting X offset (columns in bytes)
21729 hOffset : size_t
21730 Source starting Y offset (rows)
21731 width : size_t
21732 Width of matrix transfer (columns in bytes)
21733 height : size_t
21734 Height of matrix transfer (rows)
21735 kind : :py:obj:`~.cudaMemcpyKind`
21736 Type of transfer
21738 Returns
21739 -------
21740 cudaError_t
21741 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21743 See Also
21744 --------
21745 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
21746 """
21747 cdef cyruntime.cudaArray_const_t cysrc
21748 if src is None:
21749 psrc = 0
21750 elif isinstance(src, (cudaArray_const_t,)):
21751 psrc = int(src)
21752 else:
21753 psrc = int(cudaArray_const_t(src))
21754 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
21755 cydst = _HelperInputVoidPtr(dst)
21756 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21757 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21758 with nogil:
21759 err = cyruntime.cudaMemcpy2DFromArray(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind)
21760 return (_dict_cudaError_t[err],)
21762@cython.embedsignature(True)
21763def cudaMemcpy2DArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, kind not None : cudaMemcpyKind):
21764 """ Copies data between host and device.
21766 Copies a matrix (`height` rows of `width` bytes each) from the CUDA
21767 array `src` starting at `hOffsetSrc` rows and `wOffsetSrc` bytes from
21768 the upper left corner to the CUDA array `dst` starting at `hOffsetDst`
21769 rows and `wOffsetDst` bytes from the upper left corner, where `kind`
21770 specifies the direction of the copy, and must be one of
21771 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
21772 :py:obj:`~.cudaMemcpyDeviceToHost`,
21773 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21774 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21775 type of transfer is inferred from the pointer values. However,
21776 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21777 unified virtual addressing. `wOffsetDst` + `width` must not exceed the
21778 width of the CUDA array `dst`. `wOffsetSrc` + `width` must not exceed
21779 the width of the CUDA array `src`.
21781 Parameters
21782 ----------
21783 dst : :py:obj:`~.cudaArray_t`
21784 Destination memory address
21785 wOffsetDst : size_t
21786 Destination starting X offset (columns in bytes)
21787 hOffsetDst : size_t
21788 Destination starting Y offset (rows)
21789 src : :py:obj:`~.cudaArray_const_t`
21790 Source memory address
21791 wOffsetSrc : size_t
21792 Source starting X offset (columns in bytes)
21793 hOffsetSrc : size_t
21794 Source starting Y offset (rows)
21795 width : size_t
21796 Width of matrix transfer (columns in bytes)
21797 height : size_t
21798 Height of matrix transfer (rows)
21799 kind : :py:obj:`~.cudaMemcpyKind`
21800 Type of transfer
21802 Returns
21803 -------
21804 cudaError_t
21805 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21807 See Also
21808 --------
21809 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2D`, :py:obj:`~.cuMemcpy2DUnaligned`
21810 """
21811 cdef cyruntime.cudaArray_const_t cysrc
21812 if src is None:
21813 psrc = 0
21814 elif isinstance(src, (cudaArray_const_t,)):
21815 psrc = int(src)
21816 else:
21817 psrc = int(cudaArray_const_t(src))
21818 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
21819 cdef cyruntime.cudaArray_t cydst
21820 if dst is None:
21821 pdst = 0
21822 elif isinstance(dst, (cudaArray_t,)):
21823 pdst = int(dst)
21824 else:
21825 pdst = int(cudaArray_t(dst))
21826 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
21827 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21828 with nogil:
21829 err = cyruntime.cudaMemcpy2DArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, width, height, cykind)
21830 return (_dict_cudaError_t[err],)
21832@cython.embedsignature(True)
21833def cudaMemcpyAsync(dst, src, size_t count, kind not None : cudaMemcpyKind, stream):
21834 """ Copies data between host and device.
21836 Copies `count` bytes from the memory area pointed to by `src` to the
21837 memory area pointed to by `dst`, where `kind` specifies the direction
21838 of the copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
21839 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
21840 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
21841 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
21842 type of transfer is inferred from the pointer values. However,
21843 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
21844 unified virtual addressing.
21846 The memory areas may not overlap. Calling :py:obj:`~.cudaMemcpyAsync()`
21847 with `dst` and `src` pointers that do not match the direction of the
21848 copy results in an undefined behavior.
21850 :py:obj:`~.cudaMemcpyAsync()` is asynchronous with respect to the host,
21851 so the call may return before the copy is complete. The copy can
21852 optionally be associated to a stream by passing a non-zero `stream`
21853 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
21854 :py:obj:`~.cudaMemcpyDeviceToHost` and the `stream` is non-zero, the
21855 copy may overlap with operations in other streams.
21857 The device version of this function only handles device to device
21858 copies and cannot be given local or shared pointers.
21860 Parameters
21861 ----------
21862 dst : Any
21863 Destination memory address
21864 src : Any
21865 Source memory address
21866 count : size_t
21867 Size in bytes to copy
21868 kind : :py:obj:`~.cudaMemcpyKind`
21869 Type of transfer
21870 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
21871 Stream identifier
21873 Returns
21874 -------
21875 cudaError_t
21876 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
21878 See Also
21879 --------
21880 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAsync`, :py:obj:`~.cuMemcpyDtoHAsync`, :py:obj:`~.cuMemcpyHtoDAsync`, :py:obj:`~.cuMemcpyDtoDAsync`
21881 """
21882 cdef cyruntime.cudaStream_t cystream
21883 if stream is None:
21884 pstream = 0
21885 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
21886 pstream = int(stream)
21887 else:
21888 pstream = int(cudaStream_t(stream))
21889 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
21890 cydst = _HelperInputVoidPtr(dst)
21891 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21892 cysrc = _HelperInputVoidPtr(src)
21893 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21894 cdef cyruntime.cudaMemcpyKind cykind = kind.value
21895 with nogil:
21896 err = cyruntime.cudaMemcpyAsync(cydst_ptr, cysrc_ptr, count, cykind, cystream)
21897 return (_dict_cudaError_t[err],)
21899@cython.embedsignature(True)
21900def cudaMemcpyPeerAsync(dst, int dstDevice, src, int srcDevice, size_t count, stream):
21901 """ Copies memory between two devices asynchronously.
21903 Copies memory from one device to memory on another device. `dst` is the
21904 base device pointer of the destination memory and `dstDevice` is the
21905 destination device. `src` is the base device pointer of the source
21906 memory and `srcDevice` is the source device. `count` specifies the
21907 number of bytes to copy.
21909 Note that this function is asynchronous with respect to the host and
21910 all work on other devices.
21912 Parameters
21913 ----------
21914 dst : Any
21915 Destination device pointer
21916 dstDevice : int
21917 Destination device
21918 src : Any
21919 Source device pointer
21920 srcDevice : int
21921 Source device
21922 count : size_t
21923 Size of memory copy in bytes
21924 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
21925 Stream identifier
21927 Returns
21928 -------
21929 cudaError_t
21930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
21932 See Also
21933 --------
21934 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cuMemcpyPeerAsync`
21935 """
21936 cdef cyruntime.cudaStream_t cystream
21937 if stream is None:
21938 pstream = 0
21939 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
21940 pstream = int(stream)
21941 else:
21942 pstream = int(cudaStream_t(stream))
21943 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
21944 cydst = _HelperInputVoidPtr(dst)
21945 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
21946 cysrc = _HelperInputVoidPtr(src)
21947 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
21948 with nogil:
21949 err = cyruntime.cudaMemcpyPeerAsync(cydst_ptr, dstDevice, cysrc_ptr, srcDevice, count, cystream)
21950 return (_dict_cudaError_t[err],)
21952@cython.embedsignature(True)
21953def cudaMemcpyBatchAsync(dsts : Optional[tuple[Any] | list[Any]], srcs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, attrs : Optional[tuple[cudaMemcpyAttributes] | list[cudaMemcpyAttributes]], attrsIdxs : tuple[int] | list[int], size_t numAttrs, stream):
21954 """ Performs a batch of memory copies asynchronously.
21956 Performs a batch of memory copies. The batch as a whole executes in
21957 stream order but copies within a batch are not guaranteed to execute in
21958 any specific order. This API only supports pointer-to-pointer copies.
21959 For copies involving CUDA arrays, please see
21960 :py:obj:`~.cudaMemcpy3DBatchAsync`.
21962 Performs memory copies from source buffers specified in `srcs` to
21963 destination buffers specified in `dsts`. The size of each copy is
21964 specified in `sizes`. All three arrays must be of the same length as
21965 specified by `count`. Since there are no ordering guarantees for copies
21966 within a batch, specifying any dependent copies within a batch will
21967 result in undefined behavior.
21969 Every copy in the batch has to be associated with a set of attributes
21970 specified in the `attrs` array. Each entry in this array can apply to
21971 more than one copy. This can be done by specifying in the `attrsIdxs`
21972 array, the index of the first copy that the corresponding entry in the
21973 `attrs` array applies to. Both `attrs` and `attrsIdxs` must be of the
21974 same length as specified by `numAttrs`. For example, if a batch has 10
21975 copies listed in dst/src/sizes, the first 6 of which have one set of
21976 attributes and the remaining 4 another, then `numAttrs` will be 2,
21977 `attrsIdxs` will be {0, 6} and `attrs` will contains the two sets of
21978 attributes. Note that the first entry in `attrsIdxs` must always be 0.
21979 Also, each entry must be greater than the previous entry and the last
21980 entry should be less than `count`. Furthermore, `numAttrs` must be
21981 lesser than or equal to `count`.
21983 The :py:obj:`~.cudaMemcpyAttributes.srcAccessOrder` indicates the
21984 source access ordering to be observed for copies associated with the
21985 attribute. If the source access order is set to
21986 :py:obj:`~.cudaMemcpySrcAccessOrderStream`, then the source will be
21987 accessed in stream order. If the source access order is set to
21988 :py:obj:`~.cudaMemcpySrcAccessOrderDuringApiCall` then it indicates
21989 that access to the source pointer can be out of stream order and all
21990 accesses must be complete before the API call returns. This flag is
21991 suited for ephemeral sources (ex., stack variables) when it's known
21992 that no prior operations in the stream can be accessing the memory and
21993 also that the lifetime of the memory is limited to the scope that the
21994 source variable was declared in. Specifying this flag allows the driver
21995 to optimize the copy and removes the need for the user to synchronize
21996 the stream after the API call. If the source access order is set to
21997 :py:obj:`~.cudaMemcpySrcAccessOrderAny` then it indicates that access
21998 to the source pointer can be out of stream order and the accesses can
21999 happen even after the API call returns. This flag is suited for host
22000 pointers allocated outside CUDA (ex., via malloc) when it's known that
22001 no prior operations in the stream can be accessing the memory.
22002 Specifying this flag allows the driver to optimize the copy on certain
22003 platforms. Each memcpy operation in the batch must have a valid
22004 :py:obj:`~.cudaMemcpyAttributes` corresponding to it including the
22005 appropriate srcAccessOrder setting, otherwise the API will return
22006 :py:obj:`~.cudaErrorInvalidValue`.
22008 The :py:obj:`~.cudaMemcpyAttributes.srcLocHint` and
22009 :py:obj:`~.cudaMemcpyAttributes.dstLocHint` allows applications to
22010 specify hint locations for operands of a copy when the operand doesn't
22011 have a fixed location. That is, these hints are only applicable for
22012 managed memory pointers on devices where
22013 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is true or system-
22014 allocated pageable memory on devices where
22015 :py:obj:`~.cudaDevAttrPageableMemoryAccess` is true. For other cases,
22016 these hints are ignored.
22018 The :py:obj:`~.cudaMemcpyAttributes.flags` field can be used to specify
22019 certain flags for copies. Setting the
22020 :py:obj:`~.cudaMemcpyFlagPreferOverlapWithCompute` flag indicates that
22021 the associated copies should preferably overlap with any compute work.
22022 Note that this flag is a hint and can be ignored depending on the
22023 platform and other parameters of the copy.
22025 Parameters
22026 ----------
22027 dsts : list[Any]
22028 Array of destination pointers.
22029 srcs : list[Any]
22030 Array of memcpy source pointers.
22031 sizes : list[int]
22032 Array of sizes for memcpy operations.
22033 count : size_t
22034 Size of `dsts`, `srcs` and `sizes` arrays
22035 attrs : list[:py:obj:`~.cudaMemcpyAttributes`]
22036 Array of memcpy attributes.
22037 attrsIdxs : list[int]
22038 Array of indices to specify which copies each entry in the `attrs`
22039 array applies to. The attributes specified in attrs[k] will be
22040 applied to copies starting from attrsIdxs[k] through attrsIdxs[k+1]
22041 - 1. Also attrs[numAttrs-1] will apply to copies starting from
22042 attrsIdxs[numAttrs-1] through count - 1.
22043 numAttrs : size_t
22044 Size of `attrs` and `attrsIdxs` arrays.
22045 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22046 The stream to enqueue the operations in. Must not be legacy NULL
22047 stream.
22049 Returns
22050 -------
22051 cudaError_t
22052 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
22053 """
22054 cdef cyruntime.cudaStream_t cystream
22055 if stream is None:
22056 pstream = 0
22057 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22058 pstream = int(stream)
22059 else:
22060 pstream = int(cudaStream_t(stream))
22061 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22062 if not all(isinstance(_x, (int)) for _x in attrsIdxs):
22063 raise TypeError("Argument 'attrsIdxs' is not instance of type (expected tuple[int] or list[int]")
22064 attrs = [] if attrs is None else attrs
22065 if not all(isinstance(_x, (cudaMemcpyAttributes,)) for _x in attrs):
22066 raise TypeError("Argument 'attrs' is not instance of type (expected tuple[cyruntime.cudaMemcpyAttributes,] or list[cyruntime.cudaMemcpyAttributes,]")
22067 if not all(isinstance(_x, (int)) for _x in sizes):
22068 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")
22069 srcs = [] if srcs is None else srcs
22070 dsts = [] if dsts is None else dsts
22071 pylist = [_HelperInputVoidPtr(pydsts) for pydsts in dsts]
22072 cdef _InputVoidPtrPtrHelper voidStarHelperdsts = _InputVoidPtrPtrHelper(pylist)
22073 cdef const void** cydsts_ptr = <const void**><void_ptr>voidStarHelperdsts.cptr
22074 pylist = [_HelperInputVoidPtr(pysrcs) for pysrcs in srcs]
22075 cdef _InputVoidPtrPtrHelper voidStarHelpersrcs = _InputVoidPtrPtrHelper(pylist)
22076 cdef const void** cysrcs_ptr = <const void**><void_ptr>voidStarHelpersrcs.cptr
22077 cdef vector[size_t] cysizes = sizes
22078 if count > <size_t>len(dsts): raise RuntimeError("List is too small: " + str(len(dsts)) + " < " + str(count))
22079 if count > <size_t>len(srcs): raise RuntimeError("List is too small: " + str(len(srcs)) + " < " + str(count))
22080 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))
22081 cdef cyruntime.cudaMemcpyAttributes* cyattrs = NULL
22082 if len(attrs) > 1:
22083 cyattrs = <cyruntime.cudaMemcpyAttributes*> calloc(len(attrs), sizeof(cyruntime.cudaMemcpyAttributes))
22084 if cyattrs is NULL:
22085 raise MemoryError('Failed to allocate length x size memory: ' + str(len(attrs)) + 'x' + str(sizeof(cyruntime.cudaMemcpyAttributes)))
22086 for idx in range(len(attrs)):
22087 string.memcpy(&cyattrs[idx], (<cudaMemcpyAttributes>attrs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpyAttributes))
22088 elif len(attrs) == 1:
22089 cyattrs = (<cudaMemcpyAttributes>attrs[0])._pvt_ptr
22090 cdef vector[size_t] cyattrsIdxs = attrsIdxs
22091 if numAttrs > <size_t>len(attrs): raise RuntimeError("List is too small: " + str(len(attrs)) + " < " + str(numAttrs))
22092 if numAttrs > <size_t>len(attrsIdxs): raise RuntimeError("List is too small: " + str(len(attrsIdxs)) + " < " + str(numAttrs))
22093 with nogil:
22094 err = cyruntime.cudaMemcpyBatchAsync(cydsts_ptr, cysrcs_ptr, cysizes.data(), count, cyattrs, cyattrsIdxs.data(), numAttrs, cystream)
22095 if len(attrs) > 1 and cyattrs is not NULL:
22096 free(cyattrs)
22097 return (_dict_cudaError_t[err],)
22099@cython.embedsignature(True)
22100def cudaMemcpy3DBatchAsync(size_t numOps, opList : Optional[tuple[cudaMemcpy3DBatchOp] | list[cudaMemcpy3DBatchOp]], unsigned long long flags, stream):
22101 """ Performs a batch of 3D memory copies asynchronously.
22103 Performs a batch of memory copies. The batch as a whole executes in
22104 stream order but copies within a batch are not guaranteed to execute in
22105 any specific order. Note that this means specifying any dependent
22106 copies within a batch will result in undefined behavior.
22108 Performs memory copies as specified in the `opList` array. The length
22109 of this array is specified in `numOps`. Each entry in this array
22110 describes a copy operation. This includes among other things, the
22111 source and destination operands for the copy as specified in
22112 :py:obj:`~.cudaMemcpy3DBatchOp.src` and
22113 :py:obj:`~.cudaMemcpy3DBatchOp.dst` respectively. The source and
22114 destination operands of a copy can either be a pointer or a CUDA array.
22115 The width, height and depth of a copy is specified in
22116 :py:obj:`~.cudaMemcpy3DBatchOp.extent`. The width, height and depth of
22117 a copy are specified in elements and must not be zero. For pointer-to-
22118 pointer copies, the element size is considered to be 1. For pointer to
22119 CUDA array or vice versa copies, the element size is determined by the
22120 CUDA array. For CUDA array to CUDA array copies, the element size of
22121 the two CUDA arrays must match.
22123 For a given operand, if :py:obj:`~.cudaMemcpy3DOperand`::type is
22124 specified as :py:obj:`~.cudaMemcpyOperandTypePointer`, then
22125 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr will be used. The
22126 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::ptr field must contain the
22127 pointer where the copy should begin. The
22128 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::rowLength field specifies the
22129 length of each row in elements and must either be zero or be greater
22130 than or equal to the width of the copy specified in
22131 :py:obj:`~.cudaMemcpy3DBatchOp`::extent::width. The
22132 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::layerHeight field specifies
22133 the height of each layer and must either be zero or be greater than or
22134 equal to the height of the copy specified in
22135 :py:obj:`~.cudaMemcpy3DBatchOp`::extent::height. When either of these
22136 values is zero, that aspect of the operand is considered to be tightly
22137 packed according to the copy extent. For managed memory pointers on
22138 devices where :py:obj:`~.cudaDevAttrConcurrentManagedAccess` is true or
22139 system-allocated pageable memory on devices where
22140 :py:obj:`~.cudaDevAttrPageableMemoryAccess` is true, the
22141 :py:obj:`~.cudaMemcpy3DOperand`::op::ptr::locHint field can be used to
22142 hint the location of the operand.
22144 If an operand's type is specified as
22145 :py:obj:`~.cudaMemcpyOperandTypeArray`, then
22146 :py:obj:`~.cudaMemcpy3DOperand`::op::array will be used. The
22147 :py:obj:`~.cudaMemcpy3DOperand`::op::array::array field specifies the
22148 CUDA array and :py:obj:`~.cudaMemcpy3DOperand`::op::array::offset
22149 specifies the 3D offset into that array where the copy begins.
22151 The :py:obj:`~.cudaMemcpyAttributes.srcAccessOrder` indicates the
22152 source access ordering to be observed for copies associated with the
22153 attribute. If the source access order is set to
22154 :py:obj:`~.cudaMemcpySrcAccessOrderStream`, then the source will be
22155 accessed in stream order. If the source access order is set to
22156 :py:obj:`~.cudaMemcpySrcAccessOrderDuringApiCall` then it indicates
22157 that access to the source pointer can be out of stream order and all
22158 accesses must be complete before the API call returns. This flag is
22159 suited for ephemeral sources (ex., stack variables) when it's known
22160 that no prior operations in the stream can be accessing the memory and
22161 also that the lifetime of the memory is limited to the scope that the
22162 source variable was declared in. Specifying this flag allows the driver
22163 to optimize the copy and removes the need for the user to synchronize
22164 the stream after the API call. If the source access order is set to
22165 :py:obj:`~.cudaMemcpySrcAccessOrderAny` then it indicates that access
22166 to the source pointer can be out of stream order and the accesses can
22167 happen even after the API call returns. This flag is suited for host
22168 pointers allocated outside CUDA (ex., via malloc) when it's known that
22169 no prior operations in the stream can be accessing the memory.
22170 Specifying this flag allows the driver to optimize the copy on certain
22171 platforms. Each memcopy operation in `opList` must have a valid
22172 srcAccessOrder setting, otherwise this API will return
22173 :py:obj:`~.cudaErrorInvalidValue`.
22175 The :py:obj:`~.cudaMemcpyAttributes.flags` field can be used to specify
22176 certain flags for copies. Setting the
22177 :py:obj:`~.cudaMemcpyFlagPreferOverlapWithCompute` flag indicates that
22178 the associated copies should preferably overlap with any compute work.
22179 Note that this flag is a hint and can be ignored depending on the
22180 platform and other parameters of the copy.
22182 Parameters
22183 ----------
22184 numOps : size_t
22185 Total number of memcpy operations.
22186 opList : list[:py:obj:`~.cudaMemcpy3DBatchOp`]
22187 Array of size `numOps` containing the actual memcpy operations.
22188 flags : unsigned long long
22189 Flags for future use, must be zero now.
22190 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22191 The stream to enqueue the operations in. Must not be default NULL
22192 stream.
22194 Returns
22195 -------
22196 cudaError_t
22197 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
22198 """
22199 cdef cyruntime.cudaStream_t cystream
22200 if stream is None:
22201 pstream = 0
22202 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22203 pstream = int(stream)
22204 else:
22205 pstream = int(cudaStream_t(stream))
22206 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22207 opList = [] if opList is None else opList
22208 if not all(isinstance(_x, (cudaMemcpy3DBatchOp,)) for _x in opList):
22209 raise TypeError("Argument 'opList' is not instance of type (expected tuple[cyruntime.cudaMemcpy3DBatchOp,] or list[cyruntime.cudaMemcpy3DBatchOp,]")
22210 if numOps > <size_t>len(opList): raise RuntimeError("List is too small: " + str(len(opList)) + " < " + str(numOps))
22211 cdef cyruntime.cudaMemcpy3DBatchOp* cyopList = NULL
22212 if len(opList) > 1:
22213 cyopList = <cyruntime.cudaMemcpy3DBatchOp*> calloc(len(opList), sizeof(cyruntime.cudaMemcpy3DBatchOp))
22214 if cyopList is NULL:
22215 raise MemoryError('Failed to allocate length x size memory: ' + str(len(opList)) + 'x' + str(sizeof(cyruntime.cudaMemcpy3DBatchOp)))
22216 for idx in range(len(opList)):
22217 string.memcpy(&cyopList[idx], (<cudaMemcpy3DBatchOp>opList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemcpy3DBatchOp))
22218 elif len(opList) == 1:
22219 cyopList = (<cudaMemcpy3DBatchOp>opList[0])._pvt_ptr
22220 with nogil:
22221 err = cyruntime.cudaMemcpy3DBatchAsync(numOps, cyopList, flags, cystream)
22222 if len(opList) > 1 and cyopList is not NULL:
22223 free(cyopList)
22224 return (_dict_cudaError_t[err],)
22226@cython.embedsignature(True)
22227def cudaMemcpy2DAsync(dst, size_t dpitch, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
22228 """ Copies data between host and device.
22230 Copies a matrix (`height` rows of `width` bytes each) from the memory
22231 area pointed to by `src` to the memory area pointed to by `dst`, where
22232 `kind` specifies the direction of the copy, and must be one of
22233 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
22234 :py:obj:`~.cudaMemcpyDeviceToHost`,
22235 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
22236 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
22237 type of transfer is inferred from the pointer values. However,
22238 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
22239 unified virtual addressing. `dpitch` and `spitch` are the widths in
22240 memory in bytes of the 2D arrays pointed to by `dst` and `src`,
22241 including any padding added to the end of each row. The memory areas
22242 may not overlap. `width` must not exceed either `dpitch` or `spitch`.
22244 Calling :py:obj:`~.cudaMemcpy2DAsync()` with `dst` and `src` pointers
22245 that do not match the direction of the copy results in an undefined
22246 behavior. :py:obj:`~.cudaMemcpy2DAsync()` returns an error if `dpitch`
22247 or `spitch` is greater than the maximum allowed.
22249 :py:obj:`~.cudaMemcpy2DAsync()` is asynchronous with respect to the
22250 host, so the call may return before the copy is complete. The copy can
22251 optionally be associated to a stream by passing a non-zero `stream`
22252 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
22253 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
22254 may overlap with operations in other streams.
22256 The device version of this function only handles device to device
22257 copies and cannot be given local or shared pointers.
22259 Parameters
22260 ----------
22261 dst : Any
22262 Destination memory address
22263 dpitch : size_t
22264 Pitch of destination memory
22265 src : Any
22266 Source memory address
22267 spitch : size_t
22268 Pitch of source memory
22269 width : size_t
22270 Width of matrix transfer (columns in bytes)
22271 height : size_t
22272 Height of matrix transfer (rows)
22273 kind : :py:obj:`~.cudaMemcpyKind`
22274 Type of transfer
22275 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22276 Stream identifier
22278 Returns
22279 -------
22280 cudaError_t
22281 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
22283 See Also
22284 --------
22285 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
22286 """
22287 cdef cyruntime.cudaStream_t cystream
22288 if stream is None:
22289 pstream = 0
22290 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22291 pstream = int(stream)
22292 else:
22293 pstream = int(cudaStream_t(stream))
22294 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22295 cydst = _HelperInputVoidPtr(dst)
22296 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
22297 cysrc = _HelperInputVoidPtr(src)
22298 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
22299 cdef cyruntime.cudaMemcpyKind cykind = kind.value
22300 with nogil:
22301 err = cyruntime.cudaMemcpy2DAsync(cydst_ptr, dpitch, cysrc_ptr, spitch, width, height, cykind, cystream)
22302 return (_dict_cudaError_t[err],)
22304@cython.embedsignature(True)
22305def cudaMemcpy2DToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t spitch, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
22306 """ Copies data between host and device.
22308 Copies a matrix (`height` rows of `width` bytes each) from the memory
22309 area pointed to by `src` to the CUDA array `dst` starting at `hOffset`
22310 rows and `wOffset` bytes from the upper left corner, where `kind`
22311 specifies the direction of the copy, and must be one of
22312 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
22313 :py:obj:`~.cudaMemcpyDeviceToHost`,
22314 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
22315 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
22316 type of transfer is inferred from the pointer values. However,
22317 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
22318 unified virtual addressing. `spitch` is the width in memory in bytes of
22319 the 2D array pointed to by `src`, including any padding added to the
22320 end of each row. `wOffset` + `width` must not exceed the width of the
22321 CUDA array `dst`. `width` must not exceed `spitch`.
22322 :py:obj:`~.cudaMemcpy2DToArrayAsync()` returns an error if `spitch`
22323 exceeds the maximum allowed.
22325 :py:obj:`~.cudaMemcpy2DToArrayAsync()` is asynchronous with respect to
22326 the host, so the call may return before the copy is complete. The copy
22327 can optionally be associated to a stream by passing a non-zero `stream`
22328 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
22329 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
22330 may overlap with operations in other streams.
22332 :py:obj:`~.cudaMemcpy2DFromArrayAsync`,
22333 :py:obj:`~.cudaMemcpyToSymbolAsync`,
22334 :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
22336 Parameters
22337 ----------
22338 dst : :py:obj:`~.cudaArray_t`
22339 Destination memory address
22340 wOffset : size_t
22341 Destination starting X offset (columns in bytes)
22342 hOffset : size_t
22343 Destination starting Y offset (rows)
22344 src : Any
22345 Source memory address
22346 spitch : size_t
22347 Pitch of source memory
22348 width : size_t
22349 Width of matrix transfer (columns in bytes)
22350 height : size_t
22351 Height of matrix transfer (rows)
22352 kind : :py:obj:`~.cudaMemcpyKind`
22353 Type of transfer
22354 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22355 Stream identifier
22357 Returns
22358 -------
22359 cudaError_t
22360 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
22362 See Also
22363 --------
22364 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`,
22365 """
22366 cdef cyruntime.cudaStream_t cystream
22367 if stream is None:
22368 pstream = 0
22369 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22370 pstream = int(stream)
22371 else:
22372 pstream = int(cudaStream_t(stream))
22373 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22374 cdef cyruntime.cudaArray_t cydst
22375 if dst is None:
22376 pdst = 0
22377 elif isinstance(dst, (cudaArray_t,)):
22378 pdst = int(dst)
22379 else:
22380 pdst = int(cudaArray_t(dst))
22381 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
22382 cysrc = _HelperInputVoidPtr(src)
22383 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
22384 cdef cyruntime.cudaMemcpyKind cykind = kind.value
22385 with nogil:
22386 err = cyruntime.cudaMemcpy2DToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, spitch, width, height, cykind, cystream)
22387 return (_dict_cudaError_t[err],)
22389@cython.embedsignature(True)
22390def cudaMemcpy2DFromArrayAsync(dst, size_t dpitch, src, size_t wOffset, size_t hOffset, size_t width, size_t height, kind not None : cudaMemcpyKind, stream):
22391 """ Copies data between host and device.
22393 Copies a matrix (`height` rows of `width` bytes each) from the CUDA
22394 array `src` starting at `hOffset` rows and `wOffset` bytes from the
22395 upper left corner to the memory area pointed to by `dst`, where `kind`
22396 specifies the direction of the copy, and must be one of
22397 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
22398 :py:obj:`~.cudaMemcpyDeviceToHost`,
22399 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
22400 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
22401 type of transfer is inferred from the pointer values. However,
22402 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
22403 unified virtual addressing. `dpitch` is the width in memory in bytes of
22404 the 2D array pointed to by `dst`, including any padding added to the
22405 end of each row. `wOffset` + `width` must not exceed the width of the
22406 CUDA array `src`. `width` must not exceed `dpitch`.
22407 :py:obj:`~.cudaMemcpy2DFromArrayAsync()` returns an error if `dpitch`
22408 exceeds the maximum allowed.
22410 :py:obj:`~.cudaMemcpy2DFromArrayAsync()` is asynchronous with respect
22411 to the host, so the call may return before the copy is complete. The
22412 copy can optionally be associated to a stream by passing a non-zero
22413 `stream` argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
22414 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
22415 may overlap with operations in other streams.
22417 :py:obj:`~.cudaMemcpyToSymbolAsync`,
22418 :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpy2DAsync`
22420 Parameters
22421 ----------
22422 dst : Any
22423 Destination memory address
22424 dpitch : size_t
22425 Pitch of destination memory
22426 src : :py:obj:`~.cudaArray_const_t`
22427 Source memory address
22428 wOffset : size_t
22429 Source starting X offset (columns in bytes)
22430 hOffset : size_t
22431 Source starting Y offset (rows)
22432 width : size_t
22433 Width of matrix transfer (columns in bytes)
22434 height : size_t
22435 Height of matrix transfer (rows)
22436 kind : :py:obj:`~.cudaMemcpyKind`
22437 Type of transfer
22438 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22439 Stream identifier
22441 Returns
22442 -------
22443 cudaError_t
22444 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidPitchValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
22446 See Also
22447 --------
22448 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`,
22449 """
22450 cdef cyruntime.cudaStream_t cystream
22451 if stream is None:
22452 pstream = 0
22453 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22454 pstream = int(stream)
22455 else:
22456 pstream = int(cudaStream_t(stream))
22457 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22458 cdef cyruntime.cudaArray_const_t cysrc
22459 if src is None:
22460 psrc = 0
22461 elif isinstance(src, (cudaArray_const_t,)):
22462 psrc = int(src)
22463 else:
22464 psrc = int(cudaArray_const_t(src))
22465 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
22466 cydst = _HelperInputVoidPtr(dst)
22467 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
22468 cdef cyruntime.cudaMemcpyKind cykind = kind.value
22469 with nogil:
22470 err = cyruntime.cudaMemcpy2DFromArrayAsync(cydst_ptr, dpitch, cysrc, wOffset, hOffset, width, height, cykind, cystream)
22471 return (_dict_cudaError_t[err],)
22473@cython.embedsignature(True)
22474def cudaMemset(devPtr, int value, size_t count):
22475 """ Initializes or sets device memory to a value.
22477 Fills the first `count` bytes of the memory area pointed to by `devPtr`
22478 with the constant byte value `value`.
22480 Note that this function is asynchronous with respect to the host unless
22481 `devPtr` refers to pinned host memory.
22483 Parameters
22484 ----------
22485 devPtr : Any
22486 Pointer to device memory
22487 value : int
22488 Value to set for each byte of specified memory
22489 count : size_t
22490 Size in bytes to set
22492 Returns
22493 -------
22494 cudaError_t
22495 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22497 See Also
22498 --------
22499 :py:obj:`~.cuMemsetD8`, :py:obj:`~.cuMemsetD16`, :py:obj:`~.cuMemsetD32`
22500 """
22501 cydevPtr = _HelperInputVoidPtr(devPtr)
22502 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
22503 with nogil:
22504 err = cyruntime.cudaMemset(cydevPtr_ptr, value, count)
22505 return (_dict_cudaError_t[err],)
22507@cython.embedsignature(True)
22508def cudaMemset2D(devPtr, size_t pitch, int value, size_t width, size_t height):
22509 """ Initializes or sets device memory to a value.
22511 Sets to the specified value `value` a matrix (`height` rows of `width`
22512 bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of
22513 the 2D array pointed to by `dstPtr`, including any padding added to the
22514 end of each row. This function performs fastest when the pitch is one
22515 that has been passed back by :py:obj:`~.cudaMallocPitch()`.
22517 Note that this function is asynchronous with respect to the host unless
22518 `devPtr` refers to pinned host memory.
22520 Parameters
22521 ----------
22522 devPtr : Any
22523 Pointer to 2D device memory
22524 pitch : size_t
22525 Pitch in bytes of 2D device memory(Unused if `height` is 1)
22526 value : int
22527 Value to set for each byte of specified memory
22528 width : size_t
22529 Width of matrix set (columns in bytes)
22530 height : size_t
22531 Height of matrix set (rows)
22533 Returns
22534 -------
22535 cudaError_t
22536 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22538 See Also
22539 --------
22540 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8`, :py:obj:`~.cuMemsetD2D16`, :py:obj:`~.cuMemsetD2D32`
22541 """
22542 cydevPtr = _HelperInputVoidPtr(devPtr)
22543 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
22544 with nogil:
22545 err = cyruntime.cudaMemset2D(cydevPtr_ptr, pitch, value, width, height)
22546 return (_dict_cudaError_t[err],)
22548@cython.embedsignature(True)
22549def cudaMemset3D(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent):
22550 """ Initializes or sets device memory to a value.
22552 Initializes each element of a 3D array to the specified value `value`.
22553 The object to initialize is defined by `pitchedDevPtr`. The `pitch`
22554 field of `pitchedDevPtr` is the width in memory in bytes of the 3D
22555 array pointed to by `pitchedDevPtr`, including any padding added to the
22556 end of each row. The `xsize` field specifies the logical width of each
22557 row in bytes, while the `ysize` field specifies the height of each 2D
22558 slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when
22559 `height` and `depth` are both equal to 1.
22561 The extents of the initialized region are specified as a `width` in
22562 bytes, a `height` in rows, and a `depth` in slices.
22564 Extents with `width` greater than or equal to the `xsize` of
22565 `pitchedDevPtr` may perform significantly faster than extents narrower
22566 than the `xsize`. Secondarily, extents with `height` equal to the
22567 `ysize` of `pitchedDevPtr` will perform faster than when the `height`
22568 is shorter than the `ysize`.
22570 This function performs fastest when the `pitchedDevPtr` has been
22571 allocated by :py:obj:`~.cudaMalloc3D()`.
22573 Note that this function is asynchronous with respect to the host unless
22574 `pitchedDevPtr` refers to pinned host memory.
22576 Parameters
22577 ----------
22578 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
22579 Pointer to pitched device memory
22580 value : int
22581 Value to set for each byte of specified memory
22582 extent : :py:obj:`~.cudaExtent`
22583 Size parameters for where to set device memory (`width` field in
22584 bytes)
22586 Returns
22587 -------
22588 cudaError_t
22589 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22591 See Also
22592 --------
22593 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`
22594 """
22595 with nogil:
22596 err = cyruntime.cudaMemset3D(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0])
22597 return (_dict_cudaError_t[err],)
22599@cython.embedsignature(True)
22600def cudaMemsetAsync(devPtr, int value, size_t count, stream):
22601 """ Initializes or sets device memory to a value.
22603 Fills the first `count` bytes of the memory area pointed to by `devPtr`
22604 with the constant byte value `value`.
22606 :py:obj:`~.cudaMemsetAsync()` is asynchronous with respect to the host,
22607 so the call may return before the memset is complete. The operation can
22608 optionally be associated to a stream by passing a non-zero `stream`
22609 argument. If `stream` is non-zero, the operation may overlap with
22610 operations in other streams.
22612 The device version of this function only handles device to device
22613 copies and cannot be given local or shared pointers.
22615 Parameters
22616 ----------
22617 devPtr : Any
22618 Pointer to device memory
22619 value : int
22620 Value to set for each byte of specified memory
22621 count : size_t
22622 Size in bytes to set
22623 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22624 Stream identifier
22626 Returns
22627 -------
22628 cudaError_t
22629 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22631 See Also
22632 --------
22633 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD8Async`, :py:obj:`~.cuMemsetD16Async`, :py:obj:`~.cuMemsetD32Async`
22634 """
22635 cdef cyruntime.cudaStream_t cystream
22636 if stream is None:
22637 pstream = 0
22638 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22639 pstream = int(stream)
22640 else:
22641 pstream = int(cudaStream_t(stream))
22642 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22643 cydevPtr = _HelperInputVoidPtr(devPtr)
22644 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
22645 with nogil:
22646 err = cyruntime.cudaMemsetAsync(cydevPtr_ptr, value, count, cystream)
22647 return (_dict_cudaError_t[err],)
22649@cython.embedsignature(True)
22650def cudaMemset2DAsync(devPtr, size_t pitch, int value, size_t width, size_t height, stream):
22651 """ Initializes or sets device memory to a value.
22653 Sets to the specified value `value` a matrix (`height` rows of `width`
22654 bytes each) pointed to by `dstPtr`. `pitch` is the width in bytes of
22655 the 2D array pointed to by `dstPtr`, including any padding added to the
22656 end of each row. This function performs fastest when the pitch is one
22657 that has been passed back by :py:obj:`~.cudaMallocPitch()`.
22659 :py:obj:`~.cudaMemset2DAsync()` is asynchronous with respect to the
22660 host, so the call may return before the memset is complete. The
22661 operation can optionally be associated to a stream by passing a non-
22662 zero `stream` argument. If `stream` is non-zero, the operation may
22663 overlap with operations in other streams.
22665 The device version of this function only handles device to device
22666 copies and cannot be given local or shared pointers.
22668 Parameters
22669 ----------
22670 devPtr : Any
22671 Pointer to 2D device memory
22672 pitch : size_t
22673 Pitch in bytes of 2D device memory(Unused if `height` is 1)
22674 value : int
22675 Value to set for each byte of specified memory
22676 width : size_t
22677 Width of matrix set (columns in bytes)
22678 height : size_t
22679 Height of matrix set (rows)
22680 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22681 Stream identifier
22683 Returns
22684 -------
22685 cudaError_t
22686 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22688 See Also
22689 --------
22690 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset3DAsync`, :py:obj:`~.cuMemsetD2D8Async`, :py:obj:`~.cuMemsetD2D16Async`, :py:obj:`~.cuMemsetD2D32Async`
22691 """
22692 cdef cyruntime.cudaStream_t cystream
22693 if stream is None:
22694 pstream = 0
22695 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22696 pstream = int(stream)
22697 else:
22698 pstream = int(cudaStream_t(stream))
22699 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22700 cydevPtr = _HelperInputVoidPtr(devPtr)
22701 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
22702 with nogil:
22703 err = cyruntime.cudaMemset2DAsync(cydevPtr_ptr, pitch, value, width, height, cystream)
22704 return (_dict_cudaError_t[err],)
22706@cython.embedsignature(True)
22707def cudaMemset3DAsync(pitchedDevPtr not None : cudaPitchedPtr, int value, extent not None : cudaExtent, stream):
22708 """ Initializes or sets device memory to a value.
22710 Initializes each element of a 3D array to the specified value `value`.
22711 The object to initialize is defined by `pitchedDevPtr`. The `pitch`
22712 field of `pitchedDevPtr` is the width in memory in bytes of the 3D
22713 array pointed to by `pitchedDevPtr`, including any padding added to the
22714 end of each row. The `xsize` field specifies the logical width of each
22715 row in bytes, while the `ysize` field specifies the height of each 2D
22716 slice in rows. The `pitch` field of `pitchedDevPtr` is ignored when
22717 `height` and `depth` are both equal to 1.
22719 The extents of the initialized region are specified as a `width` in
22720 bytes, a `height` in rows, and a `depth` in slices.
22722 Extents with `width` greater than or equal to the `xsize` of
22723 `pitchedDevPtr` may perform significantly faster than extents narrower
22724 than the `xsize`. Secondarily, extents with `height` equal to the
22725 `ysize` of `pitchedDevPtr` will perform faster than when the `height`
22726 is shorter than the `ysize`.
22728 This function performs fastest when the `pitchedDevPtr` has been
22729 allocated by :py:obj:`~.cudaMalloc3D()`.
22731 :py:obj:`~.cudaMemset3DAsync()` is asynchronous with respect to the
22732 host, so the call may return before the memset is complete. The
22733 operation can optionally be associated to a stream by passing a non-
22734 zero `stream` argument. If `stream` is non-zero, the operation may
22735 overlap with operations in other streams.
22737 The device version of this function only handles device to device
22738 copies and cannot be given local or shared pointers.
22740 Parameters
22741 ----------
22742 pitchedDevPtr : :py:obj:`~.cudaPitchedPtr`
22743 Pointer to pitched device memory
22744 value : int
22745 Value to set for each byte of specified memory
22746 extent : :py:obj:`~.cudaExtent`
22747 Size parameters for where to set device memory (`width` field in
22748 bytes)
22749 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22750 Stream identifier
22752 Returns
22753 -------
22754 cudaError_t
22755 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
22757 See Also
22758 --------
22759 :py:obj:`~.cudaMemset`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaMemset3D`, :py:obj:`~.cudaMemsetAsync`, :py:obj:`~.cudaMemset2DAsync`, :py:obj:`~.cudaMalloc3D`, :py:obj:`~.make_cudaPitchedPtr`, :py:obj:`~.make_cudaExtent`
22760 """
22761 cdef cyruntime.cudaStream_t cystream
22762 if stream is None:
22763 pstream = 0
22764 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22765 pstream = int(stream)
22766 else:
22767 pstream = int(cudaStream_t(stream))
22768 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22769 with nogil:
22770 err = cyruntime.cudaMemset3DAsync(pitchedDevPtr._pvt_ptr[0], value, extent._pvt_ptr[0], cystream)
22771 return (_dict_cudaError_t[err],)
22773@cython.embedsignature(True)
22774def cudaMemPrefetchAsync(devPtr, size_t count, location not None : cudaMemLocation, unsigned int flags, stream):
22775 """ Prefetches memory to the specified destination location.
22777 Prefetches memory to the specified destination location. `devPtr` is
22778 the base device pointer of the memory to be prefetched and `location`
22779 specifies the destination location. `count` specifies the number of
22780 bytes to copy. `stream` is the stream in which the operation is
22781 enqueued. The memory range must refer to managed memory allocated via
22782 :py:obj:`~.cudaMallocManaged` or declared via managed variables, or it
22783 may also refer to memory allocated from a managed memory pool, or it
22784 may also refer to system-allocated memory on systems with non-zero
22785 cudaDevAttrPageableMemoryAccess.
22787 Specifying :py:obj:`~.cudaMemLocationTypeDevice` for
22788 :py:obj:`~.cudaMemLocation.type` will prefetch memory to GPU specified
22789 by device ordinal :py:obj:`~.cudaMemLocation.id` which must have non-
22790 zero value for the device attribute
22791 :py:obj:`~.concurrentManagedAccess`. Additionally, `stream` must be
22792 associated with a device that has a non-zero value for the device
22793 attribute :py:obj:`~.concurrentManagedAccess`. Specifying
22794 :py:obj:`~.cudaMemLocationTypeHost` as :py:obj:`~.cudaMemLocation.type`
22795 will prefetch data to host memory. Applications can request prefetching
22796 memory to a specific host NUMA node by specifying
22797 :py:obj:`~.cudaMemLocationTypeHostNuma` for
22798 :py:obj:`~.cudaMemLocation.type` and a valid host NUMA node id in
22799 :py:obj:`~.cudaMemLocation.id` Users can also request prefetching
22800 memory to the host NUMA node closest to the current thread's CPU by
22801 specifying :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` for
22802 :py:obj:`~.cudaMemLocation.type`. Note when
22803 :py:obj:`~.cudaMemLocation.type` is etiher
22804 :py:obj:`~.cudaMemLocationTypeHost` OR
22805 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,
22806 :py:obj:`~.cudaMemLocation.id` will be ignored.
22808 The start address and end address of the memory range will be rounded
22809 down and rounded up respectively to be aligned to CPU page size before
22810 the prefetch operation is enqueued in the stream.
22812 If no physical memory has been allocated for this region, then this
22813 memory region will be populated and mapped on the destination device.
22814 If there's insufficient memory to prefetch the desired region, the
22815 Unified Memory driver may evict pages from other
22816 :py:obj:`~.cudaMallocManaged` allocations to host memory in order to
22817 make room. Device memory allocated using :py:obj:`~.cudaMalloc` or
22818 :py:obj:`~.cudaMallocArray` will not be evicted.
22820 By default, any mappings to the previous location of the migrated pages
22821 are removed and mappings for the new location are only setup on the
22822 destination location. The exact behavior however also depends on the
22823 settings applied to this memory range via :py:obj:`~.cuMemAdvise` as
22824 described below:
22826 If :py:obj:`~.cudaMemAdviseSetReadMostly` was set on any subset of this
22827 memory range, then that subset will create a read-only copy of the
22828 pages on destination location. If however the destination location is a
22829 host NUMA node, then any pages of that subset that are already in
22830 another host NUMA node will be transferred to the destination.
22832 If :py:obj:`~.cudaMemAdviseSetPreferredLocation` was called on any
22833 subset of this memory range, then the pages will be migrated to
22834 `location` even if `location` is not the preferred location of any
22835 pages in the memory range.
22837 If :py:obj:`~.cudaMemAdviseSetAccessedBy` was called on any subset of
22838 this memory range, then mappings to those pages from all the
22839 appropriate processors are updated to refer to the new location if
22840 establishing such a mapping is possible. Otherwise, those mappings are
22841 cleared.
22843 Note that this API is not required for functionality and only serves to
22844 improve performance by allowing the application to migrate data to a
22845 suitable location before it is accessed. Memory accesses to this range
22846 are always coherent and are allowed even when the data is actively
22847 being migrated.
22849 Note that this function is asynchronous with respect to the host and
22850 all work on other devices.
22852 Parameters
22853 ----------
22854 devPtr : Any
22855 Pointer to be prefetched
22856 count : size_t
22857 Size in bytes
22858 location : :py:obj:`~.cudaMemLocation`
22859 location to prefetch to
22860 flags : unsigned int
22861 flags for future use, must be zero now.
22862 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22863 Stream to enqueue prefetch operation
22865 Returns
22866 -------
22867 cudaError_t
22868 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
22870 See Also
22871 --------
22872 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemPrefetchAsync`
22873 """
22874 cdef cyruntime.cudaStream_t cystream
22875 if stream is None:
22876 pstream = 0
22877 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22878 pstream = int(stream)
22879 else:
22880 pstream = int(cudaStream_t(stream))
22881 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22882 cydevPtr = _HelperInputVoidPtr(devPtr)
22883 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
22884 with nogil:
22885 err = cyruntime.cudaMemPrefetchAsync(cydevPtr_ptr, count, location._pvt_ptr[0], flags, cystream)
22886 return (_dict_cudaError_t[err],)
22888@cython.embedsignature(True)
22889def cudaMemPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, prefetchLocs : Optional[tuple[cudaMemLocation] | list[cudaMemLocation]], prefetchLocIdxs : tuple[int] | list[int], size_t numPrefetchLocs, unsigned long long flags, stream):
22890 """ Performs a batch of memory prefetches asynchronously.
22892 Performs a batch of memory prefetches. The batch as a whole executes in
22893 stream order but operations within a batch are not guaranteed to
22894 execute in any specific order. All devices in the system must have a
22895 non-zero value for the device attribute
22896 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will
22897 return an error.
22899 The semantics of the individual prefetch operations are as described in
22900 :py:obj:`~.cudaMemPrefetchAsync`.
22902 Performs memory prefetch on address ranges specified in `dptrs` and
22903 `sizes`. Both arrays must be of the same length as specified by
22904 `count`. Each memory range specified must refer to managed memory
22905 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
22906 variables or it may also refer to system-allocated memory when all
22907 devices have a non-zero value for
22908 :py:obj:`~.cudaDevAttrPageableMemoryAccess`. The prefetch location for
22909 every operation in the batch is specified in the `prefetchLocs` array.
22910 Each entry in this array can apply to more than one operation. This can
22911 be done by specifying in the `prefetchLocIdxs` array, the index of the
22912 first prefetch operation that the corresponding entry in the
22913 `prefetchLocs` array applies to. Both `prefetchLocs` and
22914 `prefetchLocIdxs` must be of the same length as specified by
22915 `numPrefetchLocs`. For example, if a batch has 10 prefetches listed in
22916 dptrs/sizes, the first 4 of which are to be prefetched to one location
22917 and the remaining 6 are to be prefetched to another, then
22918 `numPrefetchLocs` will be 2, `prefetchLocIdxs` will be {0, 4} and
22919 `prefetchLocs` will contain the two locations. Note the first entry in
22920 `prefetchLocIdxs` must always be 0. Also, each entry must be greater
22921 than the previous entry and the last entry should be less than `count`.
22922 Furthermore, `numPrefetchLocs` must be lesser than or equal to `count`.
22924 Parameters
22925 ----------
22926 dptrs : list[Any]
22927 Array of pointers to be prefetched
22928 sizes : list[int]
22929 Array of sizes for memory prefetch operations.
22930 count : size_t
22931 Size of `dptrs` and `sizes` arrays.
22932 prefetchLocs : list[:py:obj:`~.cudaMemLocation`]
22933 Array of locations to prefetch to.
22934 prefetchLocIdxs : list[int]
22935 Array of indices to specify which operands each entry in the
22936 `prefetchLocs` array applies to. The locations specified in
22937 prefetchLocs[k] will be applied to copies starting from
22938 prefetchLocIdxs[k] through prefetchLocIdxs[k+1] - 1. Also
22939 prefetchLocs[numPrefetchLocs - 1] will apply to prefetches starting
22940 from prefetchLocIdxs[numPrefetchLocs - 1] through count - 1.
22941 numPrefetchLocs : size_t
22942 Size of `prefetchLocs` and `prefetchLocIdxs` arrays.
22943 flags : unsigned long long
22944 Flags reserved for future use. Must be zero.
22945 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
22946 The stream to enqueue the operations in. Must not be legacy NULL
22947 stream.
22949 Returns
22950 -------
22951 cudaError_t
22953 """
22954 cdef cyruntime.cudaStream_t cystream
22955 if stream is None:
22956 pstream = 0
22957 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
22958 pstream = int(stream)
22959 else:
22960 pstream = int(cudaStream_t(stream))
22961 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
22962 if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs):
22963 raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]")
22964 prefetchLocs = [] if prefetchLocs is None else prefetchLocs
22965 if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs):
22966 raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]")
22967 if not all(isinstance(_x, (int)) for _x in sizes):
22968 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")
22969 dptrs = [] if dptrs is None else dptrs
22970 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]
22971 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)
22972 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr
22973 cdef vector[size_t] cysizes = sizes
22974 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))
22975 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))
22976 cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL
22977 if len(prefetchLocs) > 1:
22978 cyprefetchLocs = <cyruntime.cudaMemLocation*> calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation))
22979 if cyprefetchLocs is NULL:
22980 raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation)))
22981 for idx in range(len(prefetchLocs)):
22982 string.memcpy(&cyprefetchLocs[idx], (<cudaMemLocation>prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation))
22983 elif len(prefetchLocs) == 1:
22984 cyprefetchLocs = (<cudaMemLocation>prefetchLocs[0])._pvt_ptr
22985 cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs
22986 if numPrefetchLocs > <size_t>len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs))
22987 if numPrefetchLocs > <size_t>len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs))
22988 with nogil:
22989 err = cyruntime.cudaMemPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream)
22990 if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL:
22991 free(cyprefetchLocs)
22992 return (_dict_cudaError_t[err],)
22994@cython.embedsignature(True)
22995def cudaMemDiscardBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, unsigned long long flags, stream):
22996 """ Performs a batch of memory discards asynchronously.
22998 Performs a batch of memory discards. The batch as a whole executes in
22999 stream order but operations within a batch are not guaranteed to
23000 execute in any specific order. All devices in the system must have a
23001 non-zero value for the device attribute
23002 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will
23003 return an error.
23005 Discarding a memory range informs the driver that the contents of that
23006 range are no longer useful. Discarding memory ranges allows the driver
23007 to optimize certain data migrations and can also help reduce memory
23008 pressure. This operation can be undone on any part of the range by
23009 either writing to it or prefetching it via
23010 :py:obj:`~.cudaMemPrefetchAsync` or
23011 :py:obj:`~.cudaMemPrefetchBatchAsync`. Reading from a discarded range,
23012 without a subsequent write or prefetch to that part of the range, will
23013 return an indeterminate value. Note that any reads, writes or
23014 prefetches to any part of the memory range that occur simultaneously
23015 with the discard operation result in undefined behavior.
23017 Performs memory discard on address ranges specified in `dptrs` and
23018 `sizes`. Both arrays must be of the same length as specified by
23019 `count`. Each memory range specified must refer to managed memory
23020 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
23021 variables or it may also refer to system-allocated memory when all
23022 devices have a non-zero value for
23023 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
23025 Parameters
23026 ----------
23027 dptrs : list[Any]
23028 Array of pointers to be discarded
23029 sizes : list[int]
23030 Array of sizes for memory discard operations.
23031 count : size_t
23032 Size of `dptrs` and `sizes` arrays.
23033 flags : unsigned long long
23034 Flags reserved for future use. Must be zero.
23035 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23036 The stream to enqueue the operations in. Must not be legacy NULL
23037 stream.
23039 Returns
23040 -------
23041 cudaError_t
23043 """
23044 cdef cyruntime.cudaStream_t cystream
23045 if stream is None:
23046 pstream = 0
23047 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
23048 pstream = int(stream)
23049 else:
23050 pstream = int(cudaStream_t(stream))
23051 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
23052 if not all(isinstance(_x, (int)) for _x in sizes):
23053 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")
23054 dptrs = [] if dptrs is None else dptrs
23055 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]
23056 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)
23057 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr
23058 cdef vector[size_t] cysizes = sizes
23059 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))
23060 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))
23061 with nogil:
23062 err = cyruntime.cudaMemDiscardBatchAsync(cydptrs_ptr, cysizes.data(), count, flags, cystream)
23063 return (_dict_cudaError_t[err],)
23065@cython.embedsignature(True)
23066def cudaMemDiscardAndPrefetchBatchAsync(dptrs : Optional[tuple[Any] | list[Any]], sizes : tuple[int] | list[int], size_t count, prefetchLocs : Optional[tuple[cudaMemLocation] | list[cudaMemLocation]], prefetchLocIdxs : tuple[int] | list[int], size_t numPrefetchLocs, unsigned long long flags, stream):
23067 """ Performs a batch of memory discards and prefetches asynchronously.
23069 Performs a batch of memory discards followed by prefetches. The batch
23070 as a whole executes in stream order but operations within a batch are
23071 not guaranteed to execute in any specific order. All devices in the
23072 system must have a non-zero value for the device attribute
23073 :py:obj:`~.cudaDevAttrConcurrentManagedAccess` otherwise the API will
23074 return an error.
23076 Calling :py:obj:`~.cudaMemDiscardAndPrefetchBatchAsync` is semantically
23077 equivalent to calling :py:obj:`~.cudaMemDiscardBatchAsync` followed by
23078 :py:obj:`~.cudaMemPrefetchBatchAsync`, but is more optimal. For more
23079 details on what discarding and prefetching imply, please refer to
23080 :py:obj:`~.cudaMemDiscardBatchAsync` and
23081 :py:obj:`~.cudaMemPrefetchBatchAsync` respectively. Note that any
23082 reads, writes or prefetches to any part of the memory range that occur
23083 simultaneously with this combined discard+prefetch operation result in
23084 undefined behavior.
23086 Performs memory discard and prefetch on address ranges specified in
23087 `dptrs` and `sizes`. Both arrays must be of the same length as
23088 specified by `count`. Each memory range specified must refer to managed
23089 memory allocated via :py:obj:`~.cudaMallocManaged` or declared via
23090 managed variables or it may also refer to system-allocated memory when
23091 all devices have a non-zero value for
23092 :py:obj:`~.cudaDevAttrPageableMemoryAccess`. Every operation in the
23093 batch has to be associated with a valid location to prefetch the
23094 address range to and specified in the `prefetchLocs` array. Each entry
23095 in this array can apply to more than one operation. This can be done by
23096 specifying in the `prefetchLocIdxs` array, the index of the first
23097 operation that the corresponding entry in the `prefetchLocs` array
23098 applies to. Both `prefetchLocs` and `prefetchLocIdxs` must be of the
23099 same length as specified by `numPrefetchLocs`. For example, if a batch
23100 has 10 operations listed in dptrs/sizes, the first 6 of which are to be
23101 prefetched to one location and the remaining 4 are to be prefetched to
23102 another, then `numPrefetchLocs` will be 2, `prefetchLocIdxs` will be
23103 {0, 6} and `prefetchLocs` will contain the two set of locations. Note
23104 the first entry in `prefetchLocIdxs` must always be 0. Also, each entry
23105 must be greater than the previous entry and the last entry should be
23106 less than `count`. Furthermore, `numPrefetchLocs` must be lesser than
23107 or equal to `count`.
23109 Parameters
23110 ----------
23111 dptrs : list[Any]
23112 Array of pointers to be discarded
23113 sizes : list[int]
23114 Array of sizes for memory discard operations.
23115 count : size_t
23116 Size of `dptrs` and `sizes` arrays.
23117 prefetchLocs : list[:py:obj:`~.cudaMemLocation`]
23118 Array of locations to prefetch to.
23119 prefetchLocIdxs : list[int]
23120 Array of indices to specify which operands each entry in the
23121 `prefetchLocs` array applies to. The locations specified in
23122 prefetchLocs[k] will be applied to operations starting from
23123 prefetchLocIdxs[k] through prefetchLocIdxs[k+1] - 1. Also
23124 prefetchLocs[numPrefetchLocs - 1] will apply to copies starting
23125 from prefetchLocIdxs[numPrefetchLocs - 1] through count - 1.
23126 numPrefetchLocs : size_t
23127 Size of `prefetchLocs` and `prefetchLocIdxs` arrays.
23128 flags : unsigned long long
23129 Flags reserved for future use. Must be zero.
23130 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23131 The stream to enqueue the operations in. Must not be legacy NULL
23132 stream.
23134 Returns
23135 -------
23136 cudaError_t
23138 """
23139 cdef cyruntime.cudaStream_t cystream
23140 if stream is None:
23141 pstream = 0
23142 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
23143 pstream = int(stream)
23144 else:
23145 pstream = int(cudaStream_t(stream))
23146 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
23147 if not all(isinstance(_x, (int)) for _x in prefetchLocIdxs):
23148 raise TypeError("Argument 'prefetchLocIdxs' is not instance of type (expected tuple[int] or list[int]")
23149 prefetchLocs = [] if prefetchLocs is None else prefetchLocs
23150 if not all(isinstance(_x, (cudaMemLocation,)) for _x in prefetchLocs):
23151 raise TypeError("Argument 'prefetchLocs' is not instance of type (expected tuple[cyruntime.cudaMemLocation,] or list[cyruntime.cudaMemLocation,]")
23152 if not all(isinstance(_x, (int)) for _x in sizes):
23153 raise TypeError("Argument 'sizes' is not instance of type (expected tuple[int] or list[int]")
23154 dptrs = [] if dptrs is None else dptrs
23155 pylist = [_HelperInputVoidPtr(pydptrs) for pydptrs in dptrs]
23156 cdef _InputVoidPtrPtrHelper voidStarHelperdptrs = _InputVoidPtrPtrHelper(pylist)
23157 cdef void** cydptrs_ptr = <void**><void_ptr>voidStarHelperdptrs.cptr
23158 cdef vector[size_t] cysizes = sizes
23159 if count > <size_t>len(dptrs): raise RuntimeError("List is too small: " + str(len(dptrs)) + " < " + str(count))
23160 if count > <size_t>len(sizes): raise RuntimeError("List is too small: " + str(len(sizes)) + " < " + str(count))
23161 cdef cyruntime.cudaMemLocation* cyprefetchLocs = NULL
23162 if len(prefetchLocs) > 1:
23163 cyprefetchLocs = <cyruntime.cudaMemLocation*> calloc(len(prefetchLocs), sizeof(cyruntime.cudaMemLocation))
23164 if cyprefetchLocs is NULL:
23165 raise MemoryError('Failed to allocate length x size memory: ' + str(len(prefetchLocs)) + 'x' + str(sizeof(cyruntime.cudaMemLocation)))
23166 for idx in range(len(prefetchLocs)):
23167 string.memcpy(&cyprefetchLocs[idx], (<cudaMemLocation>prefetchLocs[idx])._pvt_ptr, sizeof(cyruntime.cudaMemLocation))
23168 elif len(prefetchLocs) == 1:
23169 cyprefetchLocs = (<cudaMemLocation>prefetchLocs[0])._pvt_ptr
23170 cdef vector[size_t] cyprefetchLocIdxs = prefetchLocIdxs
23171 if numPrefetchLocs > <size_t>len(prefetchLocs): raise RuntimeError("List is too small: " + str(len(prefetchLocs)) + " < " + str(numPrefetchLocs))
23172 if numPrefetchLocs > <size_t>len(prefetchLocIdxs): raise RuntimeError("List is too small: " + str(len(prefetchLocIdxs)) + " < " + str(numPrefetchLocs))
23173 with nogil:
23174 err = cyruntime.cudaMemDiscardAndPrefetchBatchAsync(cydptrs_ptr, cysizes.data(), count, cyprefetchLocs, cyprefetchLocIdxs.data(), numPrefetchLocs, flags, cystream)
23175 if len(prefetchLocs) > 1 and cyprefetchLocs is not NULL:
23176 free(cyprefetchLocs)
23177 return (_dict_cudaError_t[err],)
23179@cython.embedsignature(True)
23180def cudaMemAdvise(devPtr, size_t count, advice not None : cudaMemoryAdvise, location not None : cudaMemLocation):
23181 """ Advise about the usage of a given memory range.
23183 Advise the Unified Memory subsystem about the usage pattern for the
23184 memory range starting at `devPtr` with a size of `count` bytes. The
23185 start address and end address of the memory range will be rounded down
23186 and rounded up respectively to be aligned to CPU page size before the
23187 advice is applied. The memory range must refer to managed memory
23188 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
23189 variables. The memory range could also refer to system-allocated
23190 pageable memory provided it represents a valid, host-accessible region
23191 of memory and all additional constraints imposed by `advice` as
23192 outlined below are also satisfied. Specifying an invalid system-
23193 allocated pageable memory range results in an error being returned.
23195 The `advice` parameter can take the following values:
23197 - :py:obj:`~.cudaMemAdviseSetReadMostly`: This implies that the data is
23198 mostly going to be read from and only occasionally written to. Any
23199 read accesses from any processor to this region will create a read-
23200 only copy of at least the accessed pages in that processor's memory.
23201 Additionally, if :py:obj:`~.cudaMemPrefetchAsync` or
23202 :py:obj:`~.cudaMemPrefetchAsync` is called on this region, it will
23203 create a read-only copy of the data on the destination processor. If
23204 the target location for :py:obj:`~.cudaMemPrefetchAsync` is a host
23205 NUMA node and a read-only copy already exists on another host NUMA
23206 node, that copy will be migrated to the targeted host NUMA node. If
23207 any processor writes to this region, all copies of the corresponding
23208 page will be invalidated except for the one where the write occurred.
23209 If the writing processor is the CPU and the preferred location of the
23210 page is a host NUMA node, then the page will also be migrated to that
23211 host NUMA node. The `location` argument is ignored for this advice.
23212 Note that for a page to be read-duplicated, the accessing processor
23213 must either be the CPU or a GPU that has a non-zero value for the
23214 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
23215 Also, if a context is created on a device that does not have the
23216 device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess` set,
23217 then read-duplication will not occur until all such contexts are
23218 destroyed. If the memory region refers to valid system-allocated
23219 pageable memory, then the accessing device must have a non-zero value
23220 for the device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`
23221 for a read-only copy to be created on that device. Note however that
23222 if the accessing device also has a non-zero value for the device
23223 attribute
23224 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
23225 setting this advice will not create a read-only copy when that device
23226 accesses this memory region.
23228 - :py:obj:`~.cudaMemAdviceUnsetReadMostly`: Undoes the effect of
23229 :py:obj:`~.cudaMemAdviseSetReadMostly` and also prevents the Unified
23230 Memory driver from attempting heuristic read-duplication on the
23231 memory range. Any read-duplicated copies of the data will be
23232 collapsed into a single copy. The location for the collapsed copy
23233 will be the preferred location if the page has a preferred location
23234 and one of the read-duplicated copies was resident at that location.
23235 Otherwise, the location chosen is arbitrary. Note: The `location`
23236 argument is ignored for this advice.
23238 - :py:obj:`~.cudaMemAdviseSetPreferredLocation`: This advice sets the
23239 preferred location for the data to be the memory belonging to
23240 `location`. When :py:obj:`~.cudaMemLocation.type` is
23241 :py:obj:`~.cudaMemLocationTypeHost`, :py:obj:`~.cudaMemLocation.id`
23242 is ignored and the preferred location is set to be host memory. To
23243 set the preferred location to a specific host NUMA node, applications
23244 must set :py:obj:`~.cudaMemLocation.type` to
23245 :py:obj:`~.cudaMemLocationTypeHostNuma` and
23246 :py:obj:`~.cudaMemLocation.id` must specify the NUMA ID of the host
23247 NUMA node. If :py:obj:`~.cudaMemLocation.type` is set to
23248 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent`,
23249 :py:obj:`~.cudaMemLocation.id` will be ignored and the host NUMA node
23250 closest to the calling thread's CPU will be used as the preferred
23251 location. If :py:obj:`~.cudaMemLocation.type` is a
23252 :py:obj:`~.cudaMemLocationTypeDevice`, then
23253 :py:obj:`~.cudaMemLocation.id` must be a valid device ordinal and the
23254 device must have a non-zero value for the device attribute
23255 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`. Setting the preferred
23256 location does not cause data to migrate to that location immediately.
23257 Instead, it guides the migration policy when a fault occurs on that
23258 memory region. If the data is already in its preferred location and
23259 the faulting processor can establish a mapping without requiring the
23260 data to be migrated, then data migration will be avoided. On the
23261 other hand, if the data is not in its preferred location or if a
23262 direct mapping cannot be established, then it will be migrated to the
23263 processor accessing it. It is important to note that setting the
23264 preferred location does not prevent data prefetching done using
23265 :py:obj:`~.cudaMemPrefetchAsync`. Having a preferred location can
23266 override the page thrash detection and resolution logic in the
23267 Unified Memory driver. Normally, if a page is detected to be
23268 constantly thrashing between for example host and device memory, the
23269 page may eventually be pinned to host memory by the Unified Memory
23270 driver. But if the preferred location is set as device memory, then
23271 the page will continue to thrash indefinitely. If
23272 :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
23273 region or any subset of it, then the policies associated with that
23274 advice will override the policies of this advice, unless read
23275 accesses from `location` will not result in a read-only copy being
23276 created on that procesor as outlined in description for the advice
23277 :py:obj:`~.cudaMemAdviseSetReadMostly`. If the memory region refers
23278 to valid system-allocated pageable memory, and
23279 :py:obj:`~.cudaMemLocation.type` is
23280 :py:obj:`~.cudaMemLocationTypeDevice` then
23281 :py:obj:`~.cudaMemLocation.id` must be a valid device that has a non-
23282 zero alue for the device attribute
23283 :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
23285 - :py:obj:`~.cudaMemAdviseUnsetPreferredLocation`: Undoes the effect of
23286 :py:obj:`~.cudaMemAdviseSetPreferredLocation` and changes the
23287 preferred location to none. The `location` argument is ignored for
23288 this advice.
23290 - :py:obj:`~.cudaMemAdviseSetAccessedBy`: This advice implies that the
23291 data will be accessed by processor `location`. The
23292 :py:obj:`~.cudaMemLocation.type` must be either
23293 :py:obj:`~.cudaMemLocationTypeDevice` with
23294 :py:obj:`~.cudaMemLocation.id` representing a valid device ordinal or
23295 :py:obj:`~.cudaMemLocationTypeHost` and
23296 :py:obj:`~.cudaMemLocation.id` will be ignored. All other location
23297 types are invalid. If :py:obj:`~.cudaMemLocation.id` is a GPU, then
23298 the device attribute :py:obj:`~.cudaDevAttrConcurrentManagedAccess`
23299 must be non-zero. This advice does not cause data migration and has
23300 no impact on the location of the data per se. Instead, it causes the
23301 data to always be mapped in the specified processor's page tables, as
23302 long as the location of the data permits a mapping to be established.
23303 If the data gets migrated for any reason, the mappings are updated
23304 accordingly. This advice is recommended in scenarios where data
23305 locality is not important, but avoiding faults is. Consider for
23306 example a system containing multiple GPUs with peer-to-peer access
23307 enabled, where the data located on one GPU is occasionally accessed
23308 by peer GPUs. In such scenarios, migrating data over to the other
23309 GPUs is not as important because the accesses are infrequent and the
23310 overhead of migration may be too high. But preventing faults can
23311 still help improve performance, and so having a mapping set up in
23312 advance is useful. Note that on CPU access of this data, the data may
23313 be migrated to host memory because the CPU typically cannot access
23314 device memory directly. Any GPU that had the
23315 :py:obj:`~.cudaMemAdviseSetAccessedBy` flag set for this data will
23316 now have its mapping updated to point to the page in host memory. If
23317 :py:obj:`~.cudaMemAdviseSetReadMostly` is also set on this memory
23318 region or any subset of it, then the policies associated with that
23319 advice will override the policies of this advice. Additionally, if
23320 the preferred location of this memory region or any subset of it is
23321 also `location`, then the policies associated with
23322 :py:obj:`~.CU_MEM_ADVISE_SET_PREFERRED_LOCATION` will override the
23323 policies of this advice. If the memory region refers to valid system-
23324 allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is
23325 :py:obj:`~.cudaMemLocationTypeDevice` then device in
23326 :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the
23327 device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
23328 Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value
23329 for the device attribute
23330 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
23331 this call has no effect.
23333 - :py:obj:`~.CU_MEM_ADVISE_UNSET_ACCESSED_BY`: Undoes the effect of
23334 :py:obj:`~.cudaMemAdviseSetAccessedBy`. Any mappings to the data from
23335 `location` may be removed at any time causing accesses to result in
23336 non-fatal page faults. If the memory region refers to valid system-
23337 allocated pageable memory, and :py:obj:`~.cudaMemLocation.type` is
23338 :py:obj:`~.cudaMemLocationTypeDevice` then device in
23339 :py:obj:`~.cudaMemLocation.id` must have a non-zero value for the
23340 device attribute :py:obj:`~.cudaDevAttrPageableMemoryAccess`.
23341 Additionally, if :py:obj:`~.cudaMemLocation.id` has a non-zero value
23342 for the device attribute
23343 :py:obj:`~.cudaDevAttrPageableMemoryAccessUsesHostPageTables`, then
23344 this call has no effect.
23346 Parameters
23347 ----------
23348 devPtr : Any
23349 Pointer to memory to set the advice for
23350 count : size_t
23351 Size in bytes of the memory range
23352 advice : :py:obj:`~.cudaMemoryAdvise`
23353 Advice to be applied for the specified memory range
23354 location : :py:obj:`~.cudaMemLocation`
23355 location to apply the advice for
23357 Returns
23358 -------
23359 cudaError_t
23360 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
23362 See Also
23363 --------
23364 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpyPeer`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy3DPeerAsync`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemAdvise`
23365 """
23366 cydevPtr = _HelperInputVoidPtr(devPtr)
23367 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
23368 cdef cyruntime.cudaMemoryAdvise cyadvice = advice.value
23369 with nogil:
23370 err = cyruntime.cudaMemAdvise(cydevPtr_ptr, count, cyadvice, location._pvt_ptr[0])
23371 return (_dict_cudaError_t[err],)
23373@cython.embedsignature(True)
23374def cudaMemRangeGetAttribute(size_t dataSize, attribute not None : cudaMemRangeAttribute, devPtr, size_t count):
23375 """ Query an attribute of a given memory range.
23377 Query an attribute about the memory range starting at `devPtr` with a
23378 size of `count` bytes. The memory range must refer to managed memory
23379 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
23380 variables.
23382 The `attribute` parameter can take the following values:
23384 - :py:obj:`~.cudaMemRangeAttributeReadMostly`: If this attribute is
23385 specified, `data` will be interpreted as a 32-bit integer, and
23386 `dataSize` must be 4. The result returned will be 1 if all pages in
23387 the given memory range have read-duplication enabled, or 0 otherwise.
23389 - :py:obj:`~.cudaMemRangeAttributePreferredLocation`: If this attribute
23390 is specified, `data` will be interpreted as a 32-bit integer, and
23391 `dataSize` must be 4. The result returned will be a GPU device id if
23392 all pages in the memory range have that GPU as their preferred
23393 location, or it will be cudaCpuDeviceId if all pages in the memory
23394 range have the CPU as their preferred location, or it will be
23395 cudaInvalidDeviceId if either all the pages don't have the same
23396 preferred location or some of the pages don't have a preferred
23397 location at all. Note that the actual location of the pages in the
23398 memory range at the time of the query may be different from the
23399 preferred location.
23401 - :py:obj:`~.cudaMemRangeAttributeAccessedBy`: If this attribute is
23402 specified, `data` will be interpreted as an array of 32-bit integers,
23403 and `dataSize` must be a non-zero multiple of 4. The result returned
23404 will be a list of device ids that had
23405 :py:obj:`~.cudaMemAdviceSetAccessedBy` set for that entire memory
23406 range. If any device does not have that advice set for the entire
23407 memory range, that device will not be included. If `data` is larger
23408 than the number of devices that have that advice set for that memory
23409 range, cudaInvalidDeviceId will be returned in all the extra space
23410 provided. For ex., if `dataSize` is 12 (i.e. `data` has 3 elements)
23411 and only device 0 has the advice set, then the result returned will
23412 be { 0, cudaInvalidDeviceId, cudaInvalidDeviceId }. If `data` is
23413 smaller than the number of devices that have that advice set, then
23414 only as many devices will be returned as can fit in the array. There
23415 is no guarantee on which specific devices will be returned, however.
23417 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`: If this
23418 attribute is specified, `data` will be interpreted as a 32-bit
23419 integer, and `dataSize` must be 4. The result returned will be the
23420 last location to which all pages in the memory range were prefetched
23421 explicitly via :py:obj:`~.cudaMemPrefetchAsync`. This will either be
23422 a GPU id or cudaCpuDeviceId depending on whether the last location
23423 for prefetch was a GPU or the CPU respectively. If any page in the
23424 memory range was never explicitly prefetched or if all pages were not
23425 prefetched to the same location, cudaInvalidDeviceId will be
23426 returned. Note that this simply returns the last location that the
23427 applicaton requested to prefetch the memory range to. It gives no
23428 indication as to whether the prefetch operation to that location has
23429 completed or even begun.
23431 - :py:obj:`~.cudaMemRangeAttributePreferredLocationType`: If this
23432 attribute is specified, `data` will be interpreted as a
23433 :py:obj:`~.cudaMemLocationType`, and `dataSize` must be
23434 sizeof(cudaMemLocationType). The :py:obj:`~.cudaMemLocationType`
23435 returned will be :py:obj:`~.cudaMemLocationTypeDevice` if all pages
23436 in the memory range have the same GPU as their preferred location, or
23437 :py:obj:`~.cudaMemLocationType` will be
23438 :py:obj:`~.cudaMemLocationTypeHost` if all pages in the memory range
23439 have the CPU as their preferred location, or or it will be
23440 :py:obj:`~.cudaMemLocationTypeHostNuma` if all the pages in the
23441 memory range have the same host NUMA node ID as their preferred
23442 location or it will be :py:obj:`~.cudaMemLocationTypeInvalid` if
23443 either all the pages don't have the same preferred location or some
23444 of the pages don't have a preferred location at all. Note that the
23445 actual location type of the pages in the memory range at the time of
23446 the query may be different from the preferred location type.
23448 - :py:obj:`~.cudaMemRangeAttributePreferredLocationId`: If this
23449 attribute is specified, `data` will be interpreted as a 32-bit
23450 integer, and `dataSize` must be 4. If the
23451 :py:obj:`~.cudaMemRangeAttributePreferredLocationType` query for
23452 the same address range returns
23453 :py:obj:`~.cudaMemLocationTypeDevice`, it will be a valid device
23454 ordinal or if it returns :py:obj:`~.cudaMemLocationTypeHostNuma`,
23455 it will be a valid host NUMA node ID or if it returns any other
23456 location type, the id should be ignored.
23458 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType`: If this
23459 attribute is specified, `data` will be interpreted as a
23460 :py:obj:`~.cudaMemLocationType`, and `dataSize` must be
23461 sizeof(cudaMemLocationType). The result returned will be the last
23462 location type to which all pages in the memory range were prefetched
23463 explicitly via :py:obj:`~.cuMemPrefetchAsync`. The
23464 :py:obj:`~.cudaMemLocationType` returned will be
23465 :py:obj:`~.cudaMemLocationTypeDevice` if the last prefetch location
23466 was the GPU or :py:obj:`~.cudaMemLocationTypeHost` if it was the CPU
23467 or :py:obj:`~.cudaMemLocationTypeHostNuma` if the last prefetch
23468 location was a specific host NUMA node. If any page in the memory
23469 range was never explicitly prefetched or if all pages were not
23470 prefetched to the same location, :py:obj:`~.CUmemLocationType` will
23471 be :py:obj:`~.cudaMemLocationTypeInvalid`. Note that this simply
23472 returns the last location type that the application requested to
23473 prefetch the memory range to. It gives no indication as to whether
23474 the prefetch operation to that location has completed or even begun.
23476 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationId`: If this
23477 attribute is specified, `data` will be interpreted as a 32-bit
23478 integer, and `dataSize` must be 4. If the
23479 :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocationType` query for
23480 the same address range returns
23481 :py:obj:`~.cudaMemLocationTypeDevice`, it will be a valid device
23482 ordinal or if it returns :py:obj:`~.cudaMemLocationTypeHostNuma`,
23483 it will be a valid host NUMA node ID or if it returns any other
23484 location type, the id should be ignored.
23486 Parameters
23487 ----------
23488 dataSize : size_t
23489 Array containing the size of data
23490 attribute : :py:obj:`~.cudaMemRangeAttribute`
23491 The attribute to query
23492 devPtr : Any
23493 Start of the range to query
23494 count : size_t
23495 Size of the range to query
23497 Returns
23498 -------
23499 cudaError_t
23500 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
23501 data : Any
23502 A pointers to a memory location where the result of each attribute
23503 query will be written to.
23505 See Also
23506 --------
23507 :py:obj:`~.cudaMemRangeGetAttributes`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cuMemRangeGetAttribute`
23508 """
23509 cdef _HelperCUmem_range_attribute cydata = _HelperCUmem_range_attribute(attribute, dataSize)
23510 cdef void* cydata_ptr = <void*><void_ptr>cydata.cptr
23511 cdef cyruntime.cudaMemRangeAttribute cyattribute = attribute.value
23512 cydevPtr = _HelperInputVoidPtr(devPtr)
23513 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
23514 with nogil:
23515 err = cyruntime.cudaMemRangeGetAttribute(cydata_ptr, dataSize, cyattribute, cydevPtr_ptr, count)
23516 if err != cyruntime.cudaSuccess:
23517 return (_dict_cudaError_t[err], None)
23518 return (_dict_cudaError_t[err], cydata.pyObj())
23520@cython.embedsignature(True)
23521def cudaMemRangeGetAttributes(dataSizes : tuple[int] | list[int], attributes : Optional[tuple[cudaMemRangeAttribute] | list[cudaMemRangeAttribute]], size_t numAttributes, devPtr, size_t count):
23522 """ Query attributes of a given memory range.
23524 Query attributes of the memory range starting at `devPtr` with a size
23525 of `count` bytes. The memory range must refer to managed memory
23526 allocated via :py:obj:`~.cudaMallocManaged` or declared via managed
23527 variables. The `attributes` array will be interpreted to have
23528 `numAttributes` entries. The `dataSizes` array will also be interpreted
23529 to have `numAttributes` entries. The results of the query will be
23530 stored in `data`.
23532 The list of supported attributes are given below. Please refer to
23533 :py:obj:`~.cudaMemRangeGetAttribute` for attribute descriptions and
23534 restrictions.
23536 - :py:obj:`~.cudaMemRangeAttributeReadMostly`
23538 - :py:obj:`~.cudaMemRangeAttributePreferredLocation`
23540 - :py:obj:`~.cudaMemRangeAttributeAccessedBy`
23542 - :py:obj:`~.cudaMemRangeAttributeLastPrefetchLocation`
23544 - :: cudaMemRangeAttributePreferredLocationType
23546 - :: cudaMemRangeAttributePreferredLocationId
23548 - :: cudaMemRangeAttributeLastPrefetchLocationType
23550 - :: cudaMemRangeAttributeLastPrefetchLocationId
23552 Parameters
23553 ----------
23554 dataSizes : list[int]
23555 Array containing the sizes of each result
23556 attributes : list[:py:obj:`~.cudaMemRangeAttribute`]
23557 An array of attributes to query (numAttributes and the number of
23558 attributes in this array should match)
23559 numAttributes : size_t
23560 Number of attributes to query
23561 devPtr : Any
23562 Start of the range to query
23563 count : size_t
23564 Size of the range to query
23566 Returns
23567 -------
23568 cudaError_t
23569 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
23570 data : list[Any]
23571 A two-dimensional array containing pointers to memory locations
23572 where the result of each attribute query will be written to.
23574 See Also
23575 --------
23576 :py:obj:`~.cudaMemRangeGetAttribute`, :py:obj:`~.cudaMemAdvise`, :py:obj:`~.cudaMemPrefetchAsync`, :py:obj:`~.cuMemRangeGetAttributes`
23577 """
23578 attributes = [] if attributes is None else attributes
23579 if not all(isinstance(_x, (cudaMemRangeAttribute)) for _x in attributes):
23580 raise TypeError("Argument 'attributes' is not instance of type (expected tuple[cyruntime.cudaMemRangeAttribute] or list[cyruntime.cudaMemRangeAttribute]")
23581 if not all(isinstance(_x, (int)) for _x in dataSizes):
23582 raise TypeError("Argument 'dataSizes' is not instance of type (expected tuple[int] or list[int]")
23583 pylist = [_HelperCUmem_range_attribute(pyattributes, pydataSizes) for (pyattributes, pydataSizes) in zip(attributes, dataSizes)]
23584 cdef _InputVoidPtrPtrHelper voidStarHelperdata = _InputVoidPtrPtrHelper(pylist)
23585 cdef void** cyvoidStarHelper_ptr = <void**><void_ptr>voidStarHelperdata.cptr
23586 cdef vector[size_t] cydataSizes = dataSizes
23587 cdef vector[cyruntime.cudaMemRangeAttribute] cyattributes = [pyattributes.value for pyattributes in (attributes)]
23588 if numAttributes > <size_t>len(dataSizes): raise RuntimeError("List is too small: " + str(len(dataSizes)) + " < " + str(numAttributes))
23589 if numAttributes > <size_t>len(attributes): raise RuntimeError("List is too small: " + str(len(attributes)) + " < " + str(numAttributes))
23590 cydevPtr = _HelperInputVoidPtr(devPtr)
23591 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
23592 with nogil:
23593 err = cyruntime.cudaMemRangeGetAttributes(cyvoidStarHelper_ptr, cydataSizes.data(), cyattributes.data(), numAttributes, cydevPtr_ptr, count)
23594 if err != cyruntime.cudaSuccess:
23595 return (_dict_cudaError_t[err], None)
23596 return (_dict_cudaError_t[err], [obj.pyObj() for obj in pylist])
23598@cython.embedsignature(True)
23599def cudaMemcpyToArray(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind):
23600 """ Copies data between host and device.
23602 [Deprecated]
23604 Copies `count` bytes from the memory area pointed to by `src` to the
23605 CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from
23606 the upper left corner, where `kind` specifies the direction of the
23607 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
23608 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
23609 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
23610 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
23611 type of transfer is inferred from the pointer values. However,
23612 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
23613 unified virtual addressing.
23615 Parameters
23616 ----------
23617 dst : :py:obj:`~.cudaArray_t`
23618 Destination memory address
23619 wOffset : size_t
23620 Destination starting X offset (columns in bytes)
23621 hOffset : size_t
23622 Destination starting Y offset (rows)
23623 src : Any
23624 Source memory address
23625 count : size_t
23626 Size in bytes to copy
23627 kind : :py:obj:`~.cudaMemcpyKind`
23628 Type of transfer
23630 Returns
23631 -------
23632 cudaError_t
23633 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
23635 See Also
23636 --------
23637 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoA`, :py:obj:`~.cuMemcpyDtoA`
23638 """
23639 cdef cyruntime.cudaArray_t cydst
23640 if dst is None:
23641 pdst = 0
23642 elif isinstance(dst, (cudaArray_t,)):
23643 pdst = int(dst)
23644 else:
23645 pdst = int(cudaArray_t(dst))
23646 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
23647 cysrc = _HelperInputVoidPtr(src)
23648 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
23649 cdef cyruntime.cudaMemcpyKind cykind = kind.value
23650 with nogil:
23651 err = cyruntime.cudaMemcpyToArray(cydst, wOffset, hOffset, cysrc_ptr, count, cykind)
23652 return (_dict_cudaError_t[err],)
23654@cython.embedsignature(True)
23655def cudaMemcpyFromArray(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind):
23656 """ Copies data between host and device.
23658 [Deprecated]
23660 Copies `count` bytes from the CUDA array `src` starting at `hOffset`
23661 rows and `wOffset` bytes from the upper left corner to the memory area
23662 pointed to by `dst`, where `kind` specifies the direction of the copy,
23663 and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
23664 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
23665 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
23666 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
23667 type of transfer is inferred from the pointer values. However,
23668 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
23669 unified virtual addressing.
23671 Parameters
23672 ----------
23673 dst : Any
23674 Destination memory address
23675 src : :py:obj:`~.cudaArray_const_t`
23676 Source memory address
23677 wOffset : size_t
23678 Source starting X offset (columns in bytes)
23679 hOffset : size_t
23680 Source starting Y offset (rows)
23681 count : size_t
23682 Size in bytes to copy
23683 kind : :py:obj:`~.cudaMemcpyKind`
23684 Type of transfer
23686 Returns
23687 -------
23688 cudaError_t
23689 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
23691 See Also
23692 --------
23693 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoH`, :py:obj:`~.cuMemcpyAtoD`
23694 """
23695 cdef cyruntime.cudaArray_const_t cysrc
23696 if src is None:
23697 psrc = 0
23698 elif isinstance(src, (cudaArray_const_t,)):
23699 psrc = int(src)
23700 else:
23701 psrc = int(cudaArray_const_t(src))
23702 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
23703 cydst = _HelperInputVoidPtr(dst)
23704 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
23705 cdef cyruntime.cudaMemcpyKind cykind = kind.value
23706 with nogil:
23707 err = cyruntime.cudaMemcpyFromArray(cydst_ptr, cysrc, wOffset, hOffset, count, cykind)
23708 return (_dict_cudaError_t[err],)
23710@cython.embedsignature(True)
23711def cudaMemcpyArrayToArray(dst, size_t wOffsetDst, size_t hOffsetDst, src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, kind not None : cudaMemcpyKind):
23712 """ Copies data between host and device.
23714 [Deprecated]
23716 Copies `count` bytes from the CUDA array `src` starting at `hOffsetSrc`
23717 rows and `wOffsetSrc` bytes from the upper left corner to the CUDA
23718 array `dst` starting at `hOffsetDst` rows and `wOffsetDst` bytes from
23719 the upper left corner, where `kind` specifies the direction of the
23720 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
23721 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
23722 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
23723 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
23724 type of transfer is inferred from the pointer values. However,
23725 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
23726 unified virtual addressing.
23728 Parameters
23729 ----------
23730 dst : :py:obj:`~.cudaArray_t`
23731 Destination memory address
23732 wOffsetDst : size_t
23733 Destination starting X offset (columns in bytes)
23734 hOffsetDst : size_t
23735 Destination starting Y offset (rows)
23736 src : :py:obj:`~.cudaArray_const_t`
23737 Source memory address
23738 wOffsetSrc : size_t
23739 Source starting X offset (columns in bytes)
23740 hOffsetSrc : size_t
23741 Source starting Y offset (rows)
23742 count : size_t
23743 Size in bytes to copy
23744 kind : :py:obj:`~.cudaMemcpyKind`
23745 Type of transfer
23747 Returns
23748 -------
23749 cudaError_t
23750 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
23752 See Also
23753 --------
23754 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoA`
23755 """
23756 cdef cyruntime.cudaArray_const_t cysrc
23757 if src is None:
23758 psrc = 0
23759 elif isinstance(src, (cudaArray_const_t,)):
23760 psrc = int(src)
23761 else:
23762 psrc = int(cudaArray_const_t(src))
23763 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
23764 cdef cyruntime.cudaArray_t cydst
23765 if dst is None:
23766 pdst = 0
23767 elif isinstance(dst, (cudaArray_t,)):
23768 pdst = int(dst)
23769 else:
23770 pdst = int(cudaArray_t(dst))
23771 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
23772 cdef cyruntime.cudaMemcpyKind cykind = kind.value
23773 with nogil:
23774 err = cyruntime.cudaMemcpyArrayToArray(cydst, wOffsetDst, hOffsetDst, cysrc, wOffsetSrc, hOffsetSrc, count, cykind)
23775 return (_dict_cudaError_t[err],)
23777@cython.embedsignature(True)
23778def cudaMemcpyToArrayAsync(dst, size_t wOffset, size_t hOffset, src, size_t count, kind not None : cudaMemcpyKind, stream):
23779 """ Copies data between host and device.
23781 [Deprecated]
23783 Copies `count` bytes from the memory area pointed to by `src` to the
23784 CUDA array `dst` starting at `hOffset` rows and `wOffset` bytes from
23785 the upper left corner, where `kind` specifies the direction of the
23786 copy, and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
23787 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
23788 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
23789 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
23790 type of transfer is inferred from the pointer values. However,
23791 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
23792 unified virtual addressing.
23794 :py:obj:`~.cudaMemcpyToArrayAsync()` is asynchronous with respect to
23795 the host, so the call may return before the copy is complete. The copy
23796 can optionally be associated to a stream by passing a non-zero `stream`
23797 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
23798 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
23799 may overlap with operations in other streams.
23801 Parameters
23802 ----------
23803 dst : :py:obj:`~.cudaArray_t`
23804 Destination memory address
23805 wOffset : size_t
23806 Destination starting X offset (columns in bytes)
23807 hOffset : size_t
23808 Destination starting Y offset (rows)
23809 src : Any
23810 Source memory address
23811 count : size_t
23812 Size in bytes to copy
23813 kind : :py:obj:`~.cudaMemcpyKind`
23814 Type of transfer
23815 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23816 Stream identifier
23818 Returns
23819 -------
23820 cudaError_t
23821 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
23823 See Also
23824 --------
23825 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpyFromArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyHtoAAsync`, :py:obj:`~.cuMemcpy2DAsync`
23826 """
23827 cdef cyruntime.cudaStream_t cystream
23828 if stream is None:
23829 pstream = 0
23830 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
23831 pstream = int(stream)
23832 else:
23833 pstream = int(cudaStream_t(stream))
23834 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
23835 cdef cyruntime.cudaArray_t cydst
23836 if dst is None:
23837 pdst = 0
23838 elif isinstance(dst, (cudaArray_t,)):
23839 pdst = int(dst)
23840 else:
23841 pdst = int(cudaArray_t(dst))
23842 cydst = <cyruntime.cudaArray_t><void_ptr>pdst
23843 cysrc = _HelperInputVoidPtr(src)
23844 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
23845 cdef cyruntime.cudaMemcpyKind cykind = kind.value
23846 with nogil:
23847 err = cyruntime.cudaMemcpyToArrayAsync(cydst, wOffset, hOffset, cysrc_ptr, count, cykind, cystream)
23848 return (_dict_cudaError_t[err],)
23850@cython.embedsignature(True)
23851def cudaMemcpyFromArrayAsync(dst, src, size_t wOffset, size_t hOffset, size_t count, kind not None : cudaMemcpyKind, stream):
23852 """ Copies data between host and device.
23854 [Deprecated]
23856 Copies `count` bytes from the CUDA array `src` starting at `hOffset`
23857 rows and `wOffset` bytes from the upper left corner to the memory area
23858 pointed to by `dst`, where `kind` specifies the direction of the copy,
23859 and must be one of :py:obj:`~.cudaMemcpyHostToHost`,
23860 :py:obj:`~.cudaMemcpyHostToDevice`, :py:obj:`~.cudaMemcpyDeviceToHost`,
23861 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
23862 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
23863 type of transfer is inferred from the pointer values. However,
23864 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
23865 unified virtual addressing.
23867 :py:obj:`~.cudaMemcpyFromArrayAsync()` is asynchronous with respect to
23868 the host, so the call may return before the copy is complete. The copy
23869 can optionally be associated to a stream by passing a non-zero `stream`
23870 argument. If `kind` is :py:obj:`~.cudaMemcpyHostToDevice` or
23871 :py:obj:`~.cudaMemcpyDeviceToHost` and `stream` is non-zero, the copy
23872 may overlap with operations in other streams.
23874 Parameters
23875 ----------
23876 dst : Any
23877 Destination memory address
23878 src : :py:obj:`~.cudaArray_const_t`
23879 Source memory address
23880 wOffset : size_t
23881 Source starting X offset (columns in bytes)
23882 hOffset : size_t
23883 Source starting Y offset (rows)
23884 count : size_t
23885 Size in bytes to copy
23886 kind : :py:obj:`~.cudaMemcpyKind`
23887 Type of transfer
23888 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23889 Stream identifier
23891 Returns
23892 -------
23893 cudaError_t
23894 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidMemcpyDirection`
23896 See Also
23897 --------
23898 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaMemcpy2D`, :py:obj:`~.cudaMemcpyToArray`, :py:obj:`~.cudaMemcpy2DToArray`, :py:obj:`~.cudaMemcpyFromArray`, :py:obj:`~.cudaMemcpy2DFromArray`, :py:obj:`~.cudaMemcpyArrayToArray`, :py:obj:`~.cudaMemcpy2DArrayToArray`, :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`, :py:obj:`~.cudaMemcpyAsync`, :py:obj:`~.cudaMemcpy2DAsync`, :py:obj:`~.cudaMemcpyToArrayAsync`, :py:obj:`~.cudaMemcpy2DToArrayAsync`, :py:obj:`~.cudaMemcpy2DFromArrayAsync`, :py:obj:`~.cudaMemcpyToSymbolAsync`, :py:obj:`~.cudaMemcpyFromSymbolAsync`, :py:obj:`~.cuMemcpyAtoHAsync`, :py:obj:`~.cuMemcpy2DAsync`
23899 """
23900 cdef cyruntime.cudaStream_t cystream
23901 if stream is None:
23902 pstream = 0
23903 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
23904 pstream = int(stream)
23905 else:
23906 pstream = int(cudaStream_t(stream))
23907 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
23908 cdef cyruntime.cudaArray_const_t cysrc
23909 if src is None:
23910 psrc = 0
23911 elif isinstance(src, (cudaArray_const_t,)):
23912 psrc = int(src)
23913 else:
23914 psrc = int(cudaArray_const_t(src))
23915 cysrc = <cyruntime.cudaArray_const_t><void_ptr>psrc
23916 cydst = _HelperInputVoidPtr(dst)
23917 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
23918 cdef cyruntime.cudaMemcpyKind cykind = kind.value
23919 with nogil:
23920 err = cyruntime.cudaMemcpyFromArrayAsync(cydst_ptr, cysrc, wOffset, hOffset, count, cykind, cystream)
23921 return (_dict_cudaError_t[err],)
23923@cython.embedsignature(True)
23924def cudaMallocAsync(size_t size, hStream):
23925 """ Allocates memory with stream ordered semantics.
23927 Inserts an allocation operation into `hStream`. A pointer to the
23928 allocated memory is returned immediately in *dptr. The allocation must
23929 not be accessed until the the allocation operation completes. The
23930 allocation comes from the memory pool associated with the stream's
23931 device.
23933 Parameters
23934 ----------
23935 size : size_t
23936 Number of bytes to allocate
23937 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23938 The stream establishing the stream ordering contract and the memory
23939 pool to allocate from
23941 Returns
23942 -------
23943 cudaError_t
23944 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`,
23945 devPtr : Any
23946 Returned device pointer
23948 See Also
23949 --------
23950 :py:obj:`~.cuMemAllocAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`, :py:obj:`~.cudaMemPoolGetAttribute`
23952 Notes
23953 -----
23954 The default memory pool of a device contains device memory from that device.
23956 Basic stream ordering allows future work submitted into the same stream to use the allocation. Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation operation completes before work submitted in a separate stream runs.
23958 During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
23959 """
23960 cdef cyruntime.cudaStream_t cyhStream
23961 if hStream is None:
23962 phStream = 0
23963 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
23964 phStream = int(hStream)
23965 else:
23966 phStream = int(cudaStream_t(hStream))
23967 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
23968 cdef void_ptr devPtr = 0
23969 with nogil:
23970 err = cyruntime.cudaMallocAsync(<void**>&devPtr, size, cyhStream)
23971 if err != cyruntime.cudaSuccess:
23972 return (_dict_cudaError_t[err], None)
23973 return (_dict_cudaError_t[err], devPtr)
23975@cython.embedsignature(True)
23976def cudaFreeAsync(devPtr, hStream):
23977 """ Frees memory with stream ordered semantics.
23979 Inserts a free operation into `hStream`. The allocation must not be
23980 accessed after stream execution reaches the free. After this API
23981 returns, accessing the memory from any subsequent work launched on the
23982 GPU or querying its pointer attributes results in undefined behavior.
23984 Parameters
23985 ----------
23986 dptr : Any
23987 memory to free
23988 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
23989 The stream establishing the stream ordering promise
23991 Returns
23992 -------
23993 cudaError_t
23994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
23996 See Also
23997 --------
23998 :py:obj:`~.cuMemFreeAsync`, :py:obj:`~.cudaMallocAsync`
24000 Notes
24001 -----
24002 During stream capture, this function results in the creation of a free node and must therefore be passed the address of a graph allocation.
24003 """
24004 cdef cyruntime.cudaStream_t cyhStream
24005 if hStream is None:
24006 phStream = 0
24007 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
24008 phStream = int(hStream)
24009 else:
24010 phStream = int(cudaStream_t(hStream))
24011 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
24012 cydevPtr = _HelperInputVoidPtr(devPtr)
24013 cdef void* cydevPtr_ptr = <void*><void_ptr>cydevPtr.cptr
24014 with nogil:
24015 err = cyruntime.cudaFreeAsync(cydevPtr_ptr, cyhStream)
24016 return (_dict_cudaError_t[err],)
24018@cython.embedsignature(True)
24019def cudaMemPoolTrimTo(memPool, size_t minBytesToKeep):
24020 """ Tries to release memory back to the OS.
24022 Releases memory back to the OS until the pool contains fewer than
24023 minBytesToKeep reserved bytes, or there is no more memory that the
24024 allocator can safely release. The allocator cannot release OS
24025 allocations that back outstanding asynchronous allocations. The OS
24026 allocations may happen at different granularity from the user
24027 allocations.
24029 Parameters
24030 ----------
24031 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24032 The memory pool to trim
24033 minBytesToKeep : size_t
24034 If the pool has less than minBytesToKeep reserved, the TrimTo
24035 operation is a no-op. Otherwise the pool will be guaranteed to have
24036 at least minBytesToKeep bytes reserved after the operation.
24038 Returns
24039 -------
24040 cudaError_t
24041 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24043 See Also
24044 --------
24045 :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
24047 Notes
24048 -----
24049 : Allocations that have not been freed count as outstanding.
24051 : Allocations that have been asynchronously freed but whose completion has not been observed on the host (eg. by a synchronize) can count as outstanding.
24052 """
24053 cdef cyruntime.cudaMemPool_t cymemPool
24054 if memPool is None:
24055 pmemPool = 0
24056 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24057 pmemPool = int(memPool)
24058 else:
24059 pmemPool = int(cudaMemPool_t(memPool))
24060 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24061 with nogil:
24062 err = cyruntime.cudaMemPoolTrimTo(cymemPool, minBytesToKeep)
24063 return (_dict_cudaError_t[err],)
24065@cython.embedsignature(True)
24066def cudaMemPoolSetAttribute(memPool, attr not None : cudaMemPoolAttr, value):
24067 """ Sets attributes of a memory pool.
24069 Supported attributes are:
24071 - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =
24072 cuuint64_t) Amount of reserved memory in bytes to hold onto before
24073 trying to release memory back to the OS. When more than the release
24074 threshold bytes of memory are held by the memory pool, the allocator
24075 will try to release memory back to the OS on the next call to stream,
24076 event or context synchronize. (default 0)
24078 - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =
24079 int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously
24080 freed in another stream as long as a stream ordering dependency of
24081 the allocating stream on the free action exists. Cuda events and null
24082 stream interactions can create the required stream ordered
24083 dependencies. (default enabled)
24085 - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)
24086 Allow reuse of already completed frees when there is no dependency
24087 between the free and allocation. (default enabled)
24089 - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =
24090 int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream
24091 dependencies in order to establish the stream ordering required to
24092 reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`
24093 (default enabled).
24095 - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)
24096 Reset the high watermark that tracks the amount of backing memory
24097 that was allocated for the memory pool. It is illegal to set this
24098 attribute to a non-zero value.
24100 - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)
24101 Reset the high watermark that tracks the amount of used memory that
24102 was allocated for the memory pool. It is illegal to set this
24103 attribute to a non-zero value.
24105 Parameters
24106 ----------
24107 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24108 The memory pool to modify
24109 attr : :py:obj:`~.cudaMemPoolAttr`
24110 The attribute to modify
24111 value : Any
24112 Pointer to the value to assign
24114 Returns
24115 -------
24116 cudaError_t
24117 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24119 See Also
24120 --------
24121 :py:obj:`~.cuMemPoolSetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
24122 """
24123 cdef cyruntime.cudaMemPool_t cymemPool
24124 if memPool is None:
24125 pmemPool = 0
24126 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24127 pmemPool = int(memPool)
24128 else:
24129 pmemPool = int(cudaMemPool_t(memPool))
24130 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24131 cdef cyruntime.cudaMemPoolAttr cyattr = attr.value
24132 cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, value, is_getter=False)
24133 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
24134 with nogil:
24135 err = cyruntime.cudaMemPoolSetAttribute(cymemPool, cyattr, cyvalue_ptr)
24136 return (_dict_cudaError_t[err],)
24138@cython.embedsignature(True)
24139def cudaMemPoolGetAttribute(memPool, attr not None : cudaMemPoolAttr):
24140 """ Gets attributes of a memory pool.
24142 Supported attributes are:
24144 - :py:obj:`~.cudaMemPoolAttrReleaseThreshold`: (value type =
24145 cuuint64_t) Amount of reserved memory in bytes to hold onto before
24146 trying to release memory back to the OS. When more than the release
24147 threshold bytes of memory are held by the memory pool, the allocator
24148 will try to release memory back to the OS on the next call to stream,
24149 event or context synchronize. (default 0)
24151 - :py:obj:`~.cudaMemPoolReuseFollowEventDependencies`: (value type =
24152 int) Allow :py:obj:`~.cudaMallocAsync` to use memory asynchronously
24153 freed in another stream as long as a stream ordering dependency of
24154 the allocating stream on the free action exists. Cuda events and null
24155 stream interactions can create the required stream ordered
24156 dependencies. (default enabled)
24158 - :py:obj:`~.cudaMemPoolReuseAllowOpportunistic`: (value type = int)
24159 Allow reuse of already completed frees when there is no dependency
24160 between the free and allocation. (default enabled)
24162 - :py:obj:`~.cudaMemPoolReuseAllowInternalDependencies`: (value type =
24163 int) Allow :py:obj:`~.cudaMallocAsync` to insert new stream
24164 dependencies in order to establish the stream ordering required to
24165 reuse a piece of memory released by :py:obj:`~.cudaFreeAsync`
24166 (default enabled).
24168 - :py:obj:`~.cudaMemPoolAttrReservedMemCurrent`: (value type =
24169 cuuint64_t) Amount of backing memory currently allocated for the
24170 mempool.
24172 - :py:obj:`~.cudaMemPoolAttrReservedMemHigh`: (value type = cuuint64_t)
24173 High watermark of backing memory allocated for the mempool since the
24174 last time it was reset.
24176 - :py:obj:`~.cudaMemPoolAttrUsedMemCurrent`: (value type = cuuint64_t)
24177 Amount of memory from the pool that is currently in use by the
24178 application.
24180 - :py:obj:`~.cudaMemPoolAttrUsedMemHigh`: (value type = cuuint64_t)
24181 High watermark of the amount of memory from the pool that was in use
24182 by the application since the last time it was reset.
24184 Parameters
24185 ----------
24186 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24187 The memory pool to get attributes of
24188 attr : :py:obj:`~.cudaMemPoolAttr`
24189 The attribute to get
24191 Returns
24192 -------
24193 cudaError_t
24194 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24195 value : Any
24196 Retrieved value
24198 See Also
24199 --------
24200 :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
24201 """
24202 cdef cyruntime.cudaMemPool_t cymemPool
24203 if memPool is None:
24204 pmemPool = 0
24205 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24206 pmemPool = int(memPool)
24207 else:
24208 pmemPool = int(cudaMemPool_t(memPool))
24209 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24210 cdef cyruntime.cudaMemPoolAttr cyattr = attr.value
24211 cdef _HelperCUmemPool_attribute cyvalue = _HelperCUmemPool_attribute(attr, 0, is_getter=True)
24212 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
24213 with nogil:
24214 err = cyruntime.cudaMemPoolGetAttribute(cymemPool, cyattr, cyvalue_ptr)
24215 if err != cyruntime.cudaSuccess:
24216 return (_dict_cudaError_t[err], None)
24217 return (_dict_cudaError_t[err], cyvalue.pyObj())
24219@cython.embedsignature(True)
24220def cudaMemPoolSetAccess(memPool, descList : Optional[tuple[cudaMemAccessDesc] | list[cudaMemAccessDesc]], size_t count):
24221 """ Controls visibility of pools between devices.
24223 Parameters
24224 ----------
24225 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24226 The pool being modified
24227 map : list[:py:obj:`~.cudaMemAccessDesc`]
24228 Array of access descriptors. Each descriptor instructs the access
24229 to enable for a single gpu
24230 count : size_t
24231 Number of descriptors in the map array.
24233 Returns
24234 -------
24235 cudaError_t
24236 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24238 See Also
24239 --------
24240 :py:obj:`~.cuMemPoolSetAccess`, :py:obj:`~.cudaMemPoolGetAccess`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
24241 """
24242 descList = [] if descList is None else descList
24243 if not all(isinstance(_x, (cudaMemAccessDesc,)) for _x in descList):
24244 raise TypeError("Argument 'descList' is not instance of type (expected tuple[cyruntime.cudaMemAccessDesc,] or list[cyruntime.cudaMemAccessDesc,]")
24245 cdef cyruntime.cudaMemPool_t cymemPool
24246 if memPool is None:
24247 pmemPool = 0
24248 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24249 pmemPool = int(memPool)
24250 else:
24251 pmemPool = int(cudaMemPool_t(memPool))
24252 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24253 cdef cyruntime.cudaMemAccessDesc* cydescList = NULL
24254 if len(descList) > 1:
24255 cydescList = <cyruntime.cudaMemAccessDesc*> calloc(len(descList), sizeof(cyruntime.cudaMemAccessDesc))
24256 if cydescList is NULL:
24257 raise MemoryError('Failed to allocate length x size memory: ' + str(len(descList)) + 'x' + str(sizeof(cyruntime.cudaMemAccessDesc)))
24258 for idx in range(len(descList)):
24259 string.memcpy(&cydescList[idx], (<cudaMemAccessDesc>descList[idx])._pvt_ptr, sizeof(cyruntime.cudaMemAccessDesc))
24260 elif len(descList) == 1:
24261 cydescList = (<cudaMemAccessDesc>descList[0])._pvt_ptr
24262 if count > <size_t>len(descList): raise RuntimeError("List is too small: " + str(len(descList)) + " < " + str(count))
24263 with nogil:
24264 err = cyruntime.cudaMemPoolSetAccess(cymemPool, cydescList, count)
24265 if len(descList) > 1 and cydescList is not NULL:
24266 free(cydescList)
24267 return (_dict_cudaError_t[err],)
24269@cython.embedsignature(True)
24270def cudaMemPoolGetAccess(memPool, location : Optional[cudaMemLocation]):
24271 """ Returns the accessibility of a pool from a device.
24273 Returns the accessibility of the pool's memory from the specified
24274 location.
24276 Parameters
24277 ----------
24278 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24279 the pool being queried
24280 location : :py:obj:`~.cudaMemLocation`
24281 the location accessing the pool
24283 Returns
24284 -------
24285 cudaError_t
24287 flags : :py:obj:`~.cudaMemAccessFlags`
24288 the accessibility of the pool from the specified location
24290 See Also
24291 --------
24292 :py:obj:`~.cuMemPoolGetAccess`, :py:obj:`~.cudaMemPoolSetAccess`
24293 """
24294 cdef cyruntime.cudaMemPool_t cymemPool
24295 if memPool is None:
24296 pmemPool = 0
24297 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24298 pmemPool = int(memPool)
24299 else:
24300 pmemPool = int(cudaMemPool_t(memPool))
24301 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24302 cdef cyruntime.cudaMemAccessFlags flags
24303 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL
24304 with nogil:
24305 err = cyruntime.cudaMemPoolGetAccess(&flags, cymemPool, cylocation_ptr)
24306 if err != cyruntime.cudaSuccess:
24307 return (_dict_cudaError_t[err], None)
24308 return (_dict_cudaError_t[err], cudaMemAccessFlags(flags))
24310@cython.embedsignature(True)
24311def cudaMemPoolCreate(poolProps : Optional[cudaMemPoolProps]):
24312 """ Creates a memory pool.
24314 Creates a CUDA memory pool and returns the handle in `pool`. The
24315 `poolProps` determines the properties of the pool such as the backing
24316 device and IPC capabilities.
24318 To create a memory pool for host memory not targeting a specific NUMA
24319 node, applications must set set
24320 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type to
24321 :py:obj:`~.cudaMemLocationTypeHost`.
24322 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::id is ignored for such
24323 pools. Pools created with the type :py:obj:`~.cudaMemLocationTypeHost`
24324 are not IPC capable and :py:obj:`~.cudaMemPoolProps.handleTypes` must
24325 be 0, any other values will result in
24326 :py:obj:`~.cudaErrorInvalidValue`. To create a memory pool targeting a
24327 specific host NUMA node, applications must set
24328 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type to
24329 :py:obj:`~.cudaMemLocationTypeHostNuma` and
24330 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::id must specify the NUMA
24331 ID of the host memory node. Specifying
24332 :py:obj:`~.cudaMemLocationTypeHostNumaCurrent` as the
24333 :py:obj:`~.cudaMemPoolProps`::cudaMemLocation::type will result in
24334 :py:obj:`~.cudaErrorInvalidValue`. By default, the pool's memory will
24335 be accessible from the device it is allocated on. In the case of pools
24336 created with :py:obj:`~.cudaMemLocationTypeHostNuma` or
24337 :py:obj:`~.cudaMemLocationTypeHost`, their default accessibility will
24338 be from the host CPU. Applications can control the maximum size of the
24339 pool by specifying a non-zero value for
24340 :py:obj:`~.cudaMemPoolProps.maxSize`. If set to 0, the maximum size of
24341 the pool will default to a system dependent value.
24343 Applications that intend to use :py:obj:`~.CU_MEM_HANDLE_TYPE_FABRIC`
24344 based memory sharing must ensure: (1) `nvidia-caps-imex-channels`
24345 character device is created by the driver and is listed under
24346 /proc/devices (2) have at least one IMEX channel file accessible by the
24347 user launching the application.
24349 When exporter and importer CUDA processes have been granted access to
24350 the same IMEX channel, they can securely share memory.
24352 The IMEX channel security model works on a per user basis. Which means
24353 all processes under a user can share memory if the user has access to a
24354 valid IMEX channel. When multi-user isolation is desired, a separate
24355 IMEX channel is required for each user.
24357 These channel files exist in /dev/nvidia-caps-imex-channels/channel*
24358 and can be created using standard OS native calls like mknod on Linux.
24359 For example: To create channel0 with the major number from
24360 /proc/devices users can execute the following command: `mknod
24361 /dev/nvidia-caps-imex-channels/channel0 c <major number> 0`
24363 To create a managed memory pool, applications must set
24364 :py:obj:`~.cudaMemPoolProps`:cudaMemAllocationType to
24365 :py:obj:`~.cudaMemAllocationTypeManaged`.
24366 :py:obj:`~.cudaMemPoolProps`::cudaMemAllocationHandleType must also be
24367 set to :py:obj:`~.cudaMemHandleTypeNone` since IPC is not supported.
24368 For managed memory pools, :py:obj:`~.cudaMemPoolProps`::cudaMemLocation
24369 will be treated as the preferred location for all allocations created
24370 from the pool. An application can also set
24371 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location.
24372 :py:obj:`~.cudaMemPoolProps.maxSize` must be set to zero for managed
24373 memory pools. :py:obj:`~.cudaMemPoolProps.usage` should be zero as
24374 decompress for managed memory is not supported. For managed memory
24375 pools, all devices on the system must have non-zero
24376 :py:obj:`~.concurrentManagedAccess`. If not, this call returns
24377 :py:obj:`~.cudaErrorNotSupported`
24379 Parameters
24380 ----------
24381 poolProps : :py:obj:`~.cudaMemPoolProps`
24382 None
24384 Returns
24385 -------
24386 cudaError_t
24387 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
24388 memPool : :py:obj:`~.cudaMemPool_t`
24389 None
24391 See Also
24392 --------
24393 :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaMallocFromPoolAsync`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`
24395 Notes
24396 -----
24397 Specifying :py:obj:`~.cudaMemHandleTypeNone` creates a memory pool that will not support IPC.
24398 """
24399 cdef cudaMemPool_t memPool = cudaMemPool_t()
24400 cdef cyruntime.cudaMemPoolProps* cypoolProps_ptr = poolProps._pvt_ptr if poolProps is not None else NULL
24401 with nogil:
24402 err = cyruntime.cudaMemPoolCreate(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cypoolProps_ptr)
24403 if err != cyruntime.cudaSuccess:
24404 return (_dict_cudaError_t[err], None)
24405 return (_dict_cudaError_t[err], memPool)
24407@cython.embedsignature(True)
24408def cudaMemPoolDestroy(memPool):
24409 """ Destroys the specified memory pool.
24411 If any pointers obtained from this pool haven't been freed or the pool
24412 has free operations that haven't completed when
24413 :py:obj:`~.cudaMemPoolDestroy` is invoked, the function will return
24414 immediately and the resources associated with the pool will be released
24415 automatically once there are no more outstanding allocations.
24417 Destroying the current mempool of a device sets the default mempool of
24418 that device as the current mempool for that device.
24420 Parameters
24421 ----------
24422 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24423 None
24425 Returns
24426 -------
24427 cudaError_t
24428 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24430 See Also
24431 --------
24432 cuMemPoolDestroy, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceSetMemPool`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaDeviceGetMemPool`, :py:obj:`~.cudaMemPoolCreate`
24434 Notes
24435 -----
24436 A device's default memory pool cannot be destroyed.
24437 """
24438 cdef cyruntime.cudaMemPool_t cymemPool
24439 if memPool is None:
24440 pmemPool = 0
24441 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24442 pmemPool = int(memPool)
24443 else:
24444 pmemPool = int(cudaMemPool_t(memPool))
24445 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24446 with nogil:
24447 err = cyruntime.cudaMemPoolDestroy(cymemPool)
24448 return (_dict_cudaError_t[err],)
24450@cython.embedsignature(True)
24451def cudaMemGetDefaultMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType):
24452 """ Returns the default memory pool for a given location and allocation type.
24454 The memory location can be of one of
24455 :py:obj:`~.cudaMemLocationTypeDevice`,
24456 :py:obj:`~.cudaMemLocationTypeHost` or
24457 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one
24458 of :py:obj:`~.cudaMemAllocationTypePinned` or
24459 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is
24460 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be
24461 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location
24462 for the managed memory pool. In all other cases, the call return
24463 :py:obj:`~.cudaErrorInvalidValue`
24465 Parameters
24466 ----------
24467 location : :py:obj:`~.cudaMemLocation`
24468 None
24469 typename : :py:obj:`~.cudaMemAllocationType`
24470 None
24472 Returns
24473 -------
24474 cudaError_t
24475 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`,
24476 memPool : :py:obj:`~.cudaMemPool_t`
24477 None
24479 See Also
24480 --------
24481 :py:obj:`~.cuMemAllocAsync`, :py:obj:`~.cuMemPoolTrimTo`, :py:obj:`~.cuMemPoolGetAttribute`, :py:obj:`~.cuMemPoolSetAttribute`, cuMemPoolSetAccess, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate`
24482 """
24483 cdef cudaMemPool_t memPool = cudaMemPool_t()
24484 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL
24485 cdef cyruntime.cudaMemAllocationType cytypename = typename.value
24486 with nogil:
24487 err = cyruntime.cudaMemGetDefaultMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cylocation_ptr, cytypename)
24488 if err != cyruntime.cudaSuccess:
24489 return (_dict_cudaError_t[err], None)
24490 return (_dict_cudaError_t[err], memPool)
24492@cython.embedsignature(True)
24493def cudaMemGetMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType):
24494 """ Gets the current memory pool for a given memory location and allocation type.
24496 The memory location can be of one of
24497 :py:obj:`~.cudaMemLocationTypeDevice`,
24498 :py:obj:`~.cudaMemLocationTypeHost` or
24499 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one
24500 of :py:obj:`~.cudaMemAllocationTypePinned` or
24501 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is
24502 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be
24503 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location
24504 for the managed memory pool. In all other cases, the call return
24505 :py:obj:`~.cudaErrorInvalidValue`
24507 Returns the last pool provided to :py:obj:`~.cudaMemSetMemPool` or
24508 :py:obj:`~.cudaDeviceSetMemPool` for this location and allocation type
24509 or the location's default memory pool if :py:obj:`~.cudaMemSetMemPool`
24510 or :py:obj:`~.cudaDeviceSetMemPool` for that allocType and location has
24511 never been called. By default the current mempool of a location is the
24512 default mempool for a device that can be obtained via
24513 cudaMemGetDefaultMemPool Otherwise the returned pool must have been set
24514 with :py:obj:`~.cudaDeviceSetMemPool`.
24516 Parameters
24517 ----------
24518 location : :py:obj:`~.cudaMemLocation`
24519 None
24520 typename : :py:obj:`~.cudaMemAllocationType`
24521 None
24523 Returns
24524 -------
24525 cudaError_t
24526 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24527 memPool : :py:obj:`~.cudaMemPool_t`
24528 None
24530 See Also
24531 --------
24532 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuDeviceSetMemPool`, :py:obj:`~.cuMemSetMemPool`
24533 """
24534 cdef cudaMemPool_t memPool = cudaMemPool_t()
24535 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL
24536 cdef cyruntime.cudaMemAllocationType cytypename = typename.value
24537 with nogil:
24538 err = cyruntime.cudaMemGetMemPool(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cylocation_ptr, cytypename)
24539 if err != cyruntime.cudaSuccess:
24540 return (_dict_cudaError_t[err], None)
24541 return (_dict_cudaError_t[err], memPool)
24543@cython.embedsignature(True)
24544def cudaMemSetMemPool(location : Optional[cudaMemLocation], typename not None : cudaMemAllocationType, memPool):
24545 """ Sets the current memory pool for a memory location and allocation type.
24547 The memory location can be of one of
24548 :py:obj:`~.cudaMemLocationTypeDevice`,
24549 :py:obj:`~.cudaMemLocationTypeHost` or
24550 :py:obj:`~.cudaMemLocationTypeHostNuma`. The allocation type can be one
24551 of :py:obj:`~.cudaMemAllocationTypePinned` or
24552 :py:obj:`~.cudaMemAllocationTypeManaged`. When the allocation type is
24553 :py:obj:`~.cudaMemAllocationTypeManaged`, the location type can also be
24554 :py:obj:`~.cudaMemLocationTypeNone` to indicate no preferred location
24555 for the managed memory pool. In all other cases, the call return
24556 :py:obj:`~.cudaErrorInvalidValue`
24558 When a memory pool is set as the current memory pool, the location
24559 parameter should be the same as the location of the pool. If the
24560 location type or index don't match, the call returns
24561 :py:obj:`~.cudaErrorInvalidValue`. The type of memory pool should also
24562 match the parameter allocType. Else the call returns
24563 :py:obj:`~.cudaErrorInvalidValue`. By default, a memory location's
24564 current memory pool is its default memory pool. If the location type is
24565 :py:obj:`~.cudaMemLocationTypeDevice` and the allocation type is
24566 :py:obj:`~.cudaMemAllocationTypePinned`, then this API is the
24567 equivalent of calling :py:obj:`~.cudaDeviceSetMemPool` with the
24568 location id as the device. For further details on the implications,
24569 please refer to the documentation for :py:obj:`~.cudaDeviceSetMemPool`.
24571 Parameters
24572 ----------
24573 location : :py:obj:`~.cudaMemLocation`
24574 None
24575 typename : :py:obj:`~.cudaMemAllocationType`
24576 None
24577 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24578 None
24580 Returns
24581 -------
24582 cudaError_t
24583 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
24585 See Also
24586 --------
24587 :py:obj:`~.cuDeviceGetDefaultMemPool`, :py:obj:`~.cuDeviceGetMemPool`, :py:obj:`~.cuMemGetMemPool`, :py:obj:`~.cuMemPoolCreate`, :py:obj:`~.cuMemPoolDestroy`, :py:obj:`~.cuMemAllocFromPoolAsync`
24589 Notes
24590 -----
24591 Use :py:obj:`~.cudaMallocFromPoolAsync` to specify asynchronous allocations from a device different than the one the stream runs on.
24592 """
24593 cdef cyruntime.cudaMemPool_t cymemPool
24594 if memPool is None:
24595 pmemPool = 0
24596 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24597 pmemPool = int(memPool)
24598 else:
24599 pmemPool = int(cudaMemPool_t(memPool))
24600 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24601 cdef cyruntime.cudaMemLocation* cylocation_ptr = location._pvt_ptr if location is not None else NULL
24602 cdef cyruntime.cudaMemAllocationType cytypename = typename.value
24603 with nogil:
24604 err = cyruntime.cudaMemSetMemPool(cylocation_ptr, cytypename, cymemPool)
24605 return (_dict_cudaError_t[err],)
24607@cython.embedsignature(True)
24608def cudaMallocFromPoolAsync(size_t size, memPool, stream):
24609 """ Allocates memory from a specified pool with stream ordered semantics.
24611 Inserts an allocation operation into `hStream`. A pointer to the
24612 allocated memory is returned immediately in *dptr. The allocation must
24613 not be accessed until the the allocation operation completes. The
24614 allocation comes from the specified memory pool.
24616 Parameters
24617 ----------
24618 bytesize : size_t
24619 Number of bytes to allocate
24620 memPool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24621 The pool to allocate from
24622 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
24623 The stream establishing the stream ordering semantic
24625 Returns
24626 -------
24627 cudaError_t
24628 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`
24629 ptr : Any
24630 Returned device pointer
24632 See Also
24633 --------
24634 :py:obj:`~.cuMemAllocFromPoolAsync`, cudaMallocAsync (C++ API), :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaDeviceGetDefaultMemPool`, :py:obj:`~.cudaMemPoolCreate`, :py:obj:`~.cudaMemPoolSetAccess`, :py:obj:`~.cudaMemPoolSetAttribute`
24636 Notes
24637 -----
24638 During stream capture, this function results in the creation of an allocation node. In this case, the allocation is owned by the graph instead of the memory pool. The memory pool's properties are used to set the node's creation parameters.
24639 """
24640 cdef cyruntime.cudaStream_t cystream
24641 if stream is None:
24642 pstream = 0
24643 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
24644 pstream = int(stream)
24645 else:
24646 pstream = int(cudaStream_t(stream))
24647 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
24648 cdef cyruntime.cudaMemPool_t cymemPool
24649 if memPool is None:
24650 pmemPool = 0
24651 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24652 pmemPool = int(memPool)
24653 else:
24654 pmemPool = int(cudaMemPool_t(memPool))
24655 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24656 cdef void_ptr ptr = 0
24657 with nogil:
24658 err = cyruntime.cudaMallocFromPoolAsync(<void**>&ptr, size, cymemPool, cystream)
24659 if err != cyruntime.cudaSuccess:
24660 return (_dict_cudaError_t[err], None)
24661 return (_dict_cudaError_t[err], ptr)
24663@cython.embedsignature(True)
24664def cudaMemPoolExportToShareableHandle(memPool, handleType not None : cudaMemAllocationHandleType, unsigned int flags):
24665 """ Exports a memory pool to the requested handle type.
24667 Given an IPC capable mempool, create an OS handle to share the pool
24668 with another process. A recipient process can convert the shareable
24669 handle into a mempool with
24670 :py:obj:`~.cudaMemPoolImportFromShareableHandle`. Individual pointers
24671 can then be shared with the :py:obj:`~.cudaMemPoolExportPointer` and
24672 :py:obj:`~.cudaMemPoolImportPointer` APIs. The implementation of what
24673 the shareable handle is and how it can be transferred is defined by the
24674 requested handle type.
24676 Parameters
24677 ----------
24678 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24679 pool to export
24680 handleType : :py:obj:`~.cudaMemAllocationHandleType`
24681 the type of handle to create
24682 flags : unsigned int
24683 must be 0
24685 Returns
24686 -------
24687 cudaError_t
24688 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
24689 handle_out : Any
24690 pointer to the location in which to store the requested handle
24692 See Also
24693 --------
24694 :py:obj:`~.cuMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`
24696 Notes
24697 -----
24698 : To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than cudaMemHandleTypeNone.
24699 """
24700 cdef cyruntime.cudaMemPool_t cymemPool
24701 if memPool is None:
24702 pmemPool = 0
24703 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24704 pmemPool = int(memPool)
24705 else:
24706 pmemPool = int(cudaMemPool_t(memPool))
24707 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24708 cdef _HelperCUmemAllocationHandleType cyshareableHandle = _HelperCUmemAllocationHandleType(handleType)
24709 cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr
24710 cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value
24711 with nogil:
24712 err = cyruntime.cudaMemPoolExportToShareableHandle(cyshareableHandle_ptr, cymemPool, cyhandleType, flags)
24713 if err != cyruntime.cudaSuccess:
24714 return (_dict_cudaError_t[err], None)
24715 return (_dict_cudaError_t[err], cyshareableHandle.pyObj())
24717@cython.embedsignature(True)
24718def cudaMemPoolImportFromShareableHandle(shareableHandle, handleType not None : cudaMemAllocationHandleType, unsigned int flags):
24719 """ imports a memory pool from a shared handle.
24721 Specific allocations can be imported from the imported pool with
24722 :py:obj:`~.cudaMemPoolImportPointer`.
24724 Parameters
24725 ----------
24726 handle : Any
24727 OS handle of the pool to open
24728 handleType : :py:obj:`~.cudaMemAllocationHandleType`
24729 The type of handle being imported
24730 flags : unsigned int
24731 must be 0
24733 Returns
24734 -------
24735 cudaError_t
24736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
24737 pool_out : :py:obj:`~.cudaMemPool_t`
24738 Returned memory pool
24740 See Also
24741 --------
24742 :py:obj:`~.cuMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`, :py:obj:`~.cudaMemPoolImportPointer`
24744 Notes
24745 -----
24746 Imported memory pools do not support creating new allocations. As such imported memory pools may not be used in :py:obj:`~.cudaDeviceSetMemPool` or :py:obj:`~.cudaMallocFromPoolAsync` calls.
24747 """
24748 cdef cudaMemPool_t memPool = cudaMemPool_t()
24749 cyshareableHandle = _HelperInputVoidPtr(shareableHandle)
24750 cdef void* cyshareableHandle_ptr = <void*><void_ptr>cyshareableHandle.cptr
24751 cdef cyruntime.cudaMemAllocationHandleType cyhandleType = handleType.value
24752 with nogil:
24753 err = cyruntime.cudaMemPoolImportFromShareableHandle(<cyruntime.cudaMemPool_t*>memPool._pvt_ptr, cyshareableHandle_ptr, cyhandleType, flags)
24754 if err != cyruntime.cudaSuccess:
24755 return (_dict_cudaError_t[err], None)
24756 return (_dict_cudaError_t[err], memPool)
24758@cython.embedsignature(True)
24759def cudaMemPoolExportPointer(ptr):
24760 """ Export data to share a memory pool allocation between processes.
24762 Constructs `shareData_out` for sharing a specific allocation from an
24763 already shared memory pool. The recipient process can import the
24764 allocation with the :py:obj:`~.cudaMemPoolImportPointer` api. The data
24765 is not a handle and may be shared through any IPC mechanism.
24767 Parameters
24768 ----------
24769 ptr : Any
24770 pointer to memory being exported
24772 Returns
24773 -------
24774 cudaError_t
24775 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
24776 shareData_out : :py:obj:`~.cudaMemPoolPtrExportData`
24777 Returned export data
24779 See Also
24780 --------
24781 :py:obj:`~.cuMemPoolExportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolImportPointer`
24782 """
24783 cdef cudaMemPoolPtrExportData exportData = cudaMemPoolPtrExportData()
24784 cyptr = _HelperInputVoidPtr(ptr)
24785 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
24786 with nogil:
24787 err = cyruntime.cudaMemPoolExportPointer(<cyruntime.cudaMemPoolPtrExportData*>exportData._pvt_ptr, cyptr_ptr)
24788 if err != cyruntime.cudaSuccess:
24789 return (_dict_cudaError_t[err], None)
24790 return (_dict_cudaError_t[err], exportData)
24792@cython.embedsignature(True)
24793def cudaMemPoolImportPointer(memPool, exportData : Optional[cudaMemPoolPtrExportData]):
24794 """ Import a memory pool allocation from another process.
24796 Returns in `ptr_out` a pointer to the imported memory. The imported
24797 memory must not be accessed before the allocation operation completes
24798 in the exporting process. The imported memory must be freed from all
24799 importing processes before being freed in the exporting process. The
24800 pointer may be freed with cudaFree or cudaFreeAsync. If
24801 :py:obj:`~.cudaFreeAsync` is used, the free must be completed on the
24802 importing process before the free operation on the exporting process.
24804 Parameters
24805 ----------
24806 pool : :py:obj:`~.CUmemoryPool` or :py:obj:`~.cudaMemPool_t`
24807 pool from which to import
24808 shareData : :py:obj:`~.cudaMemPoolPtrExportData`
24809 data specifying the memory to import
24811 Returns
24812 -------
24813 cudaError_t
24814 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_INITIALIZED`, :py:obj:`~.CUDA_ERROR_OUT_OF_MEMORY`
24815 ptr_out : Any
24816 pointer to imported memory
24818 See Also
24819 --------
24820 :py:obj:`~.cuMemPoolImportPointer`, :py:obj:`~.cudaMemPoolExportToShareableHandle`, :py:obj:`~.cudaMemPoolImportFromShareableHandle`, :py:obj:`~.cudaMemPoolExportPointer`
24822 Notes
24823 -----
24824 The :py:obj:`~.cudaFreeAsync` api may be used in the exporting process before the :py:obj:`~.cudaFreeAsync` operation completes in its stream as long as the :py:obj:`~.cudaFreeAsync` in the exporting process specifies a stream with a stream dependency on the importing process's :py:obj:`~.cudaFreeAsync`.
24825 """
24826 cdef cyruntime.cudaMemPool_t cymemPool
24827 if memPool is None:
24828 pmemPool = 0
24829 elif isinstance(memPool, (cudaMemPool_t,driver.CUmemoryPool)):
24830 pmemPool = int(memPool)
24831 else:
24832 pmemPool = int(cudaMemPool_t(memPool))
24833 cymemPool = <cyruntime.cudaMemPool_t><void_ptr>pmemPool
24834 cdef void_ptr ptr = 0
24835 cdef cyruntime.cudaMemPoolPtrExportData* cyexportData_ptr = exportData._pvt_ptr if exportData is not None else NULL
24836 with nogil:
24837 err = cyruntime.cudaMemPoolImportPointer(<void**>&ptr, cymemPool, cyexportData_ptr)
24838 if err != cyruntime.cudaSuccess:
24839 return (_dict_cudaError_t[err], None)
24840 return (_dict_cudaError_t[err], ptr)
24842@cython.embedsignature(True)
24843def cudaPointerGetAttributes(ptr):
24844 """ Returns attributes about a specified pointer.
24846 Returns in `*attributes` the attributes of the pointer `ptr`. If
24847 pointer was not allocated in, mapped by or registered with context
24848 supporting unified addressing :py:obj:`~.cudaErrorInvalidValue` is
24849 returned.
24851 The :py:obj:`~.cudaPointerAttributes` structure is defined as:
24853 **View CUDA Toolkit Documentation for a C++ code example**
24855 In this structure, the individual fields mean
24857 - :py:obj:`~.cudaPointerAttributes.type` identifies type of memory. It
24858 can be :py:obj:`~.cudaMemoryTypeUnregistered` for unregistered host
24859 memory, :py:obj:`~.cudaMemoryTypeHost` for registered host memory,
24860 :py:obj:`~.cudaMemoryTypeDevice` for device memory or
24861 :py:obj:`~.cudaMemoryTypeManaged` for managed memory.
24863 - :py:obj:`~.device` is the device against which `ptr` was allocated.
24864 If `ptr` has memory type :py:obj:`~.cudaMemoryTypeDevice` then this
24865 identifies the device on which the memory referred to by `ptr`
24866 physically resides. If `ptr` has memory type
24867 :py:obj:`~.cudaMemoryTypeHost` then this identifies the device which
24868 was current when the allocation was made (and if that device is
24869 deinitialized then this allocation will vanish with that device's
24870 state).
24872 - :py:obj:`~.devicePointer` is the device pointer alias through which
24873 the memory referred to by `ptr` may be accessed on the current
24874 device. If the memory referred to by `ptr` cannot be accessed
24875 directly by the current device then this is NULL.
24877 - :py:obj:`~.hostPointer` is the host pointer alias through which the
24878 memory referred to by `ptr` may be accessed on the host. If the
24879 memory referred to by `ptr` cannot be accessed directly by the host
24880 then this is NULL.
24882 Parameters
24883 ----------
24884 ptr : Any
24885 Pointer to get attributes for
24887 Returns
24888 -------
24889 cudaError_t
24890 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`
24891 attributes : :py:obj:`~.cudaPointerAttributes`
24892 Attributes for the specified pointer
24894 See Also
24895 --------
24896 :py:obj:`~.cudaGetDeviceCount`, :py:obj:`~.cudaGetDevice`, :py:obj:`~.cudaSetDevice`, :py:obj:`~.cudaChooseDevice`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cuPointerGetAttributes`
24898 Notes
24899 -----
24900 In CUDA 11.0 forward passing host pointer will return :py:obj:`~.cudaMemoryTypeUnregistered` in :py:obj:`~.cudaPointerAttributes.type` and call will return :py:obj:`~.cudaSuccess`.
24901 """
24902 cdef cudaPointerAttributes attributes = cudaPointerAttributes()
24903 cyptr = _HelperInputVoidPtr(ptr)
24904 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
24905 with nogil:
24906 err = cyruntime.cudaPointerGetAttributes(<cyruntime.cudaPointerAttributes*>attributes._pvt_ptr, cyptr_ptr)
24907 if err != cyruntime.cudaSuccess:
24908 return (_dict_cudaError_t[err], None)
24909 return (_dict_cudaError_t[err], attributes)
24911@cython.embedsignature(True)
24912def cudaDeviceCanAccessPeer(int device, int peerDevice):
24913 """ Queries if a device may directly access a peer device's memory.
24915 Returns in `*canAccessPeer` a value of 1 if device `device` is capable
24916 of directly accessing memory from `peerDevice` and 0 otherwise. If
24917 direct access of `peerDevice` from `device` is possible, then access
24918 may be enabled by calling :py:obj:`~.cudaDeviceEnablePeerAccess()`.
24920 Parameters
24921 ----------
24922 device : int
24923 Device from which allocations on `peerDevice` are to be directly
24924 accessed.
24925 peerDevice : int
24926 Device on which the allocations to be directly accessed by `device`
24927 reside.
24929 Returns
24930 -------
24931 cudaError_t
24932 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
24933 canAccessPeer : int
24934 Returned access capability
24936 See Also
24937 --------
24938 :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuDeviceCanAccessPeer`
24939 """
24940 cdef int canAccessPeer = 0
24941 with nogil:
24942 err = cyruntime.cudaDeviceCanAccessPeer(&canAccessPeer, device, peerDevice)
24943 if err != cyruntime.cudaSuccess:
24944 return (_dict_cudaError_t[err], None)
24945 return (_dict_cudaError_t[err], canAccessPeer)
24947@cython.embedsignature(True)
24948def cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags):
24949 """ Enables direct access to memory allocations on a peer device.
24951 On success, all allocations from `peerDevice` will immediately be
24952 accessible by the current device. They will remain accessible until
24953 access is explicitly disabled using
24954 :py:obj:`~.cudaDeviceDisablePeerAccess()` or either device is reset
24955 using :py:obj:`~.cudaDeviceReset()`.
24957 Note that access granted by this call is unidirectional and that in
24958 order to access memory on the current device from `peerDevice`, a
24959 separate symmetric call to :py:obj:`~.cudaDeviceEnablePeerAccess()` is
24960 required.
24962 Note that there are both device-wide and system-wide limitations per
24963 system configuration, as noted in the CUDA Programming Guide under the
24964 section "Peer-to-Peer Memory Access".
24966 Returns :py:obj:`~.cudaErrorInvalidDevice` if
24967 :py:obj:`~.cudaDeviceCanAccessPeer()` indicates that the current device
24968 cannot directly access memory from `peerDevice`.
24970 Returns :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled` if direct access
24971 of `peerDevice` from the current device has already been enabled.
24973 Returns :py:obj:`~.cudaErrorInvalidValue` if `flags` is not 0.
24975 Parameters
24976 ----------
24977 peerDevice : int
24978 Peer device to enable direct access to from the current device
24979 flags : unsigned int
24980 Reserved for future use and must be set to 0
24982 Returns
24983 -------
24984 cudaError_t
24985 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorPeerAccessAlreadyEnabled`, :py:obj:`~.cudaErrorInvalidValue`
24987 See Also
24988 --------
24989 :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceDisablePeerAccess`, :py:obj:`~.cuCtxEnablePeerAccess`
24990 """
24991 with nogil:
24992 err = cyruntime.cudaDeviceEnablePeerAccess(peerDevice, flags)
24993 return (_dict_cudaError_t[err],)
24995@cython.embedsignature(True)
24996def cudaDeviceDisablePeerAccess(int peerDevice):
24997 """ Disables direct access to memory allocations on a peer device.
24999 Returns :py:obj:`~.cudaErrorPeerAccessNotEnabled` if direct access to
25000 memory on `peerDevice` has not yet been enabled from the current
25001 device.
25003 Parameters
25004 ----------
25005 peerDevice : int
25006 Peer device to disable direct access to
25008 Returns
25009 -------
25010 cudaError_t
25011 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorPeerAccessNotEnabled`, :py:obj:`~.cudaErrorInvalidDevice`
25013 See Also
25014 --------
25015 :py:obj:`~.cudaDeviceCanAccessPeer`, :py:obj:`~.cudaDeviceEnablePeerAccess`, :py:obj:`~.cuCtxDisablePeerAccess`
25016 """
25017 with nogil:
25018 err = cyruntime.cudaDeviceDisablePeerAccess(peerDevice)
25019 return (_dict_cudaError_t[err],)
25021@cython.embedsignature(True)
25022def cudaGraphicsUnregisterResource(resource):
25023 """ Unregisters a graphics resource for access by CUDA.
25025 Unregisters the graphics resource `resource` so it is not accessible by
25026 CUDA unless registered again.
25028 If `resource` is invalid then
25029 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned.
25031 Parameters
25032 ----------
25033 resource : :py:obj:`~.cudaGraphicsResource_t`
25034 Resource to unregister
25036 Returns
25037 -------
25038 cudaError_t
25039 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
25041 See Also
25042 --------
25043 :py:obj:`~.cudaGraphicsD3D9RegisterResource`, :py:obj:`~.cudaGraphicsD3D10RegisterResource`, :py:obj:`~.cudaGraphicsD3D11RegisterResource`, :py:obj:`~.cudaGraphicsGLRegisterBuffer`, :py:obj:`~.cudaGraphicsGLRegisterImage`, :py:obj:`~.cuGraphicsUnregisterResource`
25044 """
25045 cdef cyruntime.cudaGraphicsResource_t cyresource
25046 if resource is None:
25047 presource = 0
25048 elif isinstance(resource, (cudaGraphicsResource_t,)):
25049 presource = int(resource)
25050 else:
25051 presource = int(cudaGraphicsResource_t(resource))
25052 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
25053 with nogil:
25054 err = cyruntime.cudaGraphicsUnregisterResource(cyresource)
25055 return (_dict_cudaError_t[err],)
25057@cython.embedsignature(True)
25058def cudaGraphicsResourceSetMapFlags(resource, unsigned int flags):
25059 """ Set usage flags for mapping a graphics resource.
25061 Set `flags` for mapping the graphics resource `resource`.
25063 Changes to `flags` will take effect the next time `resource` is mapped.
25064 The `flags` argument may be any of the following:
25066 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
25067 `resource` will be used. It is therefore assumed that CUDA may read
25068 from or write to `resource`.
25070 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
25071 not write to `resource`.
25073 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies CUDA will not
25074 read from `resource` and will write over the entire contents of
25075 `resource`, so none of the data previously stored in `resource` will
25076 be preserved.
25078 If `resource` is presently mapped for access by CUDA then
25079 :py:obj:`~.cudaErrorUnknown` is returned. If `flags` is not one of the
25080 above values then :py:obj:`~.cudaErrorInvalidValue` is returned.
25082 Parameters
25083 ----------
25084 resource : :py:obj:`~.cudaGraphicsResource_t`
25085 Registered resource to set flags for
25086 flags : unsigned int
25087 Parameters for resource mapping
25089 Returns
25090 -------
25091 cudaError_t
25092 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`,
25094 See Also
25095 --------
25096 :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsResourceSetMapFlags`
25097 """
25098 cdef cyruntime.cudaGraphicsResource_t cyresource
25099 if resource is None:
25100 presource = 0
25101 elif isinstance(resource, (cudaGraphicsResource_t,)):
25102 presource = int(resource)
25103 else:
25104 presource = int(cudaGraphicsResource_t(resource))
25105 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
25106 with nogil:
25107 err = cyruntime.cudaGraphicsResourceSetMapFlags(cyresource, flags)
25108 return (_dict_cudaError_t[err],)
25110@cython.embedsignature(True)
25111def cudaGraphicsMapResources(int count, resources, stream):
25112 """ Map graphics resources for access by CUDA.
25114 Maps the `count` graphics resources in `resources` for access by CUDA.
25116 The resources in `resources` may be accessed by CUDA until they are
25117 unmapped. The graphics API from which `resources` were registered
25118 should not access any resources while they are mapped by CUDA. If an
25119 application does so, the results are undefined.
25121 This function provides the synchronization guarantee that any graphics
25122 calls issued before :py:obj:`~.cudaGraphicsMapResources()` will
25123 complete before any subsequent CUDA work issued in `stream` begins.
25125 If `resources` contains any duplicate entries then
25126 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of
25127 `resources` are presently mapped for access by CUDA then
25128 :py:obj:`~.cudaErrorUnknown` is returned.
25130 Parameters
25131 ----------
25132 count : int
25133 Number of resources to map
25134 resources : :py:obj:`~.cudaGraphicsResource_t`
25135 Resources to map for CUDA
25136 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
25137 Stream for synchronization
25139 Returns
25140 -------
25141 cudaError_t
25142 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
25144 See Also
25145 --------
25146 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsUnmapResources`, :py:obj:`~.cuGraphicsMapResources`
25147 """
25148 cdef cyruntime.cudaStream_t cystream
25149 if stream is None:
25150 pstream = 0
25151 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
25152 pstream = int(stream)
25153 else:
25154 pstream = int(cudaStream_t(stream))
25155 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
25156 cdef cyruntime.cudaGraphicsResource_t *cyresources
25157 if resources is None:
25158 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
25159 elif isinstance(resources, (cudaGraphicsResource_t,)):
25160 presources = resources.getPtr()
25161 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources
25162 elif isinstance(resources, (int)):
25163 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources
25164 else:
25165 raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))
25166 with nogil:
25167 err = cyruntime.cudaGraphicsMapResources(count, cyresources, cystream)
25168 return (_dict_cudaError_t[err],)
25170@cython.embedsignature(True)
25171def cudaGraphicsUnmapResources(int count, resources, stream):
25172 """ Unmap graphics resources.
25174 Unmaps the `count` graphics resources in `resources`.
25176 Once unmapped, the resources in `resources` may not be accessed by CUDA
25177 until they are mapped again.
25179 This function provides the synchronization guarantee that any CUDA work
25180 issued in `stream` before :py:obj:`~.cudaGraphicsUnmapResources()` will
25181 complete before any subsequently issued graphics work begins.
25183 If `resources` contains any duplicate entries then
25184 :py:obj:`~.cudaErrorInvalidResourceHandle` is returned. If any of
25185 `resources` are not presently mapped for access by CUDA then
25186 :py:obj:`~.cudaErrorUnknown` is returned.
25188 Parameters
25189 ----------
25190 count : int
25191 Number of resources to unmap
25192 resources : :py:obj:`~.cudaGraphicsResource_t`
25193 Resources to unmap
25194 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
25195 Stream for synchronization
25197 Returns
25198 -------
25199 cudaError_t
25200 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
25202 See Also
25203 --------
25204 :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cuGraphicsUnmapResources`
25205 """
25206 cdef cyruntime.cudaStream_t cystream
25207 if stream is None:
25208 pstream = 0
25209 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
25210 pstream = int(stream)
25211 else:
25212 pstream = int(cudaStream_t(stream))
25213 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
25214 cdef cyruntime.cudaGraphicsResource_t *cyresources
25215 if resources is None:
25216 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
25217 elif isinstance(resources, (cudaGraphicsResource_t,)):
25218 presources = resources.getPtr()
25219 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>presources
25220 elif isinstance(resources, (int)):
25221 cyresources = <cyruntime.cudaGraphicsResource_t*><void_ptr>resources
25222 else:
25223 raise TypeError("Argument 'resources' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(resources)))
25224 with nogil:
25225 err = cyruntime.cudaGraphicsUnmapResources(count, cyresources, cystream)
25226 return (_dict_cudaError_t[err],)
25228@cython.embedsignature(True)
25229def cudaGraphicsResourceGetMappedPointer(resource):
25230 """ Get an device pointer through which to access a mapped graphics resource.
25232 Returns in `*devPtr` a pointer through which the mapped graphics
25233 resource `resource` may be accessed. Returns in `*size` the size of the
25234 memory in bytes which may be accessed from that pointer. The value set
25235 in `devPtr` may change every time that `resource` is mapped.
25237 If `resource` is not a buffer then it cannot be accessed via a pointer
25238 and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not
25239 mapped then :py:obj:`~.cudaErrorUnknown` is returned.
25241 Parameters
25242 ----------
25243 resource : :py:obj:`~.cudaGraphicsResource_t`
25244 None
25246 Returns
25247 -------
25248 cudaError_t
25250 devPtr : Any
25251 None
25252 size : int
25253 None
25254 """
25255 cdef cyruntime.cudaGraphicsResource_t cyresource
25256 if resource is None:
25257 presource = 0
25258 elif isinstance(resource, (cudaGraphicsResource_t,)):
25259 presource = int(resource)
25260 else:
25261 presource = int(cudaGraphicsResource_t(resource))
25262 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
25263 cdef void_ptr devPtr = 0
25264 cdef size_t size = 0
25265 with nogil:
25266 err = cyruntime.cudaGraphicsResourceGetMappedPointer(<void**>&devPtr, &size, cyresource)
25267 if err != cyruntime.cudaSuccess:
25268 return (_dict_cudaError_t[err], None, None)
25269 return (_dict_cudaError_t[err], devPtr, size)
25271@cython.embedsignature(True)
25272def cudaGraphicsSubResourceGetMappedArray(resource, unsigned int arrayIndex, unsigned int mipLevel):
25273 """ Get an array through which to access a subresource of a mapped graphics resource.
25275 Returns in `*array` an array through which the subresource of the
25276 mapped graphics resource `resource` which corresponds to array index
25277 `arrayIndex` and mipmap level `mipLevel` may be accessed. The value set
25278 in `array` may change every time that `resource` is mapped.
25280 If `resource` is not a texture then it cannot be accessed via an array
25281 and :py:obj:`~.cudaErrorUnknown` is returned. If `arrayIndex` is not a
25282 valid array index for `resource` then :py:obj:`~.cudaErrorInvalidValue`
25283 is returned. If `mipLevel` is not a valid mipmap level for `resource`
25284 then :py:obj:`~.cudaErrorInvalidValue` is returned. If `resource` is
25285 not mapped then :py:obj:`~.cudaErrorUnknown` is returned.
25287 Parameters
25288 ----------
25289 resource : :py:obj:`~.cudaGraphicsResource_t`
25290 Mapped resource to access
25291 arrayIndex : unsigned int
25292 Array index for array textures or cubemap face index as defined by
25293 :py:obj:`~.cudaGraphicsCubeFace` for cubemap textures for the
25294 subresource to access
25295 mipLevel : unsigned int
25296 Mipmap level for the subresource to access
25298 Returns
25299 -------
25300 cudaError_t
25301 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
25302 array : :py:obj:`~.cudaArray_t`
25303 Returned array through which a subresource of `resource` may be
25304 accessed
25306 See Also
25307 --------
25308 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsSubResourceGetMappedArray`
25309 """
25310 cdef cyruntime.cudaGraphicsResource_t cyresource
25311 if resource is None:
25312 presource = 0
25313 elif isinstance(resource, (cudaGraphicsResource_t,)):
25314 presource = int(resource)
25315 else:
25316 presource = int(cudaGraphicsResource_t(resource))
25317 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
25318 cdef cudaArray_t array = cudaArray_t()
25319 with nogil:
25320 err = cyruntime.cudaGraphicsSubResourceGetMappedArray(<cyruntime.cudaArray_t*>array._pvt_ptr, cyresource, arrayIndex, mipLevel)
25321 if err != cyruntime.cudaSuccess:
25322 return (_dict_cudaError_t[err], None)
25323 return (_dict_cudaError_t[err], array)
25325@cython.embedsignature(True)
25326def cudaGraphicsResourceGetMappedMipmappedArray(resource):
25327 """ Get a mipmapped array through which to access a mapped graphics resource.
25329 Returns in `*mipmappedArray` a mipmapped array through which the mapped
25330 graphics resource `resource` may be accessed. The value set in
25331 `mipmappedArray` may change every time that `resource` is mapped.
25333 If `resource` is not a texture then it cannot be accessed via an array
25334 and :py:obj:`~.cudaErrorUnknown` is returned. If `resource` is not
25335 mapped then :py:obj:`~.cudaErrorUnknown` is returned.
25337 Parameters
25338 ----------
25339 resource : :py:obj:`~.cudaGraphicsResource_t`
25340 Mapped resource to access
25342 Returns
25343 -------
25344 cudaError_t
25345 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
25346 mipmappedArray : :py:obj:`~.cudaMipmappedArray_t`
25347 Returned mipmapped array through which `resource` may be accessed
25349 See Also
25350 --------
25351 :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedMipmappedArray`
25352 """
25353 cdef cyruntime.cudaGraphicsResource_t cyresource
25354 if resource is None:
25355 presource = 0
25356 elif isinstance(resource, (cudaGraphicsResource_t,)):
25357 presource = int(resource)
25358 else:
25359 presource = int(cudaGraphicsResource_t(resource))
25360 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
25361 cdef cudaMipmappedArray_t mipmappedArray = cudaMipmappedArray_t()
25362 with nogil:
25363 err = cyruntime.cudaGraphicsResourceGetMappedMipmappedArray(<cyruntime.cudaMipmappedArray_t*>mipmappedArray._pvt_ptr, cyresource)
25364 if err != cyruntime.cudaSuccess:
25365 return (_dict_cudaError_t[err], None)
25366 return (_dict_cudaError_t[err], mipmappedArray)
25368@cython.embedsignature(True)
25369def cudaGetChannelDesc(array):
25370 """ Get the channel descriptor of an array.
25372 Returns in `*desc` the channel descriptor of the CUDA array `array`.
25374 Parameters
25375 ----------
25376 array : :py:obj:`~.cudaArray_const_t`
25377 Memory array on device
25379 Returns
25380 -------
25381 cudaError_t
25382 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25383 desc : :py:obj:`~.cudaChannelFormatDesc`
25384 Channel format
25386 See Also
25387 --------
25388 :py:obj:`~.cudaCreateChannelDesc (C API)`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`
25389 """
25390 cdef cyruntime.cudaArray_const_t cyarray
25391 if array is None:
25392 parray = 0
25393 elif isinstance(array, (cudaArray_const_t,)):
25394 parray = int(array)
25395 else:
25396 parray = int(cudaArray_const_t(array))
25397 cyarray = <cyruntime.cudaArray_const_t><void_ptr>parray
25398 cdef cudaChannelFormatDesc desc = cudaChannelFormatDesc()
25399 with nogil:
25400 err = cyruntime.cudaGetChannelDesc(<cyruntime.cudaChannelFormatDesc*>desc._pvt_ptr, cyarray)
25401 if err != cyruntime.cudaSuccess:
25402 return (_dict_cudaError_t[err], None)
25403 return (_dict_cudaError_t[err], desc)
25405@cython.embedsignature(True)
25406def cudaCreateChannelDesc(int x, int y, int z, int w, f not None : cudaChannelFormatKind):
25407 """ Returns a channel descriptor using the specified format.
25409 Returns a channel descriptor with format `f` and number of bits of each
25410 component `x`, `y`, `z`, and `w`. The :py:obj:`~.cudaChannelFormatDesc`
25411 is defined as:
25413 **View CUDA Toolkit Documentation for a C++ code example**
25415 where :py:obj:`~.cudaChannelFormatKind` is one of
25416 :py:obj:`~.cudaChannelFormatKindSigned`,
25417 :py:obj:`~.cudaChannelFormatKindUnsigned`, or
25418 :py:obj:`~.cudaChannelFormatKindFloat`.
25420 Parameters
25421 ----------
25422 x : int
25423 X component
25424 y : int
25425 Y component
25426 z : int
25427 Z component
25428 w : int
25429 W component
25430 f : :py:obj:`~.cudaChannelFormatKind`
25431 Channel format
25433 Returns
25434 -------
25435 cudaError_t.cudaSuccess
25436 cudaError_t.cudaSuccess
25437 :py:obj:`~.cudaChannelFormatDesc`
25438 Channel descriptor with format `f`
25440 See Also
25441 --------
25442 cudaCreateChannelDesc (C++ API), :py:obj:`~.cudaGetChannelDesc`, :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cudaCreateSurfaceObject`
25443 """
25444 cdef cyruntime.cudaChannelFormatKind cyf = f.value
25445 with nogil:
25446 err = cyruntime.cudaCreateChannelDesc(x, y, z, w, cyf)
25447 cdef cudaChannelFormatDesc wrapper = cudaChannelFormatDesc()
25448 wrapper._pvt_ptr[0] = err
25449 return (cudaError_t.cudaSuccess, wrapper)
25451@cython.embedsignature(True)
25452def cudaCreateTextureObject(pResDesc : Optional[cudaResourceDesc], pTexDesc : Optional[cudaTextureDesc], pResViewDesc : Optional[cudaResourceViewDesc]):
25453 """ Creates a texture object.
25455 Creates a texture object and returns it in `pTexObject`. `pResDesc`
25456 describes the data to texture from. `pTexDesc` describes how the data
25457 should be sampled. `pResViewDesc` is an optional argument that
25458 specifies an alternate format for the data described by `pResDesc`, and
25459 also describes the subresource region to restrict access to when
25460 texturing. `pResViewDesc` can only be specified if the type of resource
25461 is a CUDA array or a CUDA mipmapped array not in a block compressed
25462 format.
25464 Texture objects are only supported on devices of compute capability 3.0
25465 or higher. Additionally, a texture object is an opaque value, and, as
25466 such, should only be accessed through CUDA API calls.
25468 The :py:obj:`~.cudaResourceDesc` structure is defined as:
25470 **View CUDA Toolkit Documentation for a C++ code example**
25472 where:
25474 - :py:obj:`~.cudaResourceDesc.resType` specifies the type of resource
25475 to texture from. CUresourceType is defined as:
25477 - **View CUDA Toolkit Documentation for a C++ code example**
25479 If :py:obj:`~.cudaResourceDesc.resType` is set to
25480 :py:obj:`~.cudaResourceTypeArray`,
25481 :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid
25482 CUDA array handle.
25484 If :py:obj:`~.cudaResourceDesc.resType` is set to
25485 :py:obj:`~.cudaResourceTypeMipmappedArray`,
25486 :py:obj:`~.cudaResourceDesc`::res::mipmap::mipmap must be set to a
25487 valid CUDA mipmapped array handle and
25488 :py:obj:`~.cudaTextureDesc.normalizedCoords` must be set to true.
25490 If :py:obj:`~.cudaResourceDesc.resType` is set to
25491 :py:obj:`~.cudaResourceTypeLinear`,
25492 :py:obj:`~.cudaResourceDesc`::res::linear::devPtr must be set to a
25493 valid device pointer, that is aligned to
25494 :py:obj:`~.cudaDeviceProp.textureAlignment`.
25495 :py:obj:`~.cudaResourceDesc`::res::linear::desc describes the format
25496 and the number of components per array element.
25497 :py:obj:`~.cudaResourceDesc`::res::linear::sizeInBytes specifies the
25498 size of the array in bytes. The total number of elements in the linear
25499 address range cannot exceed
25500 :py:obj:`~.cudaDeviceGetTexture1DLinearMaxWidth()`. The number of
25501 elements is computed as (sizeInBytes / sizeof(desc)).
25503 If :py:obj:`~.cudaResourceDesc.resType` is set to
25504 :py:obj:`~.cudaResourceTypePitch2D`,
25505 :py:obj:`~.cudaResourceDesc`::res::pitch2D::devPtr must be set to a
25506 valid device pointer, that is aligned to
25507 :py:obj:`~.cudaDeviceProp.textureAlignment`.
25508 :py:obj:`~.cudaResourceDesc`::res::pitch2D::desc describes the format
25509 and the number of components per array element.
25510 :py:obj:`~.cudaResourceDesc`::res::pitch2D::width and
25511 :py:obj:`~.cudaResourceDesc`::res::pitch2D::height specify the width
25512 and height of the array in elements, and cannot exceed
25513 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[0] and
25514 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[1] respectively.
25515 :py:obj:`~.cudaResourceDesc`::res::pitch2D::pitchInBytes specifies the
25516 pitch between two rows in bytes and has to be aligned to
25517 :py:obj:`~.cudaDeviceProp.texturePitchAlignment`. Pitch cannot exceed
25518 :py:obj:`~.cudaDeviceProp.maxTexture2DLinear`[2].
25520 The :py:obj:`~.cudaTextureDesc` struct is defined as
25522 **View CUDA Toolkit Documentation for a C++ code example**
25524 where
25526 - :py:obj:`~.cudaTextureDesc.addressMode` specifies the addressing mode
25527 for each dimension of the texture data.
25528 :py:obj:`~.cudaTextureAddressMode` is defined as:
25530 - **View CUDA Toolkit Documentation for a C++ code example**
25532 - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is
25533 :py:obj:`~.cudaResourceTypeLinear`. Also, if
25534 :py:obj:`~.cudaTextureDesc.normalizedCoords` is set to zero,
25535 :py:obj:`~.cudaAddressModeWrap` and :py:obj:`~.cudaAddressModeMirror`
25536 won't be supported and will be switched to
25537 :py:obj:`~.cudaAddressModeClamp`.
25539 - :py:obj:`~.cudaTextureDesc.filterMode` specifies the filtering mode
25540 to be used when fetching from the texture.
25541 :py:obj:`~.cudaTextureFilterMode` is defined as:
25543 - **View CUDA Toolkit Documentation for a C++ code example**
25545 - This is ignored if :py:obj:`~.cudaResourceDesc.resType` is
25546 :py:obj:`~.cudaResourceTypeLinear`.
25548 - :py:obj:`~.cudaTextureDesc.readMode` specifies whether integer data
25549 should be converted to floating point or not.
25550 :py:obj:`~.cudaTextureReadMode` is defined as:
25552 - **View CUDA Toolkit Documentation for a C++ code example**
25554 - Note that this applies only to 8-bit and 16-bit integer formats.
25555 32-bit integer format would not be promoted, regardless of whether or
25556 not this :py:obj:`~.cudaTextureDesc.readMode` is set
25557 :py:obj:`~.cudaReadModeNormalizedFloat` is specified.
25559 - :py:obj:`~.cudaTextureDesc.sRGB` specifies whether sRGB to linear
25560 conversion should be performed during texture fetch.
25562 - :py:obj:`~.cudaTextureDesc.borderColor` specifies the float values of
25563 color. where: :py:obj:`~.cudaTextureDesc.borderColor`[0] contains
25564 value of 'R', :py:obj:`~.cudaTextureDesc.borderColor`[1] contains
25565 value of 'G', :py:obj:`~.cudaTextureDesc.borderColor`[2] contains
25566 value of 'B', :py:obj:`~.cudaTextureDesc.borderColor`[3] contains
25567 value of 'A' Note that application using integer border color values
25568 will need to <reinterpret_cast> these values to float. The values are
25569 set only when the addressing mode specified by
25570 :py:obj:`~.cudaTextureDesc.addressMode` is cudaAddressModeBorder.
25572 - :py:obj:`~.cudaTextureDesc.normalizedCoords` specifies whether the
25573 texture coordinates will be normalized or not.
25575 - :py:obj:`~.cudaTextureDesc.maxAnisotropy` specifies the maximum
25576 anistropy ratio to be used when doing anisotropic filtering. This
25577 value will be clamped to the range [1,16].
25579 - :py:obj:`~.cudaTextureDesc.mipmapFilterMode` specifies the filter
25580 mode when the calculated mipmap level lies between two defined mipmap
25581 levels.
25583 - :py:obj:`~.cudaTextureDesc.mipmapLevelBias` specifies the offset to
25584 be applied to the calculated mipmap level.
25586 - :py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` specifies the lower
25587 end of the mipmap level range to clamp access to.
25589 - :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` specifies the upper
25590 end of the mipmap level range to clamp access to.
25592 - :py:obj:`~.cudaTextureDesc.disableTrilinearOptimization` specifies
25593 whether the trilinear filtering optimizations will be disabled.
25595 - :py:obj:`~.cudaTextureDesc.seamlessCubemap` specifies whether
25596 seamless cube map filtering is enabled. This flag can only be
25597 specified if the underlying resource is a CUDA array or a CUDA
25598 mipmapped array that was created with the flag
25599 :py:obj:`~.cudaArrayCubemap`. When seamless cube map filtering is
25600 enabled, texture address modes specified by
25601 :py:obj:`~.cudaTextureDesc.addressMode` are ignored. Instead, if the
25602 :py:obj:`~.cudaTextureDesc.filterMode` is set to
25603 :py:obj:`~.cudaFilterModePoint` the address mode
25604 :py:obj:`~.cudaAddressModeClamp` will be applied for all dimensions.
25605 If the :py:obj:`~.cudaTextureDesc.filterMode` is set to
25606 :py:obj:`~.cudaFilterModeLinear` seamless cube map filtering will be
25607 performed when sampling along the cube face borders.
25609 The :py:obj:`~.cudaResourceViewDesc` struct is defined as
25611 **View CUDA Toolkit Documentation for a C++ code example**
25613 where:
25615 - :py:obj:`~.cudaResourceViewDesc.format` specifies how the data
25616 contained in the CUDA array or CUDA mipmapped array should be
25617 interpreted. Note that this can incur a change in size of the texture
25618 data. If the resource view format is a block compressed format, then
25619 the underlying CUDA array or CUDA mipmapped array has to have a
25620 32-bit unsigned integer format with 2 or 4 channels, depending on the
25621 block compressed format. For ex., BC1 and BC4 require the underlying
25622 CUDA array to have a 32-bit unsigned int with 2 channels. The other
25623 BC formats require the underlying resource to have the same 32-bit
25624 unsigned int format but with 4 channels.
25626 - :py:obj:`~.cudaResourceViewDesc.width` specifies the new width of the
25627 texture data. If the resource view format is a block compressed
25628 format, this value has to be 4 times the original width of the
25629 resource. For non block compressed formats, this value has to be
25630 equal to that of the original resource.
25632 - :py:obj:`~.cudaResourceViewDesc.height` specifies the new height of
25633 the texture data. If the resource view format is a block compressed
25634 format, this value has to be 4 times the original height of the
25635 resource. For non block compressed formats, this value has to be
25636 equal to that of the original resource.
25638 - :py:obj:`~.cudaResourceViewDesc.depth` specifies the new depth of the
25639 texture data. This value has to be equal to that of the original
25640 resource.
25642 - :py:obj:`~.cudaResourceViewDesc.firstMipmapLevel` specifies the most
25643 detailed mipmap level. This will be the new mipmap level zero. For
25644 non-mipmapped resources, this value has to be
25645 zero.:py:obj:`~.cudaTextureDesc.minMipmapLevelClamp` and
25646 :py:obj:`~.cudaTextureDesc.maxMipmapLevelClamp` will be relative to
25647 this value. For ex., if the firstMipmapLevel is set to 2, and a
25648 minMipmapLevelClamp of 1.2 is specified, then the actual minimum
25649 mipmap level clamp will be 3.2.
25651 - :py:obj:`~.cudaResourceViewDesc.lastMipmapLevel` specifies the least
25652 detailed mipmap level. For non-mipmapped resources, this value has to
25653 be zero.
25655 - :py:obj:`~.cudaResourceViewDesc.firstLayer` specifies the first layer
25656 index for layered textures. This will be the new layer zero. For non-
25657 layered resources, this value has to be zero.
25659 - :py:obj:`~.cudaResourceViewDesc.lastLayer` specifies the last layer
25660 index for layered textures. For non-layered resources, this value has
25661 to be zero.
25663 Parameters
25664 ----------
25665 pResDesc : :py:obj:`~.cudaResourceDesc`
25666 Resource descriptor
25667 pTexDesc : :py:obj:`~.cudaTextureDesc`
25668 Texture descriptor
25669 pResViewDesc : :py:obj:`~.cudaResourceViewDesc`
25670 Resource view descriptor
25672 Returns
25673 -------
25674 cudaError_t
25675 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25676 pTexObject : :py:obj:`~.cudaTextureObject_t`
25677 Texture object to create
25679 See Also
25680 --------
25681 :py:obj:`~.cudaDestroyTextureObject`, :py:obj:`~.cuTexObjectCreate`
25682 """
25683 cdef cudaTextureObject_t pTexObject = cudaTextureObject_t()
25684 cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL
25685 cdef cyruntime.cudaTextureDesc* cypTexDesc_ptr = pTexDesc._pvt_ptr if pTexDesc is not None else NULL
25686 cdef cyruntime.cudaResourceViewDesc* cypResViewDesc_ptr = pResViewDesc._pvt_ptr if pResViewDesc is not None else NULL
25687 with nogil:
25688 err = cyruntime.cudaCreateTextureObject(<cyruntime.cudaTextureObject_t*>pTexObject._pvt_ptr, cypResDesc_ptr, cypTexDesc_ptr, cypResViewDesc_ptr)
25689 if err != cyruntime.cudaSuccess:
25690 return (_dict_cudaError_t[err], None)
25691 return (_dict_cudaError_t[err], pTexObject)
25693@cython.embedsignature(True)
25694def cudaDestroyTextureObject(texObject):
25695 """ Destroys a texture object.
25697 Destroys the texture object specified by `texObject`.
25699 Parameters
25700 ----------
25701 texObject : :py:obj:`~.cudaTextureObject_t`
25702 Texture object to destroy
25704 Returns
25705 -------
25706 cudaError_t
25707 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25709 See Also
25710 --------
25711 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectDestroy`
25712 """
25713 cdef cyruntime.cudaTextureObject_t cytexObject
25714 if texObject is None:
25715 ptexObject = 0
25716 elif isinstance(texObject, (cudaTextureObject_t,)):
25717 ptexObject = int(texObject)
25718 else:
25719 ptexObject = int(cudaTextureObject_t(texObject))
25720 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
25721 with nogil:
25722 err = cyruntime.cudaDestroyTextureObject(cytexObject)
25723 return (_dict_cudaError_t[err],)
25725@cython.embedsignature(True)
25726def cudaGetTextureObjectResourceDesc(texObject):
25727 """ Returns a texture object's resource descriptor.
25729 Returns the resource descriptor for the texture object specified by
25730 `texObject`.
25732 Parameters
25733 ----------
25734 texObject : :py:obj:`~.cudaTextureObject_t`
25735 Texture object
25737 Returns
25738 -------
25739 cudaError_t
25740 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25741 pResDesc : :py:obj:`~.cudaResourceDesc`
25742 Resource descriptor
25744 See Also
25745 --------
25746 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceDesc`
25747 """
25748 cdef cyruntime.cudaTextureObject_t cytexObject
25749 if texObject is None:
25750 ptexObject = 0
25751 elif isinstance(texObject, (cudaTextureObject_t,)):
25752 ptexObject = int(texObject)
25753 else:
25754 ptexObject = int(cudaTextureObject_t(texObject))
25755 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
25756 cdef cudaResourceDesc pResDesc = cudaResourceDesc()
25757 with nogil:
25758 err = cyruntime.cudaGetTextureObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._pvt_ptr, cytexObject)
25759 if err != cyruntime.cudaSuccess:
25760 return (_dict_cudaError_t[err], None)
25761 return (_dict_cudaError_t[err], pResDesc)
25763@cython.embedsignature(True)
25764def cudaGetTextureObjectTextureDesc(texObject):
25765 """ Returns a texture object's texture descriptor.
25767 Returns the texture descriptor for the texture object specified by
25768 `texObject`.
25770 Parameters
25771 ----------
25772 texObject : :py:obj:`~.cudaTextureObject_t`
25773 Texture object
25775 Returns
25776 -------
25777 cudaError_t
25778 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25779 pTexDesc : :py:obj:`~.cudaTextureDesc`
25780 Texture descriptor
25782 See Also
25783 --------
25784 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetTextureDesc`
25785 """
25786 cdef cyruntime.cudaTextureObject_t cytexObject
25787 if texObject is None:
25788 ptexObject = 0
25789 elif isinstance(texObject, (cudaTextureObject_t,)):
25790 ptexObject = int(texObject)
25791 else:
25792 ptexObject = int(cudaTextureObject_t(texObject))
25793 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
25794 cdef cudaTextureDesc pTexDesc = cudaTextureDesc()
25795 with nogil:
25796 err = cyruntime.cudaGetTextureObjectTextureDesc(<cyruntime.cudaTextureDesc*>pTexDesc._pvt_ptr, cytexObject)
25797 if err != cyruntime.cudaSuccess:
25798 return (_dict_cudaError_t[err], None)
25799 return (_dict_cudaError_t[err], pTexDesc)
25801@cython.embedsignature(True)
25802def cudaGetTextureObjectResourceViewDesc(texObject):
25803 """ Returns a texture object's resource view descriptor.
25805 Returns the resource view descriptor for the texture object specified
25806 by `texObject`. If no resource view was specified,
25807 :py:obj:`~.cudaErrorInvalidValue` is returned.
25809 Parameters
25810 ----------
25811 texObject : :py:obj:`~.cudaTextureObject_t`
25812 Texture object
25814 Returns
25815 -------
25816 cudaError_t
25817 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25818 pResViewDesc : :py:obj:`~.cudaResourceViewDesc`
25819 Resource view descriptor
25821 See Also
25822 --------
25823 :py:obj:`~.cudaCreateTextureObject`, :py:obj:`~.cuTexObjectGetResourceViewDesc`
25824 """
25825 cdef cyruntime.cudaTextureObject_t cytexObject
25826 if texObject is None:
25827 ptexObject = 0
25828 elif isinstance(texObject, (cudaTextureObject_t,)):
25829 ptexObject = int(texObject)
25830 else:
25831 ptexObject = int(cudaTextureObject_t(texObject))
25832 cytexObject = <cyruntime.cudaTextureObject_t><void_ptr>ptexObject
25833 cdef cudaResourceViewDesc pResViewDesc = cudaResourceViewDesc()
25834 with nogil:
25835 err = cyruntime.cudaGetTextureObjectResourceViewDesc(<cyruntime.cudaResourceViewDesc*>pResViewDesc._pvt_ptr, cytexObject)
25836 if err != cyruntime.cudaSuccess:
25837 return (_dict_cudaError_t[err], None)
25838 return (_dict_cudaError_t[err], pResViewDesc)
25840@cython.embedsignature(True)
25841def cudaCreateSurfaceObject(pResDesc : Optional[cudaResourceDesc]):
25842 """ Creates a surface object.
25844 Creates a surface object and returns it in `pSurfObject`. `pResDesc`
25845 describes the data to perform surface load/stores on.
25846 :py:obj:`~.cudaResourceDesc.resType` must be
25847 :py:obj:`~.cudaResourceTypeArray` and
25848 :py:obj:`~.cudaResourceDesc`::res::array::array must be set to a valid
25849 CUDA array handle.
25851 Surface objects are only supported on devices of compute capability 3.0
25852 or higher. Additionally, a surface object is an opaque value, and, as
25853 such, should only be accessed through CUDA API calls.
25855 Parameters
25856 ----------
25857 pResDesc : :py:obj:`~.cudaResourceDesc`
25858 Resource descriptor
25860 Returns
25861 -------
25862 cudaError_t
25863 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidChannelDescriptor`, :py:obj:`~.cudaErrorInvalidResourceHandle`
25864 pSurfObject : :py:obj:`~.cudaSurfaceObject_t`
25865 Surface object to create
25867 See Also
25868 --------
25869 :py:obj:`~.cudaDestroySurfaceObject`, :py:obj:`~.cuSurfObjectCreate`
25870 """
25871 cdef cudaSurfaceObject_t pSurfObject = cudaSurfaceObject_t()
25872 cdef cyruntime.cudaResourceDesc* cypResDesc_ptr = pResDesc._pvt_ptr if pResDesc is not None else NULL
25873 with nogil:
25874 err = cyruntime.cudaCreateSurfaceObject(<cyruntime.cudaSurfaceObject_t*>pSurfObject._pvt_ptr, cypResDesc_ptr)
25875 if err != cyruntime.cudaSuccess:
25876 return (_dict_cudaError_t[err], None)
25877 return (_dict_cudaError_t[err], pSurfObject)
25879@cython.embedsignature(True)
25880def cudaDestroySurfaceObject(surfObject):
25881 """ Destroys a surface object.
25883 Destroys the surface object specified by `surfObject`.
25885 Parameters
25886 ----------
25887 surfObject : :py:obj:`~.cudaSurfaceObject_t`
25888 Surface object to destroy
25890 Returns
25891 -------
25892 cudaError_t
25893 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25895 See Also
25896 --------
25897 :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectDestroy`
25898 """
25899 cdef cyruntime.cudaSurfaceObject_t cysurfObject
25900 if surfObject is None:
25901 psurfObject = 0
25902 elif isinstance(surfObject, (cudaSurfaceObject_t,)):
25903 psurfObject = int(surfObject)
25904 else:
25905 psurfObject = int(cudaSurfaceObject_t(surfObject))
25906 cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
25907 with nogil:
25908 err = cyruntime.cudaDestroySurfaceObject(cysurfObject)
25909 return (_dict_cudaError_t[err],)
25911@cython.embedsignature(True)
25912def cudaGetSurfaceObjectResourceDesc(surfObject):
25913 """ Returns a surface object's resource descriptor Returns the resource descriptor for the surface object specified by `surfObject`.
25915 Parameters
25916 ----------
25917 surfObject : :py:obj:`~.cudaSurfaceObject_t`
25918 Surface object
25920 Returns
25921 -------
25922 cudaError_t
25923 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25924 pResDesc : :py:obj:`~.cudaResourceDesc`
25925 Resource descriptor
25927 See Also
25928 --------
25929 :py:obj:`~.cudaCreateSurfaceObject`, :py:obj:`~.cuSurfObjectGetResourceDesc`
25930 """
25931 cdef cyruntime.cudaSurfaceObject_t cysurfObject
25932 if surfObject is None:
25933 psurfObject = 0
25934 elif isinstance(surfObject, (cudaSurfaceObject_t,)):
25935 psurfObject = int(surfObject)
25936 else:
25937 psurfObject = int(cudaSurfaceObject_t(surfObject))
25938 cysurfObject = <cyruntime.cudaSurfaceObject_t><void_ptr>psurfObject
25939 cdef cudaResourceDesc pResDesc = cudaResourceDesc()
25940 with nogil:
25941 err = cyruntime.cudaGetSurfaceObjectResourceDesc(<cyruntime.cudaResourceDesc*>pResDesc._pvt_ptr, cysurfObject)
25942 if err != cyruntime.cudaSuccess:
25943 return (_dict_cudaError_t[err], None)
25944 return (_dict_cudaError_t[err], pResDesc)
25946@cython.embedsignature(True)
25947def cudaDriverGetVersion():
25948 """ Returns the latest version of CUDA supported by the driver.
25950 Returns in `*driverVersion` the latest version of CUDA supported by the
25951 driver. The version is returned as (1000 * major + 10 * minor). For
25952 example, CUDA 9.2 would be represented by 9020. If no driver is
25953 installed, then 0 is returned as the driver version.
25955 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
25956 if `driverVersion` is NULL.
25958 Returns
25959 -------
25960 cudaError_t
25961 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25962 driverVersion : int
25963 Returns the CUDA driver version.
25965 See Also
25966 --------
25967 :py:obj:`~.cudaRuntimeGetVersion`, :py:obj:`~.cuDriverGetVersion`
25968 """
25969 cdef int driverVersion = 0
25970 with nogil:
25971 err = cyruntime.cudaDriverGetVersion(&driverVersion)
25972 if err != cyruntime.cudaSuccess:
25973 return (_dict_cudaError_t[err], None)
25974 return (_dict_cudaError_t[err], driverVersion)
25976@cython.embedsignature(True)
25977def cudaRuntimeGetVersion():
25978 """ Returns the CUDA Runtime version.
25980 Returns in `*runtimeVersion` the version number of the current CUDA
25981 Runtime instance. The version is returned as (1000 * major + 10 *
25982 minor). For example, CUDA 9.2 would be represented by 9020.
25984 As of CUDA 12.0, this function no longer initializes CUDA. The purpose
25985 of this API is solely to return a compile-time constant stating the
25986 CUDA Toolkit version in the above format.
25988 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
25989 if the `runtimeVersion` argument is NULL.
25991 Returns
25992 -------
25993 cudaError_t
25994 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
25995 runtimeVersion : int
25996 Returns the CUDA Runtime version.
25998 See Also
25999 --------
26000 :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`
26001 """
26002 cdef int runtimeVersion = 0
26003 with nogil:
26004 err = cyruntime.cudaRuntimeGetVersion(&runtimeVersion)
26005 if err != cyruntime.cudaSuccess:
26006 return (_dict_cudaError_t[err], None)
26007 return (_dict_cudaError_t[err], runtimeVersion)
26009@cython.embedsignature(True)
26010def cudaLogsRegisterCallback(callbackFunc, userData):
26011 """ Register a callback function to receive error log messages.
26013 Parameters
26014 ----------
26015 callbackFunc : :py:obj:`~.cudaLogsCallback_t`
26016 The function to register as a callback
26017 userData : Any
26018 A generic pointer to user data. This is passed into the callback
26019 function.
26021 Returns
26022 -------
26023 cudaError_t
26024 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26025 callback_out : :py:obj:`~.cudaLogsCallbackHandle`
26026 Optional location to store the callback handle after it is
26027 registered
26028 """
26029 cdef cyruntime.cudaLogsCallback_t cycallbackFunc
26030 if callbackFunc is None:
26031 pcallbackFunc = 0
26032 elif isinstance(callbackFunc, (cudaLogsCallback_t,)):
26033 pcallbackFunc = int(callbackFunc)
26034 else:
26035 pcallbackFunc = int(cudaLogsCallback_t(callbackFunc))
26036 cycallbackFunc = <cyruntime.cudaLogsCallback_t><void_ptr>pcallbackFunc
26037 cyuserData = _HelperInputVoidPtr(userData)
26038 cdef void* cyuserData_ptr = <void*><void_ptr>cyuserData.cptr
26039 cdef cudaLogsCallbackHandle callback_out = cudaLogsCallbackHandle()
26040 with nogil:
26041 err = cyruntime.cudaLogsRegisterCallback(cycallbackFunc, cyuserData_ptr, <cyruntime.cudaLogsCallbackHandle*>callback_out._pvt_ptr)
26042 if err != cyruntime.cudaSuccess:
26043 return (_dict_cudaError_t[err], None)
26044 return (_dict_cudaError_t[err], callback_out)
26046@cython.embedsignature(True)
26047def cudaLogsUnregisterCallback(callback):
26048 """ Unregister a log message callback.
26050 Parameters
26051 ----------
26052 callback : :py:obj:`~.cudaLogsCallbackHandle`
26053 The callback instance to unregister from receiving log messages
26055 Returns
26056 -------
26057 cudaError_t
26058 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26059 """
26060 cdef cyruntime.cudaLogsCallbackHandle cycallback
26061 if callback is None:
26062 pcallback = 0
26063 elif isinstance(callback, (cudaLogsCallbackHandle,)):
26064 pcallback = int(callback)
26065 else:
26066 pcallback = int(cudaLogsCallbackHandle(callback))
26067 cycallback = <cyruntime.cudaLogsCallbackHandle><void_ptr>pcallback
26068 with nogil:
26069 err = cyruntime.cudaLogsUnregisterCallback(cycallback)
26070 return (_dict_cudaError_t[err],)
26072@cython.embedsignature(True)
26073def cudaLogsCurrent(unsigned int flags):
26074 """ Sets log iterator to point to the end of log buffer, where the next message would be written.
26076 Parameters
26077 ----------
26078 flags : unsigned int
26079 Reserved for future use, must be 0
26081 Returns
26082 -------
26083 cudaError_t
26084 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26085 iterator_out : :py:obj:`~.cudaLogIterator`
26086 Location to store an iterator to the current tail of the logs
26087 """
26088 cdef cudaLogIterator iterator_out = cudaLogIterator()
26089 with nogil:
26090 err = cyruntime.cudaLogsCurrent(<cyruntime.cudaLogIterator*>iterator_out._pvt_ptr, flags)
26091 if err != cyruntime.cudaSuccess:
26092 return (_dict_cudaError_t[err], None)
26093 return (_dict_cudaError_t[err], iterator_out)
26095@cython.embedsignature(True)
26096def cudaLogsDumpToFile(iterator : Optional[cudaLogIterator], char* pathToFile, unsigned int flags):
26097 """ Dump accumulated driver logs into a file.
26099 Logs generated by the driver are stored in an internal buffer and can
26100 be copied out using this API. This API dumps all driver logs starting
26101 from `iterator` into `pathToFile` provided.
26103 Parameters
26104 ----------
26105 iterator : :py:obj:`~.cudaLogIterator`
26106 Optional auto-advancing iterator specifying the starting log to
26107 read. NULL value dumps all logs.
26108 pathToFile : bytes
26109 Path to output file for dumping logs
26110 flags : unsigned int
26111 Reserved for future use, must be 0
26113 Returns
26114 -------
26115 cudaError_t
26116 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26117 iterator : :py:obj:`~.cudaLogIterator`
26118 Optional auto-advancing iterator specifying the starting log to
26119 read. NULL value dumps all logs.
26121 Notes
26122 -----
26123 `iterator` is auto-advancing. Dumping logs will update the value of `iterator` to receive the next generated log.
26125 The driver reserves limited memory for storing logs. The oldest logs may be overwritten and become unrecoverable. An indication will appear in the destination outupt if the logs have been truncated. Call dump after each failed API to mitigate this risk.
26126 """
26127 cdef cyruntime.cudaLogIterator* cyiterator = NULL
26128 if iterator is not None:
26129 cyiterator = iterator._pvt_ptr
26130 with nogil:
26131 err = cyruntime.cudaLogsDumpToFile(cyiterator, pathToFile, flags)
26132 if err != cyruntime.cudaSuccess:
26133 return (_dict_cudaError_t[err], None)
26134 return (_dict_cudaError_t[err], iterator)
26136@cython.embedsignature(True)
26137def cudaLogsDumpToMemory(iterator : Optional[cudaLogIterator], char* buffer, size_t size, unsigned int flags):
26138 """ Dump accumulated driver logs into a buffer.
26140 Logs generated by the driver are stored in an internal buffer and can
26141 be copied out using this API. This API dumps driver logs from
26142 `iterator` into `buffer` up to the size specified in `*size`. The
26143 driver will always null terminate the buffer but there will not be a
26144 null character between log entries, only a newline \n. The driver will
26145 then return the actual number of bytes written in `*size`, excluding
26146 the null terminator. If there are no messages to dump, `*size` will be
26147 set to 0 and the function will return :py:obj:`~.CUDA_SUCCESS`. If the
26148 provided `buffer` is not large enough to hold any messages, `*size`
26149 will be set to 0 and the function will return
26150 :py:obj:`~.CUDA_ERROR_INVALID_VALUE`.
26152 Parameters
26153 ----------
26154 iterator : :py:obj:`~.cudaLogIterator`
26155 Optional auto-advancing iterator specifying the starting log to
26156 read. NULL value dumps all logs.
26157 buffer : bytes
26158 Pointer to dump logs
26159 size : int
26160 See description
26161 flags : unsigned int
26162 Reserved for future use, must be 0
26164 Returns
26165 -------
26166 cudaError_t
26167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26168 iterator : :py:obj:`~.cudaLogIterator`
26169 Optional auto-advancing iterator specifying the starting log to
26170 read. NULL value dumps all logs.
26171 size : int
26172 See description
26174 Notes
26175 -----
26176 `iterator` is auto-advancing. Dumping logs will update the value of `iterator` to receive the next generated log.
26178 The driver reserves limited memory for storing logs. The maximum size of the buffer is 25600 bytes. The oldest logs may be overwritten and become unrecoverable. An indication will appear in the destination outupt if the logs have been truncated. Call dump after each failed API to mitigate this risk.
26180 If the provided value in `*size` is not large enough to hold all buffered messages, a message will be added at the head of the buffer indicating this. The driver then computes the number of messages it is able to store in `buffer` and writes it out. The final message in `buffer` will always be the most recent log message as of when the API is called.
26181 """
26182 cdef cyruntime.cudaLogIterator* cyiterator = NULL
26183 if iterator is not None:
26184 cyiterator = iterator._pvt_ptr
26185 with nogil:
26186 err = cyruntime.cudaLogsDumpToMemory(cyiterator, buffer, &size, flags)
26187 if err != cyruntime.cudaSuccess:
26188 return (_dict_cudaError_t[err], None, None)
26189 return (_dict_cudaError_t[err], iterator, size)
26191@cython.embedsignature(True)
26192def cudaGraphCreate(unsigned int flags):
26193 """ Creates a graph.
26195 Creates an empty graph, which is returned via `pGraph`.
26197 Parameters
26198 ----------
26199 flags : unsigned int
26200 Graph creation flags, must be 0
26202 Returns
26203 -------
26204 cudaError_t
26205 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
26206 pGraph : :py:obj:`~.cudaGraph_t`
26207 Returns newly created graph
26209 See Also
26210 --------
26211 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphDestroy`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphClone`
26212 """
26213 cdef cudaGraph_t pGraph = cudaGraph_t()
26214 with nogil:
26215 err = cyruntime.cudaGraphCreate(<cyruntime.cudaGraph_t*>pGraph._pvt_ptr, flags)
26216 if err != cyruntime.cudaSuccess:
26217 return (_dict_cudaError_t[err], None)
26218 return (_dict_cudaError_t[err], pGraph)
26220@cython.embedsignature(True)
26221def cudaGraphAddKernelNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaKernelNodeParams]):
26222 """ Creates a kernel execution node and adds it to a graph.
26224 Creates a new kernel execution node and adds it to `graph` with
26225 `numDependencies` dependencies specified via `pDependencies` and
26226 arguments specified in `pNodeParams`. It is possible for
26227 `numDependencies` to be 0, in which case the node will be placed at the
26228 root of the graph. `pDependencies` may not have any duplicate entries.
26229 A handle to the new node will be returned in `pGraphNode`.
26231 The :py:obj:`~.cudaKernelNodeParams` structure is defined as:
26233 **View CUDA Toolkit Documentation for a C++ code example**
26235 When the graph is launched, the node will invoke kernel `func` on a
26236 (`gridDim.x` x `gridDim.y` x `gridDim.z`) grid of blocks. Each block
26237 contains (`blockDim.x` x `blockDim.y` x `blockDim.z`) threads.
26239 `sharedMem` sets the amount of dynamic shared memory that will be
26240 available to each thread block.
26242 Kernel parameters to `func` can be specified in one of two ways:
26244 1) Kernel parameters can be specified via `kernelParams`. If the kernel
26245 has N parameters, then `kernelParams` needs to be an array of N
26246 pointers. Each pointer, from `kernelParams`[0] to `kernelParams`[N-1],
26247 points to the region of memory from which the actual parameter will be
26248 copied. The number of kernel parameters and their offsets and sizes do
26249 not need to be specified as that information is retrieved directly from
26250 the kernel's image.
26252 2) Kernel parameters can also be packaged by the application into a
26253 single buffer that is passed in via `extra`. This places the burden on
26254 the application of knowing each kernel parameter's size and
26255 alignment/padding within the buffer. The `extra` parameter exists to
26256 allow this function to take additional less commonly used arguments.
26257 `extra` specifies a list of names of extra settings and their
26258 corresponding values. Each extra setting name is immediately followed
26259 by the corresponding value. The list must be terminated with either
26260 NULL or CU_LAUNCH_PARAM_END.
26262 - :py:obj:`~.CU_LAUNCH_PARAM_END`, which indicates the end of the
26263 `extra` array;
26265 - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`, which specifies that the
26266 next value in `extra` will be a pointer to a buffer containing all
26267 the kernel parameters for launching kernel `func`;
26269 - :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_SIZE`, which specifies that the
26270 next value in `extra` will be a pointer to a size_t containing the
26271 size of the buffer specified with
26272 :py:obj:`~.CU_LAUNCH_PARAM_BUFFER_POINTER`;
26274 The error :py:obj:`~.cudaErrorInvalidValue` will be returned if kernel
26275 parameters are specified with both `kernelParams` and `extra` (i.e.
26276 both `kernelParams` and `extra` are non-NULL).
26278 The `kernelParams` or `extra` array, as well as the argument values it
26279 points to, are copied during this call.
26281 Parameters
26282 ----------
26283 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
26284 Graph to which to add the node
26285 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
26286 Dependencies of the node
26287 numDependencies : size_t
26288 Number of dependencies
26289 pNodeParams : :py:obj:`~.cudaKernelNodeParams`
26290 Parameters for the GPU execution node
26292 Returns
26293 -------
26294 cudaError_t
26295 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`
26296 pGraphNode : :py:obj:`~.cudaGraphNode_t`
26297 Returns newly created node
26299 See Also
26300 --------
26301 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
26303 Notes
26304 -----
26305 Kernels launched using graphs must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior. This restriction does not apply to texture and surface objects.
26306 """
26307 pDependencies = [] if pDependencies is None else pDependencies
26308 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
26309 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
26310 cdef cyruntime.cudaGraph_t cygraph
26311 if graph is None:
26312 pgraph = 0
26313 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
26314 pgraph = int(graph)
26315 else:
26316 pgraph = int(cudaGraph_t(graph))
26317 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
26318 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
26319 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
26320 if len(pDependencies) > 1:
26321 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
26322 if cypDependencies is NULL:
26323 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
26324 else:
26325 for idx in range(len(pDependencies)):
26326 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
26327 elif len(pDependencies) == 1:
26328 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
26329 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
26330 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
26331 with nogil:
26332 err = cyruntime.cudaGraphAddKernelNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr)
26333 if len(pDependencies) > 1 and cypDependencies is not NULL:
26334 free(cypDependencies)
26335 if err != cyruntime.cudaSuccess:
26336 return (_dict_cudaError_t[err], None)
26337 return (_dict_cudaError_t[err], pGraphNode)
26339@cython.embedsignature(True)
26340def cudaGraphKernelNodeGetParams(node):
26341 """ Returns a kernel node's parameters.
26343 Returns the parameters of kernel node `node` in `pNodeParams`. The
26344 `kernelParams` or `extra` array returned in `pNodeParams`, as well as
26345 the argument values it points to, are owned by the node. This memory
26346 remains valid until the node is destroyed or its parameters are
26347 modified, and should not be modified directly. Use
26348 :py:obj:`~.cudaGraphKernelNodeSetParams` to update the parameters of
26349 this node.
26351 The params will contain either `kernelParams` or `extra`, according to
26352 which of these was most recently set on the node.
26354 Parameters
26355 ----------
26356 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26357 Node to get the parameters for
26359 Returns
26360 -------
26361 cudaError_t
26362 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`
26363 pNodeParams : :py:obj:`~.cudaKernelNodeParams`
26364 Pointer to return the parameters
26366 See Also
26367 --------
26368 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`
26369 """
26370 cdef cyruntime.cudaGraphNode_t cynode
26371 if node is None:
26372 pnode = 0
26373 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26374 pnode = int(node)
26375 else:
26376 pnode = int(cudaGraphNode_t(node))
26377 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26378 cdef cudaKernelNodeParams pNodeParams = cudaKernelNodeParams()
26379 with nogil:
26380 err = cyruntime.cudaGraphKernelNodeGetParams(cynode, <cyruntime.cudaKernelNodeParams*>pNodeParams._pvt_ptr)
26381 if err != cyruntime.cudaSuccess:
26382 return (_dict_cudaError_t[err], None)
26383 return (_dict_cudaError_t[err], pNodeParams)
26385@cython.embedsignature(True)
26386def cudaGraphKernelNodeSetParams(node, pNodeParams : Optional[cudaKernelNodeParams]):
26387 """ Sets a kernel node's parameters.
26389 Sets the parameters of kernel node `node` to `pNodeParams`.
26391 Parameters
26392 ----------
26393 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26394 Node to set the parameters for
26395 pNodeParams : :py:obj:`~.cudaKernelNodeParams`
26396 Parameters to copy
26398 Returns
26399 -------
26400 cudaError_t
26401 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorMemoryAllocation`
26403 See Also
26404 --------
26405 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeGetParams`
26406 """
26407 cdef cyruntime.cudaGraphNode_t cynode
26408 if node is None:
26409 pnode = 0
26410 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26411 pnode = int(node)
26412 else:
26413 pnode = int(cudaGraphNode_t(node))
26414 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26415 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
26416 with nogil:
26417 err = cyruntime.cudaGraphKernelNodeSetParams(cynode, cypNodeParams_ptr)
26418 return (_dict_cudaError_t[err],)
26420@cython.embedsignature(True)
26421def cudaGraphKernelNodeCopyAttributes(hDst, hSrc):
26422 """ Copies attributes from source node to destination node.
26424 Copies attributes from source node `hSrc` to destination node `hDst`.
26425 Both node must have the same context.
26427 Parameters
26428 ----------
26429 hDst : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26430 Destination node
26431 hSrc : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26432 Source node For list of attributes see
26433 :py:obj:`~.cudaKernelNodeAttrID`
26435 Returns
26436 -------
26437 cudaError_t
26438 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidContext`
26440 See Also
26441 --------
26442 :py:obj:`~.cudaAccessPolicyWindow`
26443 """
26444 cdef cyruntime.cudaGraphNode_t cyhSrc
26445 if hSrc is None:
26446 phSrc = 0
26447 elif isinstance(hSrc, (cudaGraphNode_t,driver.CUgraphNode)):
26448 phSrc = int(hSrc)
26449 else:
26450 phSrc = int(cudaGraphNode_t(hSrc))
26451 cyhSrc = <cyruntime.cudaGraphNode_t><void_ptr>phSrc
26452 cdef cyruntime.cudaGraphNode_t cyhDst
26453 if hDst is None:
26454 phDst = 0
26455 elif isinstance(hDst, (cudaGraphNode_t,driver.CUgraphNode)):
26456 phDst = int(hDst)
26457 else:
26458 phDst = int(cudaGraphNode_t(hDst))
26459 cyhDst = <cyruntime.cudaGraphNode_t><void_ptr>phDst
26460 with nogil:
26461 err = cyruntime.cudaGraphKernelNodeCopyAttributes(cyhDst, cyhSrc)
26462 return (_dict_cudaError_t[err],)
26464@cython.embedsignature(True)
26465def cudaGraphKernelNodeGetAttribute(hNode, attr not None : cudaKernelNodeAttrID):
26466 """ Queries node attribute.
26468 Queries attribute `attr` from node `hNode` and stores it in
26469 corresponding member of `value_out`.
26471 Parameters
26472 ----------
26473 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26475 attr : :py:obj:`~.cudaKernelNodeAttrID`
26478 Returns
26479 -------
26480 cudaError_t
26481 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
26482 value_out : :py:obj:`~.cudaKernelNodeAttrValue`
26485 See Also
26486 --------
26487 :py:obj:`~.cudaAccessPolicyWindow`
26488 """
26489 cdef cyruntime.cudaGraphNode_t cyhNode
26490 if hNode is None:
26491 phNode = 0
26492 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
26493 phNode = int(hNode)
26494 else:
26495 phNode = int(cudaGraphNode_t(hNode))
26496 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
26497 cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value
26498 cdef cudaKernelNodeAttrValue value_out = cudaKernelNodeAttrValue()
26499 with nogil:
26500 err = cyruntime.cudaGraphKernelNodeGetAttribute(cyhNode, cyattr, <cyruntime.cudaKernelNodeAttrValue*>value_out._pvt_ptr)
26501 if err != cyruntime.cudaSuccess:
26502 return (_dict_cudaError_t[err], None)
26503 return (_dict_cudaError_t[err], value_out)
26505@cython.embedsignature(True)
26506def cudaGraphKernelNodeSetAttribute(hNode, attr not None : cudaKernelNodeAttrID, value : Optional[cudaKernelNodeAttrValue]):
26507 """ Sets node attribute.
26509 Sets attribute `attr` on node `hNode` from corresponding attribute of
26510 `value`.
26512 Parameters
26513 ----------
26514 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26516 attr : :py:obj:`~.cudaKernelNodeAttrID`
26518 value : :py:obj:`~.cudaKernelNodeAttrValue`
26521 Returns
26522 -------
26523 cudaError_t
26524 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
26526 See Also
26527 --------
26528 :py:obj:`~.cudaAccessPolicyWindow`
26529 """
26530 cdef cyruntime.cudaGraphNode_t cyhNode
26531 if hNode is None:
26532 phNode = 0
26533 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
26534 phNode = int(hNode)
26535 else:
26536 phNode = int(cudaGraphNode_t(hNode))
26537 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
26538 cdef cyruntime.cudaKernelNodeAttrID cyattr = attr.value
26539 cdef cyruntime.cudaKernelNodeAttrValue* cyvalue_ptr = value._pvt_ptr if value is not None else NULL
26540 with nogil:
26541 err = cyruntime.cudaGraphKernelNodeSetAttribute(cyhNode, cyattr, cyvalue_ptr)
26542 return (_dict_cudaError_t[err],)
26544@cython.embedsignature(True)
26545def cudaGraphAddMemcpyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pCopyParams : Optional[cudaMemcpy3DParms]):
26546 """ Creates a memcpy node and adds it to a graph.
26548 Creates a new memcpy node and adds it to `graph` with `numDependencies`
26549 dependencies specified via `pDependencies`. It is possible for
26550 `numDependencies` to be 0, in which case the node will be placed at the
26551 root of the graph. `pDependencies` may not have any duplicate entries.
26552 A handle to the new node will be returned in `pGraphNode`.
26554 When the graph is launched, the node will perform the memcpy described
26555 by `pCopyParams`. See :py:obj:`~.cudaMemcpy3D()` for a description of
26556 the structure and its restrictions.
26558 Memcpy nodes have some additional restrictions with regards to managed
26559 memory, if the system contains at least one device which has a zero
26560 value for the device attribute
26561 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
26563 Parameters
26564 ----------
26565 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
26566 Graph to which to add the node
26567 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
26568 Dependencies of the node
26569 numDependencies : size_t
26570 Number of dependencies
26571 pCopyParams : :py:obj:`~.cudaMemcpy3DParms`
26572 Parameters for the memory copy
26574 Returns
26575 -------
26576 cudaError_t
26577 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26578 pGraphNode : :py:obj:`~.cudaGraphNode_t`
26579 Returns newly created node
26581 See Also
26582 --------
26583 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNodeToSymbol`, :py:obj:`~.cudaGraphAddMemcpyNodeFromSymbol`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`
26584 """
26585 pDependencies = [] if pDependencies is None else pDependencies
26586 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
26587 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
26588 cdef cyruntime.cudaGraph_t cygraph
26589 if graph is None:
26590 pgraph = 0
26591 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
26592 pgraph = int(graph)
26593 else:
26594 pgraph = int(cudaGraph_t(graph))
26595 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
26596 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
26597 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
26598 if len(pDependencies) > 1:
26599 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
26600 if cypDependencies is NULL:
26601 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
26602 else:
26603 for idx in range(len(pDependencies)):
26604 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
26605 elif len(pDependencies) == 1:
26606 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
26607 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
26608 cdef cyruntime.cudaMemcpy3DParms* cypCopyParams_ptr = pCopyParams._pvt_ptr if pCopyParams is not None else NULL
26609 with nogil:
26610 err = cyruntime.cudaGraphAddMemcpyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypCopyParams_ptr)
26611 if len(pDependencies) > 1 and cypDependencies is not NULL:
26612 free(cypDependencies)
26613 if err != cyruntime.cudaSuccess:
26614 return (_dict_cudaError_t[err], None)
26615 return (_dict_cudaError_t[err], pGraphNode)
26617@cython.embedsignature(True)
26618def cudaGraphAddMemcpyNode1D(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, dst, src, size_t count, kind not None : cudaMemcpyKind):
26619 """ Creates a 1D memcpy node and adds it to a graph.
26621 Creates a new 1D memcpy node and adds it to `graph` with
26622 `numDependencies` dependencies specified via `pDependencies`. It is
26623 possible for `numDependencies` to be 0, in which case the node will be
26624 placed at the root of the graph. `pDependencies` may not have any
26625 duplicate entries. A handle to the new node will be returned in
26626 `pGraphNode`.
26628 When the graph is launched, the node will copy `count` bytes from the
26629 memory area pointed to by `src` to the memory area pointed to by `dst`,
26630 where `kind` specifies the direction of the copy, and must be one of
26631 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
26632 :py:obj:`~.cudaMemcpyDeviceToHost`,
26633 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
26634 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
26635 type of transfer is inferred from the pointer values. However,
26636 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
26637 unified virtual addressing. Launching a memcpy node with dst and src
26638 pointers that do not match the direction of the copy results in an
26639 undefined behavior.
26641 Memcpy nodes have some additional restrictions with regards to managed
26642 memory, if the system contains at least one device which has a zero
26643 value for the device attribute
26644 :py:obj:`~.cudaDevAttrConcurrentManagedAccess`.
26646 Parameters
26647 ----------
26648 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
26649 Graph to which to add the node
26650 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
26651 Dependencies of the node
26652 numDependencies : size_t
26653 Number of dependencies
26654 dst : Any
26655 Destination memory address
26656 src : Any
26657 Source memory address
26658 count : size_t
26659 Size in bytes to copy
26660 kind : :py:obj:`~.cudaMemcpyKind`
26661 Type of transfer
26663 Returns
26664 -------
26665 cudaError_t
26666 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26667 pGraphNode : :py:obj:`~.cudaGraphNode_t`
26668 Returns newly created node
26670 See Also
26671 --------
26672 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemsetNode`
26673 """
26674 pDependencies = [] if pDependencies is None else pDependencies
26675 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
26676 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
26677 cdef cyruntime.cudaGraph_t cygraph
26678 if graph is None:
26679 pgraph = 0
26680 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
26681 pgraph = int(graph)
26682 else:
26683 pgraph = int(cudaGraph_t(graph))
26684 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
26685 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
26686 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
26687 if len(pDependencies) > 1:
26688 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
26689 if cypDependencies is NULL:
26690 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
26691 else:
26692 for idx in range(len(pDependencies)):
26693 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
26694 elif len(pDependencies) == 1:
26695 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
26696 cydst = _HelperInputVoidPtr(dst)
26697 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
26698 cysrc = _HelperInputVoidPtr(src)
26699 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
26700 cdef cyruntime.cudaMemcpyKind cykind = kind.value
26701 with nogil:
26702 err = cyruntime.cudaGraphAddMemcpyNode1D(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydst_ptr, cysrc_ptr, count, cykind)
26703 if len(pDependencies) > 1 and cypDependencies is not NULL:
26704 free(cypDependencies)
26705 if err != cyruntime.cudaSuccess:
26706 return (_dict_cudaError_t[err], None)
26707 return (_dict_cudaError_t[err], pGraphNode)
26709@cython.embedsignature(True)
26710def cudaGraphMemcpyNodeGetParams(node):
26711 """ Returns a memcpy node's parameters.
26713 Returns the parameters of memcpy node `node` in `pNodeParams`.
26715 Parameters
26716 ----------
26717 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26718 Node to get the parameters for
26720 Returns
26721 -------
26722 cudaError_t
26723 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26724 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
26725 Pointer to return the parameters
26727 See Also
26728 --------
26729 :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`
26730 """
26731 cdef cyruntime.cudaGraphNode_t cynode
26732 if node is None:
26733 pnode = 0
26734 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26735 pnode = int(node)
26736 else:
26737 pnode = int(cudaGraphNode_t(node))
26738 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26739 cdef cudaMemcpy3DParms pNodeParams = cudaMemcpy3DParms()
26740 with nogil:
26741 err = cyruntime.cudaGraphMemcpyNodeGetParams(cynode, <cyruntime.cudaMemcpy3DParms*>pNodeParams._pvt_ptr)
26742 if err != cyruntime.cudaSuccess:
26743 return (_dict_cudaError_t[err], None)
26744 return (_dict_cudaError_t[err], pNodeParams)
26746@cython.embedsignature(True)
26747def cudaGraphMemcpyNodeSetParams(node, pNodeParams : Optional[cudaMemcpy3DParms]):
26748 """ Sets a memcpy node's parameters.
26750 Sets the parameters of memcpy node `node` to `pNodeParams`.
26752 Parameters
26753 ----------
26754 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26755 Node to set the parameters for
26756 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
26757 Parameters to copy
26759 Returns
26760 -------
26761 cudaError_t
26762 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
26764 See Also
26765 --------
26766 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemcpy3D`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`
26767 """
26768 cdef cyruntime.cudaGraphNode_t cynode
26769 if node is None:
26770 pnode = 0
26771 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26772 pnode = int(node)
26773 else:
26774 pnode = int(cudaGraphNode_t(node))
26775 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26776 cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
26777 with nogil:
26778 err = cyruntime.cudaGraphMemcpyNodeSetParams(cynode, cypNodeParams_ptr)
26779 return (_dict_cudaError_t[err],)
26781@cython.embedsignature(True)
26782def cudaGraphMemcpyNodeSetParams1D(node, dst, src, size_t count, kind not None : cudaMemcpyKind):
26783 """ Sets a memcpy node's parameters to perform a 1-dimensional copy.
26785 Sets the parameters of memcpy node `node` to the copy described by the
26786 provided parameters.
26788 When the graph is launched, the node will copy `count` bytes from the
26789 memory area pointed to by `src` to the memory area pointed to by `dst`,
26790 where `kind` specifies the direction of the copy, and must be one of
26791 :py:obj:`~.cudaMemcpyHostToHost`, :py:obj:`~.cudaMemcpyHostToDevice`,
26792 :py:obj:`~.cudaMemcpyDeviceToHost`,
26793 :py:obj:`~.cudaMemcpyDeviceToDevice`, or :py:obj:`~.cudaMemcpyDefault`.
26794 Passing :py:obj:`~.cudaMemcpyDefault` is recommended, in which case the
26795 type of transfer is inferred from the pointer values. However,
26796 :py:obj:`~.cudaMemcpyDefault` is only allowed on systems that support
26797 unified virtual addressing. Launching a memcpy node with dst and src
26798 pointers that do not match the direction of the copy results in an
26799 undefined behavior.
26801 Parameters
26802 ----------
26803 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26804 Node to set the parameters for
26805 dst : Any
26806 Destination memory address
26807 src : Any
26808 Source memory address
26809 count : size_t
26810 Size in bytes to copy
26811 kind : :py:obj:`~.cudaMemcpyKind`
26812 Type of transfer
26814 Returns
26815 -------
26816 cudaError_t
26817 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26819 See Also
26820 --------
26821 :py:obj:`~.cudaMemcpy`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`
26822 """
26823 cdef cyruntime.cudaGraphNode_t cynode
26824 if node is None:
26825 pnode = 0
26826 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26827 pnode = int(node)
26828 else:
26829 pnode = int(cudaGraphNode_t(node))
26830 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26831 cydst = _HelperInputVoidPtr(dst)
26832 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
26833 cysrc = _HelperInputVoidPtr(src)
26834 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
26835 cdef cyruntime.cudaMemcpyKind cykind = kind.value
26836 with nogil:
26837 err = cyruntime.cudaGraphMemcpyNodeSetParams1D(cynode, cydst_ptr, cysrc_ptr, count, cykind)
26838 return (_dict_cudaError_t[err],)
26840@cython.embedsignature(True)
26841def cudaGraphAddMemsetNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pMemsetParams : Optional[cudaMemsetParams]):
26842 """ Creates a memset node and adds it to a graph.
26844 Creates a new memset node and adds it to `graph` with `numDependencies`
26845 dependencies specified via `pDependencies`. It is possible for
26846 `numDependencies` to be 0, in which case the node will be placed at the
26847 root of the graph. `pDependencies` may not have any duplicate entries.
26848 A handle to the new node will be returned in `pGraphNode`.
26850 The element size must be 1, 2, or 4 bytes. When the graph is launched,
26851 the node will perform the memset described by `pMemsetParams`.
26853 Parameters
26854 ----------
26855 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
26856 Graph to which to add the node
26857 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
26858 Dependencies of the node
26859 numDependencies : size_t
26860 Number of dependencies
26861 pMemsetParams : :py:obj:`~.cudaMemsetParams`
26862 Parameters for the memory set
26864 Returns
26865 -------
26866 cudaError_t
26867 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
26868 pGraphNode : :py:obj:`~.cudaGraphNode_t`
26869 Returns newly created node
26871 See Also
26872 --------
26873 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`
26874 """
26875 pDependencies = [] if pDependencies is None else pDependencies
26876 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
26877 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
26878 cdef cyruntime.cudaGraph_t cygraph
26879 if graph is None:
26880 pgraph = 0
26881 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
26882 pgraph = int(graph)
26883 else:
26884 pgraph = int(cudaGraph_t(graph))
26885 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
26886 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
26887 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
26888 if len(pDependencies) > 1:
26889 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
26890 if cypDependencies is NULL:
26891 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
26892 else:
26893 for idx in range(len(pDependencies)):
26894 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
26895 elif len(pDependencies) == 1:
26896 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
26897 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
26898 cdef cyruntime.cudaMemsetParams* cypMemsetParams_ptr = pMemsetParams._pvt_ptr if pMemsetParams is not None else NULL
26899 with nogil:
26900 err = cyruntime.cudaGraphAddMemsetNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypMemsetParams_ptr)
26901 if len(pDependencies) > 1 and cypDependencies is not NULL:
26902 free(cypDependencies)
26903 if err != cyruntime.cudaSuccess:
26904 return (_dict_cudaError_t[err], None)
26905 return (_dict_cudaError_t[err], pGraphNode)
26907@cython.embedsignature(True)
26908def cudaGraphMemsetNodeGetParams(node):
26909 """ Returns a memset node's parameters.
26911 Returns the parameters of memset node `node` in `pNodeParams`.
26913 Parameters
26914 ----------
26915 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26916 Node to get the parameters for
26918 Returns
26919 -------
26920 cudaError_t
26921 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26922 pNodeParams : :py:obj:`~.cudaMemsetParams`
26923 Pointer to return the parameters
26925 See Also
26926 --------
26927 :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`
26928 """
26929 cdef cyruntime.cudaGraphNode_t cynode
26930 if node is None:
26931 pnode = 0
26932 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26933 pnode = int(node)
26934 else:
26935 pnode = int(cudaGraphNode_t(node))
26936 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26937 cdef cudaMemsetParams pNodeParams = cudaMemsetParams()
26938 with nogil:
26939 err = cyruntime.cudaGraphMemsetNodeGetParams(cynode, <cyruntime.cudaMemsetParams*>pNodeParams._pvt_ptr)
26940 if err != cyruntime.cudaSuccess:
26941 return (_dict_cudaError_t[err], None)
26942 return (_dict_cudaError_t[err], pNodeParams)
26944@cython.embedsignature(True)
26945def cudaGraphMemsetNodeSetParams(node, pNodeParams : Optional[cudaMemsetParams]):
26946 """ Sets a memset node's parameters.
26948 Sets the parameters of memset node `node` to `pNodeParams`.
26950 Parameters
26951 ----------
26952 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
26953 Node to set the parameters for
26954 pNodeParams : :py:obj:`~.cudaMemsetParams`
26955 Parameters to copy
26957 Returns
26958 -------
26959 cudaError_t
26960 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
26962 See Also
26963 --------
26964 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaMemset2D`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeGetParams`
26965 """
26966 cdef cyruntime.cudaGraphNode_t cynode
26967 if node is None:
26968 pnode = 0
26969 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
26970 pnode = int(node)
26971 else:
26972 pnode = int(cudaGraphNode_t(node))
26973 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
26974 cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
26975 with nogil:
26976 err = cyruntime.cudaGraphMemsetNodeSetParams(cynode, cypNodeParams_ptr)
26977 return (_dict_cudaError_t[err],)
26979@cython.embedsignature(True)
26980def cudaGraphAddHostNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, pNodeParams : Optional[cudaHostNodeParams]):
26981 """ Creates a host execution node and adds it to a graph.
26983 Creates a new CPU execution node and adds it to `graph` with
26984 `numDependencies` dependencies specified via `pDependencies` and
26985 arguments specified in `pNodeParams`. It is possible for
26986 `numDependencies` to be 0, in which case the node will be placed at the
26987 root of the graph. `pDependencies` may not have any duplicate entries.
26988 A handle to the new node will be returned in `pGraphNode`.
26990 When the graph is launched, the node will invoke the specified CPU
26991 function. Host nodes are not supported under MPS with pre-Volta GPUs.
26993 Parameters
26994 ----------
26995 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
26996 Graph to which to add the node
26997 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
26998 Dependencies of the node
26999 numDependencies : size_t
27000 Number of dependencies
27001 pNodeParams : :py:obj:`~.cudaHostNodeParams`
27002 Parameters for the host node
27004 Returns
27005 -------
27006 cudaError_t
27007 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`
27008 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27009 Returns newly created node
27011 See Also
27012 --------
27013 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27014 """
27015 pDependencies = [] if pDependencies is None else pDependencies
27016 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27017 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27018 cdef cyruntime.cudaGraph_t cygraph
27019 if graph is None:
27020 pgraph = 0
27021 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27022 pgraph = int(graph)
27023 else:
27024 pgraph = int(cudaGraph_t(graph))
27025 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27026 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27027 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27028 if len(pDependencies) > 1:
27029 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27030 if cypDependencies is NULL:
27031 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27032 else:
27033 for idx in range(len(pDependencies)):
27034 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27035 elif len(pDependencies) == 1:
27036 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27037 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27038 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
27039 with nogil:
27040 err = cyruntime.cudaGraphAddHostNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cypNodeParams_ptr)
27041 if len(pDependencies) > 1 and cypDependencies is not NULL:
27042 free(cypDependencies)
27043 if err != cyruntime.cudaSuccess:
27044 return (_dict_cudaError_t[err], None)
27045 return (_dict_cudaError_t[err], pGraphNode)
27047@cython.embedsignature(True)
27048def cudaGraphHostNodeGetParams(node):
27049 """ Returns a host node's parameters.
27051 Returns the parameters of host node `node` in `pNodeParams`.
27053 Parameters
27054 ----------
27055 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27056 Node to get the parameters for
27058 Returns
27059 -------
27060 cudaError_t
27061 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27062 pNodeParams : :py:obj:`~.cudaHostNodeParams`
27063 Pointer to return the parameters
27065 See Also
27066 --------
27067 :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`
27068 """
27069 cdef cyruntime.cudaGraphNode_t cynode
27070 if node is None:
27071 pnode = 0
27072 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27073 pnode = int(node)
27074 else:
27075 pnode = int(cudaGraphNode_t(node))
27076 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27077 cdef cudaHostNodeParams pNodeParams = cudaHostNodeParams()
27078 with nogil:
27079 err = cyruntime.cudaGraphHostNodeGetParams(cynode, <cyruntime.cudaHostNodeParams*>pNodeParams._pvt_ptr)
27080 if err != cyruntime.cudaSuccess:
27081 return (_dict_cudaError_t[err], None)
27082 return (_dict_cudaError_t[err], pNodeParams)
27084@cython.embedsignature(True)
27085def cudaGraphHostNodeSetParams(node, pNodeParams : Optional[cudaHostNodeParams]):
27086 """ Sets a host node's parameters.
27088 Sets the parameters of host node `node` to `nodeParams`.
27090 Parameters
27091 ----------
27092 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27093 Node to set the parameters for
27094 pNodeParams : :py:obj:`~.cudaHostNodeParams`
27095 Parameters to copy
27097 Returns
27098 -------
27099 cudaError_t
27100 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27102 See Also
27103 --------
27104 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaLaunchHostFunc`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeGetParams`
27105 """
27106 cdef cyruntime.cudaGraphNode_t cynode
27107 if node is None:
27108 pnode = 0
27109 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27110 pnode = int(node)
27111 else:
27112 pnode = int(cudaGraphNode_t(node))
27113 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27114 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
27115 with nogil:
27116 err = cyruntime.cudaGraphHostNodeSetParams(cynode, cypNodeParams_ptr)
27117 return (_dict_cudaError_t[err],)
27119@cython.embedsignature(True)
27120def cudaGraphAddChildGraphNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, childGraph):
27121 """ Creates a child graph node and adds it to a graph.
27123 Creates a new node which executes an embedded graph, and adds it to
27124 `graph` with `numDependencies` dependencies specified via
27125 `pDependencies`. It is possible for `numDependencies` to be 0, in which
27126 case the node will be placed at the root of the graph. `pDependencies`
27127 may not have any duplicate entries. A handle to the new node will be
27128 returned in `pGraphNode`.
27130 If `childGraph` contains allocation nodes, free nodes, or conditional
27131 nodes, this call will return an error.
27133 The node executes an embedded child graph. The child graph is cloned in
27134 this call.
27136 Parameters
27137 ----------
27138 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27139 Graph to which to add the node
27140 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
27141 Dependencies of the node
27142 numDependencies : size_t
27143 Number of dependencies
27144 childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27145 The graph to clone into this node
27147 Returns
27148 -------
27149 cudaError_t
27150 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27151 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27152 Returns newly created node
27154 See Also
27155 --------
27156 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphClone`
27157 """
27158 cdef cyruntime.cudaGraph_t cychildGraph
27159 if childGraph is None:
27160 pchildGraph = 0
27161 elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):
27162 pchildGraph = int(childGraph)
27163 else:
27164 pchildGraph = int(cudaGraph_t(childGraph))
27165 cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
27166 pDependencies = [] if pDependencies is None else pDependencies
27167 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27168 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27169 cdef cyruntime.cudaGraph_t cygraph
27170 if graph is None:
27171 pgraph = 0
27172 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27173 pgraph = int(graph)
27174 else:
27175 pgraph = int(cudaGraph_t(graph))
27176 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27177 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27178 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27179 if len(pDependencies) > 1:
27180 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27181 if cypDependencies is NULL:
27182 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27183 else:
27184 for idx in range(len(pDependencies)):
27185 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27186 elif len(pDependencies) == 1:
27187 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27188 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27189 with nogil:
27190 err = cyruntime.cudaGraphAddChildGraphNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cychildGraph)
27191 if len(pDependencies) > 1 and cypDependencies is not NULL:
27192 free(cypDependencies)
27193 if err != cyruntime.cudaSuccess:
27194 return (_dict_cudaError_t[err], None)
27195 return (_dict_cudaError_t[err], pGraphNode)
27197@cython.embedsignature(True)
27198def cudaGraphChildGraphNodeGetGraph(node):
27199 """ Gets a handle to the embedded graph of a child graph node.
27201 Gets a handle to the embedded graph in a child graph node. This call
27202 does not clone the graph. Changes to the graph will be reflected in the
27203 node, and the node retains ownership of the graph.
27205 Allocation and free nodes cannot be added to the returned graph.
27206 Attempting to do so will return an error.
27208 Parameters
27209 ----------
27210 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27211 Node to get the embedded graph for
27213 Returns
27214 -------
27215 cudaError_t
27216 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27217 pGraph : :py:obj:`~.cudaGraph_t`
27218 Location to store a handle to the graph
27220 See Also
27221 --------
27222 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphNodeFindInClone`
27223 """
27224 cdef cyruntime.cudaGraphNode_t cynode
27225 if node is None:
27226 pnode = 0
27227 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27228 pnode = int(node)
27229 else:
27230 pnode = int(cudaGraphNode_t(node))
27231 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27232 cdef cudaGraph_t pGraph = cudaGraph_t()
27233 with nogil:
27234 err = cyruntime.cudaGraphChildGraphNodeGetGraph(cynode, <cyruntime.cudaGraph_t*>pGraph._pvt_ptr)
27235 if err != cyruntime.cudaSuccess:
27236 return (_dict_cudaError_t[err], None)
27237 return (_dict_cudaError_t[err], pGraph)
27239@cython.embedsignature(True)
27240def cudaGraphAddEmptyNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies):
27241 """ Creates an empty node and adds it to a graph.
27243 Creates a new node which performs no operation, and adds it to `graph`
27244 with `numDependencies` dependencies specified via `pDependencies`. It
27245 is possible for `numDependencies` to be 0, in which case the node will
27246 be placed at the root of the graph. `pDependencies` may not have any
27247 duplicate entries. A handle to the new node will be returned in
27248 `pGraphNode`.
27250 An empty node performs no operation during execution, but can be used
27251 for transitive ordering. For example, a phased execution graph with 2
27252 groups of n nodes with a barrier between them can be represented using
27253 an empty node and 2*n dependency edges, rather than no empty node and
27254 n^2 dependency edges.
27256 Parameters
27257 ----------
27258 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27259 Graph to which to add the node
27260 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
27261 Dependencies of the node
27262 numDependencies : size_t
27263 Number of dependencies
27265 Returns
27266 -------
27267 cudaError_t
27268 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27269 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27270 Returns newly created node
27272 See Also
27273 --------
27274 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27275 """
27276 pDependencies = [] if pDependencies is None else pDependencies
27277 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27278 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27279 cdef cyruntime.cudaGraph_t cygraph
27280 if graph is None:
27281 pgraph = 0
27282 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27283 pgraph = int(graph)
27284 else:
27285 pgraph = int(cudaGraph_t(graph))
27286 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27287 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27288 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27289 if len(pDependencies) > 1:
27290 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27291 if cypDependencies is NULL:
27292 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27293 else:
27294 for idx in range(len(pDependencies)):
27295 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27296 elif len(pDependencies) == 1:
27297 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27298 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27299 with nogil:
27300 err = cyruntime.cudaGraphAddEmptyNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies)
27301 if len(pDependencies) > 1 and cypDependencies is not NULL:
27302 free(cypDependencies)
27303 if err != cyruntime.cudaSuccess:
27304 return (_dict_cudaError_t[err], None)
27305 return (_dict_cudaError_t[err], pGraphNode)
27307@cython.embedsignature(True)
27308def cudaGraphAddEventRecordNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, event):
27309 """ Creates an event record node and adds it to a graph.
27311 Creates a new event record node and adds it to `hGraph` with
27312 `numDependencies` dependencies specified via `dependencies` and event
27313 specified in `event`. It is possible for `numDependencies` to be 0, in
27314 which case the node will be placed at the root of the graph.
27315 `dependencies` may not have any duplicate entries. A handle to the new
27316 node will be returned in `phGraphNode`.
27318 Each launch of the graph will record `event` to capture execution of
27319 the node's dependencies.
27321 These nodes may not be used in loops or conditionals.
27323 Parameters
27324 ----------
27325 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27326 Graph to which to add the node
27327 dependencies : list[:py:obj:`~.cudaGraphNode_t`]
27328 Dependencies of the node
27329 numDependencies : size_t
27330 Number of dependencies
27331 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
27332 Event for the node
27334 Returns
27335 -------
27336 cudaError_t
27337 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27338 phGraphNode : :py:obj:`~.cudaGraphNode_t`
27339 Returns newly created node
27341 See Also
27342 --------
27343 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27344 """
27345 cdef cyruntime.cudaEvent_t cyevent
27346 if event is None:
27347 pevent = 0
27348 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
27349 pevent = int(event)
27350 else:
27351 pevent = int(cudaEvent_t(event))
27352 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
27353 pDependencies = [] if pDependencies is None else pDependencies
27354 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27355 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27356 cdef cyruntime.cudaGraph_t cygraph
27357 if graph is None:
27358 pgraph = 0
27359 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27360 pgraph = int(graph)
27361 else:
27362 pgraph = int(cudaGraph_t(graph))
27363 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27364 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27365 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27366 if len(pDependencies) > 1:
27367 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27368 if cypDependencies is NULL:
27369 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27370 else:
27371 for idx in range(len(pDependencies)):
27372 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27373 elif len(pDependencies) == 1:
27374 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27375 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27376 with nogil:
27377 err = cyruntime.cudaGraphAddEventRecordNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent)
27378 if len(pDependencies) > 1 and cypDependencies is not NULL:
27379 free(cypDependencies)
27380 if err != cyruntime.cudaSuccess:
27381 return (_dict_cudaError_t[err], None)
27382 return (_dict_cudaError_t[err], pGraphNode)
27384@cython.embedsignature(True)
27385def cudaGraphEventRecordNodeGetEvent(node):
27386 """ Returns the event associated with an event record node.
27388 Returns the event of event record node `hNode` in `event_out`.
27390 Parameters
27391 ----------
27392 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27393 Node to get the event for
27395 Returns
27396 -------
27397 cudaError_t
27398 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27399 event_out : :py:obj:`~.cudaEvent_t`
27400 Pointer to return the event
27402 See Also
27403 --------
27404 :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
27405 """
27406 cdef cyruntime.cudaGraphNode_t cynode
27407 if node is None:
27408 pnode = 0
27409 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27410 pnode = int(node)
27411 else:
27412 pnode = int(cudaGraphNode_t(node))
27413 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27414 cdef cudaEvent_t event_out = cudaEvent_t()
27415 with nogil:
27416 err = cyruntime.cudaGraphEventRecordNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._pvt_ptr)
27417 if err != cyruntime.cudaSuccess:
27418 return (_dict_cudaError_t[err], None)
27419 return (_dict_cudaError_t[err], event_out)
27421@cython.embedsignature(True)
27422def cudaGraphEventRecordNodeSetEvent(node, event):
27423 """ Sets an event record node's event.
27425 Sets the event of event record node `hNode` to `event`.
27427 Parameters
27428 ----------
27429 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27430 Node to set the event for
27431 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
27432 Event to use
27434 Returns
27435 -------
27436 cudaError_t
27437 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27439 See Also
27440 --------
27441 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
27442 """
27443 cdef cyruntime.cudaEvent_t cyevent
27444 if event is None:
27445 pevent = 0
27446 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
27447 pevent = int(event)
27448 else:
27449 pevent = int(cudaEvent_t(event))
27450 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
27451 cdef cyruntime.cudaGraphNode_t cynode
27452 if node is None:
27453 pnode = 0
27454 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27455 pnode = int(node)
27456 else:
27457 pnode = int(cudaGraphNode_t(node))
27458 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27459 with nogil:
27460 err = cyruntime.cudaGraphEventRecordNodeSetEvent(cynode, cyevent)
27461 return (_dict_cudaError_t[err],)
27463@cython.embedsignature(True)
27464def cudaGraphAddEventWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, event):
27465 """ Creates an event wait node and adds it to a graph.
27467 Creates a new event wait node and adds it to `hGraph` with
27468 `numDependencies` dependencies specified via `dependencies` and event
27469 specified in `event`. It is possible for `numDependencies` to be 0, in
27470 which case the node will be placed at the root of the graph.
27471 `dependencies` may not have any duplicate entries. A handle to the new
27472 node will be returned in `phGraphNode`.
27474 The graph node will wait for all work captured in `event`. See
27475 :py:obj:`~.cuEventRecord()` for details on what is captured by an
27476 event. The synchronization will be performed efficiently on the device
27477 when applicable. `event` may be from a different context or device than
27478 the launch stream.
27480 These nodes may not be used in loops or conditionals.
27482 Parameters
27483 ----------
27484 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27485 Graph to which to add the node
27486 dependencies : list[:py:obj:`~.cudaGraphNode_t`]
27487 Dependencies of the node
27488 numDependencies : size_t
27489 Number of dependencies
27490 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
27491 Event for the node
27493 Returns
27494 -------
27495 cudaError_t
27496 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27497 phGraphNode : :py:obj:`~.cudaGraphNode_t`
27498 Returns newly created node
27500 See Also
27501 --------
27502 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27503 """
27504 cdef cyruntime.cudaEvent_t cyevent
27505 if event is None:
27506 pevent = 0
27507 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
27508 pevent = int(event)
27509 else:
27510 pevent = int(cudaEvent_t(event))
27511 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
27512 pDependencies = [] if pDependencies is None else pDependencies
27513 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27514 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27515 cdef cyruntime.cudaGraph_t cygraph
27516 if graph is None:
27517 pgraph = 0
27518 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27519 pgraph = int(graph)
27520 else:
27521 pgraph = int(cudaGraph_t(graph))
27522 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27523 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27524 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27525 if len(pDependencies) > 1:
27526 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27527 if cypDependencies is NULL:
27528 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27529 else:
27530 for idx in range(len(pDependencies)):
27531 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27532 elif len(pDependencies) == 1:
27533 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27534 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27535 with nogil:
27536 err = cyruntime.cudaGraphAddEventWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cyevent)
27537 if len(pDependencies) > 1 and cypDependencies is not NULL:
27538 free(cypDependencies)
27539 if err != cyruntime.cudaSuccess:
27540 return (_dict_cudaError_t[err], None)
27541 return (_dict_cudaError_t[err], pGraphNode)
27543@cython.embedsignature(True)
27544def cudaGraphEventWaitNodeGetEvent(node):
27545 """ Returns the event associated with an event wait node.
27547 Returns the event of event wait node `hNode` in `event_out`.
27549 Parameters
27550 ----------
27551 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27552 Node to get the event for
27554 Returns
27555 -------
27556 cudaError_t
27557 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27558 event_out : :py:obj:`~.cudaEvent_t`
27559 Pointer to return the event
27561 See Also
27562 --------
27563 :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
27564 """
27565 cdef cyruntime.cudaGraphNode_t cynode
27566 if node is None:
27567 pnode = 0
27568 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27569 pnode = int(node)
27570 else:
27571 pnode = int(cudaGraphNode_t(node))
27572 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27573 cdef cudaEvent_t event_out = cudaEvent_t()
27574 with nogil:
27575 err = cyruntime.cudaGraphEventWaitNodeGetEvent(cynode, <cyruntime.cudaEvent_t*>event_out._pvt_ptr)
27576 if err != cyruntime.cudaSuccess:
27577 return (_dict_cudaError_t[err], None)
27578 return (_dict_cudaError_t[err], event_out)
27580@cython.embedsignature(True)
27581def cudaGraphEventWaitNodeSetEvent(node, event):
27582 """ Sets an event wait node's event.
27584 Sets the event of event wait node `hNode` to `event`.
27586 Parameters
27587 ----------
27588 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27589 Node to set the event for
27590 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
27591 Event to use
27593 Returns
27594 -------
27595 cudaError_t
27596 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27598 See Also
27599 --------
27600 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`
27601 """
27602 cdef cyruntime.cudaEvent_t cyevent
27603 if event is None:
27604 pevent = 0
27605 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
27606 pevent = int(event)
27607 else:
27608 pevent = int(cudaEvent_t(event))
27609 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
27610 cdef cyruntime.cudaGraphNode_t cynode
27611 if node is None:
27612 pnode = 0
27613 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
27614 pnode = int(node)
27615 else:
27616 pnode = int(cudaGraphNode_t(node))
27617 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
27618 with nogil:
27619 err = cyruntime.cudaGraphEventWaitNodeSetEvent(cynode, cyevent)
27620 return (_dict_cudaError_t[err],)
27622@cython.embedsignature(True)
27623def cudaGraphAddExternalSemaphoresSignalNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
27624 """ Creates an external semaphore signal node and adds it to a graph.
27626 Creates a new external semaphore signal node and adds it to `graph`
27627 with `numDependencies` dependencies specified via `dependencies` and
27628 arguments specified in `nodeParams`. It is possible for
27629 `numDependencies` to be 0, in which case the node will be placed at the
27630 root of the graph. `dependencies` may not have any duplicate entries. A
27631 handle to the new node will be returned in `pGraphNode`.
27633 Performs a signal operation on a set of externally allocated semaphore
27634 objects when the node is launched. The operation(s) will occur after
27635 all of the node's dependencies have completed.
27637 Parameters
27638 ----------
27639 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27640 Graph to which to add the node
27641 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
27642 Dependencies of the node
27643 numDependencies : size_t
27644 Number of dependencies
27645 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
27646 Parameters for the node
27648 Returns
27649 -------
27650 cudaError_t
27651 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27652 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27653 Returns newly created node
27655 See Also
27656 --------
27657 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27658 """
27659 pDependencies = [] if pDependencies is None else pDependencies
27660 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27661 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27662 cdef cyruntime.cudaGraph_t cygraph
27663 if graph is None:
27664 pgraph = 0
27665 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27666 pgraph = int(graph)
27667 else:
27668 pgraph = int(cudaGraph_t(graph))
27669 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27670 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27671 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27672 if len(pDependencies) > 1:
27673 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27674 if cypDependencies is NULL:
27675 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27676 else:
27677 for idx in range(len(pDependencies)):
27678 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27679 elif len(pDependencies) == 1:
27680 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27681 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27682 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
27683 with nogil:
27684 err = cyruntime.cudaGraphAddExternalSemaphoresSignalNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)
27685 if len(pDependencies) > 1 and cypDependencies is not NULL:
27686 free(cypDependencies)
27687 if err != cyruntime.cudaSuccess:
27688 return (_dict_cudaError_t[err], None)
27689 return (_dict_cudaError_t[err], pGraphNode)
27691@cython.embedsignature(True)
27692def cudaGraphExternalSemaphoresSignalNodeGetParams(hNode):
27693 """ Returns an external semaphore signal node's parameters.
27695 Returns the parameters of an external semaphore signal node `hNode` in
27696 `params_out`. The `extSemArray` and `paramsArray` returned in
27697 `params_out`, are owned by the node. This memory remains valid until
27698 the node is destroyed or its parameters are modified, and should not be
27699 modified directly. Use
27700 :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update
27701 the parameters of this node.
27703 Parameters
27704 ----------
27705 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27706 Node to get the parameters for
27708 Returns
27709 -------
27710 cudaError_t
27711 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27712 params_out : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
27713 Pointer to return the parameters
27715 See Also
27716 --------
27717 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
27718 """
27719 cdef cyruntime.cudaGraphNode_t cyhNode
27720 if hNode is None:
27721 phNode = 0
27722 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
27723 phNode = int(hNode)
27724 else:
27725 phNode = int(cudaGraphNode_t(hNode))
27726 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
27727 cdef cudaExternalSemaphoreSignalNodeParams params_out = cudaExternalSemaphoreSignalNodeParams()
27728 with nogil:
27729 err = cyruntime.cudaGraphExternalSemaphoresSignalNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreSignalNodeParams*>params_out._pvt_ptr)
27730 if err != cyruntime.cudaSuccess:
27731 return (_dict_cudaError_t[err], None)
27732 return (_dict_cudaError_t[err], params_out)
27734@cython.embedsignature(True)
27735def cudaGraphExternalSemaphoresSignalNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
27736 """ Sets an external semaphore signal node's parameters.
27738 Sets the parameters of an external semaphore signal node `hNode` to
27739 `nodeParams`.
27741 Parameters
27742 ----------
27743 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27744 Node to set the parameters for
27745 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
27746 Parameters to copy
27748 Returns
27749 -------
27750 cudaError_t
27751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27753 See Also
27754 --------
27755 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
27756 """
27757 cdef cyruntime.cudaGraphNode_t cyhNode
27758 if hNode is None:
27759 phNode = 0
27760 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
27761 phNode = int(hNode)
27762 else:
27763 phNode = int(cudaGraphNode_t(hNode))
27764 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
27765 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
27766 with nogil:
27767 err = cyruntime.cudaGraphExternalSemaphoresSignalNodeSetParams(cyhNode, cynodeParams_ptr)
27768 return (_dict_cudaError_t[err],)
27770@cython.embedsignature(True)
27771def cudaGraphAddExternalSemaphoresWaitNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
27772 """ Creates an external semaphore wait node and adds it to a graph.
27774 Creates a new external semaphore wait node and adds it to `graph` with
27775 `numDependencies` dependencies specified via `dependencies` and
27776 arguments specified in `nodeParams`. It is possible for
27777 `numDependencies` to be 0, in which case the node will be placed at the
27778 root of the graph. `dependencies` may not have any duplicate entries. A
27779 handle to the new node will be returned in `pGraphNode`.
27781 Performs a wait operation on a set of externally allocated semaphore
27782 objects when the node is launched. The node's dependencies will not be
27783 launched until the wait operation has completed.
27785 Parameters
27786 ----------
27787 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27788 Graph to which to add the node
27789 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
27790 Dependencies of the node
27791 numDependencies : size_t
27792 Number of dependencies
27793 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
27794 Parameters for the node
27796 Returns
27797 -------
27798 cudaError_t
27799 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27800 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27801 Returns newly created node
27803 See Also
27804 --------
27805 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeGetParams`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27806 """
27807 pDependencies = [] if pDependencies is None else pDependencies
27808 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27809 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27810 cdef cyruntime.cudaGraph_t cygraph
27811 if graph is None:
27812 pgraph = 0
27813 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
27814 pgraph = int(graph)
27815 else:
27816 pgraph = int(cudaGraph_t(graph))
27817 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
27818 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
27819 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
27820 if len(pDependencies) > 1:
27821 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
27822 if cypDependencies is NULL:
27823 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
27824 else:
27825 for idx in range(len(pDependencies)):
27826 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
27827 elif len(pDependencies) == 1:
27828 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
27829 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
27830 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
27831 with nogil:
27832 err = cyruntime.cudaGraphAddExternalSemaphoresWaitNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)
27833 if len(pDependencies) > 1 and cypDependencies is not NULL:
27834 free(cypDependencies)
27835 if err != cyruntime.cudaSuccess:
27836 return (_dict_cudaError_t[err], None)
27837 return (_dict_cudaError_t[err], pGraphNode)
27839@cython.embedsignature(True)
27840def cudaGraphExternalSemaphoresWaitNodeGetParams(hNode):
27841 """ Returns an external semaphore wait node's parameters.
27843 Returns the parameters of an external semaphore wait node `hNode` in
27844 `params_out`. The `extSemArray` and `paramsArray` returned in
27845 `params_out`, are owned by the node. This memory remains valid until
27846 the node is destroyed or its parameters are modified, and should not be
27847 modified directly. Use
27848 :py:obj:`~.cudaGraphExternalSemaphoresSignalNodeSetParams` to update
27849 the parameters of this node.
27851 Parameters
27852 ----------
27853 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27854 Node to get the parameters for
27856 Returns
27857 -------
27858 cudaError_t
27859 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27860 params_out : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
27861 Pointer to return the parameters
27863 See Also
27864 --------
27865 :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
27866 """
27867 cdef cyruntime.cudaGraphNode_t cyhNode
27868 if hNode is None:
27869 phNode = 0
27870 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
27871 phNode = int(hNode)
27872 else:
27873 phNode = int(cudaGraphNode_t(hNode))
27874 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
27875 cdef cudaExternalSemaphoreWaitNodeParams params_out = cudaExternalSemaphoreWaitNodeParams()
27876 with nogil:
27877 err = cyruntime.cudaGraphExternalSemaphoresWaitNodeGetParams(cyhNode, <cyruntime.cudaExternalSemaphoreWaitNodeParams*>params_out._pvt_ptr)
27878 if err != cyruntime.cudaSuccess:
27879 return (_dict_cudaError_t[err], None)
27880 return (_dict_cudaError_t[err], params_out)
27882@cython.embedsignature(True)
27883def cudaGraphExternalSemaphoresWaitNodeSetParams(hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
27884 """ Sets an external semaphore wait node's parameters.
27886 Sets the parameters of an external semaphore wait node `hNode` to
27887 `nodeParams`.
27889 Parameters
27890 ----------
27891 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
27892 Node to set the parameters for
27893 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
27894 Parameters to copy
27896 Returns
27897 -------
27898 cudaError_t
27899 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
27901 See Also
27902 --------
27903 :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`
27904 """
27905 cdef cyruntime.cudaGraphNode_t cyhNode
27906 if hNode is None:
27907 phNode = 0
27908 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
27909 phNode = int(hNode)
27910 else:
27911 phNode = int(cudaGraphNode_t(hNode))
27912 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
27913 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
27914 with nogil:
27915 err = cyruntime.cudaGraphExternalSemaphoresWaitNodeSetParams(cyhNode, cynodeParams_ptr)
27916 return (_dict_cudaError_t[err],)
27918@cython.embedsignature(True)
27919def cudaGraphAddMemAllocNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, nodeParams : Optional[cudaMemAllocNodeParams]):
27920 """ Creates an allocation node and adds it to a graph.
27922 Creates a new allocation node and adds it to `graph` with
27923 `numDependencies` dependencies specified via `pDependencies` and
27924 arguments specified in `nodeParams`. It is possible for
27925 `numDependencies` to be 0, in which case the node will be placed at the
27926 root of the graph. `pDependencies` may not have any duplicate entries.
27927 A handle to the new node will be returned in `pGraphNode`.
27929 When :py:obj:`~.cudaGraphAddMemAllocNode` creates an allocation node,
27930 it returns the address of the allocation in `nodeParams.dptr`. The
27931 allocation's address remains fixed across instantiations and launches.
27933 If the allocation is freed in the same graph, by creating a free node
27934 using :py:obj:`~.cudaGraphAddMemFreeNode`, the allocation can be
27935 accessed by nodes ordered after the allocation node but before the free
27936 node. These allocations cannot be freed outside the owning graph, and
27937 they can only be freed once in the owning graph.
27939 If the allocation is not freed in the same graph, then it can be
27940 accessed not only by nodes in the graph which are ordered after the
27941 allocation node, but also by stream operations ordered after the
27942 graph's execution but before the allocation is freed.
27944 Allocations which are not freed in the same graph can be freed by:
27946 - passing the allocation to :py:obj:`~.cudaMemFreeAsync` or
27947 :py:obj:`~.cudaMemFree`;
27949 - launching a graph with a free node for that allocation; or
27951 - specifying :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`
27952 during instantiation, which makes each launch behave as though it
27953 called :py:obj:`~.cudaMemFreeAsync` for every unfreed allocation.
27955 It is not possible to free an allocation in both the owning graph and
27956 another graph. If the allocation is freed in the same graph, a free
27957 node cannot be added to another graph. If the allocation is freed in
27958 another graph, a free node can no longer be added to the owning graph.
27960 The following restrictions apply to graphs which contain allocation
27961 and/or memory free nodes:
27963 - Nodes and edges of the graph cannot be deleted.
27965 - The graph can only be used in a child node if the ownership is moved
27966 to the parent.
27968 - Only one instantiation of the graph may exist at any point in time.
27970 - The graph cannot be cloned.
27972 Parameters
27973 ----------
27974 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
27975 Graph to which to add the node
27976 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
27977 Dependencies of the node
27978 numDependencies : size_t
27979 Number of dependencies
27980 nodeParams : :py:obj:`~.cudaMemAllocNodeParams`
27981 Parameters for the node
27983 Returns
27984 -------
27985 cudaError_t
27986 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
27987 pGraphNode : :py:obj:`~.cudaGraphNode_t`
27988 Returns newly created node
27990 See Also
27991 --------
27992 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemAllocNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
27993 """
27994 pDependencies = [] if pDependencies is None else pDependencies
27995 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
27996 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
27997 cdef cyruntime.cudaGraph_t cygraph
27998 if graph is None:
27999 pgraph = 0
28000 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
28001 pgraph = int(graph)
28002 else:
28003 pgraph = int(cudaGraph_t(graph))
28004 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
28005 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
28006 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
28007 if len(pDependencies) > 1:
28008 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
28009 if cypDependencies is NULL:
28010 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28011 else:
28012 for idx in range(len(pDependencies)):
28013 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
28014 elif len(pDependencies) == 1:
28015 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
28016 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
28017 cdef cyruntime.cudaMemAllocNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
28018 with nogil:
28019 err = cyruntime.cudaGraphAddMemAllocNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cynodeParams_ptr)
28020 if len(pDependencies) > 1 and cypDependencies is not NULL:
28021 free(cypDependencies)
28022 if err != cyruntime.cudaSuccess:
28023 return (_dict_cudaError_t[err], None)
28024 return (_dict_cudaError_t[err], pGraphNode)
28026@cython.embedsignature(True)
28027def cudaGraphMemAllocNodeGetParams(node):
28028 """ Returns a memory alloc node's parameters.
28030 Returns the parameters of a memory alloc node `hNode` in `params_out`.
28031 The `poolProps` and `accessDescs` returned in `params_out`, are owned
28032 by the node. This memory remains valid until the node is destroyed. The
28033 returned parameters must not be modified.
28035 Parameters
28036 ----------
28037 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28038 Node to get the parameters for
28040 Returns
28041 -------
28042 cudaError_t
28043 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28044 params_out : :py:obj:`~.cudaMemAllocNodeParams`
28045 Pointer to return the parameters
28047 See Also
28048 --------
28049 :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`
28050 """
28051 cdef cyruntime.cudaGraphNode_t cynode
28052 if node is None:
28053 pnode = 0
28054 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
28055 pnode = int(node)
28056 else:
28057 pnode = int(cudaGraphNode_t(node))
28058 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
28059 cdef cudaMemAllocNodeParams params_out = cudaMemAllocNodeParams()
28060 with nogil:
28061 err = cyruntime.cudaGraphMemAllocNodeGetParams(cynode, <cyruntime.cudaMemAllocNodeParams*>params_out._pvt_ptr)
28062 if err != cyruntime.cudaSuccess:
28063 return (_dict_cudaError_t[err], None)
28064 return (_dict_cudaError_t[err], params_out)
28066@cython.embedsignature(True)
28067def cudaGraphAddMemFreeNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], size_t numDependencies, dptr):
28068 """ Creates a memory free node and adds it to a graph.
28070 Creates a new memory free node and adds it to `graph` with
28071 `numDependencies` dependencies specified via `pDependencies` and
28072 address specified in `dptr`. It is possible for `numDependencies` to be
28073 0, in which case the node will be placed at the root of the graph.
28074 `pDependencies` may not have any duplicate entries. A handle to the new
28075 node will be returned in `pGraphNode`.
28077 :py:obj:`~.cudaGraphAddMemFreeNode` will return
28078 :py:obj:`~.cudaErrorInvalidValue` if the user attempts to free:
28080 - an allocation twice in the same graph.
28082 - an address that was not returned by an allocation node.
28084 - an invalid address.
28086 The following restrictions apply to graphs which contain allocation
28087 and/or memory free nodes:
28089 - Nodes and edges of the graph cannot be deleted.
28091 - The graph can only be used in a child node if the ownership is moved
28092 to the parent.
28094 - Only one instantiation of the graph may exist at any point in time.
28096 - The graph cannot be cloned.
28098 Parameters
28099 ----------
28100 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28101 Graph to which to add the node
28102 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
28103 Dependencies of the node
28104 numDependencies : size_t
28105 Number of dependencies
28106 dptr : Any
28107 Address of memory to free
28109 Returns
28110 -------
28111 cudaError_t
28112 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOutOfMemory`
28113 pGraphNode : :py:obj:`~.cudaGraphNode_t`
28114 Returns newly created node
28116 See Also
28117 --------
28118 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphDestroyNode`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
28119 """
28120 pDependencies = [] if pDependencies is None else pDependencies
28121 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
28122 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
28123 cdef cyruntime.cudaGraph_t cygraph
28124 if graph is None:
28125 pgraph = 0
28126 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
28127 pgraph = int(graph)
28128 else:
28129 pgraph = int(cudaGraph_t(graph))
28130 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
28131 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
28132 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
28133 if len(pDependencies) > 1:
28134 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
28135 if cypDependencies is NULL:
28136 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28137 else:
28138 for idx in range(len(pDependencies)):
28139 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
28140 elif len(pDependencies) == 1:
28141 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
28142 if numDependencies > <size_t>len(pDependencies): raise RuntimeError("List is too small: " + str(len(pDependencies)) + " < " + str(numDependencies))
28143 cydptr = _HelperInputVoidPtr(dptr)
28144 cdef void* cydptr_ptr = <void*><void_ptr>cydptr.cptr
28145 with nogil:
28146 err = cyruntime.cudaGraphAddMemFreeNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, numDependencies, cydptr_ptr)
28147 if len(pDependencies) > 1 and cypDependencies is not NULL:
28148 free(cypDependencies)
28149 if err != cyruntime.cudaSuccess:
28150 return (_dict_cudaError_t[err], None)
28151 return (_dict_cudaError_t[err], pGraphNode)
28153@cython.embedsignature(True)
28154def cudaGraphMemFreeNodeGetParams(node):
28155 """ Returns a memory free node's parameters.
28157 Returns the address of a memory free node `hNode` in `dptr_out`.
28159 Parameters
28160 ----------
28161 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28162 Node to get the parameters for
28164 Returns
28165 -------
28166 cudaError_t
28167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28168 dptr_out : Any
28169 Pointer to return the device address
28171 See Also
28172 --------
28173 :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaGraphMemFreeNodeGetParams`
28174 """
28175 cdef cyruntime.cudaGraphNode_t cynode
28176 if node is None:
28177 pnode = 0
28178 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
28179 pnode = int(node)
28180 else:
28181 pnode = int(cudaGraphNode_t(node))
28182 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
28183 cdef void_ptr dptr_out = 0
28184 cdef void* cydptr_out_ptr = <void*>&dptr_out
28185 with nogil:
28186 err = cyruntime.cudaGraphMemFreeNodeGetParams(cynode, cydptr_out_ptr)
28187 if err != cyruntime.cudaSuccess:
28188 return (_dict_cudaError_t[err], None)
28189 return (_dict_cudaError_t[err], dptr_out)
28191@cython.embedsignature(True)
28192def cudaDeviceGraphMemTrim(int device):
28193 """ Free unused memory that was cached on the specified device for use with graphs back to the OS.
28195 Blocks which are not in use by a graph that is either currently
28196 executing or scheduled to execute are freed back to the operating
28197 system.
28199 Parameters
28200 ----------
28201 device : int
28202 The device for which cached memory should be freed.
28204 Returns
28205 -------
28206 cudaError_t
28207 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28209 See Also
28210 --------
28211 :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
28212 """
28213 with nogil:
28214 err = cyruntime.cudaDeviceGraphMemTrim(device)
28215 return (_dict_cudaError_t[err],)
28217@cython.embedsignature(True)
28218def cudaDeviceGetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType):
28219 """ Query asynchronous allocation attributes related to graphs.
28221 Valid attributes are:
28223 - :py:obj:`~.cudaGraphMemAttrUsedMemCurrent`: Amount of memory, in
28224 bytes, currently associated with graphs
28226 - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in
28227 bytes, associated with graphs since the last time it was reset. High
28228 watermark can only be reset to zero.
28230 - :py:obj:`~.cudaGraphMemAttrReservedMemCurrent`: Amount of memory, in
28231 bytes, currently allocated for use by the CUDA graphs asynchronous
28232 allocator.
28234 - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of
28235 memory, in bytes, currently allocated for use by the CUDA graphs
28236 asynchronous allocator.
28238 Parameters
28239 ----------
28240 device : int
28241 Specifies the scope of the query
28242 attr : :py:obj:`~.cudaGraphMemAttributeType`
28243 attribute to get
28245 Returns
28246 -------
28247 cudaError_t
28248 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
28249 value : Any
28250 retrieved value
28252 See Also
28253 --------
28254 :py:obj:`~.cudaDeviceSetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
28255 """
28256 cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value
28257 cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, 0, is_getter=True)
28258 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
28259 with nogil:
28260 err = cyruntime.cudaDeviceGetGraphMemAttribute(device, cyattr, cyvalue_ptr)
28261 if err != cyruntime.cudaSuccess:
28262 return (_dict_cudaError_t[err], None)
28263 return (_dict_cudaError_t[err], cyvalue.pyObj())
28265@cython.embedsignature(True)
28266def cudaDeviceSetGraphMemAttribute(int device, attr not None : cudaGraphMemAttributeType, value):
28267 """ Set asynchronous allocation attributes related to graphs.
28269 Valid attributes are:
28271 - :py:obj:`~.cudaGraphMemAttrUsedMemHigh`: High watermark of memory, in
28272 bytes, associated with graphs since the last time it was reset. High
28273 watermark can only be reset to zero.
28275 - :py:obj:`~.cudaGraphMemAttrReservedMemHigh`: High watermark of
28276 memory, in bytes, currently allocated for use by the CUDA graphs
28277 asynchronous allocator.
28279 Parameters
28280 ----------
28281 device : int
28282 Specifies the scope of the query
28283 attr : :py:obj:`~.cudaGraphMemAttributeType`
28284 attribute to get
28285 value : Any
28286 pointer to value to set
28288 Returns
28289 -------
28290 cudaError_t
28291 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`
28293 See Also
28294 --------
28295 :py:obj:`~.cudaDeviceGetGraphMemAttribute`, :py:obj:`~.cudaGraphAddMemAllocNode`, :py:obj:`~.cudaGraphAddMemFreeNode`, :py:obj:`~.cudaDeviceGraphMemTrim`, :py:obj:`~.cudaMallocAsync`, :py:obj:`~.cudaFreeAsync`
28296 """
28297 cdef cyruntime.cudaGraphMemAttributeType cyattr = attr.value
28298 cdef _HelperCUgraphMem_attribute cyvalue = _HelperCUgraphMem_attribute(attr, value, is_getter=False)
28299 cdef void* cyvalue_ptr = <void*><void_ptr>cyvalue.cptr
28300 with nogil:
28301 err = cyruntime.cudaDeviceSetGraphMemAttribute(device, cyattr, cyvalue_ptr)
28302 return (_dict_cudaError_t[err],)
28304@cython.embedsignature(True)
28305def cudaGraphClone(originalGraph):
28306 """ Clones a graph.
28308 This function creates a copy of `originalGraph` and returns it in
28309 `pGraphClone`. All parameters are copied into the cloned graph. The
28310 original graph may be modified after this call without affecting the
28311 clone.
28313 Child graph nodes in the original graph are recursively copied into the
28314 clone.
28316 Parameters
28317 ----------
28318 originalGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28319 Graph to clone
28321 Returns
28322 -------
28323 cudaError_t
28324 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`
28325 pGraphClone : :py:obj:`~.cudaGraph_t`
28326 Returns newly created cloned graph
28328 See Also
28329 --------
28330 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeFindInClone`
28332 Notes
28333 -----
28334 : Cloning is not supported for graphs which contain memory allocation nodes, memory free nodes, or conditional nodes.
28335 """
28336 cdef cyruntime.cudaGraph_t cyoriginalGraph
28337 if originalGraph is None:
28338 poriginalGraph = 0
28339 elif isinstance(originalGraph, (cudaGraph_t,driver.CUgraph)):
28340 poriginalGraph = int(originalGraph)
28341 else:
28342 poriginalGraph = int(cudaGraph_t(originalGraph))
28343 cyoriginalGraph = <cyruntime.cudaGraph_t><void_ptr>poriginalGraph
28344 cdef cudaGraph_t pGraphClone = cudaGraph_t()
28345 with nogil:
28346 err = cyruntime.cudaGraphClone(<cyruntime.cudaGraph_t*>pGraphClone._pvt_ptr, cyoriginalGraph)
28347 if err != cyruntime.cudaSuccess:
28348 return (_dict_cudaError_t[err], None)
28349 return (_dict_cudaError_t[err], pGraphClone)
28351@cython.embedsignature(True)
28352def cudaGraphNodeFindInClone(originalNode, clonedGraph):
28353 """ Finds a cloned version of a node.
28355 This function returns the node in `clonedGraph` corresponding to
28356 `originalNode` in the original graph.
28358 `clonedGraph` must have been cloned from `originalGraph` via
28359 :py:obj:`~.cudaGraphClone`. `originalNode` must have been in
28360 `originalGraph` at the time of the call to :py:obj:`~.cudaGraphClone`,
28361 and the corresponding cloned node in `clonedGraph` must not have been
28362 removed. The cloned node is then returned via `pClonedNode`.
28364 Parameters
28365 ----------
28366 originalNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28367 Handle to the original node
28368 clonedGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28369 Cloned graph to query
28371 Returns
28372 -------
28373 cudaError_t
28374 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28375 pNode : :py:obj:`~.cudaGraphNode_t`
28376 Returns handle to the cloned node
28378 See Also
28379 --------
28380 :py:obj:`~.cudaGraphClone`
28381 """
28382 cdef cyruntime.cudaGraph_t cyclonedGraph
28383 if clonedGraph is None:
28384 pclonedGraph = 0
28385 elif isinstance(clonedGraph, (cudaGraph_t,driver.CUgraph)):
28386 pclonedGraph = int(clonedGraph)
28387 else:
28388 pclonedGraph = int(cudaGraph_t(clonedGraph))
28389 cyclonedGraph = <cyruntime.cudaGraph_t><void_ptr>pclonedGraph
28390 cdef cyruntime.cudaGraphNode_t cyoriginalNode
28391 if originalNode is None:
28392 poriginalNode = 0
28393 elif isinstance(originalNode, (cudaGraphNode_t,driver.CUgraphNode)):
28394 poriginalNode = int(originalNode)
28395 else:
28396 poriginalNode = int(cudaGraphNode_t(originalNode))
28397 cyoriginalNode = <cyruntime.cudaGraphNode_t><void_ptr>poriginalNode
28398 cdef cudaGraphNode_t pNode = cudaGraphNode_t()
28399 with nogil:
28400 err = cyruntime.cudaGraphNodeFindInClone(<cyruntime.cudaGraphNode_t*>pNode._pvt_ptr, cyoriginalNode, cyclonedGraph)
28401 if err != cyruntime.cudaSuccess:
28402 return (_dict_cudaError_t[err], None)
28403 return (_dict_cudaError_t[err], pNode)
28405@cython.embedsignature(True)
28406def cudaGraphNodeGetType(node):
28407 """ Returns a node's type.
28409 Returns the node type of `node` in `pType`.
28411 Parameters
28412 ----------
28413 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28414 Node to query
28416 Returns
28417 -------
28418 cudaError_t
28419 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28420 pType : :py:obj:`~.cudaGraphNodeType`
28421 Pointer to return the node type
28423 See Also
28424 --------
28425 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphKernelNodeGetParams`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphHostNodeGetParams`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeGetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemsetNodeGetParams`, :py:obj:`~.cudaGraphMemsetNodeSetParams`
28426 """
28427 cdef cyruntime.cudaGraphNode_t cynode
28428 if node is None:
28429 pnode = 0
28430 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
28431 pnode = int(node)
28432 else:
28433 pnode = int(cudaGraphNode_t(node))
28434 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
28435 cdef cyruntime.cudaGraphNodeType pType
28436 with nogil:
28437 err = cyruntime.cudaGraphNodeGetType(cynode, &pType)
28438 if err != cyruntime.cudaSuccess:
28439 return (_dict_cudaError_t[err], None)
28440 return (_dict_cudaError_t[err], cudaGraphNodeType(pType))
28442@cython.embedsignature(True)
28443def cudaGraphNodeGetContainingGraph(hNode):
28444 """ Returns the graph that contains a given graph node.
28446 Returns the graph that contains `hNode` in `*phGraph`. If hNode is in a
28447 child graph, the child graph it is in is returned.
28449 Parameters
28450 ----------
28451 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28452 Node to query
28454 Returns
28455 -------
28456 cudaError_t
28457 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
28458 phGraph : :py:obj:`~.cudaGraph_t`
28459 Pointer to return the containing graph
28461 See Also
28462 --------
28463 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`
28464 """
28465 cdef cyruntime.cudaGraphNode_t cyhNode
28466 if hNode is None:
28467 phNode = 0
28468 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
28469 phNode = int(hNode)
28470 else:
28471 phNode = int(cudaGraphNode_t(hNode))
28472 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
28473 cdef cudaGraph_t phGraph = cudaGraph_t()
28474 with nogil:
28475 err = cyruntime.cudaGraphNodeGetContainingGraph(cyhNode, <cyruntime.cudaGraph_t*>phGraph._pvt_ptr)
28476 if err != cyruntime.cudaSuccess:
28477 return (_dict_cudaError_t[err], None)
28478 return (_dict_cudaError_t[err], phGraph)
28480@cython.embedsignature(True)
28481def cudaGraphNodeGetLocalId(hNode):
28482 """ Returns the node id of a given graph node.
28484 Returns the node id of `hNode` in `*nodeId`. The nodeId matches that
28485 referenced by :py:obj:`~.cudaGraphDebugDotPrint`. The local nodeId and
28486 graphId together can uniquely identify the node.
28488 Parameters
28489 ----------
28490 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28491 Node to query
28493 Returns
28494 -------
28495 cudaError_t
28496 :py:obj:`~.cudaSuccess` :py:obj:`~.cudaErrorInvalidValue`
28497 nodeId : unsigned int
28498 Pointer to return the nodeId
28500 See Also
28501 --------
28502 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`
28503 """
28504 cdef cyruntime.cudaGraphNode_t cyhNode
28505 if hNode is None:
28506 phNode = 0
28507 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
28508 phNode = int(hNode)
28509 else:
28510 phNode = int(cudaGraphNode_t(hNode))
28511 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
28512 cdef unsigned int nodeId = 0
28513 with nogil:
28514 err = cyruntime.cudaGraphNodeGetLocalId(cyhNode, &nodeId)
28515 if err != cyruntime.cudaSuccess:
28516 return (_dict_cudaError_t[err], None)
28517 return (_dict_cudaError_t[err], nodeId)
28519@cython.embedsignature(True)
28520def cudaGraphNodeGetToolsId(hNode):
28521 """ Returns an id used by tools to identify a given node.
28523 Parameters
28524 ----------
28525 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28526 Node to query
28528 Returns
28529 -------
28530 cudaError_t
28531 :py:obj:`~.CUDA_SUCCESS` :py:obj:`~.cudaErrorInvalidValue`
28532 *toolsNodeId : unsigned long long
28533 Pointer to return the id used by tools
28535 See Also
28536 --------
28537 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphGetId` :py:obj:`~.cudaGraphExecGetId`
28538 """
28539 cdef cyruntime.cudaGraphNode_t cyhNode
28540 if hNode is None:
28541 phNode = 0
28542 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
28543 phNode = int(hNode)
28544 else:
28545 phNode = int(cudaGraphNode_t(hNode))
28546 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
28547 cdef unsigned long long toolsNodeId = 0
28548 with nogil:
28549 err = cyruntime.cudaGraphNodeGetToolsId(cyhNode, &toolsNodeId)
28550 if err != cyruntime.cudaSuccess:
28551 return (_dict_cudaError_t[err], None)
28552 return (_dict_cudaError_t[err], toolsNodeId)
28554@cython.embedsignature(True)
28555def cudaGraphGetId(hGraph):
28556 """ Returns the id of a given graph.
28558 Returns the id of `hGraph` in `*graphId`. The value in `*graphId`
28559 matches that referenced by :py:obj:`~.cudaGraphDebugDotPrint`.
28561 Parameters
28562 ----------
28563 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28564 Graph to query
28566 Returns
28567 -------
28568 cudaError_t
28569 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28570 graphId : unsigned int
28571 Pointer to return the graphId
28573 See Also
28574 --------
28575 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphExecGetId`
28576 """
28577 cdef cyruntime.cudaGraph_t cyhGraph
28578 if hGraph is None:
28579 phGraph = 0
28580 elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)):
28581 phGraph = int(hGraph)
28582 else:
28583 phGraph = int(cudaGraph_t(hGraph))
28584 cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph
28585 cdef unsigned int graphID = 0
28586 with nogil:
28587 err = cyruntime.cudaGraphGetId(cyhGraph, &graphID)
28588 if err != cyruntime.cudaSuccess:
28589 return (_dict_cudaError_t[err], None)
28590 return (_dict_cudaError_t[err], graphID)
28592@cython.embedsignature(True)
28593def cudaGraphExecGetId(hGraphExec):
28594 """ Returns the id of a given graph exec.
28596 Returns the id of `hGraphExec` in `*graphId`. The value in `*graphId`
28597 matches that referenced by :py:obj:`~.cudaGraphDebugDotPrint`.
28599 Parameters
28600 ----------
28601 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
28602 Graph to query
28604 Returns
28605 -------
28606 cudaError_t
28607 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28608 graphId : unsigned int
28609 Pointer to return the graphId
28611 See Also
28612 --------
28613 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphDebugDotPrint` :py:obj:`~.cudaGraphNodeGetContainingGraph` :py:obj:`~.cudaGraphNodeGetLocalId` :py:obj:`~.cudaGraphNodeGetToolsId` :py:obj:`~.cudaGraphGetId`
28614 """
28615 cdef cyruntime.cudaGraphExec_t cyhGraphExec
28616 if hGraphExec is None:
28617 phGraphExec = 0
28618 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
28619 phGraphExec = int(hGraphExec)
28620 else:
28621 phGraphExec = int(cudaGraphExec_t(hGraphExec))
28622 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
28623 cdef unsigned int graphID = 0
28624 with nogil:
28625 err = cyruntime.cudaGraphExecGetId(cyhGraphExec, &graphID)
28626 if err != cyruntime.cudaSuccess:
28627 return (_dict_cudaError_t[err], None)
28628 return (_dict_cudaError_t[err], graphID)
28630@cython.embedsignature(True)
28631def cudaGraphGetNodes(graph, size_t numNodes = 0):
28632 """ Returns a graph's nodes.
28634 Returns a list of `graph's` nodes. `nodes` may be NULL, in which case
28635 this function will return the number of nodes in `numNodes`. Otherwise,
28636 `numNodes` entries will be filled in. If `numNodes` is higher than the
28637 actual number of nodes, the remaining entries in `nodes` will be set to
28638 NULL, and the number of nodes actually obtained will be returned in
28639 `numNodes`.
28641 Parameters
28642 ----------
28643 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28644 Graph to query
28645 numNodes : int
28646 See description
28648 Returns
28649 -------
28650 cudaError_t
28651 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28652 nodes : list[:py:obj:`~.cudaGraphNode_t`]
28653 Pointer to return the nodes
28654 numNodes : int
28655 See description
28657 See Also
28658 --------
28659 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
28660 """
28661 cdef size_t _graph_length = numNodes
28662 cdef cyruntime.cudaGraph_t cygraph
28663 if graph is None:
28664 pgraph = 0
28665 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
28666 pgraph = int(graph)
28667 else:
28668 pgraph = int(cudaGraph_t(graph))
28669 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
28670 cdef cyruntime.cudaGraphNode_t* cynodes = NULL
28671 pynodes = []
28672 if _graph_length != 0:
28673 cynodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28674 if cynodes is NULL:
28675 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28676 with nogil:
28677 err = cyruntime.cudaGraphGetNodes(cygraph, cynodes, &numNodes)
28678 if cudaError_t(err) == cudaError_t(0):
28679 pynodes = [cudaGraphNode_t(init_value=<void_ptr>cynodes[idx]) for idx in range(_graph_length)]
28680 if cynodes is not NULL:
28681 free(cynodes)
28682 if err != cyruntime.cudaSuccess:
28683 return (_dict_cudaError_t[err], None, None)
28684 return (_dict_cudaError_t[err], pynodes, numNodes)
28686@cython.embedsignature(True)
28687def cudaGraphGetRootNodes(graph, size_t pNumRootNodes = 0):
28688 """ Returns a graph's root nodes.
28690 Returns a list of `graph's` root nodes. `pRootNodes` may be NULL, in
28691 which case this function will return the number of root nodes in
28692 `pNumRootNodes`. Otherwise, `pNumRootNodes` entries will be filled in.
28693 If `pNumRootNodes` is higher than the actual number of root nodes, the
28694 remaining entries in `pRootNodes` will be set to NULL, and the number
28695 of nodes actually obtained will be returned in `pNumRootNodes`.
28697 Parameters
28698 ----------
28699 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28700 Graph to query
28701 pNumRootNodes : int
28702 See description
28704 Returns
28705 -------
28706 cudaError_t
28707 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
28708 pRootNodes : list[:py:obj:`~.cudaGraphNode_t`]
28709 Pointer to return the root nodes
28710 pNumRootNodes : int
28711 See description
28713 See Also
28714 --------
28715 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetType`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
28716 """
28717 cdef size_t _graph_length = pNumRootNodes
28718 cdef cyruntime.cudaGraph_t cygraph
28719 if graph is None:
28720 pgraph = 0
28721 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
28722 pgraph = int(graph)
28723 else:
28724 pgraph = int(cudaGraph_t(graph))
28725 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
28726 cdef cyruntime.cudaGraphNode_t* cypRootNodes = NULL
28727 pypRootNodes = []
28728 if _graph_length != 0:
28729 cypRootNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28730 if cypRootNodes is NULL:
28731 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28732 with nogil:
28733 err = cyruntime.cudaGraphGetRootNodes(cygraph, cypRootNodes, &pNumRootNodes)
28734 if cudaError_t(err) == cudaError_t(0):
28735 pypRootNodes = [cudaGraphNode_t(init_value=<void_ptr>cypRootNodes[idx]) for idx in range(_graph_length)]
28736 if cypRootNodes is not NULL:
28737 free(cypRootNodes)
28738 if err != cyruntime.cudaSuccess:
28739 return (_dict_cudaError_t[err], None, None)
28740 return (_dict_cudaError_t[err], pypRootNodes, pNumRootNodes)
28742@cython.embedsignature(True)
28743def cudaGraphGetEdges(graph, size_t numEdges = 0):
28744 """ Returns a graph's dependency edges.
28746 Returns a list of `graph's` dependency edges. Edges are returned via
28747 corresponding indices in `from`, `to` and `edgeData`; that is, the node
28748 in `to`[i] has a dependency on the node in `from`[i] with data
28749 `edgeData`[i]. `from` and `to` may both be NULL, in which case this
28750 function only returns the number of edges in `numEdges`. Otherwise,
28751 `numEdges` entries will be filled in. If `numEdges` is higher than the
28752 actual number of edges, the remaining entries in `from` and `to` will
28753 be set to NULL, and the number of edges actually returned will be
28754 written to `numEdges`. `edgeData` may alone be NULL, in which case the
28755 edges must all have default (zeroed) edge data. Attempting a losst
28756 query via NULL `edgeData` will result in
28757 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL then `from`
28758 and `to` must be as well.
28760 Parameters
28761 ----------
28762 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28763 Graph to get the edges from
28764 numEdges : int
28765 See description
28767 Returns
28768 -------
28769 cudaError_t
28770 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
28771 from : list[:py:obj:`~.cudaGraphNode_t`]
28772 Location to return edge endpoints
28773 to : list[:py:obj:`~.cudaGraphNode_t`]
28774 Location to return edge endpoints
28775 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]
28776 Optional location to return edge data
28777 numEdges : int
28778 See description
28780 See Also
28781 --------
28782 :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
28783 """
28784 cdef size_t _graph_length = numEdges
28785 cdef cyruntime.cudaGraph_t cygraph
28786 if graph is None:
28787 pgraph = 0
28788 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
28789 pgraph = int(graph)
28790 else:
28791 pgraph = int(cudaGraph_t(graph))
28792 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
28793 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
28794 pyfrom_ = []
28795 if _graph_length != 0:
28796 cyfrom_ = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28797 if cyfrom_ is NULL:
28798 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28799 cdef cyruntime.cudaGraphNode_t* cyto = NULL
28800 pyto = []
28801 if _graph_length != 0:
28802 cyto = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28803 if cyto is NULL:
28804 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28805 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
28806 pyedgeData = []
28807 if _graph_length != 0:
28808 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
28809 if cyedgeData is NULL:
28810 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
28811 with nogil:
28812 err = cyruntime.cudaGraphGetEdges(cygraph, cyfrom_, cyto, cyedgeData, &numEdges)
28813 if cudaError_t(err) == cudaError_t(0):
28814 pyfrom_ = [cudaGraphNode_t(init_value=<void_ptr>cyfrom_[idx]) for idx in range(_graph_length)]
28815 if cyfrom_ is not NULL:
28816 free(cyfrom_)
28817 if cudaError_t(err) == cudaError_t(0):
28818 pyto = [cudaGraphNode_t(init_value=<void_ptr>cyto[idx]) for idx in range(_graph_length)]
28819 if cyto is not NULL:
28820 free(cyto)
28821 if cudaError_t(err) == cudaError_t(0):
28822 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
28823 if cyedgeData is not NULL:
28824 free(cyedgeData)
28825 if err != cyruntime.cudaSuccess:
28826 return (_dict_cudaError_t[err], None, None, None, None)
28827 return (_dict_cudaError_t[err], pyfrom_, pyto, pyedgeData, numEdges)
28829@cython.embedsignature(True)
28830def cudaGraphNodeGetDependencies(node, size_t pNumDependencies = 0):
28831 """ Returns a node's dependencies.
28833 Returns a list of `node's` dependencies. `pDependencies` may be NULL,
28834 in which case this function will return the number of dependencies in
28835 `pNumDependencies`. Otherwise, `pNumDependencies` entries will be
28836 filled in. If `pNumDependencies` is higher than the actual number of
28837 dependencies, the remaining entries in `pDependencies` will be set to
28838 NULL, and the number of nodes actually obtained will be returned in
28839 `pNumDependencies`.
28841 Note that if an edge has non-zero (non-default) edge data and
28842 `edgeData` is NULL, this API will return
28843 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then
28844 `pDependencies` must be as well.
28846 Parameters
28847 ----------
28848 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28849 Node to query
28850 pNumDependencies : int
28851 See description
28853 Returns
28854 -------
28855 cudaError_t
28856 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
28857 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
28858 Pointer to return the dependencies
28859 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]
28860 Optional array to return edge data for each dependency
28861 pNumDependencies : int
28862 See description
28864 See Also
28865 --------
28866 :py:obj:`~.cudaGraphNodeGetDependentNodes`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
28867 """
28868 cdef size_t _graph_length = pNumDependencies
28869 cdef cyruntime.cudaGraphNode_t cynode
28870 if node is None:
28871 pnode = 0
28872 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
28873 pnode = int(node)
28874 else:
28875 pnode = int(cudaGraphNode_t(node))
28876 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
28877 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
28878 pypDependencies = []
28879 if _graph_length != 0:
28880 cypDependencies = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28881 if cypDependencies is NULL:
28882 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28883 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
28884 pyedgeData = []
28885 if _graph_length != 0:
28886 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
28887 if cyedgeData is NULL:
28888 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
28889 with nogil:
28890 err = cyruntime.cudaGraphNodeGetDependencies(cynode, cypDependencies, cyedgeData, &pNumDependencies)
28891 if cudaError_t(err) == cudaError_t(0):
28892 pypDependencies = [cudaGraphNode_t(init_value=<void_ptr>cypDependencies[idx]) for idx in range(_graph_length)]
28893 if cypDependencies is not NULL:
28894 free(cypDependencies)
28895 if cudaError_t(err) == cudaError_t(0):
28896 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
28897 if cyedgeData is not NULL:
28898 free(cyedgeData)
28899 if err != cyruntime.cudaSuccess:
28900 return (_dict_cudaError_t[err], None, None, None)
28901 return (_dict_cudaError_t[err], pypDependencies, pyedgeData, pNumDependencies)
28903@cython.embedsignature(True)
28904def cudaGraphNodeGetDependentNodes(node, size_t pNumDependentNodes = 0):
28905 """ Returns a node's dependent nodes.
28907 Returns a list of `node's` dependent nodes. `pDependentNodes` may be
28908 NULL, in which case this function will return the number of dependent
28909 nodes in `pNumDependentNodes`. Otherwise, `pNumDependentNodes` entries
28910 will be filled in. If `pNumDependentNodes` is higher than the actual
28911 number of dependent nodes, the remaining entries in `pDependentNodes`
28912 will be set to NULL, and the number of nodes actually obtained will be
28913 returned in `pNumDependentNodes`.
28915 Note that if an edge has non-zero (non-default) edge data and
28916 `edgeData` is NULL, this API will return
28917 :py:obj:`~.cudaErrorLossyQuery`. If `edgeData` is non-NULL, then
28918 `pDependentNodes` must be as well.
28920 Parameters
28921 ----------
28922 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
28923 Node to query
28924 pNumDependentNodes : int
28925 See description
28927 Returns
28928 -------
28929 cudaError_t
28930 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLossyQuery`, :py:obj:`~.cudaErrorInvalidValue`
28931 pDependentNodes : list[:py:obj:`~.cudaGraphNode_t`]
28932 Pointer to return the dependent nodes
28933 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]
28934 Optional pointer to return edge data for dependent nodes
28935 pNumDependentNodes : int
28936 See description
28938 See Also
28939 --------
28940 :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphGetNodes`, :py:obj:`~.cudaGraphGetRootNodes`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphRemoveDependencies`
28941 """
28942 cdef size_t _graph_length = pNumDependentNodes
28943 cdef cyruntime.cudaGraphNode_t cynode
28944 if node is None:
28945 pnode = 0
28946 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
28947 pnode = int(node)
28948 else:
28949 pnode = int(cudaGraphNode_t(node))
28950 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
28951 cdef cyruntime.cudaGraphNode_t* cypDependentNodes = NULL
28952 pypDependentNodes = []
28953 if _graph_length != 0:
28954 cypDependentNodes = <cyruntime.cudaGraphNode_t*>calloc(_graph_length, sizeof(cyruntime.cudaGraphNode_t))
28955 if cypDependentNodes is NULL:
28956 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
28957 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
28958 pyedgeData = []
28959 if _graph_length != 0:
28960 cyedgeData = <cyruntime.cudaGraphEdgeData*>calloc(_graph_length, sizeof(cyruntime.cudaGraphEdgeData))
28961 if cyedgeData is NULL:
28962 raise MemoryError('Failed to allocate length x size memory: ' + str(_graph_length) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
28963 with nogil:
28964 err = cyruntime.cudaGraphNodeGetDependentNodes(cynode, cypDependentNodes, cyedgeData, &pNumDependentNodes)
28965 if cudaError_t(err) == cudaError_t(0):
28966 pypDependentNodes = [cudaGraphNode_t(init_value=<void_ptr>cypDependentNodes[idx]) for idx in range(_graph_length)]
28967 if cypDependentNodes is not NULL:
28968 free(cypDependentNodes)
28969 if cudaError_t(err) == cudaError_t(0):
28970 pyedgeData = [cudaGraphEdgeData(_ptr=<void_ptr>&cyedgeData[idx]) for idx in range(_graph_length)]
28971 if cyedgeData is not NULL:
28972 free(cyedgeData)
28973 if err != cyruntime.cudaSuccess:
28974 return (_dict_cudaError_t[err], None, None, None)
28975 return (_dict_cudaError_t[err], pypDependentNodes, pyedgeData, pNumDependentNodes)
28977@cython.embedsignature(True)
28978def cudaGraphAddDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], to : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], edgeData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies):
28979 """ Adds dependency edges to a graph.
28981 The number of dependencies to be added is defined by `numDependencies`
28982 Elements in `pFrom` and `pTo` at corresponding indices define a
28983 dependency. Each node in `pFrom` and `pTo` must belong to `graph`.
28985 If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
28986 ignored. Specifying an existing dependency will return an error.
28988 Parameters
28989 ----------
28990 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
28991 Graph to which dependencies are added
28992 from : list[:py:obj:`~.cudaGraphNode_t`]
28993 Array of nodes that provide the dependencies
28994 to : list[:py:obj:`~.cudaGraphNode_t`]
28995 Array of dependent nodes
28996 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]
28997 Optional array of edge data. If NULL, default (zeroed) edge data is
28998 assumed.
28999 numDependencies : size_t
29000 Number of dependencies to be added
29002 Returns
29003 -------
29004 cudaError_t
29005 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29007 See Also
29008 --------
29009 :py:obj:`~.cudaGraphRemoveDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
29010 """
29011 edgeData = [] if edgeData is None else edgeData
29012 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):
29013 raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")
29014 to = [] if to is None else to
29015 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
29016 raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
29017 from_ = [] if from_ is None else from_
29018 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
29019 raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
29020 cdef cyruntime.cudaGraph_t cygraph
29021 if graph is None:
29022 pgraph = 0
29023 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
29024 pgraph = int(graph)
29025 else:
29026 pgraph = int(cudaGraph_t(graph))
29027 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
29028 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
29029 if len(from_) > 1:
29030 cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
29031 if cyfrom_ is NULL:
29032 raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
29033 else:
29034 for idx in range(len(from_)):
29035 cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._pvt_ptr[0]
29036 elif len(from_) == 1:
29037 cyfrom_ = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._pvt_ptr
29038 cdef cyruntime.cudaGraphNode_t* cyto = NULL
29039 if len(to) > 1:
29040 cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
29041 if cyto is NULL:
29042 raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
29043 else:
29044 for idx in range(len(to)):
29045 cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._pvt_ptr[0]
29046 elif len(to) == 1:
29047 cyto = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._pvt_ptr
29048 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
29049 if len(edgeData) > 1:
29050 cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))
29051 if cyedgeData is NULL:
29052 raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
29053 for idx in range(len(edgeData)):
29054 string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))
29055 elif len(edgeData) == 1:
29056 cyedgeData = (<cudaGraphEdgeData>edgeData[0])._pvt_ptr
29057 with nogil:
29058 err = cyruntime.cudaGraphAddDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies)
29059 if len(from_) > 1 and cyfrom_ is not NULL:
29060 free(cyfrom_)
29061 if len(to) > 1 and cyto is not NULL:
29062 free(cyto)
29063 if len(edgeData) > 1 and cyedgeData is not NULL:
29064 free(cyedgeData)
29065 return (_dict_cudaError_t[err],)
29067@cython.embedsignature(True)
29068def cudaGraphRemoveDependencies(graph, from_ : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], to : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], edgeData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies):
29069 """ Removes dependency edges from a graph.
29071 The number of `pDependencies` to be removed is defined by
29072 `numDependencies`. Elements in `pFrom` and `pTo` at corresponding
29073 indices define a dependency. Each node in `pFrom` and `pTo` must belong
29074 to `graph`.
29076 If `numDependencies` is 0, elements in `pFrom` and `pTo` will be
29077 ignored. Specifying an edge that does not exist in the graph, with data
29078 matching `edgeData`, results in an error. `edgeData` is nullable, which
29079 is equivalent to passing default (zeroed) data for each edge.
29081 Parameters
29082 ----------
29083 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
29084 Graph from which to remove dependencies
29085 from : list[:py:obj:`~.cudaGraphNode_t`]
29086 Array of nodes that provide the dependencies
29087 to : list[:py:obj:`~.cudaGraphNode_t`]
29088 Array of dependent nodes
29089 edgeData : list[:py:obj:`~.cudaGraphEdgeData`]
29090 Optional array of edge data. If NULL, edge data is assumed to be
29091 default (zeroed).
29092 numDependencies : size_t
29093 Number of dependencies to be removed
29095 Returns
29096 -------
29097 cudaError_t
29098 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29100 See Also
29101 --------
29102 :py:obj:`~.cudaGraphAddDependencies`, :py:obj:`~.cudaGraphGetEdges`, :py:obj:`~.cudaGraphNodeGetDependencies`, :py:obj:`~.cudaGraphNodeGetDependentNodes`
29103 """
29104 edgeData = [] if edgeData is None else edgeData
29105 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in edgeData):
29106 raise TypeError("Argument 'edgeData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")
29107 to = [] if to is None else to
29108 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in to):
29109 raise TypeError("Argument 'to' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
29110 from_ = [] if from_ is None else from_
29111 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in from_):
29112 raise TypeError("Argument 'from_' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
29113 cdef cyruntime.cudaGraph_t cygraph
29114 if graph is None:
29115 pgraph = 0
29116 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
29117 pgraph = int(graph)
29118 else:
29119 pgraph = int(cudaGraph_t(graph))
29120 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
29121 cdef cyruntime.cudaGraphNode_t* cyfrom_ = NULL
29122 if len(from_) > 1:
29123 cyfrom_ = <cyruntime.cudaGraphNode_t*> calloc(len(from_), sizeof(cyruntime.cudaGraphNode_t))
29124 if cyfrom_ is NULL:
29125 raise MemoryError('Failed to allocate length x size memory: ' + str(len(from_)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
29126 else:
29127 for idx in range(len(from_)):
29128 cyfrom_[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>from_[idx])._pvt_ptr[0]
29129 elif len(from_) == 1:
29130 cyfrom_ = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>from_[0])._pvt_ptr
29131 cdef cyruntime.cudaGraphNode_t* cyto = NULL
29132 if len(to) > 1:
29133 cyto = <cyruntime.cudaGraphNode_t*> calloc(len(to), sizeof(cyruntime.cudaGraphNode_t))
29134 if cyto is NULL:
29135 raise MemoryError('Failed to allocate length x size memory: ' + str(len(to)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
29136 else:
29137 for idx in range(len(to)):
29138 cyto[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>to[idx])._pvt_ptr[0]
29139 elif len(to) == 1:
29140 cyto = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>to[0])._pvt_ptr
29141 cdef cyruntime.cudaGraphEdgeData* cyedgeData = NULL
29142 if len(edgeData) > 1:
29143 cyedgeData = <cyruntime.cudaGraphEdgeData*> calloc(len(edgeData), sizeof(cyruntime.cudaGraphEdgeData))
29144 if cyedgeData is NULL:
29145 raise MemoryError('Failed to allocate length x size memory: ' + str(len(edgeData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
29146 for idx in range(len(edgeData)):
29147 string.memcpy(&cyedgeData[idx], (<cudaGraphEdgeData>edgeData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))
29148 elif len(edgeData) == 1:
29149 cyedgeData = (<cudaGraphEdgeData>edgeData[0])._pvt_ptr
29150 with nogil:
29151 err = cyruntime.cudaGraphRemoveDependencies(cygraph, cyfrom_, cyto, cyedgeData, numDependencies)
29152 if len(from_) > 1 and cyfrom_ is not NULL:
29153 free(cyfrom_)
29154 if len(to) > 1 and cyto is not NULL:
29155 free(cyto)
29156 if len(edgeData) > 1 and cyedgeData is not NULL:
29157 free(cyedgeData)
29158 return (_dict_cudaError_t[err],)
29160@cython.embedsignature(True)
29161def cudaGraphDestroyNode(node):
29162 """ Remove a node from the graph.
29164 Removes `node` from its graph. This operation also severs any
29165 dependencies of other nodes on `node` and vice versa.
29167 Dependencies cannot be removed from graphs which contain allocation or
29168 free nodes. Any attempt to do so will return an error.
29170 Parameters
29171 ----------
29172 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29173 Node to remove
29175 Returns
29176 -------
29177 cudaError_t
29178 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29180 See Also
29181 --------
29182 :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphAddEmptyNode`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemsetNode`
29183 """
29184 cdef cyruntime.cudaGraphNode_t cynode
29185 if node is None:
29186 pnode = 0
29187 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29188 pnode = int(node)
29189 else:
29190 pnode = int(cudaGraphNode_t(node))
29191 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29192 with nogil:
29193 err = cyruntime.cudaGraphDestroyNode(cynode)
29194 return (_dict_cudaError_t[err],)
29196@cython.embedsignature(True)
29197def cudaGraphInstantiate(graph, unsigned long long flags):
29198 """ Creates an executable graph from a graph.
29200 Instantiates `graph` as an executable graph. The graph is validated for
29201 any structural constraints or intra-node constraints which were not
29202 previously validated. If instantiation is successful, a handle to the
29203 instantiated graph is returned in `pGraphExec`.
29205 The `flags` parameter controls the behavior of instantiation and
29206 subsequent graph launches. Valid flags are:
29208 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
29209 configures a graph containing memory allocation nodes to
29210 automatically free any unfreed memory allocations before the graph is
29211 relaunched.
29213 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
29214 the graph for launch from the device. If this flag is passed, the
29215 executable graph handle returned can be used to launch the graph from
29216 both the host and device. This flag cannot be used in conjunction
29217 with :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
29219 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
29220 graph to use the priorities from the per-node attributes rather than
29221 the priority of the launch stream during execution. Note that
29222 priorities are only available on kernel nodes, and are copied from
29223 stream priority during stream capture.
29225 If `graph` contains any allocation or free nodes, there can be at most
29226 one executable graph in existence for that graph at a time. An attempt
29227 to instantiate a second executable graph before destroying the first
29228 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
29229 also applies if `graph` contains any device-updatable kernel nodes.
29231 Graphs instantiated for launch on the device have additional
29232 restrictions which do not apply to host graphs:
29234 - The graph's nodes must reside on a single device.
29236 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
29237 and child graph nodes.
29239 - The graph cannot be empty and must contain at least one kernel,
29240 memcpy, or memset node. Operation-specific restrictions are outlined
29241 below.
29243 - Kernel nodes:
29245 - Use of CUDA Dynamic Parallelism is not permitted.
29247 - Cooperative launches are permitted as long as MPS is not in use.
29249 - Memcpy nodes:
29251 - Only copies involving device memory and/or pinned device-mapped
29252 host memory are permitted.
29254 - Copies involving CUDA arrays are not permitted.
29256 - Both operands must be accessible from the current device, and the
29257 current device must match the device of other nodes in the graph.
29259 If `graph` is not instantiated for launch on the device but contains
29260 kernels which call device-side :py:obj:`~.cudaGraphLaunch()` from
29261 multiple devices, this will result in an error.
29263 Parameters
29264 ----------
29265 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
29266 Graph to instantiate
29267 flags : unsigned long long
29268 Flags to control instantiation. See
29269 :py:obj:`~.CUgraphInstantiate_flags`.
29271 Returns
29272 -------
29273 cudaError_t
29274 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29275 pGraphExec : :py:obj:`~.cudaGraphExec_t`
29276 Returns instantiated graph
29278 See Also
29279 --------
29280 :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
29281 """
29282 cdef cyruntime.cudaGraph_t cygraph
29283 if graph is None:
29284 pgraph = 0
29285 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
29286 pgraph = int(graph)
29287 else:
29288 pgraph = int(cudaGraph_t(graph))
29289 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
29290 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
29291 with nogil:
29292 err = cyruntime.cudaGraphInstantiate(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, flags)
29293 if err != cyruntime.cudaSuccess:
29294 return (_dict_cudaError_t[err], None)
29295 return (_dict_cudaError_t[err], pGraphExec)
29297@cython.embedsignature(True)
29298def cudaGraphInstantiateWithFlags(graph, unsigned long long flags):
29299 """ Creates an executable graph from a graph.
29301 Instantiates `graph` as an executable graph. The graph is validated for
29302 any structural constraints or intra-node constraints which were not
29303 previously validated. If instantiation is successful, a handle to the
29304 instantiated graph is returned in `pGraphExec`.
29306 The `flags` parameter controls the behavior of instantiation and
29307 subsequent graph launches. Valid flags are:
29309 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
29310 configures a graph containing memory allocation nodes to
29311 automatically free any unfreed memory allocations before the graph is
29312 relaunched.
29314 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
29315 the graph for launch from the device. If this flag is passed, the
29316 executable graph handle returned can be used to launch the graph from
29317 both the host and device. This flag can only be used on platforms
29318 which support unified addressing. This flag cannot be used in
29319 conjunction with
29320 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
29322 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
29323 graph to use the priorities from the per-node attributes rather than
29324 the priority of the launch stream during execution. Note that
29325 priorities are only available on kernel nodes, and are copied from
29326 stream priority during stream capture.
29328 If `graph` contains any allocation or free nodes, there can be at most
29329 one executable graph in existence for that graph at a time. An attempt
29330 to instantiate a second executable graph before destroying the first
29331 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
29332 also applies if `graph` contains any device-updatable kernel nodes.
29334 If `graph` contains kernels which call device-side
29335 :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result
29336 in an error.
29338 Graphs instantiated for launch on the device have additional
29339 restrictions which do not apply to host graphs:
29341 - The graph's nodes must reside on a single device.
29343 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
29344 and child graph nodes.
29346 - The graph cannot be empty and must contain at least one kernel,
29347 memcpy, or memset node. Operation-specific restrictions are outlined
29348 below.
29350 - Kernel nodes:
29352 - Use of CUDA Dynamic Parallelism is not permitted.
29354 - Cooperative launches are permitted as long as MPS is not in use.
29356 - Memcpy nodes:
29358 - Only copies involving device memory and/or pinned device-mapped
29359 host memory are permitted.
29361 - Copies involving CUDA arrays are not permitted.
29363 - Both operands must be accessible from the current device, and the
29364 current device must match the device of other nodes in the graph.
29366 Parameters
29367 ----------
29368 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
29369 Graph to instantiate
29370 flags : unsigned long long
29371 Flags to control instantiation. See
29372 :py:obj:`~.CUgraphInstantiate_flags`.
29374 Returns
29375 -------
29376 cudaError_t
29377 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29378 pGraphExec : :py:obj:`~.cudaGraphExec_t`
29379 Returns instantiated graph
29381 See Also
29382 --------
29383 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
29384 """
29385 cdef cyruntime.cudaGraph_t cygraph
29386 if graph is None:
29387 pgraph = 0
29388 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
29389 pgraph = int(graph)
29390 else:
29391 pgraph = int(cudaGraph_t(graph))
29392 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
29393 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
29394 with nogil:
29395 err = cyruntime.cudaGraphInstantiateWithFlags(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, flags)
29396 if err != cyruntime.cudaSuccess:
29397 return (_dict_cudaError_t[err], None)
29398 return (_dict_cudaError_t[err], pGraphExec)
29400@cython.embedsignature(True)
29401def cudaGraphInstantiateWithParams(graph, instantiateParams : Optional[cudaGraphInstantiateParams]):
29402 """ Creates an executable graph from a graph.
29404 Instantiates `graph` as an executable graph according to the
29405 `instantiateParams` structure. The graph is validated for any
29406 structural constraints or intra-node constraints which were not
29407 previously validated. If instantiation is successful, a handle to the
29408 instantiated graph is returned in `pGraphExec`.
29410 `instantiateParams` controls the behavior of instantiation and
29411 subsequent graph launches, as well as returning more detailed
29412 information in the event of an error.
29413 :py:obj:`~.cudaGraphInstantiateParams` is defined as:
29415 **View CUDA Toolkit Documentation for a C++ code example**
29417 The `flags` field controls the behavior of instantiation and subsequent
29418 graph launches. Valid flags are:
29420 - :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, which
29421 configures a graph containing memory allocation nodes to
29422 automatically free any unfreed memory allocations before the graph is
29423 relaunched.
29425 - :py:obj:`~.cudaGraphInstantiateFlagUpload`, which will perform an
29426 upload of the graph into `uploadStream` once the graph has been
29427 instantiated.
29429 - :py:obj:`~.cudaGraphInstantiateFlagDeviceLaunch`, which configures
29430 the graph for launch from the device. If this flag is passed, the
29431 executable graph handle returned can be used to launch the graph from
29432 both the host and device. This flag can only be used on platforms
29433 which support unified addressing. This flag cannot be used in
29434 conjunction with
29435 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`.
29437 - :py:obj:`~.cudaGraphInstantiateFlagUseNodePriority`, which causes the
29438 graph to use the priorities from the per-node attributes rather than
29439 the priority of the launch stream during execution. Note that
29440 priorities are only available on kernel nodes, and are copied from
29441 stream priority during stream capture.
29443 If `graph` contains any allocation or free nodes, there can be at most
29444 one executable graph in existence for that graph at a time. An attempt
29445 to instantiate a second executable graph before destroying the first
29446 with :py:obj:`~.cudaGraphExecDestroy` will result in an error. The same
29447 also applies if `graph` contains any device-updatable kernel nodes.
29449 If `graph` contains kernels which call device-side
29450 :py:obj:`~.cudaGraphLaunch()` from multiple devices, this will result
29451 in an error.
29453 Graphs instantiated for launch on the device have additional
29454 restrictions which do not apply to host graphs:
29456 - The graph's nodes must reside on a single device.
29458 - The graph can only contain kernel nodes, memcpy nodes, memset nodes,
29459 and child graph nodes.
29461 - The graph cannot be empty and must contain at least one kernel,
29462 memcpy, or memset node. Operation-specific restrictions are outlined
29463 below.
29465 - Kernel nodes:
29467 - Use of CUDA Dynamic Parallelism is not permitted.
29469 - Cooperative launches are permitted as long as MPS is not in use.
29471 - Memcpy nodes:
29473 - Only copies involving device memory and/or pinned device-mapped
29474 host memory are permitted.
29476 - Copies involving CUDA arrays are not permitted.
29478 - Both operands must be accessible from the current device, and the
29479 current device must match the device of other nodes in the graph.
29481 In the event of an error, the `result_out` and `errNode_out` fields
29482 will contain more information about the nature of the error. Possible
29483 error reporting includes:
29485 - :py:obj:`~.cudaGraphInstantiateError`, if passed an invalid value or
29486 if an unexpected error occurred which is described by the return
29487 value of the function. `errNode_out` will be set to NULL.
29489 - :py:obj:`~.cudaGraphInstantiateInvalidStructure`, if the graph
29490 structure is invalid. `errNode_out` will be set to one of the
29491 offending nodes.
29493 - :py:obj:`~.cudaGraphInstantiateNodeOperationNotSupported`, if the
29494 graph is instantiated for device launch but contains a node of an
29495 unsupported node type, or a node which performs unsupported
29496 operations, such as use of CUDA dynamic parallelism within a kernel
29497 node. `errNode_out` will be set to this node.
29499 - :py:obj:`~.cudaGraphInstantiateMultipleDevicesNotSupported`, if the
29500 graph is instantiated for device launch but a node’s device differs
29501 from that of another node. This error can also be returned if a graph
29502 is not instantiated for device launch and it contains kernels which
29503 call device-side :py:obj:`~.cudaGraphLaunch()` from multiple devices.
29504 `errNode_out` will be set to this node.
29506 If instantiation is successful, `result_out` will be set to
29507 :py:obj:`~.cudaGraphInstantiateSuccess`, and `hErrNode_out` will be set
29508 to NULL.
29510 Parameters
29511 ----------
29512 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
29513 Graph to instantiate
29514 instantiateParams : :py:obj:`~.cudaGraphInstantiateParams`
29515 Instantiation parameters
29517 Returns
29518 -------
29519 cudaError_t
29520 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29521 pGraphExec : :py:obj:`~.cudaGraphExec_t`
29522 Returns instantiated graph
29524 See Also
29525 --------
29526 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphExecDestroy`
29527 """
29528 cdef cyruntime.cudaGraph_t cygraph
29529 if graph is None:
29530 pgraph = 0
29531 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
29532 pgraph = int(graph)
29533 else:
29534 pgraph = int(cudaGraph_t(graph))
29535 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
29536 cdef cudaGraphExec_t pGraphExec = cudaGraphExec_t()
29537 cdef cyruntime.cudaGraphInstantiateParams* cyinstantiateParams_ptr = instantiateParams._pvt_ptr if instantiateParams is not None else NULL
29538 with nogil:
29539 err = cyruntime.cudaGraphInstantiateWithParams(<cyruntime.cudaGraphExec_t*>pGraphExec._pvt_ptr, cygraph, cyinstantiateParams_ptr)
29540 if err != cyruntime.cudaSuccess:
29541 return (_dict_cudaError_t[err], None)
29542 return (_dict_cudaError_t[err], pGraphExec)
29544@cython.embedsignature(True)
29545def cudaGraphExecGetFlags(graphExec):
29546 """ Query the instantiation flags of an executable graph.
29548 Returns the flags that were passed to instantiation for the given
29549 executable graph. :py:obj:`~.cudaGraphInstantiateFlagUpload` will not
29550 be returned by this API as it does not affect the resulting executable
29551 graph.
29553 Parameters
29554 ----------
29555 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29556 The executable graph to query
29558 Returns
29559 -------
29560 cudaError_t
29561 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29562 flags : unsigned long long
29563 Returns the instantiation flags
29565 See Also
29566 --------
29567 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphInstantiateWithFlags`, :py:obj:`~.cudaGraphInstantiateWithParams`
29568 """
29569 cdef cyruntime.cudaGraphExec_t cygraphExec
29570 if graphExec is None:
29571 pgraphExec = 0
29572 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29573 pgraphExec = int(graphExec)
29574 else:
29575 pgraphExec = int(cudaGraphExec_t(graphExec))
29576 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
29577 cdef unsigned long long flags = 0
29578 with nogil:
29579 err = cyruntime.cudaGraphExecGetFlags(cygraphExec, &flags)
29580 if err != cyruntime.cudaSuccess:
29581 return (_dict_cudaError_t[err], None)
29582 return (_dict_cudaError_t[err], flags)
29584@cython.embedsignature(True)
29585def cudaGraphExecKernelNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaKernelNodeParams]):
29586 """ Sets the parameters for a kernel node in the given graphExec.
29588 Sets the parameters of a kernel node in an executable graph
29589 `hGraphExec`. The node is identified by the corresponding node `node`
29590 in the non-executable graph, from which the executable graph was
29591 instantiated.
29593 `node` must not have been removed from the original graph. All
29594 `nodeParams` fields may change, but the following restrictions apply to
29595 `func` updates:
29597 - The owning device of the function cannot change.
29599 - A node whose function originally did not use CUDA dynamic parallelism
29600 cannot be updated to a function which uses CDP
29602 - A node whose function originally did not make device-side update
29603 calls cannot be updated to a function which makes device-side update
29604 calls.
29606 - If `hGraphExec` was not instantiated for device launch, a node whose
29607 function originally did not use device-side
29608 :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which
29609 uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node
29610 resides on the same device as nodes which contained such calls at
29611 instantiate-time. If no such calls were present at instantiation,
29612 these updates cannot be performed at all.
29614 The modifications only affect future launches of `hGraphExec`. Already
29615 enqueued or running launches of `hGraphExec` are not affected by this
29616 call. `node` is also not modified by this call.
29618 If `node` is a device-updatable kernel node, the next upload/launch of
29619 `hGraphExec` will overwrite any previous device-side updates.
29620 Additionally, applying host updates to a device-updatable kernel node
29621 while it is being updated from the device will result in undefined
29622 behavior.
29624 Parameters
29625 ----------
29626 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29627 The executable graph in which to set the specified node
29628 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29629 kernel node from the graph from which graphExec was instantiated
29630 pNodeParams : :py:obj:`~.cudaKernelNodeParams`
29631 Updated Parameters to set
29633 Returns
29634 -------
29635 cudaError_t
29636 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
29638 See Also
29639 --------
29640 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddKernelNode`, :py:obj:`~.cudaGraphKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29641 """
29642 cdef cyruntime.cudaGraphNode_t cynode
29643 if node is None:
29644 pnode = 0
29645 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29646 pnode = int(node)
29647 else:
29648 pnode = int(cudaGraphNode_t(node))
29649 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29650 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29651 if hGraphExec is None:
29652 phGraphExec = 0
29653 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29654 phGraphExec = int(hGraphExec)
29655 else:
29656 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29657 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29658 cdef cyruntime.cudaKernelNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
29659 with nogil:
29660 err = cyruntime.cudaGraphExecKernelNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
29661 return (_dict_cudaError_t[err],)
29663@cython.embedsignature(True)
29664def cudaGraphExecMemcpyNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemcpy3DParms]):
29665 """ Sets the parameters for a memcpy node in the given graphExec.
29667 Updates the work represented by `node` in `hGraphExec` as though `node`
29668 had contained `pNodeParams` at instantiation. `node` must remain in the
29669 graph which was used to instantiate `hGraphExec`. Changed edges to and
29670 from `node` are ignored.
29672 The source and destination memory in `pNodeParams` must be allocated
29673 from the same contexts as the original source and destination memory.
29674 Both the instantiation-time memory operands and the memory operands in
29675 `pNodeParams` must be 1-dimensional. Zero-length operations are not
29676 supported.
29678 The modifications only affect future launches of `hGraphExec`. Already
29679 enqueued or running launches of `hGraphExec` are not affected by this
29680 call. `node` is also not modified by this call.
29682 Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'
29683 mappings changed or either the original or new memory operands are
29684 multidimensional.
29686 Parameters
29687 ----------
29688 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29689 The executable graph in which to set the specified node
29690 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29691 Memcpy node from the graph which was used to instantiate graphExec
29692 pNodeParams : :py:obj:`~.cudaMemcpy3DParms`
29693 Updated Parameters to set
29695 Returns
29696 -------
29697 cudaError_t
29698 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
29700 See Also
29701 --------
29702 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsToSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParamsFromSymbol`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29703 """
29704 cdef cyruntime.cudaGraphNode_t cynode
29705 if node is None:
29706 pnode = 0
29707 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29708 pnode = int(node)
29709 else:
29710 pnode = int(cudaGraphNode_t(node))
29711 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29712 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29713 if hGraphExec is None:
29714 phGraphExec = 0
29715 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29716 phGraphExec = int(hGraphExec)
29717 else:
29718 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29719 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29720 cdef cyruntime.cudaMemcpy3DParms* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
29721 with nogil:
29722 err = cyruntime.cudaGraphExecMemcpyNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
29723 return (_dict_cudaError_t[err],)
29725@cython.embedsignature(True)
29726def cudaGraphExecMemcpyNodeSetParams1D(hGraphExec, node, dst, src, size_t count, kind not None : cudaMemcpyKind):
29727 """ Sets the parameters for a memcpy node in the given graphExec to perform a 1-dimensional copy.
29729 Updates the work represented by `node` in `hGraphExec` as though `node`
29730 had contained the given params at instantiation. `node` must remain in
29731 the graph which was used to instantiate `hGraphExec`. Changed edges to
29732 and from `node` are ignored.
29734 `src` and `dst` must be allocated from the same contexts as the
29735 original source and destination memory. The instantiation-time memory
29736 operands must be 1-dimensional. Zero-length operations are not
29737 supported.
29739 The modifications only affect future launches of `hGraphExec`. Already
29740 enqueued or running launches of `hGraphExec` are not affected by this
29741 call. `node` is also not modified by this call.
29743 Returns :py:obj:`~.cudaErrorInvalidValue` if the memory operands'
29744 mappings changed or the original memory operands are multidimensional.
29746 Parameters
29747 ----------
29748 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29749 The executable graph in which to set the specified node
29750 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29751 Memcpy node from the graph which was used to instantiate graphExec
29752 dst : Any
29753 Destination memory address
29754 src : Any
29755 Source memory address
29756 count : size_t
29757 Size in bytes to copy
29758 kind : :py:obj:`~.cudaMemcpyKind`
29759 Type of transfer
29761 Returns
29762 -------
29763 cudaError_t
29764 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
29766 See Also
29767 --------
29768 :py:obj:`~.cudaGraphAddMemcpyNode`, :py:obj:`~.cudaGraphAddMemcpyNode1D`, :py:obj:`~.cudaGraphMemcpyNodeSetParams`, :py:obj:`~.cudaGraphMemcpyNodeSetParams1D`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29769 """
29770 cdef cyruntime.cudaGraphNode_t cynode
29771 if node is None:
29772 pnode = 0
29773 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29774 pnode = int(node)
29775 else:
29776 pnode = int(cudaGraphNode_t(node))
29777 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29778 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29779 if hGraphExec is None:
29780 phGraphExec = 0
29781 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29782 phGraphExec = int(hGraphExec)
29783 else:
29784 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29785 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29786 cydst = _HelperInputVoidPtr(dst)
29787 cdef void* cydst_ptr = <void*><void_ptr>cydst.cptr
29788 cysrc = _HelperInputVoidPtr(src)
29789 cdef void* cysrc_ptr = <void*><void_ptr>cysrc.cptr
29790 cdef cyruntime.cudaMemcpyKind cykind = kind.value
29791 with nogil:
29792 err = cyruntime.cudaGraphExecMemcpyNodeSetParams1D(cyhGraphExec, cynode, cydst_ptr, cysrc_ptr, count, cykind)
29793 return (_dict_cudaError_t[err],)
29795@cython.embedsignature(True)
29796def cudaGraphExecMemsetNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaMemsetParams]):
29797 """ Sets the parameters for a memset node in the given graphExec.
29799 Updates the work represented by `node` in `hGraphExec` as though `node`
29800 had contained `pNodeParams` at instantiation. `node` must remain in the
29801 graph which was used to instantiate `hGraphExec`. Changed edges to and
29802 from `node` are ignored.
29804 Zero sized operations are not supported.
29806 The new destination pointer in `pNodeParams` must be to the same kind
29807 of allocation as the original destination pointer and have the same
29808 context association and device mapping as the original destination
29809 pointer.
29811 Both the value and pointer address may be updated. Changing other
29812 aspects of the memset (width, height, element size or pitch) may cause
29813 the update to be rejected. Specifically, for 2d memsets, all dimension
29814 changes are rejected. For 1d memsets, changes in height are explicitly
29815 rejected and other changes are opportunistically allowed if the
29816 resulting work maps onto the work resources already allocated for the
29817 node.
29819 The modifications only affect future launches of `hGraphExec`. Already
29820 enqueued or running launches of `hGraphExec` are not affected by this
29821 call. `node` is also not modified by this call.
29823 Parameters
29824 ----------
29825 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29826 The executable graph in which to set the specified node
29827 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29828 Memset node from the graph which was used to instantiate graphExec
29829 pNodeParams : :py:obj:`~.cudaMemsetParams`
29830 Updated Parameters to set
29832 Returns
29833 -------
29834 cudaError_t
29835 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
29837 See Also
29838 --------
29839 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddMemsetNode`, :py:obj:`~.cudaGraphMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29840 """
29841 cdef cyruntime.cudaGraphNode_t cynode
29842 if node is None:
29843 pnode = 0
29844 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29845 pnode = int(node)
29846 else:
29847 pnode = int(cudaGraphNode_t(node))
29848 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29849 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29850 if hGraphExec is None:
29851 phGraphExec = 0
29852 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29853 phGraphExec = int(hGraphExec)
29854 else:
29855 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29856 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29857 cdef cyruntime.cudaMemsetParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
29858 with nogil:
29859 err = cyruntime.cudaGraphExecMemsetNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
29860 return (_dict_cudaError_t[err],)
29862@cython.embedsignature(True)
29863def cudaGraphExecHostNodeSetParams(hGraphExec, node, pNodeParams : Optional[cudaHostNodeParams]):
29864 """ Sets the parameters for a host node in the given graphExec.
29866 Updates the work represented by `node` in `hGraphExec` as though `node`
29867 had contained `pNodeParams` at instantiation. `node` must remain in the
29868 graph which was used to instantiate `hGraphExec`. Changed edges to and
29869 from `node` are ignored.
29871 The modifications only affect future launches of `hGraphExec`. Already
29872 enqueued or running launches of `hGraphExec` are not affected by this
29873 call. `node` is also not modified by this call.
29875 Parameters
29876 ----------
29877 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29878 The executable graph in which to set the specified node
29879 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29880 Host node from the graph which was used to instantiate graphExec
29881 pNodeParams : :py:obj:`~.cudaHostNodeParams`
29882 Updated Parameters to set
29884 Returns
29885 -------
29886 cudaError_t
29887 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
29889 See Also
29890 --------
29891 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddHostNode`, :py:obj:`~.cudaGraphHostNodeSetParams`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29892 """
29893 cdef cyruntime.cudaGraphNode_t cynode
29894 if node is None:
29895 pnode = 0
29896 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29897 pnode = int(node)
29898 else:
29899 pnode = int(cudaGraphNode_t(node))
29900 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29901 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29902 if hGraphExec is None:
29903 phGraphExec = 0
29904 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29905 phGraphExec = int(hGraphExec)
29906 else:
29907 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29908 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29909 cdef cyruntime.cudaHostNodeParams* cypNodeParams_ptr = pNodeParams._pvt_ptr if pNodeParams is not None else NULL
29910 with nogil:
29911 err = cyruntime.cudaGraphExecHostNodeSetParams(cyhGraphExec, cynode, cypNodeParams_ptr)
29912 return (_dict_cudaError_t[err],)
29914@cython.embedsignature(True)
29915def cudaGraphExecChildGraphNodeSetParams(hGraphExec, node, childGraph):
29916 """ Updates node parameters in the child graph node in the given graphExec.
29918 Updates the work represented by `node` in `hGraphExec` as though the
29919 nodes contained in `node's` graph had the parameters contained in
29920 `childGraph's` nodes at instantiation. `node` must remain in the graph
29921 which was used to instantiate `hGraphExec`. Changed edges to and from
29922 `node` are ignored.
29924 The modifications only affect future launches of `hGraphExec`. Already
29925 enqueued or running launches of `hGraphExec` are not affected by this
29926 call. `node` is also not modified by this call.
29928 The topology of `childGraph`, as well as the node insertion order, must
29929 match that of the graph contained in `node`. See
29930 :py:obj:`~.cudaGraphExecUpdate()` for a list of restrictions on what
29931 can be updated in an instantiated graph. The update is recursive, so
29932 child graph nodes contained within the top level child graph will also
29933 be updated.
29935 Parameters
29936 ----------
29937 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29938 The executable graph in which to set the specified node
29939 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29940 Host node from the graph which was used to instantiate graphExec
29941 childGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
29942 The graph supplying the updated parameters
29944 Returns
29945 -------
29946 cudaError_t
29947 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
29949 See Also
29950 --------
29951 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddChildGraphNode`, :py:obj:`~.cudaGraphChildGraphNodeGetGraph`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
29952 """
29953 cdef cyruntime.cudaGraph_t cychildGraph
29954 if childGraph is None:
29955 pchildGraph = 0
29956 elif isinstance(childGraph, (cudaGraph_t,driver.CUgraph)):
29957 pchildGraph = int(childGraph)
29958 else:
29959 pchildGraph = int(cudaGraph_t(childGraph))
29960 cychildGraph = <cyruntime.cudaGraph_t><void_ptr>pchildGraph
29961 cdef cyruntime.cudaGraphNode_t cynode
29962 if node is None:
29963 pnode = 0
29964 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
29965 pnode = int(node)
29966 else:
29967 pnode = int(cudaGraphNode_t(node))
29968 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
29969 cdef cyruntime.cudaGraphExec_t cyhGraphExec
29970 if hGraphExec is None:
29971 phGraphExec = 0
29972 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
29973 phGraphExec = int(hGraphExec)
29974 else:
29975 phGraphExec = int(cudaGraphExec_t(hGraphExec))
29976 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
29977 with nogil:
29978 err = cyruntime.cudaGraphExecChildGraphNodeSetParams(cyhGraphExec, cynode, cychildGraph)
29979 return (_dict_cudaError_t[err],)
29981@cython.embedsignature(True)
29982def cudaGraphExecEventRecordNodeSetEvent(hGraphExec, hNode, event):
29983 """ Sets the event for an event record node in the given graphExec.
29985 Sets the event of an event record node in an executable graph
29986 `hGraphExec`. The node is identified by the corresponding node `hNode`
29987 in the non-executable graph, from which the executable graph was
29988 instantiated.
29990 The modifications only affect future launches of `hGraphExec`. Already
29991 enqueued or running launches of `hGraphExec` are not affected by this
29992 call. `hNode` is also not modified by this call.
29994 Parameters
29995 ----------
29996 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
29997 The executable graph in which to set the specified node
29998 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
29999 Event record node from the graph from which graphExec was
30000 instantiated
30001 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
30002 Updated event to use
30004 Returns
30005 -------
30006 cudaError_t
30007 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30009 See Also
30010 --------
30011 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventRecordNode`, :py:obj:`~.cudaGraphEventRecordNodeGetEvent`, :py:obj:`~.cudaGraphEventWaitNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
30012 """
30013 cdef cyruntime.cudaEvent_t cyevent
30014 if event is None:
30015 pevent = 0
30016 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
30017 pevent = int(event)
30018 else:
30019 pevent = int(cudaEvent_t(event))
30020 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
30021 cdef cyruntime.cudaGraphNode_t cyhNode
30022 if hNode is None:
30023 phNode = 0
30024 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30025 phNode = int(hNode)
30026 else:
30027 phNode = int(cudaGraphNode_t(hNode))
30028 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30029 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30030 if hGraphExec is None:
30031 phGraphExec = 0
30032 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30033 phGraphExec = int(hGraphExec)
30034 else:
30035 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30036 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30037 with nogil:
30038 err = cyruntime.cudaGraphExecEventRecordNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
30039 return (_dict_cudaError_t[err],)
30041@cython.embedsignature(True)
30042def cudaGraphExecEventWaitNodeSetEvent(hGraphExec, hNode, event):
30043 """ Sets the event for an event wait node in the given graphExec.
30045 Sets the event of an event wait node in an executable graph
30046 `hGraphExec`. The node is identified by the corresponding node `hNode`
30047 in the non-executable graph, from which the executable graph was
30048 instantiated.
30050 The modifications only affect future launches of `hGraphExec`. Already
30051 enqueued or running launches of `hGraphExec` are not affected by this
30052 call. `hNode` is also not modified by this call.
30054 Parameters
30055 ----------
30056 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30057 The executable graph in which to set the specified node
30058 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
30059 Event wait node from the graph from which graphExec was
30060 instantiated
30061 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
30062 Updated event to use
30064 Returns
30065 -------
30066 cudaError_t
30067 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30069 See Also
30070 --------
30071 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddEventWaitNode`, :py:obj:`~.cudaGraphEventWaitNodeGetEvent`, :py:obj:`~.cudaGraphEventRecordNodeSetEvent`, :py:obj:`~.cudaEventRecordWithFlags`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
30072 """
30073 cdef cyruntime.cudaEvent_t cyevent
30074 if event is None:
30075 pevent = 0
30076 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
30077 pevent = int(event)
30078 else:
30079 pevent = int(cudaEvent_t(event))
30080 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
30081 cdef cyruntime.cudaGraphNode_t cyhNode
30082 if hNode is None:
30083 phNode = 0
30084 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30085 phNode = int(hNode)
30086 else:
30087 phNode = int(cudaGraphNode_t(hNode))
30088 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30089 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30090 if hGraphExec is None:
30091 phGraphExec = 0
30092 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30093 phGraphExec = int(hGraphExec)
30094 else:
30095 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30096 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30097 with nogil:
30098 err = cyruntime.cudaGraphExecEventWaitNodeSetEvent(cyhGraphExec, cyhNode, cyevent)
30099 return (_dict_cudaError_t[err],)
30101@cython.embedsignature(True)
30102def cudaGraphExecExternalSemaphoresSignalNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreSignalNodeParams]):
30103 """ Sets the parameters for an external semaphore signal node in the given graphExec.
30105 Sets the parameters of an external semaphore signal node in an
30106 executable graph `hGraphExec`. The node is identified by the
30107 corresponding node `hNode` in the non-executable graph, from which the
30108 executable graph was instantiated.
30110 `hNode` must not have been removed from the original graph.
30112 The modifications only affect future launches of `hGraphExec`. Already
30113 enqueued or running launches of `hGraphExec` are not affected by this
30114 call. `hNode` is also not modified by this call.
30116 Changing `nodeParams->numExtSems` is not supported.
30118 Parameters
30119 ----------
30120 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30121 The executable graph in which to set the specified node
30122 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
30123 semaphore signal node from the graph from which graphExec was
30124 instantiated
30125 nodeParams : :py:obj:`~.cudaExternalSemaphoreSignalNodeParams`
30126 Updated Parameters to set
30128 Returns
30129 -------
30130 cudaError_t
30131 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30133 See Also
30134 --------
30135 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresSignalNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresWaitNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
30136 """
30137 cdef cyruntime.cudaGraphNode_t cyhNode
30138 if hNode is None:
30139 phNode = 0
30140 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30141 phNode = int(hNode)
30142 else:
30143 phNode = int(cudaGraphNode_t(hNode))
30144 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30145 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30146 if hGraphExec is None:
30147 phGraphExec = 0
30148 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30149 phGraphExec = int(hGraphExec)
30150 else:
30151 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30152 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30153 cdef cyruntime.cudaExternalSemaphoreSignalNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
30154 with nogil:
30155 err = cyruntime.cudaGraphExecExternalSemaphoresSignalNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
30156 return (_dict_cudaError_t[err],)
30158@cython.embedsignature(True)
30159def cudaGraphExecExternalSemaphoresWaitNodeSetParams(hGraphExec, hNode, nodeParams : Optional[cudaExternalSemaphoreWaitNodeParams]):
30160 """ Sets the parameters for an external semaphore wait node in the given graphExec.
30162 Sets the parameters of an external semaphore wait node in an executable
30163 graph `hGraphExec`. The node is identified by the corresponding node
30164 `hNode` in the non-executable graph, from which the executable graph
30165 was instantiated.
30167 `hNode` must not have been removed from the original graph.
30169 The modifications only affect future launches of `hGraphExec`. Already
30170 enqueued or running launches of `hGraphExec` are not affected by this
30171 call. `hNode` is also not modified by this call.
30173 Changing `nodeParams->numExtSems` is not supported.
30175 Parameters
30176 ----------
30177 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30178 The executable graph in which to set the specified node
30179 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
30180 semaphore wait node from the graph from which graphExec was
30181 instantiated
30182 nodeParams : :py:obj:`~.cudaExternalSemaphoreWaitNodeParams`
30183 Updated Parameters to set
30185 Returns
30186 -------
30187 cudaError_t
30188 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30190 See Also
30191 --------
30192 :py:obj:`~.cudaGraphExecNodeSetParams`, :py:obj:`~.cudaGraphAddExternalSemaphoresWaitNode`, :py:obj:`~.cudaImportExternalSemaphore`, :py:obj:`~.cudaSignalExternalSemaphoresAsync`, :py:obj:`~.cudaWaitExternalSemaphoresAsync`, :py:obj:`~.cudaGraphExecKernelNodeSetParams`, :py:obj:`~.cudaGraphExecMemcpyNodeSetParams`, :py:obj:`~.cudaGraphExecMemsetNodeSetParams`, :py:obj:`~.cudaGraphExecHostNodeSetParams`, :py:obj:`~.cudaGraphExecChildGraphNodeSetParams`, :py:obj:`~.cudaGraphExecEventRecordNodeSetEvent`, :py:obj:`~.cudaGraphExecEventWaitNodeSetEvent`, :py:obj:`~.cudaGraphExecExternalSemaphoresSignalNodeSetParams`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
30193 """
30194 cdef cyruntime.cudaGraphNode_t cyhNode
30195 if hNode is None:
30196 phNode = 0
30197 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30198 phNode = int(hNode)
30199 else:
30200 phNode = int(cudaGraphNode_t(hNode))
30201 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30202 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30203 if hGraphExec is None:
30204 phGraphExec = 0
30205 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30206 phGraphExec = int(hGraphExec)
30207 else:
30208 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30209 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30210 cdef cyruntime.cudaExternalSemaphoreWaitNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
30211 with nogil:
30212 err = cyruntime.cudaGraphExecExternalSemaphoresWaitNodeSetParams(cyhGraphExec, cyhNode, cynodeParams_ptr)
30213 return (_dict_cudaError_t[err],)
30215@cython.embedsignature(True)
30216def cudaGraphNodeSetEnabled(hGraphExec, hNode, unsigned int isEnabled):
30217 """ Enables or disables the specified node in the given graphExec.
30219 Sets `hNode` to be either enabled or disabled. Disabled nodes are
30220 functionally equivalent to empty nodes until they are reenabled.
30221 Existing node parameters are not affected by disabling/enabling the
30222 node.
30224 The node is identified by the corresponding node `hNode` in the non-
30225 executable graph, from which the executable graph was instantiated.
30227 `hNode` must not have been removed from the original graph.
30229 The modifications only affect future launches of `hGraphExec`. Already
30230 enqueued or running launches of `hGraphExec` are not affected by this
30231 call. `hNode` is also not modified by this call.
30233 Parameters
30234 ----------
30235 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30236 The executable graph in which to set the specified node
30237 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
30238 Node from the graph from which graphExec was instantiated
30239 isEnabled : unsigned int
30240 Node is enabled if != 0, otherwise the node is disabled
30242 Returns
30243 -------
30244 cudaError_t
30245 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30247 See Also
30248 --------
30249 :py:obj:`~.cudaGraphNodeGetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`
30251 Notes
30252 -----
30253 Currently only kernel, memset and memcpy nodes are supported.
30254 """
30255 cdef cyruntime.cudaGraphNode_t cyhNode
30256 if hNode is None:
30257 phNode = 0
30258 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30259 phNode = int(hNode)
30260 else:
30261 phNode = int(cudaGraphNode_t(hNode))
30262 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30263 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30264 if hGraphExec is None:
30265 phGraphExec = 0
30266 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30267 phGraphExec = int(hGraphExec)
30268 else:
30269 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30270 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30271 with nogil:
30272 err = cyruntime.cudaGraphNodeSetEnabled(cyhGraphExec, cyhNode, isEnabled)
30273 return (_dict_cudaError_t[err],)
30275@cython.embedsignature(True)
30276def cudaGraphNodeGetEnabled(hGraphExec, hNode):
30277 """ Query whether a node in the given graphExec is enabled.
30279 Sets isEnabled to 1 if `hNode` is enabled, or 0 if `hNode` is disabled.
30281 The node is identified by the corresponding node `hNode` in the non-
30282 executable graph, from which the executable graph was instantiated.
30284 `hNode` must not have been removed from the original graph.
30286 Parameters
30287 ----------
30288 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30289 The executable graph in which to set the specified node
30290 hNode : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
30291 Node from the graph from which graphExec was instantiated
30293 Returns
30294 -------
30295 cudaError_t
30296 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30297 isEnabled : unsigned int
30298 Location to return the enabled status of the node
30300 See Also
30301 --------
30302 :py:obj:`~.cudaGraphNodeSetEnabled`, :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate` :py:obj:`~.cudaGraphLaunch`
30304 Notes
30305 -----
30306 Currently only kernel, memset and memcpy nodes are supported.
30307 """
30308 cdef cyruntime.cudaGraphNode_t cyhNode
30309 if hNode is None:
30310 phNode = 0
30311 elif isinstance(hNode, (cudaGraphNode_t,driver.CUgraphNode)):
30312 phNode = int(hNode)
30313 else:
30314 phNode = int(cudaGraphNode_t(hNode))
30315 cyhNode = <cyruntime.cudaGraphNode_t><void_ptr>phNode
30316 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30317 if hGraphExec is None:
30318 phGraphExec = 0
30319 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30320 phGraphExec = int(hGraphExec)
30321 else:
30322 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30323 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30324 cdef unsigned int isEnabled = 0
30325 with nogil:
30326 err = cyruntime.cudaGraphNodeGetEnabled(cyhGraphExec, cyhNode, &isEnabled)
30327 if err != cyruntime.cudaSuccess:
30328 return (_dict_cudaError_t[err], None)
30329 return (_dict_cudaError_t[err], isEnabled)
30331@cython.embedsignature(True)
30332def cudaGraphExecUpdate(hGraphExec, hGraph):
30333 """ Check whether an executable graph can be updated with a graph and perform the update if possible.
30335 Updates the node parameters in the instantiated graph specified by
30336 `hGraphExec` with the node parameters in a topologically identical
30337 graph specified by `hGraph`.
30339 Limitations:
30341 - Kernel nodes:
30343 - The owning context of the function cannot change.
30345 - A node whose function originally did not use CUDA dynamic
30346 parallelism cannot be updated to a function which uses CDP.
30348 - A node whose function originally did not make device-side update
30349 calls cannot be updated to a function which makes device-side
30350 update calls.
30352 - A cooperative node cannot be updated to a non-cooperative node, and
30353 vice-versa.
30355 - If the graph was instantiated with
30356 cudaGraphInstantiateFlagUseNodePriority, the priority attribute
30357 cannot change. Equality is checked on the originally requested
30358 priority values, before they are clamped to the device's supported
30359 range.
30361 - If `hGraphExec` was not instantiated for device launch, a node
30362 whose function originally did not use device-side
30363 :py:obj:`~.cudaGraphLaunch()` cannot be updated to a function which
30364 uses device-side :py:obj:`~.cudaGraphLaunch()` unless the node
30365 resides on the same device as nodes which contained such calls at
30366 instantiate-time. If no such calls were present at instantiation,
30367 these updates cannot be performed at all.
30369 - Neither `hGraph` nor `hGraphExec` may contain device-updatable
30370 kernel nodes.
30372 - Memset and memcpy nodes:
30374 - The CUDA device(s) to which the operand(s) was allocated/mapped
30375 cannot change.
30377 - The source/destination memory must be allocated from the same
30378 contexts as the original source/destination memory.
30380 - For 2d memsets, only address and assigned value may be updated.
30382 - For 1d memsets, updating dimensions is also allowed, but may fail
30383 if the resulting operation doesn't map onto the work resources
30384 already allocated for the node.
30386 - Additional memcpy node restrictions:
30388 - Changing either the source or destination memory type(i.e.
30389 CU_MEMORYTYPE_DEVICE, CU_MEMORYTYPE_ARRAY, etc.) is not supported.
30391 - Conditional nodes:
30393 - Changing node parameters is not supported.
30395 - Changing parameters of nodes within the conditional body graph is
30396 subject to the rules above.
30398 - Conditional handle flags and default values are updated as part of
30399 the graph update.
30401 Note: The API may add further restrictions in future releases. The
30402 return code should always be checked.
30404 cudaGraphExecUpdate sets the result member of `resultInfo` to
30405 cudaGraphExecUpdateErrorTopologyChanged under the following conditions:
30407 - The count of nodes directly in `hGraphExec` and `hGraph` differ, in
30408 which case resultInfo->errorNode is set to NULL.
30410 - `hGraph` has more exit nodes than `hGraph`, in which case
30411 resultInfo->errorNode is set to one of the exit nodes in hGraph.
30413 - A node in `hGraph` has a different number of dependencies than the
30414 node from `hGraphExec` it is paired with, in which case
30415 resultInfo->errorNode is set to the node from `hGraph`.
30417 - A node in `hGraph` has a dependency that does not match with the
30418 corresponding dependency of the paired node from `hGraphExec`.
30419 resultInfo->errorNode will be set to the node from `hGraph`.
30420 resultInfo->errorFromNode will be set to the mismatched dependency.
30421 The dependencies are paired based on edge order and a dependency does
30422 not match when the nodes are already paired based on other edges
30423 examined in the graph.
30425 cudaGraphExecUpdate sets `the` result member of `resultInfo` to:
30427 - cudaGraphExecUpdateError if passed an invalid value.
30429 - cudaGraphExecUpdateErrorTopologyChanged if the graph topology changed
30431 - cudaGraphExecUpdateErrorNodeTypeChanged if the type of a node
30432 changed, in which case `hErrorNode_out` is set to the node from
30433 `hGraph`.
30435 - cudaGraphExecUpdateErrorFunctionChanged if the function of a kernel
30436 node changed (CUDA driver < 11.2)
30438 - cudaGraphExecUpdateErrorUnsupportedFunctionChange if the func field
30439 of a kernel changed in an unsupported way(see note above), in which
30440 case `hErrorNode_out` is set to the node from `hGraph`
30442 - cudaGraphExecUpdateErrorParametersChanged if any parameters to a node
30443 changed in a way that is not supported, in which case
30444 `hErrorNode_out` is set to the node from `hGraph`
30446 - cudaGraphExecUpdateErrorAttributesChanged if any attributes of a node
30447 changed in a way that is not supported, in which case
30448 `hErrorNode_out` is set to the node from `hGraph`
30450 - cudaGraphExecUpdateErrorNotSupported if something about a node is
30451 unsupported, like the node's type or configuration, in which case
30452 `hErrorNode_out` is set to the node from `hGraph`
30454 If the update fails for a reason not listed above, the result member of
30455 `resultInfo` will be set to cudaGraphExecUpdateError. If the update
30456 succeeds, the result member will be set to cudaGraphExecUpdateSuccess.
30458 cudaGraphExecUpdate returns cudaSuccess when the updated was performed
30459 successfully. It returns cudaErrorGraphExecUpdateFailure if the graph
30460 update was not performed because it included changes which violated
30461 constraints specific to instantiated graph update.
30463 Parameters
30464 ----------
30465 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30466 The instantiated graph to be updated
30467 hGraph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30468 The graph containing the updated parameters
30470 Returns
30471 -------
30472 cudaError_t
30473 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorGraphExecUpdateFailure`,
30474 resultInfo : :py:obj:`~.cudaGraphExecUpdateResultInfo`
30475 the error info structure
30477 See Also
30478 --------
30479 :py:obj:`~.cudaGraphInstantiate`
30480 """
30481 cdef cyruntime.cudaGraph_t cyhGraph
30482 if hGraph is None:
30483 phGraph = 0
30484 elif isinstance(hGraph, (cudaGraph_t,driver.CUgraph)):
30485 phGraph = int(hGraph)
30486 else:
30487 phGraph = int(cudaGraph_t(hGraph))
30488 cyhGraph = <cyruntime.cudaGraph_t><void_ptr>phGraph
30489 cdef cyruntime.cudaGraphExec_t cyhGraphExec
30490 if hGraphExec is None:
30491 phGraphExec = 0
30492 elif isinstance(hGraphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30493 phGraphExec = int(hGraphExec)
30494 else:
30495 phGraphExec = int(cudaGraphExec_t(hGraphExec))
30496 cyhGraphExec = <cyruntime.cudaGraphExec_t><void_ptr>phGraphExec
30497 cdef cudaGraphExecUpdateResultInfo resultInfo = cudaGraphExecUpdateResultInfo()
30498 with nogil:
30499 err = cyruntime.cudaGraphExecUpdate(cyhGraphExec, cyhGraph, <cyruntime.cudaGraphExecUpdateResultInfo*>resultInfo._pvt_ptr)
30500 if err != cyruntime.cudaSuccess:
30501 return (_dict_cudaError_t[err], None)
30502 return (_dict_cudaError_t[err], resultInfo)
30504@cython.embedsignature(True)
30505def cudaGraphUpload(graphExec, stream):
30506 """ Uploads an executable graph in a stream.
30508 Uploads `hGraphExec` to the device in `hStream` without executing it.
30509 Uploads of the same `hGraphExec` will be serialized. Each upload is
30510 ordered behind both any previous work in `hStream` and any previous
30511 launches of `hGraphExec`. Uses memory cached by `stream` to back the
30512 allocations owned by `graphExec`.
30514 Parameters
30515 ----------
30516 hGraphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30517 Executable graph to upload
30518 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
30519 Stream in which to upload the graph
30521 Returns
30522 -------
30523 cudaError_t
30524 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`,
30526 See Also
30527 --------
30528 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphLaunch`, :py:obj:`~.cudaGraphExecDestroy`
30529 """
30530 cdef cyruntime.cudaStream_t cystream
30531 if stream is None:
30532 pstream = 0
30533 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
30534 pstream = int(stream)
30535 else:
30536 pstream = int(cudaStream_t(stream))
30537 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
30538 cdef cyruntime.cudaGraphExec_t cygraphExec
30539 if graphExec is None:
30540 pgraphExec = 0
30541 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30542 pgraphExec = int(graphExec)
30543 else:
30544 pgraphExec = int(cudaGraphExec_t(graphExec))
30545 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
30546 with nogil:
30547 err = cyruntime.cudaGraphUpload(cygraphExec, cystream)
30548 return (_dict_cudaError_t[err],)
30550@cython.embedsignature(True)
30551def cudaGraphLaunch(graphExec, stream):
30552 """ Launches an executable graph in a stream.
30554 Executes `graphExec` in `stream`. Only one instance of `graphExec` may
30555 be executing at a time. Each launch is ordered behind both any previous
30556 work in `stream` and any previous launches of `graphExec`. To execute a
30557 graph concurrently, it must be instantiated multiple times into
30558 multiple executable graphs.
30560 If any allocations created by `graphExec` remain unfreed (from a
30561 previous launch) and `graphExec` was not instantiated with
30562 :py:obj:`~.cudaGraphInstantiateFlagAutoFreeOnLaunch`, the launch will
30563 fail with :py:obj:`~.cudaErrorInvalidValue`.
30565 Parameters
30566 ----------
30567 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30568 Executable graph to launch
30569 stream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
30570 Stream in which to launch the graph
30572 Returns
30573 -------
30574 cudaError_t
30575 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30577 See Also
30578 --------
30579 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphExecDestroy`
30580 """
30581 cdef cyruntime.cudaStream_t cystream
30582 if stream is None:
30583 pstream = 0
30584 elif isinstance(stream, (cudaStream_t,driver.CUstream)):
30585 pstream = int(stream)
30586 else:
30587 pstream = int(cudaStream_t(stream))
30588 cystream = <cyruntime.cudaStream_t><void_ptr>pstream
30589 cdef cyruntime.cudaGraphExec_t cygraphExec
30590 if graphExec is None:
30591 pgraphExec = 0
30592 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30593 pgraphExec = int(graphExec)
30594 else:
30595 pgraphExec = int(cudaGraphExec_t(graphExec))
30596 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
30597 with nogil:
30598 err = cyruntime.cudaGraphLaunch(cygraphExec, cystream)
30599 return (_dict_cudaError_t[err],)
30601@cython.embedsignature(True)
30602def cudaGraphExecDestroy(graphExec):
30603 """ Destroys an executable graph.
30605 Destroys the executable graph specified by `graphExec`.
30607 Parameters
30608 ----------
30609 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
30610 Executable graph to destroy
30612 Returns
30613 -------
30614 cudaError_t
30615 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30617 See Also
30618 --------
30619 :py:obj:`~.cudaGraphInstantiate`, :py:obj:`~.cudaGraphUpload`, :py:obj:`~.cudaGraphLaunch`
30620 """
30621 cdef cyruntime.cudaGraphExec_t cygraphExec
30622 if graphExec is None:
30623 pgraphExec = 0
30624 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
30625 pgraphExec = int(graphExec)
30626 else:
30627 pgraphExec = int(cudaGraphExec_t(graphExec))
30628 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
30629 with nogil:
30630 err = cyruntime.cudaGraphExecDestroy(cygraphExec)
30631 return (_dict_cudaError_t[err],)
30633@cython.embedsignature(True)
30634def cudaGraphDestroy(graph):
30635 """ Destroys a graph.
30637 Destroys the graph specified by `graph`, as well as all of its nodes.
30639 Parameters
30640 ----------
30641 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30642 Graph to destroy
30644 Returns
30645 -------
30646 cudaError_t
30647 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30649 See Also
30650 --------
30651 :py:obj:`~.cudaGraphCreate`
30652 """
30653 cdef cyruntime.cudaGraph_t cygraph
30654 if graph is None:
30655 pgraph = 0
30656 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
30657 pgraph = int(graph)
30658 else:
30659 pgraph = int(cudaGraph_t(graph))
30660 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
30661 with nogil:
30662 err = cyruntime.cudaGraphDestroy(cygraph)
30663 return (_dict_cudaError_t[err],)
30665@cython.embedsignature(True)
30666def cudaGraphDebugDotPrint(graph, char* path, unsigned int flags):
30667 """ Write a DOT file describing graph structure.
30669 Using the provided `graph`, write to `path` a DOT formatted description
30670 of the graph. By default this includes the graph topology, node types,
30671 node id, kernel names and memcpy direction. `flags` can be specified to
30672 write more detailed information about each node type such as parameter
30673 values, kernel attributes, node and function handles.
30675 Parameters
30676 ----------
30677 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30678 The graph to create a DOT file from
30679 path : bytes
30680 The path to write the DOT file to
30681 flags : unsigned int
30682 Flags from cudaGraphDebugDotFlags for specifying which additional
30683 node information to write
30685 Returns
30686 -------
30687 cudaError_t
30688 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorOperatingSystem`
30689 """
30690 cdef cyruntime.cudaGraph_t cygraph
30691 if graph is None:
30692 pgraph = 0
30693 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
30694 pgraph = int(graph)
30695 else:
30696 pgraph = int(cudaGraph_t(graph))
30697 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
30698 with nogil:
30699 err = cyruntime.cudaGraphDebugDotPrint(cygraph, path, flags)
30700 return (_dict_cudaError_t[err],)
30702@cython.embedsignature(True)
30703def cudaUserObjectCreate(ptr, destroy, unsigned int initialRefcount, unsigned int flags):
30704 """ Create a user object.
30706 Create a user object with the specified destructor callback and initial
30707 reference count. The initial references are owned by the caller.
30709 Destructor callbacks cannot make CUDA API calls and should avoid
30710 blocking behavior, as they are executed by a shared internal thread.
30711 Another thread may be signaled to perform such actions, if it does not
30712 block forward progress of tasks scheduled through CUDA.
30714 See CUDA User Objects in the CUDA C++ Programming Guide for more
30715 information on user objects.
30717 Parameters
30718 ----------
30719 ptr : Any
30720 The pointer to pass to the destroy function
30721 destroy : :py:obj:`~.cudaHostFn_t`
30722 Callback to free the user object when it is no longer in use
30723 initialRefcount : unsigned int
30724 The initial refcount to create the object with, typically 1. The
30725 initial references are owned by the calling thread.
30726 flags : unsigned int
30727 Currently it is required to pass
30728 :py:obj:`~.cudaUserObjectNoDestructorSync`, which is the only
30729 defined flag. This indicates that the destroy callback cannot be
30730 waited on by any CUDA API. Users requiring synchronization of the
30731 callback should signal its completion manually.
30733 Returns
30734 -------
30735 cudaError_t
30736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30737 object_out : :py:obj:`~.cudaUserObject_t`
30738 Location to return the user object handle
30740 See Also
30741 --------
30742 :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
30743 """
30744 cdef cyruntime.cudaHostFn_t cydestroy
30745 if destroy is None:
30746 pdestroy = 0
30747 elif isinstance(destroy, (cudaHostFn_t,)):
30748 pdestroy = int(destroy)
30749 else:
30750 pdestroy = int(cudaHostFn_t(destroy))
30751 cydestroy = <cyruntime.cudaHostFn_t><void_ptr>pdestroy
30752 cdef cudaUserObject_t object_out = cudaUserObject_t()
30753 cyptr = _HelperInputVoidPtr(ptr)
30754 cdef void* cyptr_ptr = <void*><void_ptr>cyptr.cptr
30755 with nogil:
30756 err = cyruntime.cudaUserObjectCreate(<cyruntime.cudaUserObject_t*>object_out._pvt_ptr, cyptr_ptr, cydestroy, initialRefcount, flags)
30757 if err != cyruntime.cudaSuccess:
30758 return (_dict_cudaError_t[err], None)
30759 return (_dict_cudaError_t[err], object_out)
30761@cython.embedsignature(True)
30762def cudaUserObjectRetain(object, unsigned int count):
30763 """ Retain a reference to a user object.
30765 Retains new references to a user object. The new references are owned
30766 by the caller.
30768 See CUDA User Objects in the CUDA C++ Programming Guide for more
30769 information on user objects.
30771 Parameters
30772 ----------
30773 object : :py:obj:`~.cudaUserObject_t`
30774 The object to retain
30775 count : unsigned int
30776 The number of references to retain, typically 1. Must be nonzero
30777 and not larger than INT_MAX.
30779 Returns
30780 -------
30781 cudaError_t
30782 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30784 See Also
30785 --------
30786 :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
30787 """
30788 cdef cyruntime.cudaUserObject_t cyobject
30789 if object is None:
30790 pobject = 0
30791 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
30792 pobject = int(object)
30793 else:
30794 pobject = int(cudaUserObject_t(object))
30795 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
30796 with nogil:
30797 err = cyruntime.cudaUserObjectRetain(cyobject, count)
30798 return (_dict_cudaError_t[err],)
30800@cython.embedsignature(True)
30801def cudaUserObjectRelease(object, unsigned int count):
30802 """ Release a reference to a user object.
30804 Releases user object references owned by the caller. The object's
30805 destructor is invoked if the reference count reaches zero.
30807 It is undefined behavior to release references not owned by the caller,
30808 or to use a user object handle after all references are released.
30810 See CUDA User Objects in the CUDA C++ Programming Guide for more
30811 information on user objects.
30813 Parameters
30814 ----------
30815 object : :py:obj:`~.cudaUserObject_t`
30816 The object to release
30817 count : unsigned int
30818 The number of references to release, typically 1. Must be nonzero
30819 and not larger than INT_MAX.
30821 Returns
30822 -------
30823 cudaError_t
30824 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30826 See Also
30827 --------
30828 :py:obj:`~.cudaUserObjectCreate`, :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
30829 """
30830 cdef cyruntime.cudaUserObject_t cyobject
30831 if object is None:
30832 pobject = 0
30833 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
30834 pobject = int(object)
30835 else:
30836 pobject = int(cudaUserObject_t(object))
30837 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
30838 with nogil:
30839 err = cyruntime.cudaUserObjectRelease(cyobject, count)
30840 return (_dict_cudaError_t[err],)
30842@cython.embedsignature(True)
30843def cudaGraphRetainUserObject(graph, object, unsigned int count, unsigned int flags):
30844 """ Retain a reference to a user object from a graph.
30846 Creates or moves user object references that will be owned by a CUDA
30847 graph.
30849 See CUDA User Objects in the CUDA C++ Programming Guide for more
30850 information on user objects.
30852 Parameters
30853 ----------
30854 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30855 The graph to associate the reference with
30856 object : :py:obj:`~.cudaUserObject_t`
30857 The user object to retain a reference for
30858 count : unsigned int
30859 The number of references to add to the graph, typically 1. Must be
30860 nonzero and not larger than INT_MAX.
30861 flags : unsigned int
30862 The optional flag :py:obj:`~.cudaGraphUserObjectMove` transfers
30863 references from the calling thread, rather than create new
30864 references. Pass 0 to create new references.
30866 Returns
30867 -------
30868 cudaError_t
30869 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30871 See Also
30872 --------
30873 :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphReleaseUserObject`, :py:obj:`~.cudaGraphCreate`
30874 """
30875 cdef cyruntime.cudaUserObject_t cyobject
30876 if object is None:
30877 pobject = 0
30878 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
30879 pobject = int(object)
30880 else:
30881 pobject = int(cudaUserObject_t(object))
30882 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
30883 cdef cyruntime.cudaGraph_t cygraph
30884 if graph is None:
30885 pgraph = 0
30886 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
30887 pgraph = int(graph)
30888 else:
30889 pgraph = int(cudaGraph_t(graph))
30890 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
30891 with nogil:
30892 err = cyruntime.cudaGraphRetainUserObject(cygraph, cyobject, count, flags)
30893 return (_dict_cudaError_t[err],)
30895@cython.embedsignature(True)
30896def cudaGraphReleaseUserObject(graph, object, unsigned int count):
30897 """ Release a user object reference from a graph.
30899 Releases user object references owned by a graph.
30901 See CUDA User Objects in the CUDA C++ Programming Guide for more
30902 information on user objects.
30904 Parameters
30905 ----------
30906 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30907 The graph that will release the reference
30908 object : :py:obj:`~.cudaUserObject_t`
30909 The user object to release a reference for
30910 count : unsigned int
30911 The number of references to release, typically 1. Must be nonzero
30912 and not larger than INT_MAX.
30914 Returns
30915 -------
30916 cudaError_t
30917 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
30919 See Also
30920 --------
30921 :py:obj:`~.cudaUserObjectCreate` :py:obj:`~.cudaUserObjectRetain`, :py:obj:`~.cudaUserObjectRelease`, :py:obj:`~.cudaGraphRetainUserObject`, :py:obj:`~.cudaGraphCreate`
30922 """
30923 cdef cyruntime.cudaUserObject_t cyobject
30924 if object is None:
30925 pobject = 0
30926 elif isinstance(object, (cudaUserObject_t,driver.CUuserObject)):
30927 pobject = int(object)
30928 else:
30929 pobject = int(cudaUserObject_t(object))
30930 cyobject = <cyruntime.cudaUserObject_t><void_ptr>pobject
30931 cdef cyruntime.cudaGraph_t cygraph
30932 if graph is None:
30933 pgraph = 0
30934 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
30935 pgraph = int(graph)
30936 else:
30937 pgraph = int(cudaGraph_t(graph))
30938 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
30939 with nogil:
30940 err = cyruntime.cudaGraphReleaseUserObject(cygraph, cyobject, count)
30941 return (_dict_cudaError_t[err],)
30943@cython.embedsignature(True)
30944def cudaGraphAddNode(graph, pDependencies : Optional[tuple[cudaGraphNode_t] | list[cudaGraphNode_t]], dependencyData : Optional[tuple[cudaGraphEdgeData] | list[cudaGraphEdgeData]], size_t numDependencies, nodeParams : Optional[cudaGraphNodeParams]):
30945 """ Adds a node of arbitrary type to a graph.
30947 Creates a new node in `graph` described by `nodeParams` with
30948 `numDependencies` dependencies specified via `pDependencies`.
30949 `numDependencies` may be 0. `pDependencies` may be null if
30950 `numDependencies` is 0. `pDependencies` may not have any duplicate
30951 entries.
30953 `nodeParams` is a tagged union. The node type should be specified in
30954 the `typename` field, and type-specific parameters in the corresponding
30955 union member. All unused bytes - that is, `reserved0` and all bytes
30956 past the utilized union member - must be set to zero. It is recommended
30957 to use brace initialization or memset to ensure all bytes are
30958 initialized.
30960 Note that for some node types, `nodeParams` may contain "out
30961 parameters" which are modified during the call, such as
30962 `nodeParams->alloc.dptr`.
30964 A handle to the new node will be returned in `phGraphNode`.
30966 Parameters
30967 ----------
30968 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
30969 Graph to which to add the node
30970 pDependencies : list[:py:obj:`~.cudaGraphNode_t`]
30971 Dependencies of the node
30972 dependencyData : list[:py:obj:`~.cudaGraphEdgeData`]
30973 Optional edge data for the dependencies. If NULL, the data is
30974 assumed to be default (zeroed) for all dependencies.
30975 numDependencies : size_t
30976 Number of dependencies
30977 nodeParams : :py:obj:`~.cudaGraphNodeParams`
30978 Specification of the node
30980 Returns
30981 -------
30982 cudaError_t
30983 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
30984 pGraphNode : :py:obj:`~.cudaGraphNode_t`
30985 Returns newly created node
30987 See Also
30988 --------
30989 :py:obj:`~.cudaGraphCreate`, :py:obj:`~.cudaGraphNodeSetParams`, :py:obj:`~.cudaGraphExecNodeSetParams`
30990 """
30991 dependencyData = [] if dependencyData is None else dependencyData
30992 if not all(isinstance(_x, (cudaGraphEdgeData,)) for _x in dependencyData):
30993 raise TypeError("Argument 'dependencyData' is not instance of type (expected tuple[cyruntime.cudaGraphEdgeData,] or list[cyruntime.cudaGraphEdgeData,]")
30994 pDependencies = [] if pDependencies is None else pDependencies
30995 if not all(isinstance(_x, (cudaGraphNode_t,driver.CUgraphNode)) for _x in pDependencies):
30996 raise TypeError("Argument 'pDependencies' is not instance of type (expected tuple[cyruntime.cudaGraphNode_t,driver.CUgraphNode] or list[cyruntime.cudaGraphNode_t,driver.CUgraphNode]")
30997 cdef cyruntime.cudaGraph_t cygraph
30998 if graph is None:
30999 pgraph = 0
31000 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
31001 pgraph = int(graph)
31002 else:
31003 pgraph = int(cudaGraph_t(graph))
31004 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
31005 cdef cudaGraphNode_t pGraphNode = cudaGraphNode_t()
31006 cdef cyruntime.cudaGraphNode_t* cypDependencies = NULL
31007 if len(pDependencies) > 1:
31008 cypDependencies = <cyruntime.cudaGraphNode_t*> calloc(len(pDependencies), sizeof(cyruntime.cudaGraphNode_t))
31009 if cypDependencies is NULL:
31010 raise MemoryError('Failed to allocate length x size memory: ' + str(len(pDependencies)) + 'x' + str(sizeof(cyruntime.cudaGraphNode_t)))
31011 else:
31012 for idx in range(len(pDependencies)):
31013 cypDependencies[idx] = <cyruntime.cudaGraphNode_t>(<cudaGraphNode_t>pDependencies[idx])._pvt_ptr[0]
31014 elif len(pDependencies) == 1:
31015 cypDependencies = <cyruntime.cudaGraphNode_t*>(<cudaGraphNode_t>pDependencies[0])._pvt_ptr
31016 cdef cyruntime.cudaGraphEdgeData* cydependencyData = NULL
31017 if len(dependencyData) > 1:
31018 cydependencyData = <cyruntime.cudaGraphEdgeData*> calloc(len(dependencyData), sizeof(cyruntime.cudaGraphEdgeData))
31019 if cydependencyData is NULL:
31020 raise MemoryError('Failed to allocate length x size memory: ' + str(len(dependencyData)) + 'x' + str(sizeof(cyruntime.cudaGraphEdgeData)))
31021 for idx in range(len(dependencyData)):
31022 string.memcpy(&cydependencyData[idx], (<cudaGraphEdgeData>dependencyData[idx])._pvt_ptr, sizeof(cyruntime.cudaGraphEdgeData))
31023 elif len(dependencyData) == 1:
31024 cydependencyData = (<cudaGraphEdgeData>dependencyData[0])._pvt_ptr
31025 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
31026 with nogil:
31027 err = cyruntime.cudaGraphAddNode(<cyruntime.cudaGraphNode_t*>pGraphNode._pvt_ptr, cygraph, cypDependencies, cydependencyData, numDependencies, cynodeParams_ptr)
31028 if len(pDependencies) > 1 and cypDependencies is not NULL:
31029 free(cypDependencies)
31030 if len(dependencyData) > 1 and cydependencyData is not NULL:
31031 free(cydependencyData)
31032 if err != cyruntime.cudaSuccess:
31033 return (_dict_cudaError_t[err], None)
31034 return (_dict_cudaError_t[err], pGraphNode)
31036@cython.embedsignature(True)
31037def cudaGraphNodeSetParams(node, nodeParams : Optional[cudaGraphNodeParams]):
31038 """ Update's a graph node's parameters.
31040 Sets the parameters of graph node `node` to `nodeParams`. The node type
31041 specified by `nodeParams->type` must match the type of `node`.
31042 `nodeParams` must be fully initialized and all unused bytes (reserved,
31043 padding) zeroed.
31045 Modifying parameters is not supported for node types
31046 cudaGraphNodeTypeMemAlloc and cudaGraphNodeTypeMemFree.
31048 Parameters
31049 ----------
31050 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
31051 Node to set the parameters for
31052 nodeParams : :py:obj:`~.cudaGraphNodeParams`
31053 Parameters to copy
31055 Returns
31056 -------
31057 cudaError_t
31058 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
31060 See Also
31061 --------
31062 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphExecNodeSetParams`
31063 """
31064 cdef cyruntime.cudaGraphNode_t cynode
31065 if node is None:
31066 pnode = 0
31067 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
31068 pnode = int(node)
31069 else:
31070 pnode = int(cudaGraphNode_t(node))
31071 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
31072 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
31073 with nogil:
31074 err = cyruntime.cudaGraphNodeSetParams(cynode, cynodeParams_ptr)
31075 return (_dict_cudaError_t[err],)
31077@cython.embedsignature(True)
31078def cudaGraphExecNodeSetParams(graphExec, node, nodeParams : Optional[cudaGraphNodeParams]):
31079 """ Update's a graph node's parameters in an instantiated graph.
31081 Sets the parameters of a node in an executable graph `graphExec`. The
31082 node is identified by the corresponding node `node` in the non-
31083 executable graph from which the executable graph was instantiated.
31084 `node` must not have been removed from the original graph.
31086 The modifications only affect future launches of `graphExec`. Already
31087 enqueued or running launches of `graphExec` are not affected by this
31088 call. `node` is also not modified by this call.
31090 Allowed changes to parameters on executable graphs are as follows:
31092 **View CUDA Toolkit Documentation for a table example**
31094 Parameters
31095 ----------
31096 graphExec : :py:obj:`~.CUgraphExec` or :py:obj:`~.cudaGraphExec_t`
31097 The executable graph in which to update the specified node
31098 node : :py:obj:`~.CUgraphNode` or :py:obj:`~.cudaGraphNode_t`
31099 Corresponding node from the graph from which graphExec was
31100 instantiated
31101 nodeParams : :py:obj:`~.cudaGraphNodeParams`
31102 Updated Parameters to set
31104 Returns
31105 -------
31106 cudaError_t
31107 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorNotSupported`
31109 See Also
31110 --------
31111 :py:obj:`~.cudaGraphAddNode`, :py:obj:`~.cudaGraphNodeSetParams` :py:obj:`~.cudaGraphExecUpdate`, :py:obj:`~.cudaGraphInstantiate`
31112 """
31113 cdef cyruntime.cudaGraphNode_t cynode
31114 if node is None:
31115 pnode = 0
31116 elif isinstance(node, (cudaGraphNode_t,driver.CUgraphNode)):
31117 pnode = int(node)
31118 else:
31119 pnode = int(cudaGraphNode_t(node))
31120 cynode = <cyruntime.cudaGraphNode_t><void_ptr>pnode
31121 cdef cyruntime.cudaGraphExec_t cygraphExec
31122 if graphExec is None:
31123 pgraphExec = 0
31124 elif isinstance(graphExec, (cudaGraphExec_t,driver.CUgraphExec)):
31125 pgraphExec = int(graphExec)
31126 else:
31127 pgraphExec = int(cudaGraphExec_t(graphExec))
31128 cygraphExec = <cyruntime.cudaGraphExec_t><void_ptr>pgraphExec
31129 cdef cyruntime.cudaGraphNodeParams* cynodeParams_ptr = nodeParams._pvt_ptr if nodeParams is not None else NULL
31130 with nogil:
31131 err = cyruntime.cudaGraphExecNodeSetParams(cygraphExec, cynode, cynodeParams_ptr)
31132 return (_dict_cudaError_t[err],)
31134@cython.embedsignature(True)
31135def cudaGraphConditionalHandleCreate(graph, unsigned int defaultLaunchValue, unsigned int flags):
31136 """ Create a conditional handle.
31138 Creates a conditional handle associated with `hGraph`.
31140 The conditional handle must be associated with a conditional node in
31141 this graph or one of its children.
31143 Handles not associated with a conditional node may cause graph
31144 instantiation to fail.
31146 Parameters
31147 ----------
31148 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
31149 Graph which will contain the conditional node using this handle.
31150 defaultLaunchValue : unsigned int
31151 Optional initial value for the conditional variable. Applied at the
31152 beginning of each graph execution if cudaGraphCondAssignDefault is
31153 set in `flags`.
31154 flags : unsigned int
31155 Currently must be cudaGraphCondAssignDefault or 0.
31157 Returns
31158 -------
31159 cudaError_t
31160 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
31161 pHandle_out : :py:obj:`~.cudaGraphConditionalHandle`
31162 Pointer used to return the handle to the caller.
31164 See Also
31165 --------
31166 :py:obj:`~.cuGraphAddNode`,
31167 """
31168 cdef cyruntime.cudaGraph_t cygraph
31169 if graph is None:
31170 pgraph = 0
31171 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
31172 pgraph = int(graph)
31173 else:
31174 pgraph = int(cudaGraph_t(graph))
31175 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
31176 cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle()
31177 with nogil:
31178 err = cyruntime.cudaGraphConditionalHandleCreate(<cyruntime.cudaGraphConditionalHandle*>pHandle_out._pvt_ptr, cygraph, defaultLaunchValue, flags)
31179 if err != cyruntime.cudaSuccess:
31180 return (_dict_cudaError_t[err], None)
31181 return (_dict_cudaError_t[err], pHandle_out)
31183@cython.embedsignature(True)
31184def cudaGraphConditionalHandleCreate_v2(graph, ctx, unsigned int defaultLaunchValue, unsigned int flags):
31185 """ Create a conditional handle.
31187 Creates a conditional handle associated with `hGraph`.
31189 The conditional handle must be associated with a conditional node in
31190 this graph or one of its children.
31192 Handles not associated with a conditional node may cause graph
31193 instantiation to fail.
31195 Parameters
31196 ----------
31197 graph : :py:obj:`~.CUgraph` or :py:obj:`~.cudaGraph_t`
31198 Graph which will contain the conditional node using this handle.
31199 ctx : :py:obj:`~.cudaExecutionContext_t`
31200 Execution context for the handle and associated conditional node.
31201 If NULL, current context will be used.
31202 defaultLaunchValue : unsigned int
31203 Optional initial value for the conditional variable. Applied at the
31204 beginning of each graph execution if cudaGraphCondAssignDefault is
31205 set in `flags`.
31206 flags : unsigned int
31207 Currently must be cudaGraphCondAssignDefault or 0.
31209 Returns
31210 -------
31211 cudaError_t
31212 :py:obj:`~.CUDA_SUCCESS`, :py:obj:`~.CUDA_ERROR_INVALID_VALUE`, :py:obj:`~.CUDA_ERROR_NOT_SUPPORTED`
31213 pHandle_out : :py:obj:`~.cudaGraphConditionalHandle`
31214 Pointer used to return the handle to the caller.
31216 See Also
31217 --------
31218 :py:obj:`~.cuGraphAddNode`,
31219 """
31220 cdef cyruntime.cudaExecutionContext_t cyctx
31221 if ctx is None:
31222 pctx = 0
31223 elif isinstance(ctx, (cudaExecutionContext_t,)):
31224 pctx = int(ctx)
31225 else:
31226 pctx = int(cudaExecutionContext_t(ctx))
31227 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
31228 cdef cyruntime.cudaGraph_t cygraph
31229 if graph is None:
31230 pgraph = 0
31231 elif isinstance(graph, (cudaGraph_t,driver.CUgraph)):
31232 pgraph = int(graph)
31233 else:
31234 pgraph = int(cudaGraph_t(graph))
31235 cygraph = <cyruntime.cudaGraph_t><void_ptr>pgraph
31236 cdef cudaGraphConditionalHandle pHandle_out = cudaGraphConditionalHandle()
31237 with nogil:
31238 err = cyruntime.cudaGraphConditionalHandleCreate_v2(<cyruntime.cudaGraphConditionalHandle*>pHandle_out._pvt_ptr, cygraph, cyctx, defaultLaunchValue, flags)
31239 if err != cyruntime.cudaSuccess:
31240 return (_dict_cudaError_t[err], None)
31241 return (_dict_cudaError_t[err], pHandle_out)
31243@cython.embedsignature(True)
31244def cudaGetDriverEntryPoint(char* symbol, unsigned long long flags):
31245 """ Returns the requested driver API function pointer.
31247 [Deprecated]
31249 Returns in `**funcPtr` the address of the CUDA driver function for the
31250 requested flags.
31252 For a requested driver symbol, if the CUDA version in which the driver
31253 symbol was introduced is less than or equal to the CUDA runtime
31254 version, the API will return the function pointer to the corresponding
31255 versioned driver function.
31257 The pointer returned by the API should be cast to a function pointer
31258 matching the requested driver function's definition in the API header
31259 file. The function pointer typedef can be picked up from the
31260 corresponding typedefs header file. For example, cudaTypedefs.h
31261 consists of function pointer typedefs for driver APIs defined in
31262 cuda.h.
31264 The API will return :py:obj:`~.cudaSuccess` and set the returned
31265 `funcPtr` if the requested driver function is valid and supported on
31266 the platform.
31268 The API will return :py:obj:`~.cudaSuccess` and set the returned
31269 `funcPtr` to NULL if the requested driver function is not supported on
31270 the platform, no ABI compatible driver function exists for the CUDA
31271 runtime version or if the driver symbol is invalid.
31273 It will also set the optional `driverStatus` to one of the values in
31274 :py:obj:`~.cudaDriverEntryPointQueryResult` with the following
31275 meanings:
31277 - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was
31278 succesfully found based on input arguments and `pfn` is valid
31280 - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol
31281 was not found
31283 - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested
31284 symbol was found but is not supported by the current runtime version
31285 (CUDART_VERSION)
31287 The requested flags can be:
31289 - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is
31290 equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the
31291 code is compiled with --default-stream per-thread compilation flag or
31292 the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;
31293 :py:obj:`~.cudaEnableLegacyStream` otherwise.
31295 - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for
31296 all driver symbols that match the requested driver symbol name except
31297 the corresponding per-thread versions.
31299 - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the
31300 search for all driver symbols that match the requested driver symbol
31301 name including the per-thread versions. If a per-thread version is
31302 not found, the API will return the legacy version of the driver
31303 function.
31305 Parameters
31306 ----------
31307 symbol : bytes
31308 The base name of the driver API function to look for. As an
31309 example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`
31310 would be cuMemAlloc. Note that the API will use the CUDA runtime
31311 version to return the address to the most recent ABI compatible
31312 driver symbol, :py:obj:`~.cuMemAlloc` or :py:obj:`~.cuMemAlloc_v2`.
31313 flags : unsigned long long
31314 Flags to specify search options.
31316 Returns
31317 -------
31318 cudaError_t
31319 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
31320 funcPtr : Any
31321 Location to return the function pointer to the requested driver
31322 function
31323 driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`
31324 Optional location to store the status of finding the symbol from
31325 the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for
31326 possible values.
31328 See Also
31329 --------
31330 :py:obj:`~.cuGetProcAddress`
31332 Notes
31333 -----
31334 This API is deprecated and :py:obj:`~.cudaGetDriverEntryPointByVersion` (with a hardcoded :py:obj:`~.cudaVersion`) should be used instead.
31335 """
31336 cdef void_ptr funcPtr = 0
31337 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus
31338 with nogil:
31339 err = cyruntime.cudaGetDriverEntryPoint(symbol, <void**>&funcPtr, flags, &driverStatus)
31340 if err != cyruntime.cudaSuccess:
31341 return (_dict_cudaError_t[err], None, None)
31342 return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus))
31344@cython.embedsignature(True)
31345def cudaGetDriverEntryPointByVersion(char* symbol, unsigned int cudaVersion, unsigned long long flags):
31346 """ Returns the requested driver API function pointer by CUDA version.
31348 Returns in `**funcPtr` the address of the CUDA driver function for the
31349 requested flags and CUDA driver version.
31351 The CUDA version is specified as (1000 * major + 10 * minor), so CUDA
31352 11.2 should be specified as 11020. For a requested driver symbol, if
31353 the specified CUDA version is greater than or equal to the CUDA version
31354 in which the driver symbol was introduced, this API will return the
31355 function pointer to the corresponding versioned function. If the
31356 specified CUDA version is greater than the driver version, the API will
31357 return :py:obj:`~.cudaErrorInvalidValue`.
31359 The pointer returned by the API should be cast to a function pointer
31360 matching the requested driver function's definition in the API header
31361 file. The function pointer typedef can be picked up from the
31362 corresponding typedefs header file. For example, cudaTypedefs.h
31363 consists of function pointer typedefs for driver APIs defined in
31364 cuda.h.
31366 For the case where the CUDA version requested is greater than the CUDA
31367 Toolkit installed, there may not be an appropriate function pointer
31368 typedef in the corresponding header file and may need a custom typedef
31369 to match the driver function signature returned. This can be done by
31370 getting the typedefs from a later toolkit or creating appropriately
31371 matching custom function typedefs.
31373 The API will return :py:obj:`~.cudaSuccess` and set the returned
31374 `funcPtr` if the requested driver function is valid and supported on
31375 the platform.
31377 The API will return :py:obj:`~.cudaSuccess` and set the returned
31378 `funcPtr` to NULL if the requested driver function is not supported on
31379 the platform, no ABI compatible driver function exists for the
31380 requested version or if the driver symbol is invalid.
31382 It will also set the optional `driverStatus` to one of the values in
31383 :py:obj:`~.cudaDriverEntryPointQueryResult` with the following
31384 meanings:
31386 - :py:obj:`~.cudaDriverEntryPointSuccess` - The requested symbol was
31387 succesfully found based on input arguments and `pfn` is valid
31389 - :py:obj:`~.cudaDriverEntryPointSymbolNotFound` - The requested symbol
31390 was not found
31392 - :py:obj:`~.cudaDriverEntryPointVersionNotSufficent` - The requested
31393 symbol was found but is not supported by the specified version
31394 `cudaVersion`
31396 The requested flags can be:
31398 - :py:obj:`~.cudaEnableDefault`: This is the default mode. This is
31399 equivalent to :py:obj:`~.cudaEnablePerThreadDefaultStream` if the
31400 code is compiled with --default-stream per-thread compilation flag or
31401 the macro CUDA_API_PER_THREAD_DEFAULT_STREAM is defined;
31402 :py:obj:`~.cudaEnableLegacyStream` otherwise.
31404 - :py:obj:`~.cudaEnableLegacyStream`: This will enable the search for
31405 all driver symbols that match the requested driver symbol name except
31406 the corresponding per-thread versions.
31408 - :py:obj:`~.cudaEnablePerThreadDefaultStream`: This will enable the
31409 search for all driver symbols that match the requested driver symbol
31410 name including the per-thread versions. If a per-thread version is
31411 not found, the API will return the legacy version of the driver
31412 function.
31414 Parameters
31415 ----------
31416 symbol : bytes
31417 The base name of the driver API function to look for. As an
31418 example, for the driver API :py:obj:`~.cuMemAlloc_v2`, `symbol`
31419 would be cuMemAlloc.
31420 cudaVersion : unsigned int
31421 The CUDA version to look for the requested driver symbol
31422 flags : unsigned long long
31423 Flags to specify search options.
31425 Returns
31426 -------
31427 cudaError_t
31428 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`
31429 funcPtr : Any
31430 Location to return the function pointer to the requested driver
31431 function
31432 driverStatus : :py:obj:`~.cudaDriverEntryPointQueryResult`
31433 Optional location to store the status of finding the symbol from
31434 the driver. See :py:obj:`~.cudaDriverEntryPointQueryResult` for
31435 possible values.
31437 See Also
31438 --------
31439 :py:obj:`~.cuGetProcAddress`
31440 """
31441 cdef void_ptr funcPtr = 0
31442 cdef cyruntime.cudaDriverEntryPointQueryResult driverStatus
31443 with nogil:
31444 err = cyruntime.cudaGetDriverEntryPointByVersion(symbol, <void**>&funcPtr, cudaVersion, flags, &driverStatus)
31445 if err != cyruntime.cudaSuccess:
31446 return (_dict_cudaError_t[err], None, None)
31447 return (_dict_cudaError_t[err], funcPtr, cudaDriverEntryPointQueryResult(driverStatus))
31449@cython.embedsignature(True)
31450def cudaLibraryLoadData(code, jitOptions : Optional[tuple[cudaJitOption] | list[cudaJitOption]], jitOptionsValues : Optional[tuple[Any] | list[Any]], unsigned int numJitOptions, libraryOptions : Optional[tuple[cudaLibraryOption] | list[cudaLibraryOption]], libraryOptionValues : Optional[tuple[Any] | list[Any]], unsigned int numLibraryOptions):
31451 """ Load a library with specified code and options.
31453 Takes a pointer `code` and loads the corresponding library `library`
31454 based on the application defined library loading mode:
31456 - If module loading is set to EAGER, via the environment variables
31457 described in "Module loading", `library` is loaded eagerly into all
31458 contexts at the time of the call and future contexts at the time of
31459 creation until the library is unloaded with
31460 :py:obj:`~.cudaLibraryUnload()`.
31462 - If the environment variables are set to LAZY, `library` is not
31463 immediately loaded onto all existent contexts and will only be loaded
31464 when a function is needed for that context, such as a kernel launch.
31466 These environment variables are described in the CUDA programming guide
31467 under the "CUDA environment variables" section.
31469 The `code` may be a `cubin` or `fatbin` as output by nvcc, or a NULL-
31470 terminated `PTX`, either as output by nvcc or hand-written, or `Tile`
31471 IR data. A fatbin should also contain relocatable code when doing
31472 separate compilation. Please also see the documentation for nvrtc
31473 (https://docs.nvidia.com/cuda/nvrtc/index.html), nvjitlink
31474 (https://docs.nvidia.com/cuda/nvjitlink/index.html), and nvfatbin
31475 (https://docs.nvidia.com/cuda/nvfatbin/index.html) for more information
31476 on generating loadable code at runtime.
31478 Options are passed as an array via `jitOptions` and any corresponding
31479 parameters are passed in `jitOptionsValues`. The number of total JIT
31480 options is supplied via `numJitOptions`. Any outputs will be returned
31481 via `jitOptionsValues`.
31483 Library load options are passed as an array via `libraryOptions` and
31484 any corresponding parameters are passed in `libraryOptionValues`. The
31485 number of total library load options is supplied via
31486 `numLibraryOptions`.
31488 Parameters
31489 ----------
31490 code : Any
31491 Code to load
31492 jitOptions : list[:py:obj:`~.cudaJitOption`]
31493 Options for JIT
31494 jitOptionsValues : list[Any]
31495 Option values for JIT
31496 numJitOptions : unsigned int
31497 Number of options
31498 libraryOptions : list[:py:obj:`~.cudaLibraryOption`]
31499 Options for loading
31500 libraryOptionValues : list[Any]
31501 Option values for loading
31502 numLibraryOptions : unsigned int
31503 Number of options for loading
31505 Returns
31506 -------
31507 cudaError_t
31508 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorSharedObjectSymbolNotFound`, :py:obj:`~.cudaErrorSharedObjectInitFailed`, :py:obj:`~.cudaErrorJitCompilerNotFound`
31509 library : :py:obj:`~.cudaLibrary_t`
31510 Returned library
31512 See Also
31513 --------
31514 :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadData`
31515 """
31516 libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues
31517 libraryOptions = [] if libraryOptions is None else libraryOptions
31518 if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions):
31519 raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]")
31520 jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues
31521 jitOptions = [] if jitOptions is None else jitOptions
31522 if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions):
31523 raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]")
31524 cdef cudaLibrary_t library = cudaLibrary_t()
31525 cycode = _HelperInputVoidPtr(code)
31526 cdef void* cycode_ptr = <void*><void_ptr>cycode.cptr
31527 cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]
31528 pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]
31529 cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist)
31530 cdef void** cyjitOptionsValues_ptr = <void**><void_ptr>voidStarHelperjitOptionsValues.cptr
31531 if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))
31532 if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))
31533 cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]
31534 pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]
31535 cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist)
31536 cdef void** cylibraryOptionValues_ptr = <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr
31537 if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))
31538 if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))
31539 with nogil:
31540 err = cyruntime.cudaLibraryLoadData(<cyruntime.cudaLibrary_t*>library._pvt_ptr, cycode_ptr, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions)
31541 if err != cyruntime.cudaSuccess:
31542 return (_dict_cudaError_t[err], None)
31543 return (_dict_cudaError_t[err], library)
31545@cython.embedsignature(True)
31546def cudaLibraryLoadFromFile(char* fileName, jitOptions : Optional[tuple[cudaJitOption] | list[cudaJitOption]], jitOptionsValues : Optional[tuple[Any] | list[Any]], unsigned int numJitOptions, libraryOptions : Optional[tuple[cudaLibraryOption] | list[cudaLibraryOption]], libraryOptionValues : Optional[tuple[Any] | list[Any]], unsigned int numLibraryOptions):
31547 """ Load a library with specified file and options.
31549 Takes a pointer `code` and loads the corresponding library `library`
31550 based on the application defined library loading mode:
31552 - If module loading is set to EAGER, via the environment variables
31553 described in "Module loading", `library` is loaded eagerly into all
31554 contexts at the time of the call and future contexts at the time of
31555 creation until the library is unloaded with
31556 :py:obj:`~.cudaLibraryUnload()`.
31558 - If the environment variables are set to LAZY, `library` is not
31559 immediately loaded onto all existent contexts and will only be loaded
31560 when a function is needed for that context, such as a kernel launch.
31562 These environment variables are described in the CUDA programming guide
31563 under the "CUDA environment variables" section.
31565 The file should be a `cubin` file as output by nvcc, or a `PTX` file
31566 either as output by nvcc or handwritten, or a `fatbin` file as output
31567 by nvcc or hand-written, or `Tile` IR file. A fatbin should also
31568 contain relocatable code when doing separate compilation. Please also
31569 see the documentation for nvrtc
31570 (https://docs.nvidia.com/cuda/nvrtc/index.html), nvjitlink
31571 (https://docs.nvidia.com/cuda/nvjitlink/index.html), and nvfatbin
31572 (https://docs.nvidia.com/cuda/nvfatbin/index.html) for more information
31573 on generating loadable code at runtime.
31575 Options are passed as an array via `jitOptions` and any corresponding
31576 parameters are passed in `jitOptionsValues`. The number of total
31577 options is supplied via `numJitOptions`. Any outputs will be returned
31578 via `jitOptionsValues`.
31580 Library load options are passed as an array via `libraryOptions` and
31581 any corresponding parameters are passed in `libraryOptionValues`. The
31582 number of total library load options is supplied via
31583 `numLibraryOptions`.
31585 Parameters
31586 ----------
31587 fileName : bytes
31588 File to load from
31589 jitOptions : list[:py:obj:`~.cudaJitOption`]
31590 Options for JIT
31591 jitOptionsValues : list[Any]
31592 Option values for JIT
31593 numJitOptions : unsigned int
31594 Number of options
31595 libraryOptions : list[:py:obj:`~.cudaLibraryOption`]
31596 Options for loading
31597 libraryOptionValues : list[Any]
31598 Option values for loading
31599 numLibraryOptions : unsigned int
31600 Number of options for loading
31602 Returns
31603 -------
31604 cudaError_t
31605 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorMemoryAllocation`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInvalidPtx`, :py:obj:`~.cudaErrorUnsupportedPtxVersion`, :py:obj:`~.cudaErrorNoKernelImageForDevice`, :py:obj:`~.cudaErrorSharedObjectSymbolNotFound`, :py:obj:`~.cudaErrorSharedObjectInitFailed`, :py:obj:`~.cudaErrorJitCompilerNotFound`
31606 library : :py:obj:`~.cudaLibrary_t`
31607 Returned library
31609 See Also
31610 --------
31611 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryLoadFromFile`
31612 """
31613 libraryOptionValues = [] if libraryOptionValues is None else libraryOptionValues
31614 libraryOptions = [] if libraryOptions is None else libraryOptions
31615 if not all(isinstance(_x, (cudaLibraryOption)) for _x in libraryOptions):
31616 raise TypeError("Argument 'libraryOptions' is not instance of type (expected tuple[cyruntime.cudaLibraryOption] or list[cyruntime.cudaLibraryOption]")
31617 jitOptionsValues = [] if jitOptionsValues is None else jitOptionsValues
31618 jitOptions = [] if jitOptions is None else jitOptions
31619 if not all(isinstance(_x, (cudaJitOption)) for _x in jitOptions):
31620 raise TypeError("Argument 'jitOptions' is not instance of type (expected tuple[cyruntime.cudaJitOption] or list[cyruntime.cudaJitOption]")
31621 cdef cudaLibrary_t library = cudaLibrary_t()
31622 cdef vector[cyruntime.cudaJitOption] cyjitOptions = [pyjitOptions.value for pyjitOptions in (jitOptions)]
31623 pylist = [_HelperCudaJitOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(jitOptions, jitOptionsValues)]
31624 cdef _InputVoidPtrPtrHelper voidStarHelperjitOptionsValues = _InputVoidPtrPtrHelper(pylist)
31625 cdef void** cyjitOptionsValues_ptr = <void**><void_ptr>voidStarHelperjitOptionsValues.cptr
31626 if numJitOptions > len(jitOptions): raise RuntimeError("List is too small: " + str(len(jitOptions)) + " < " + str(numJitOptions))
31627 if numJitOptions > len(jitOptionsValues): raise RuntimeError("List is too small: " + str(len(jitOptionsValues)) + " < " + str(numJitOptions))
31628 cdef vector[cyruntime.cudaLibraryOption] cylibraryOptions = [pylibraryOptions.value for pylibraryOptions in (libraryOptions)]
31629 pylist = [_HelperCudaLibraryOption(pyoptions, pyoptionValues) for pyoptions, pyoptionValues in zip(libraryOptions, libraryOptionValues)]
31630 cdef _InputVoidPtrPtrHelper voidStarHelperlibraryOptionValues = _InputVoidPtrPtrHelper(pylist)
31631 cdef void** cylibraryOptionValues_ptr = <void**><void_ptr>voidStarHelperlibraryOptionValues.cptr
31632 if numLibraryOptions > len(libraryOptions): raise RuntimeError("List is too small: " + str(len(libraryOptions)) + " < " + str(numLibraryOptions))
31633 if numLibraryOptions > len(libraryOptionValues): raise RuntimeError("List is too small: " + str(len(libraryOptionValues)) + " < " + str(numLibraryOptions))
31634 with nogil:
31635 err = cyruntime.cudaLibraryLoadFromFile(<cyruntime.cudaLibrary_t*>library._pvt_ptr, fileName, cyjitOptions.data(), cyjitOptionsValues_ptr, numJitOptions, cylibraryOptions.data(), cylibraryOptionValues_ptr, numLibraryOptions)
31636 if err != cyruntime.cudaSuccess:
31637 return (_dict_cudaError_t[err], None)
31638 return (_dict_cudaError_t[err], library)
31640@cython.embedsignature(True)
31641def cudaLibraryUnload(library):
31642 """ Unloads a library.
31644 Unloads the library specified with `library`
31646 Parameters
31647 ----------
31648 library : :py:obj:`~.cudaLibrary_t`
31649 Library to unload
31651 Returns
31652 -------
31653 cudaError_t
31654 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`
31656 See Also
31657 --------
31658 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cuLibraryUnload`
31659 """
31660 cdef cyruntime.cudaLibrary_t cylibrary
31661 if library is None:
31662 plibrary = 0
31663 elif isinstance(library, (cudaLibrary_t,)):
31664 plibrary = int(library)
31665 else:
31666 plibrary = int(cudaLibrary_t(library))
31667 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary
31668 with nogil:
31669 err = cyruntime.cudaLibraryUnload(cylibrary)
31670 return (_dict_cudaError_t[err],)
31672@cython.embedsignature(True)
31673def cudaLibraryGetKernel(library, char* name):
31674 """ Returns a kernel handle.
31676 Returns in `pKernel` the handle of the kernel with name `name` located
31677 in library `library`. If kernel handle is not found, the call returns
31678 :py:obj:`~.cudaErrorSymbolNotFound`.
31680 Parameters
31681 ----------
31682 library : :py:obj:`~.cudaLibrary_t`
31683 Library to retrieve kernel from
31684 name : bytes
31685 Name of kernel to retrieve
31687 Returns
31688 -------
31689 cudaError_t
31690 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`
31691 pKernel : :py:obj:`~.cudaKernel_t`
31692 Returned kernel handle
31694 See Also
31695 --------
31696 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetKernel`
31697 """
31698 cdef cyruntime.cudaLibrary_t cylibrary
31699 if library is None:
31700 plibrary = 0
31701 elif isinstance(library, (cudaLibrary_t,)):
31702 plibrary = int(library)
31703 else:
31704 plibrary = int(cudaLibrary_t(library))
31705 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary
31706 cdef cudaKernel_t pKernel = cudaKernel_t()
31707 with nogil:
31708 err = cyruntime.cudaLibraryGetKernel(<cyruntime.cudaKernel_t*>pKernel._pvt_ptr, cylibrary, name)
31709 if err != cyruntime.cudaSuccess:
31710 return (_dict_cudaError_t[err], None)
31711 return (_dict_cudaError_t[err], pKernel)
31713@cython.embedsignature(True)
31714def cudaLibraryGetGlobal(library, char* name):
31715 """ Returns a global device pointer.
31717 Returns in `*dptr` and `*bytes` the base pointer and size of the global
31718 with name `name` for the requested library `library` and the current
31719 device. If no global for the requested name `name` exists, the call
31720 returns :py:obj:`~.cudaErrorSymbolNotFound`. One of the parameters
31721 `dptr` or `numbytes` (not both) can be NULL in which case it is
31722 ignored. The returned `dptr` cannot be passed to the Symbol APIs such
31723 as :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`,
31724 :py:obj:`~.cudaGetSymbolAddress`, or :py:obj:`~.cudaGetSymbolSize`.
31726 Parameters
31727 ----------
31728 library : :py:obj:`~.cudaLibrary_t`
31729 Library to retrieve global from
31730 name : bytes
31731 Name of global to retrieve
31733 Returns
31734 -------
31735 cudaError_t
31736 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound` :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorContextIsDestroyed`
31737 dptr : Any
31738 Returned global device pointer for the requested library
31739 numbytes : int
31740 Returned global size in bytes
31742 See Also
31743 --------
31744 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetManaged`, :py:obj:`~.cuLibraryGetGlobal`
31745 """
31746 cdef cyruntime.cudaLibrary_t cylibrary
31747 if library is None:
31748 plibrary = 0
31749 elif isinstance(library, (cudaLibrary_t,)):
31750 plibrary = int(library)
31751 else:
31752 plibrary = int(cudaLibrary_t(library))
31753 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary
31754 cdef void_ptr dptr = 0
31755 cdef size_t numbytes = 0
31756 with nogil:
31757 err = cyruntime.cudaLibraryGetGlobal(<void**>&dptr, &numbytes, cylibrary, name)
31758 if err != cyruntime.cudaSuccess:
31759 return (_dict_cudaError_t[err], None, None)
31760 return (_dict_cudaError_t[err], dptr, numbytes)
31762@cython.embedsignature(True)
31763def cudaLibraryGetManaged(library, char* name):
31764 """ Returns a pointer to managed memory.
31766 Returns in `*dptr` and `*bytes` the base pointer and size of the
31767 managed memory with name `name` for the requested library `library`. If
31768 no managed memory with the requested name `name` exists, the call
31769 returns :py:obj:`~.cudaErrorSymbolNotFound`. One of the parameters
31770 `dptr` or `numbytes` (not both) can be NULL in which case it is
31771 ignored. Note that managed memory for library `library` is shared
31772 across devices and is registered when the library is loaded. The
31773 returned `dptr` cannot be passed to the Symbol APIs such as
31774 :py:obj:`~.cudaMemcpyToSymbol`, :py:obj:`~.cudaMemcpyFromSymbol`,
31775 :py:obj:`~.cudaGetSymbolAddress`, or :py:obj:`~.cudaGetSymbolSize`.
31777 Parameters
31778 ----------
31779 library : :py:obj:`~.cudaLibrary_t`
31780 Library to retrieve managed memory from
31781 name : bytes
31782 Name of managed memory to retrieve
31784 Returns
31785 -------
31786 cudaError_t
31787 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`
31788 dptr : Any
31789 Returned pointer to the managed memory
31790 numbytes : int
31791 Returned memory size in bytes
31793 See Also
31794 --------
31795 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetGlobal`, :py:obj:`~.cuLibraryGetManaged`
31796 """
31797 cdef cyruntime.cudaLibrary_t cylibrary
31798 if library is None:
31799 plibrary = 0
31800 elif isinstance(library, (cudaLibrary_t,)):
31801 plibrary = int(library)
31802 else:
31803 plibrary = int(cudaLibrary_t(library))
31804 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary
31805 cdef void_ptr dptr = 0
31806 cdef size_t numbytes = 0
31807 with nogil:
31808 err = cyruntime.cudaLibraryGetManaged(<void**>&dptr, &numbytes, cylibrary, name)
31809 if err != cyruntime.cudaSuccess:
31810 return (_dict_cudaError_t[err], None, None)
31811 return (_dict_cudaError_t[err], dptr, numbytes)
31813@cython.embedsignature(True)
31814def cudaLibraryGetUnifiedFunction(library, char* symbol):
31815 """ Returns a pointer to a unified function.
31817 Returns in `*fptr` the function pointer to a unified function denoted
31818 by `symbol`. If no unified function with name `symbol` exists, the call
31819 returns :py:obj:`~.cudaErrorSymbolNotFound`. If there is no device with
31820 attribute :py:obj:`~.cudaDeviceProp.unifiedFunctionPointers` present in
31821 the system, the call may return :py:obj:`~.cudaErrorSymbolNotFound`.
31823 Parameters
31824 ----------
31825 library : :py:obj:`~.cudaLibrary_t`
31826 Library to retrieve function pointer memory from
31827 symbol : bytes
31828 Name of function pointer to retrieve
31830 Returns
31831 -------
31832 cudaError_t
31833 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorSymbolNotFound`
31834 fptr : Any
31835 Returned pointer to a unified function
31837 See Also
31838 --------
31839 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cuLibraryGetUnifiedFunction`
31840 """
31841 cdef cyruntime.cudaLibrary_t cylibrary
31842 if library is None:
31843 plibrary = 0
31844 elif isinstance(library, (cudaLibrary_t,)):
31845 plibrary = int(library)
31846 else:
31847 plibrary = int(cudaLibrary_t(library))
31848 cylibrary = <cyruntime.cudaLibrary_t><void_ptr>plibrary
31849 cdef void_ptr fptr = 0
31850 with nogil:
31851 err = cyruntime.cudaLibraryGetUnifiedFunction(<void**>&fptr, cylibrary, symbol)
31852 if err != cyruntime.cudaSuccess:
31853 return (_dict_cudaError_t[err], None)
31854 return (_dict_cudaError_t[err], fptr)
31856@cython.embedsignature(True)
31857def cudaLibraryGetKernelCount(lib):
31858 """ Returns the number of kernels within a library.
31860 Returns in `count` the number of kernels in `lib`.
31862 Parameters
31863 ----------
31864 lib : :py:obj:`~.cudaLibrary_t`
31865 Library to query
31867 Returns
31868 -------
31869 cudaError_t
31870 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
31871 count : unsigned int
31872 Number of kernels found within the library
31874 See Also
31875 --------
31876 :py:obj:`~.cudaLibraryEnumerateKernels`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cuLibraryGetKernelCount`
31877 """
31878 cdef cyruntime.cudaLibrary_t cylib
31879 if lib is None:
31880 plib = 0
31881 elif isinstance(lib, (cudaLibrary_t,)):
31882 plib = int(lib)
31883 else:
31884 plib = int(cudaLibrary_t(lib))
31885 cylib = <cyruntime.cudaLibrary_t><void_ptr>plib
31886 cdef unsigned int count = 0
31887 with nogil:
31888 err = cyruntime.cudaLibraryGetKernelCount(&count, cylib)
31889 if err != cyruntime.cudaSuccess:
31890 return (_dict_cudaError_t[err], None)
31891 return (_dict_cudaError_t[err], count)
31893@cython.embedsignature(True)
31894def cudaLibraryEnumerateKernels(unsigned int numKernels, lib):
31895 """ Retrieve the kernel handles within a library.
31897 Returns in `kernels` a maximum number of `numKernels` kernel handles
31898 within `lib`. The returned kernel handle becomes invalid when the
31899 library is unloaded.
31901 Parameters
31902 ----------
31903 numKernels : unsigned int
31904 Maximum number of kernel handles may be returned to the buffer
31905 lib : :py:obj:`~.cudaLibrary_t`
31906 Library to query from
31908 Returns
31909 -------
31910 cudaError_t
31911 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`
31912 kernels : list[:py:obj:`~.cudaKernel_t`]
31913 Buffer where the kernel handles are returned to
31915 See Also
31916 --------
31917 :py:obj:`~.cudaLibraryGetKernelCount`, :py:obj:`~.cuLibraryEnumerateKernels`
31918 """
31919 cdef cyruntime.cudaLibrary_t cylib
31920 if lib is None:
31921 plib = 0
31922 elif isinstance(lib, (cudaLibrary_t,)):
31923 plib = int(lib)
31924 else:
31925 plib = int(cudaLibrary_t(lib))
31926 cylib = <cyruntime.cudaLibrary_t><void_ptr>plib
31927 cdef cyruntime.cudaKernel_t* cykernels = NULL
31928 pykernels = []
31929 if numKernels != 0:
31930 cykernels = <cyruntime.cudaKernel_t*>calloc(numKernels, sizeof(cyruntime.cudaKernel_t))
31931 if cykernels is NULL:
31932 raise MemoryError('Failed to allocate length x size memory: ' + str(numKernels) + 'x' + str(sizeof(cyruntime.cudaKernel_t)))
31933 with nogil:
31934 err = cyruntime.cudaLibraryEnumerateKernels(cykernels, numKernels, cylib)
31935 if cudaError_t(err) == cudaError_t(0):
31936 pykernels = [cudaKernel_t(init_value=<void_ptr>cykernels[idx]) for idx in range(numKernels)]
31937 if cykernels is not NULL:
31938 free(cykernels)
31939 if err != cyruntime.cudaSuccess:
31940 return (_dict_cudaError_t[err], None)
31941 return (_dict_cudaError_t[err], pykernels)
31943@cython.embedsignature(True)
31944def cudaKernelSetAttributeForDevice(kernel, attr not None : cudaFuncAttribute, int value, int device):
31945 """ Sets information about a kernel.
31947 This call sets the value of a specified attribute `attr` on the kernel
31948 `kernel` for the requested device `device` to an integer value
31949 specified by `value`. This function returns :py:obj:`~.cudaSuccess` if
31950 the new value of the attribute could be successfully set. If the set
31951 fails, this call will return an error. Not all attributes can have
31952 values set. Attempting to set a value on a read-only attribute will
31953 result in an error (:py:obj:`~.cudaErrorInvalidValue`)
31955 Note that attributes set using :py:obj:`~.cudaFuncSetAttribute()` will
31956 override the attribute set by this API irrespective of whether the call
31957 to :py:obj:`~.cudaFuncSetAttribute()` is made before or after this API
31958 call. Because of this and the stricter locking requirements mentioned
31959 below it is suggested that this call be used during the initialization
31960 path and not on each thread accessing `kernel` such as on kernel
31961 launches or on the critical path.
31963 Valid values for `attr` are:
31965 - :py:obj:`~.cudaFuncAttributeMaxDynamicSharedMemorySize` - The
31966 requested maximum size in bytes of dynamically-allocated shared
31967 memory. The sum of this value and the function attribute
31968 :py:obj:`~.sharedSizeBytes` cannot exceed the device attribute
31969 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerBlockOptin`. The maximal size
31970 of requestable dynamic shared memory may differ by GPU architecture.
31972 - :py:obj:`~.cudaFuncAttributePreferredSharedMemoryCarveout` - On
31973 devices where the L1 cache and shared memory use the same hardware
31974 resources, this sets the shared memory carveout preference, in
31975 percent of the total shared memory. See
31976 :py:obj:`~.cudaDevAttrMaxSharedMemoryPerMultiprocessor`. This is only
31977 a hint, and the driver can choose a different ratio if required to
31978 execute the function.
31980 - :py:obj:`~.cudaFuncAttributeRequiredClusterWidth`: The required
31981 cluster width in blocks. The width, height, and depth values must
31982 either all be 0 or all be positive. The validity of the cluster
31983 dimensions is checked at launch time. If the value is set during
31984 compile time, it cannot be set at runtime. Setting it at runtime will
31985 return cudaErrorNotPermitted.
31987 - :py:obj:`~.cudaFuncAttributeRequiredClusterHeight`: The required
31988 cluster height in blocks. The width, height, and depth values must
31989 either all be 0 or all be positive. The validity of the cluster
31990 dimensions is checked at launch time. If the value is set during
31991 compile time, it cannot be set at runtime. Setting it at runtime will
31992 return cudaErrorNotPermitted.
31994 - :py:obj:`~.cudaFuncAttributeRequiredClusterDepth`: The required
31995 cluster depth in blocks. The width, height, and depth values must
31996 either all be 0 or all be positive. The validity of the cluster
31997 dimensions is checked at launch time. If the value is set during
31998 compile time, it cannot be set at runtime. Setting it at runtime will
31999 return cudaErrorNotPermitted.
32001 - :py:obj:`~.cudaFuncAttributeNonPortableClusterSizeAllowed`: Indicates
32002 whether the function can be launched with non-portable cluster size.
32003 1 is allowed, 0 is disallowed.
32005 - :py:obj:`~.cudaFuncAttributeClusterSchedulingPolicyPreference`: The
32006 block scheduling policy of a function. The value type is
32007 cudaClusterSchedulingPolicy.
32009 Parameters
32010 ----------
32011 kernel : :py:obj:`~.cudaKernel_t`
32012 Kernel to set attribute of
32013 attr : :py:obj:`~.cudaFuncAttribute`
32014 Attribute requested
32015 value : int
32016 Value to set
32017 device : int
32018 Device to set attribute of
32020 Returns
32021 -------
32022 cudaError_t
32023 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDeviceFunction`, :py:obj:`~.cudaErrorInvalidValue`
32025 See Also
32026 --------
32027 :py:obj:`~.cudaLibraryLoadData`, :py:obj:`~.cudaLibraryLoadFromFile`, :py:obj:`~.cudaLibraryUnload`, :py:obj:`~.cudaLibraryGetKernel`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaFuncSetAttribute`, :py:obj:`~.cuKernelSetAttribute`
32029 Notes
32030 -----
32031 The API has stricter locking requirements in comparison to its legacy counterpart :py:obj:`~.cudaFuncSetAttribute()` due to device-wide semantics. If multiple threads are trying to set the same attribute on the same device simultaneously, the attribute setting will depend on the interleavings chosen by the OS scheduler and memory consistency.
32032 """
32033 cdef cyruntime.cudaKernel_t cykernel
32034 if kernel is None:
32035 pkernel = 0
32036 elif isinstance(kernel, (cudaKernel_t,)):
32037 pkernel = int(kernel)
32038 else:
32039 pkernel = int(cudaKernel_t(kernel))
32040 cykernel = <cyruntime.cudaKernel_t><void_ptr>pkernel
32041 cdef cyruntime.cudaFuncAttribute cyattr = attr.value
32042 with nogil:
32043 err = cyruntime.cudaKernelSetAttributeForDevice(cykernel, cyattr, value, device)
32044 return (_dict_cudaError_t[err],)
32046@cython.embedsignature(True)
32047def cudaDeviceGetDevResource(int device, typename not None : cudaDevResourceType):
32048 """ Get device resources.
32050 Get the `typename` resources available to the `device`. This may often
32051 be the starting point for further partitioning or configuring of
32052 resources.
32054 Note: The API is not supported on 32-bit platforms.
32056 Parameters
32057 ----------
32058 device : int
32059 Device to get resource for
32060 typename : :py:obj:`~.cudaDevResourceType`
32061 Type of resource to retrieve
32063 Returns
32064 -------
32065 cudaError_t
32066 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32067 resource : :py:obj:`~.cudaDevResource`
32068 Output pointer to a cudaDevResource structure
32070 See Also
32071 --------
32072 :py:obj:`~.cuDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`
32073 """
32074 cdef cudaDevResource resource = cudaDevResource()
32075 cdef cyruntime.cudaDevResourceType cytypename = typename.value
32076 with nogil:
32077 err = cyruntime.cudaDeviceGetDevResource(device, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)
32078 if err != cyruntime.cudaSuccess:
32079 return (_dict_cudaError_t[err], None)
32080 return (_dict_cudaError_t[err], resource)
32082@cython.embedsignature(True)
32083def cudaDevSmResourceSplitByCount(unsigned int nbGroups, input_ : Optional[cudaDevResource], unsigned int flags, unsigned int minCount):
32084 """ Splits `cudaDevResourceTypeSm` resources.
32086 Splits `cudaDevResourceTypeSm` resources into `nbGroups`, adhering to
32087 the minimum SM count specified in `minCount` and the usage flags in
32088 `flags`. If `result` is NULL, the API simulates a split and provides
32089 the amount of groups that would be created in `nbGroups`. Otherwise,
32090 `nbGroups` must point to the amount of elements in `result` and on
32091 return, the API will overwrite `nbGroups` with the amount actually
32092 created. The groups are written to the array in `result`. `nbGroups`
32093 can be less than the total amount if a smaller number of groups is
32094 needed.
32096 This API is used to spatially partition the input resource. The input
32097 resource needs to come from one of
32098 :py:obj:`~.cudaDeviceGetDevResource`, or
32099 :py:obj:`~.cudaExecutionCtxGetDevResource`. A limitation of the API is
32100 that the output results cannot be split again without first creating a
32101 descriptor and a green context with that descriptor.
32103 When creating the groups, the API will take into account the
32104 performance and functional characteristics of the input resource, and
32105 guarantee a split that will create a disjoint set of symmetrical
32106 partitions. This may lead to fewer groups created than purely dividing
32107 the total SM count by the `minCount` due to cluster requirements or
32108 alignment and granularity requirements for the minCount. These
32109 requirements can be queried with :py:obj:`~.cudaDeviceGetDevResource`,
32110 or :py:obj:`~.cudaExecutionCtxGetDevResource` for
32111 :py:obj:`~.cudaDevResourceTypeSm`, using the `minSmPartitionSize` and
32112 `smCoscheduledAlignment` fields to determine minimum partition size and
32113 alignment granularity, respectively.
32115 The `remainder` set does not have the same functional or performance
32116 guarantees as the groups in `result`. Its use should be carefully
32117 planned and future partitions of the `remainder` set are discouraged.
32119 The following flags are supported:
32121 - `cudaDevSmResourceSplitIgnoreSmCoscheduling` : Lower the minimum SM
32122 count and alignment, and treat each SM independent of its hierarchy.
32123 This allows more fine grained partitions but at the cost of advanced
32124 features (such as large clusters on compute capability 9.0+).
32126 - `cudaDevSmResourceSplitMaxPotentialClusterSize` : Compute Capability
32127 9.0+ only. Attempt to create groups that may allow for maximally
32128 sized thread clusters. This can be queried post green context
32129 creation using :py:obj:`~.cudaOccupancyMaxPotentialClusterSize`.
32131 A successful API call must either have:
32133 - A valid array of `result` pointers of size passed in `nbGroups`, with
32134 `input` of type `cudaDevResourceTypeSm`. Value of `minCount` must be
32135 between 0 and the SM count specified in `input`. `remaining` may be
32136 NULL.
32138 - NULL passed in for `result`, with a valid integer pointer in
32139 `nbGroups` and `input` of type `cudaDevResourceTypeSm`. Value of
32140 `minCount` must be between 0 and the SM count specified in `input`.
32141 `remaining` may be NULL. This queries the number of groups that would
32142 be created by the API.
32144 Note: The API is not supported on 32-bit platforms.
32146 Parameters
32147 ----------
32148 nbGroups : unsigned int
32149 This is a pointer, specifying the number of groups that would be or
32150 should be created as described below.
32151 input : :py:obj:`~.cudaDevResource`
32152 Input SM resource to be split. Must be a valid `None` resource.
32153 flags : unsigned int
32154 Flags specifying how these partitions are used or which constraints
32155 to abide by when splitting the input. Zero is valid for default
32156 behavior.
32157 minCount : unsigned int
32158 Minimum number of SMs required
32160 Returns
32161 -------
32162 cudaError_t
32163 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32164 result : list[:py:obj:`~.cudaDevResource`]
32165 Output array of `cudaDevResource` resources. Can be NULL to query
32166 the number of groups.
32167 nbGroups : unsigned int
32168 This is a pointer, specifying the number of groups that would be or
32169 should be created as described below.
32170 remaining : :py:obj:`~.cudaDevResource`
32171 If the input resource cannot be cleanly split among `nbGroups`, the
32172 remaining is placed in here. Can be ommitted (NULL) if the user
32173 does not need the remaining set.
32175 See Also
32176 --------
32177 :py:obj:`~.cuDevSmResourceSplitByCount`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc`
32178 """
32179 cdef cyruntime.cudaDevResource* cyresult = NULL
32180 pyresult = [cudaDevResource() for idx in range(nbGroups)]
32181 if nbGroups != 0:
32182 cyresult = <cyruntime.cudaDevResource*>calloc(nbGroups, sizeof(cyruntime.cudaDevResource))
32183 if cyresult is NULL:
32184 raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource)))
32185 cdef unsigned int cynbGroups = nbGroups
32186 cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL
32187 cdef cudaDevResource remaining = cudaDevResource()
32188 with nogil:
32189 err = cyruntime.cudaDevSmResourceSplitByCount(cyresult, &cynbGroups, cyinput__ptr, <cyruntime.cudaDevResource*>remaining._pvt_ptr, flags, minCount)
32190 if cudaError_t(err) == cudaError_t(0):
32191 for idx in range(nbGroups):
32192 string.memcpy((<cudaDevResource>pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource))
32193 if cyresult is not NULL:
32194 free(cyresult)
32195 if err != cyruntime.cudaSuccess:
32196 return (_dict_cudaError_t[err], None, None, None)
32197 return (_dict_cudaError_t[err], pyresult, cynbGroups, remaining)
32199@cython.embedsignature(True)
32200def cudaDevSmResourceSplit(unsigned int nbGroups, input_ : Optional[cudaDevResource], unsigned int flags, groupParams : Optional[cudaDevSmResourceGroupParams]):
32201 """ Splits a `cudaDevResourceTypeSm` resource into structured groups.
32203 This API will split a resource of :py:obj:`~.cudaDevResourceTypeSm`
32204 into `nbGroups` structured device resource groups (the `result` array),
32205 as well as an optional `remainder`, according to a set of requirements
32206 specified in the `groupParams` array. The term “structured” is a trait
32207 that specifies the `result` has SMs that are co-scheduled together.
32208 This co-scheduling can be specified via the `coscheduledSmCount` field
32209 of the `groupParams` structure, while the `smCount` will specify how
32210 many SMs are required in total for that result. The remainder is always
32211 “unstructured”, it does not have any set guarantees with respect to co-
32212 scheduling and those properties will need to either be queried via the
32213 occupancy set of APIs or further split into structured groups by this
32214 API.
32216 The API has a discovery mode for use cases where it is difficult to
32217 know ahead of time what the SM count should be. Discovery happens when
32218 the `smCount` field of a given `groupParams` array entry is set to 0 -
32219 the smCount will be filled in by the API with the derived SM count
32220 according to the provided `groupParams` fields and constraints.
32221 Discovery can be used with both a valid result array and with a NULL
32222 `result` pointer value. The latter is useful in situations where the
32223 smCount will end up being zero, which is an invalid value to create a
32224 result entry with, but allowed for discovery purposes when the `result`
32225 is NULL.
32227 The `groupParams` array is evaluated from index 0 to `nbGroups` - 1.
32228 For each index in the `groupParams` array, the API will evaluate which
32229 SMs may be a good fit based on constraints and assign those SMs to
32230 `result`. This evaluation order is important to consider when using
32231 discovery mode, as it helps discover the remaining SMs.
32233 For a valid call:
32235 - `result` should point to a `cudaDevResource` array of size
32236 `nbGroups`, or alternatively, may be NULL, if the developer wishes
32237 for only the groupParams entries to be updated
32239 - `input` should be a valid :py:obj:`~.cudaDevResourceTypeSm` resource
32240 that originates from querying the execution context, or device.
32242 - The `remainder` group may be NULL.
32244 - There are no API `flags` at this time, so the value passed in should
32245 be 0.
32247 - A :py:obj:`~.cudaDevSmResourceGroupParams` array of size `nbGroups`.
32248 Each entry must be zero-initialized.
32250 - `smCount:` must be either 0 or in the range of [2,inputSmCount]
32251 where inputSmCount is the amount of SMs the `input` resource has.
32252 `smCount` must be a multiple of 2, as well as a multiple of
32253 `coscheduledSmCount`. When assigning SMs to a group (and if results
32254 are expected by having the `result` parameter set), `smCount`
32255 cannot end up with 0 or a value less than `coscheduledSmCount`
32256 otherwise :py:obj:`~.cudaErrorInvalidResourceConfiguration` will be
32257 returned.
32259 - `coscheduledSmCount:` allows grouping SMs together in order to be
32260 able to launch clusters on Compute Architecture 9.0+. The default
32261 value may be queried from the device’s
32262 :py:obj:`~.cudaDevResourceTypeSm` resource (8 on Compute
32263 Architecture 9.0+ and 2 otherwise). The maximum is 32 on Compute
32264 Architecture 9.0+ and 2 otherwise.
32266 - `preferredCoscheduledSmCount:` Attempts to merge
32267 `coscheduledSmCount` groups into larger groups, in order to make
32268 use of `preferredClusterDimensions` on Compute Architecture 10.0+.
32269 The default value is set to `coscheduledSmCount`.
32271 - `flags:`
32273 - `cudaDevSmResourceGroupBackfill:` lets `smCount` be a non-multiple of
32274 `coscheduledSmCount`, filling the difference between SM count and
32275 already assigned co-scheduled groupings with other SMs. This lets any
32276 resulting group behave similar to the `remainder` group for example.
32278 Example params and their effect:
32280 A groupParams array element is defined in the following order:
32282 **View CUDA Toolkit Documentation for a C++ code example**
32284 **View CUDA Toolkit Documentation for a C++ code example**
32286 **View CUDA Toolkit Documentation for a C++ code example**
32288 **View CUDA Toolkit Documentation for a C++ code example**
32290 The difference between a catch-all param group as the last entry and
32291 the remainder is in two aspects:
32293 - The remainder may be NULL / _TYPE_INVALID (if there are no SMs
32294 remaining), while a result group must always be valid.
32296 - The remainder does not have a structure, while the result group will
32297 always need to adhere to a structure of coscheduledSmCount (even if
32298 its just 2), and therefore must always have enough coscheduled SMs to
32299 cover that requirement (even with the
32300 `cudaDevSmResourceGroupBackfill` flag enabled).
32302 Splitting an input into N groups, can be accomplished by repeatedly
32303 splitting off 1 group and re-splitting the remainder (a bisect
32304 operation). However, it's recommended to accomplish this with a single
32305 call wherever possible.
32307 Parameters
32308 ----------
32309 nbGroups : unsigned int
32310 Specifies the number of groups in `result` and `groupParams`
32311 input : :py:obj:`~.cudaDevResource`
32312 Input SM resource to be split. Must be a valid
32313 `cudaDevResourceTypeSm` resource.
32314 flags : unsigned int
32315 Flags specifying how the API should behave. The value should be 0
32316 for now.
32317 groupParams : :py:obj:`~.cudaDevSmResourceGroupParams`
32318 Description of how the SMs should be split and assigned to the
32319 corresponding result entry.
32321 Returns
32322 -------
32323 cudaError_t
32324 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32325 result : list[:py:obj:`~.cudaDevResource`]
32326 Output array of `cudaDevResource` resources. Can be NULL, alongside
32327 an smCount of 0, for discovery purpose.
32328 remainder : :py:obj:`~.cudaDevResource`
32329 If splitting the input resource leaves any SMs, the remainder is
32330 placed in here.
32332 See Also
32333 --------
32334 :py:obj:`~.cuDevSmResourceSplit`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevResourceGenerateDesc`
32335 """
32336 cdef cyruntime.cudaDevResource* cyresult = NULL
32337 pyresult = [cudaDevResource() for idx in range(nbGroups)]
32338 if nbGroups != 0:
32339 cyresult = <cyruntime.cudaDevResource*>calloc(nbGroups, sizeof(cyruntime.cudaDevResource))
32340 if cyresult is NULL:
32341 raise MemoryError('Failed to allocate length x size memory: ' + str(nbGroups) + 'x' + str(sizeof(cyruntime.cudaDevResource)))
32342 cdef cyruntime.cudaDevResource* cyinput__ptr = input_._pvt_ptr if input_ is not None else NULL
32343 cdef cudaDevResource remainder = cudaDevResource()
32344 cdef cyruntime.cudaDevSmResourceGroupParams* cygroupParams_ptr = groupParams._pvt_ptr if groupParams is not None else NULL
32345 with nogil:
32346 err = cyruntime.cudaDevSmResourceSplit(cyresult, nbGroups, cyinput__ptr, <cyruntime.cudaDevResource*>remainder._pvt_ptr, flags, cygroupParams_ptr)
32347 if cudaError_t(err) == cudaError_t(0):
32348 for idx in range(nbGroups):
32349 string.memcpy((<cudaDevResource>pyresult[idx])._pvt_ptr, &cyresult[idx], sizeof(cyruntime.cudaDevResource))
32350 if cyresult is not NULL:
32351 free(cyresult)
32352 if err != cyruntime.cudaSuccess:
32353 return (_dict_cudaError_t[err], None, None)
32354 return (_dict_cudaError_t[err], pyresult, remainder)
32356@cython.embedsignature(True)
32357def cudaDevResourceGenerateDesc(resources : Optional[tuple[cudaDevResource] | list[cudaDevResource]], unsigned int nbResources):
32358 """ Generate a resource descriptor.
32360 Generates a single resource descriptor with the set of resources
32361 specified in `resources`. The generated resource descriptor is
32362 necessary for the creation of green contexts via the
32363 :py:obj:`~.cudaGreenCtxCreate` API. Resources of the same type can be
32364 passed in, provided they meet the requirements as noted below.
32366 A successful API call must have:
32368 - A valid output pointer for the `phDesc` descriptor as well as a valid
32369 array of `resources` pointers, with the array size passed in
32370 `nbResources`. If multiple resources are provided in `resources`, the
32371 device they came from must be the same, otherwise
32372 :py:obj:`~.cudaErrorInvalidResourceConfiguration` is returned. If
32373 multiple resources are provided in `resources` and they are of type
32374 :py:obj:`~.cudaDevResourceTypeSm`, they must be outputs (whether
32375 `result` or `remaining`) from the same split API instance and have
32376 the same smCoscheduledAlignment values, otherwise
32377 :py:obj:`~.cudaErrorInvalidResourceConfiguration` is returned.
32379 Note: The API is not supported on 32-bit platforms.
32381 Parameters
32382 ----------
32383 resources : list[:py:obj:`~.cudaDevResource`]
32384 Array of resources to be included in the descriptor
32385 nbResources : unsigned int
32386 Number of resources passed in `resources`
32388 Returns
32389 -------
32390 cudaError_t
32391 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidResourceConfiguration`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32392 phDesc : :py:obj:`~.cudaDevResourceDesc_t`
32393 Output descriptor
32395 See Also
32396 --------
32397 :py:obj:`~.cuDevResourceGenerateDesc`, :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaGreenCtxCreate`
32398 """
32399 resources = [] if resources is None else resources
32400 if not all(isinstance(_x, (cudaDevResource,)) for _x in resources):
32401 raise TypeError("Argument 'resources' is not instance of type (expected tuple[cyruntime.cudaDevResource,] or list[cyruntime.cudaDevResource,]")
32402 cdef cudaDevResourceDesc_t phDesc = cudaDevResourceDesc_t()
32403 cdef cyruntime.cudaDevResource* cyresources = NULL
32404 if len(resources) > 1:
32405 cyresources = <cyruntime.cudaDevResource*> calloc(len(resources), sizeof(cyruntime.cudaDevResource))
32406 if cyresources is NULL:
32407 raise MemoryError('Failed to allocate length x size memory: ' + str(len(resources)) + 'x' + str(sizeof(cyruntime.cudaDevResource)))
32408 for idx in range(len(resources)):
32409 string.memcpy(&cyresources[idx], (<cudaDevResource>resources[idx])._pvt_ptr, sizeof(cyruntime.cudaDevResource))
32410 elif len(resources) == 1:
32411 cyresources = (<cudaDevResource>resources[0])._pvt_ptr
32412 with nogil:
32413 err = cyruntime.cudaDevResourceGenerateDesc(<cyruntime.cudaDevResourceDesc_t*>phDesc._pvt_ptr, cyresources, nbResources)
32414 if len(resources) > 1 and cyresources is not NULL:
32415 free(cyresources)
32416 if err != cyruntime.cudaSuccess:
32417 return (_dict_cudaError_t[err], None)
32418 return (_dict_cudaError_t[err], phDesc)
32420@cython.embedsignature(True)
32421def cudaGreenCtxCreate(desc, int device, unsigned int flags):
32422 """ Creates a green context with a specified set of resources.
32424 This API creates a green context with the resources specified in the
32425 descriptor `desc` and returns it in the handle represented by `phCtx`.
32427 This API retains the device’s primary context for the lifetime of the
32428 green context. The primary context will be released when the green
32429 context is destroyed. To avoid the overhead of repeated initialization
32430 and teardown, it is recommended to explicitly initialize the device's
32431 primary context ahead of time using :py:obj:`~.cudaInitDevice`. This
32432 ensures that the primary context remains initialized throughout the
32433 program’s lifetime, minimizing overhead during green context creation
32434 and destruction.
32436 The API does not create a default stream for the green context.
32437 Developers are expected to create streams explicitly using
32438 :py:obj:`~.cudaExecutionCtxStreamCreate` to submit work to the green
32439 context.
32441 Note: The API is not supported on 32-bit platforms.
32443 Parameters
32444 ----------
32445 desc : :py:obj:`~.cudaDevResourceDesc_t`
32446 Descriptor generated via :py:obj:`~.cudaDevResourceGenerateDesc`
32447 which contains the set of resources to be used
32448 device : int
32449 Device on which to create the green context.
32450 flags : unsigned int
32451 Green context creation flags. Must be 0, currently reserved for
32452 future use.
32454 Returns
32455 -------
32456 cudaError_t
32457 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32458 phCtx : :py:obj:`~.cudaExecutionContext_t`
32459 Pointer for the output handle to the green context
32461 See Also
32462 --------
32463 :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaExecutionCtxGetDevResource`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaInitDevice`, :py:obj:`~.cudaExecutionCtxStreamCreate`
32464 """
32465 cdef cyruntime.cudaDevResourceDesc_t cydesc
32466 if desc is None:
32467 pdesc = 0
32468 elif isinstance(desc, (cudaDevResourceDesc_t,)):
32469 pdesc = int(desc)
32470 else:
32471 pdesc = int(cudaDevResourceDesc_t(desc))
32472 cydesc = <cyruntime.cudaDevResourceDesc_t><void_ptr>pdesc
32473 cdef cudaExecutionContext_t phCtx = cudaExecutionContext_t()
32474 with nogil:
32475 err = cyruntime.cudaGreenCtxCreate(<cyruntime.cudaExecutionContext_t*>phCtx._pvt_ptr, cydesc, device, flags)
32476 if err != cyruntime.cudaSuccess:
32477 return (_dict_cudaError_t[err], None)
32478 return (_dict_cudaError_t[err], phCtx)
32480@cython.embedsignature(True)
32481def cudaExecutionCtxDestroy(ctx):
32482 """ Destroy a execution context.
32484 Destroys the specified execution context `ctx`. It is the
32485 responsibility of the caller to ensure that no API call issues using
32486 `ctx` while :py:obj:`~.cudaExecutionCtxDestroy()` is executing or
32487 subsequently.
32489 If `ctx` is a green context, any resources provisioned for it (that
32490 were initially available via the resource descriptor) are released as
32491 well.
32493 The API does not destroy streams created via
32494 :py:obj:`~.cudaExecutionCtxStreamCreate`. Users are expected to destroy
32495 these streams explicitly using :py:obj:`~.cudaStreamDestroy` to avoid
32496 resource leaks. Once the execution context is destroyed, any subsequent
32497 API calls involving these streams will return
32498 :py:obj:`~.cudaErrorStreamDetached` with the exception of the following
32499 APIs:
32501 - :py:obj:`~.cudaStreamDestroy`. Note this is only supported on CUDA
32502 drivers 13.1 and above.
32504 Additionally, the API will invalidate all active captures on these
32505 streams.
32507 Passing in a `ctx` that was not explicitly created via CUDA Runtime
32508 APIs is not allowed and will result in undefined behavior.
32510 Parameters
32511 ----------
32512 ctx : :py:obj:`~.cudaExecutionContext_t`
32513 Execution context to destroy (required parameter, see note below)
32515 Returns
32516 -------
32517 cudaError_t
32518 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32520 See Also
32521 --------
32522 :py:obj:`~.cudaGreenCtxCreate`
32523 """
32524 cdef cyruntime.cudaExecutionContext_t cyctx
32525 if ctx is None:
32526 pctx = 0
32527 elif isinstance(ctx, (cudaExecutionContext_t,)):
32528 pctx = int(ctx)
32529 else:
32530 pctx = int(cudaExecutionContext_t(ctx))
32531 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32532 with nogil:
32533 err = cyruntime.cudaExecutionCtxDestroy(cyctx)
32534 return (_dict_cudaError_t[err],)
32536@cython.embedsignature(True)
32537def cudaExecutionCtxGetDevResource(ctx, typename not None : cudaDevResourceType):
32538 """ Get context resources.
32540 Get the `typename` resources available to context represented by `ctx`.
32542 Note: The API is not supported on 32-bit platforms.
32544 Parameters
32545 ----------
32546 ctx : :py:obj:`~.cudaExecutionContext_t`
32547 Execution context to get resource for (required parameter, see note
32548 below)
32549 typename : :py:obj:`~.cudaDevResourceType`
32550 Type of resource to retrieve
32552 Returns
32553 -------
32554 cudaError_t
32555 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotSupported`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32556 resource : :py:obj:`~.cudaDevResource`
32557 Output pointer to a cudaDevResource structure
32559 See Also
32560 --------
32561 :py:obj:`~.cudaDeviceGetDevResource`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cudaGreenCtxCreate`
32562 """
32563 cdef cyruntime.cudaExecutionContext_t cyctx
32564 if ctx is None:
32565 pctx = 0
32566 elif isinstance(ctx, (cudaExecutionContext_t,)):
32567 pctx = int(ctx)
32568 else:
32569 pctx = int(cudaExecutionContext_t(ctx))
32570 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32571 cdef cudaDevResource resource = cudaDevResource()
32572 cdef cyruntime.cudaDevResourceType cytypename = typename.value
32573 with nogil:
32574 err = cyruntime.cudaExecutionCtxGetDevResource(cyctx, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)
32575 if err != cyruntime.cudaSuccess:
32576 return (_dict_cudaError_t[err], None)
32577 return (_dict_cudaError_t[err], resource)
32579@cython.embedsignature(True)
32580def cudaExecutionCtxGetDevice(ctx):
32581 """ Returns the device handle for the execution context.
32583 Returns in `*device` the handle of the specified execution context's
32584 device. The execution context should not be NULL.
32586 Parameters
32587 ----------
32588 ctx : :py:obj:`~.cudaExecutionContext_t`
32589 Execution context for which to obtain the device (required
32590 parameter, see note below)
32592 Returns
32593 -------
32594 cudaError_t
32595 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`
32596 device : int
32597 Returned device handle for the specified execution context
32599 See Also
32600 --------
32601 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cuCtxGetDevice`
32602 """
32603 cdef cyruntime.cudaExecutionContext_t cyctx
32604 if ctx is None:
32605 pctx = 0
32606 elif isinstance(ctx, (cudaExecutionContext_t,)):
32607 pctx = int(ctx)
32608 else:
32609 pctx = int(cudaExecutionContext_t(ctx))
32610 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32611 cdef int device = 0
32612 with nogil:
32613 err = cyruntime.cudaExecutionCtxGetDevice(&device, cyctx)
32614 if err != cyruntime.cudaSuccess:
32615 return (_dict_cudaError_t[err], None)
32616 return (_dict_cudaError_t[err], device)
32618@cython.embedsignature(True)
32619def cudaExecutionCtxGetId(ctx):
32620 """ Returns the unique Id associated with the execution context supplied.
32622 Returns in `ctxId` the unique Id which is associated with a given
32623 context. The Id is unique for the life of the program for this instance
32624 of CUDA. The execution context should not be NULL.
32626 Parameters
32627 ----------
32628 ctx : :py:obj:`~.cudaExecutionContext_t`
32629 Context for which to obtain the Id (required parameter, see note
32630 below)
32632 Returns
32633 -------
32634 cudaError_t
32635 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`
32636 ctxId : unsigned long long
32637 Pointer to store the Id of the context
32639 See Also
32640 --------
32641 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cuCtxGetId`
32642 """
32643 cdef cyruntime.cudaExecutionContext_t cyctx
32644 if ctx is None:
32645 pctx = 0
32646 elif isinstance(ctx, (cudaExecutionContext_t,)):
32647 pctx = int(ctx)
32648 else:
32649 pctx = int(cudaExecutionContext_t(ctx))
32650 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32651 cdef unsigned long long ctxId = 0
32652 with nogil:
32653 err = cyruntime.cudaExecutionCtxGetId(cyctx, &ctxId)
32654 if err != cyruntime.cudaSuccess:
32655 return (_dict_cudaError_t[err], None)
32656 return (_dict_cudaError_t[err], ctxId)
32658@cython.embedsignature(True)
32659def cudaExecutionCtxStreamCreate(ctx, unsigned int flags, int priority):
32660 """ Creates a stream and initializes it for the given execution context.
32662 The API creates a CUDA stream with the specified `flags` and
32663 `priority`, initializing it with resources as defined at the time of
32664 creating the specified `ctx`. Additionally, the API also enables work
32665 submitted to to the stream to be tracked under `ctx`.
32667 The supported values for `flags` are:
32669 - :py:obj:`~.cudaStreamDefault`: Default stream creation flag. This
32670 would be :py:obj:`~.cudaStreamNonBlocking` for streams created on a
32671 green context.
32673 - :py:obj:`~.cudaStreamNonBlocking`: Specifies that work running in the
32674 created stream may run concurrently with work in stream 0 (the NULL
32675 stream), and that the created stream should perform no implicit
32676 synchronization with stream 0
32678 Specifying `priority` affects the scheduling priority of work in the
32679 stream. Priorities provide a hint to preferentially run work with
32680 higher priority when possible, but do not preempt already-running work
32681 or provide any other functional guarantee on execution order.
32682 `priority` follows a convention where lower numbers represent higher
32683 priorities. '0' represents default priority. The range of meaningful
32684 numerical priorities can be queried using
32685 :py:obj:`~.cudaDeviceGetStreamPriorityRange`. If the specified priority
32686 is outside the numerical range returned by
32687 :py:obj:`~.cudaDeviceGetStreamPriorityRange`, it will automatically be
32688 clamped to the lowest or the highest number in the range.
32690 Parameters
32691 ----------
32692 ctx : :py:obj:`~.cudaExecutionContext_t`
32693 Execution context to initialize the stream with (required
32694 parameter, see note below)
32695 flags : unsigned int
32696 Flags for stream creation
32697 priority : int
32698 Stream priority
32700 Returns
32701 -------
32702 cudaError_t
32703 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorOutOfMemory`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`
32704 phStream : :py:obj:`~.cudaStream_t`
32705 Returned stream handle
32707 See Also
32708 --------
32709 :py:obj:`~.cudaStreamDestroy`, :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaDeviceGetStreamPriorityRange`, :py:obj:`~.cudaStreamGetFlags`, :py:obj:`~.cudaStreamGetPriority`, :py:obj:`~.cudaStreamGetDevice`, :py:obj:`~.cudaStreamGetDevResource`, :py:obj:`~.cudaLaunchKernel`, :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cudaStreamQuery`, :py:obj:`~.cudaStreamSynchronize`, :py:obj:`~.cudaStreamAddCallback`
32711 Notes
32712 -----
32713 In the current implementation, only compute kernels launched in priority streams are affected by the stream's priority. Stream priorities have no effect on host-to-device and device-to-host memory operations.
32714 """
32715 cdef cyruntime.cudaExecutionContext_t cyctx
32716 if ctx is None:
32717 pctx = 0
32718 elif isinstance(ctx, (cudaExecutionContext_t,)):
32719 pctx = int(ctx)
32720 else:
32721 pctx = int(cudaExecutionContext_t(ctx))
32722 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32723 cdef cudaStream_t phStream = cudaStream_t()
32724 with nogil:
32725 err = cyruntime.cudaExecutionCtxStreamCreate(<cyruntime.cudaStream_t*>phStream._pvt_ptr, cyctx, flags, priority)
32726 if err != cyruntime.cudaSuccess:
32727 return (_dict_cudaError_t[err], None)
32728 return (_dict_cudaError_t[err], phStream)
32730@cython.embedsignature(True)
32731def cudaExecutionCtxSynchronize(ctx):
32732 """ Block for the specified execution context's tasks to complete.
32734 Blocks until the specified execution context has completed all
32735 preceding requested tasks. If the specified execution context is the
32736 device (primary) context obtained via
32737 :py:obj:`~.cudaDeviceGetExecutionCtx`, green contexts that have been
32738 created on the device will also be synchronized.
32740 The API returns an error if one of the preceding tasks failed.
32742 Parameters
32743 ----------
32744 ctx : :py:obj:`~.cudaExecutionContext_t`
32745 Execution context to synchronize (required parameter, see note
32746 below)
32748 Returns
32749 -------
32750 cudaError_t
32751 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorInvalidValue`
32753 See Also
32754 --------
32755 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxDestroy`, :py:obj:`~.cudaDeviceSynchronize`, :py:obj:`~.cuCtxSynchronize_v2`
32756 """
32757 cdef cyruntime.cudaExecutionContext_t cyctx
32758 if ctx is None:
32759 pctx = 0
32760 elif isinstance(ctx, (cudaExecutionContext_t,)):
32761 pctx = int(ctx)
32762 else:
32763 pctx = int(cudaExecutionContext_t(ctx))
32764 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32765 with nogil:
32766 err = cyruntime.cudaExecutionCtxSynchronize(cyctx)
32767 return (_dict_cudaError_t[err],)
32769@cython.embedsignature(True)
32770def cudaStreamGetDevResource(hStream, typename not None : cudaDevResourceType):
32771 """ Get stream resources.
32773 Get the `typename` resources available to the `hStream` and store them
32774 in `resource`.
32776 Note: The API will return :py:obj:`~.cudaErrorInvalidResourceType` is
32777 `typename` is `cudaDevResourceTypeWorkqueueConfig` or
32778 `cudaDevResourceTypeWorkqueue`.
32780 Parameters
32781 ----------
32782 hStream : :py:obj:`~.CUstream` or :py:obj:`~.cudaStream_t`
32783 Stream to get resource for
32784 typename : :py:obj:`~.cudaDevResourceType`
32785 Type of resource to retrieve
32787 Returns
32788 -------
32789 cudaError_t
32790 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorDeviceUninitialized`, :py:obj:`~.cudaErrorInvalidResourceType`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorNotPermitted`, :py:obj:`~.cudaErrorCallRequiresNewerDriver`,
32791 resource : :py:obj:`~.cudaDevResource`
32792 Output pointer to a cudaDevResource structure
32794 See Also
32795 --------
32796 :py:obj:`~.cudaGreenCtxCreate`, :py:obj:`~.cudaExecutionCtxStreamCreate`, :py:obj:`~.cudaStreamCreate`, :py:obj:`~.cudaDevSmResourceSplit`, :py:obj:`~.cudaDevResourceGenerateDesc`, :py:obj:`~.cuStreamGetDevResource`
32797 """
32798 cdef cyruntime.cudaStream_t cyhStream
32799 if hStream is None:
32800 phStream = 0
32801 elif isinstance(hStream, (cudaStream_t,driver.CUstream)):
32802 phStream = int(hStream)
32803 else:
32804 phStream = int(cudaStream_t(hStream))
32805 cyhStream = <cyruntime.cudaStream_t><void_ptr>phStream
32806 cdef cudaDevResource resource = cudaDevResource()
32807 cdef cyruntime.cudaDevResourceType cytypename = typename.value
32808 with nogil:
32809 err = cyruntime.cudaStreamGetDevResource(cyhStream, <cyruntime.cudaDevResource*>resource._pvt_ptr, cytypename)
32810 if err != cyruntime.cudaSuccess:
32811 return (_dict_cudaError_t[err], None)
32812 return (_dict_cudaError_t[err], resource)
32814@cython.embedsignature(True)
32815def cudaExecutionCtxRecordEvent(ctx, event):
32816 """ Records an event for the specified execution context.
32818 Captures in `event` all the activities of the execution context `ctx`
32819 at the time of this call. `event` and `ctx` must be from the same CUDA
32820 device, otherwise :py:obj:`~.cudaErrorInvalidHandle` will be returned.
32821 Calls such as :py:obj:`~.cudaEventQuery()` or
32822 :py:obj:`~.cudaExecutionCtxWaitEvent()` will then examine or wait for
32823 completion of the work that was captured. Uses of `ctx` after this call
32824 do not modify `event`. If the execution context passed to `ctx` is the
32825 device (primary) context obtained via
32826 :py:obj:`~.cudaDeviceGetExecutionCtx()`, `event` will capture all the
32827 activities of the green contexts created on the device as well.
32829 Parameters
32830 ----------
32831 ctx : :py:obj:`~.cudaExecutionContext_t`
32832 Execution context to record event for (required parameter, see note
32833 below)
32834 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
32835 Event to record
32837 Returns
32838 -------
32839 cudaError_t
32840 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`
32842 See Also
32843 --------
32844 :py:obj:`~.cudaEventRecord`, :py:obj:`~.cudaExecutionCtxWaitEvent`, :py:obj:`~.cuCtxRecordEvent`, :py:obj:`~.cuGreenCtxRecordEvent`
32846 Notes
32847 -----
32848 The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` if the specified execution context `ctx` has a stream in the capture mode. In such a case, the call will invalidate all the conflicting captures.
32849 """
32850 cdef cyruntime.cudaEvent_t cyevent
32851 if event is None:
32852 pevent = 0
32853 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
32854 pevent = int(event)
32855 else:
32856 pevent = int(cudaEvent_t(event))
32857 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
32858 cdef cyruntime.cudaExecutionContext_t cyctx
32859 if ctx is None:
32860 pctx = 0
32861 elif isinstance(ctx, (cudaExecutionContext_t,)):
32862 pctx = int(ctx)
32863 else:
32864 pctx = int(cudaExecutionContext_t(ctx))
32865 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32866 with nogil:
32867 err = cyruntime.cudaExecutionCtxRecordEvent(cyctx, cyevent)
32868 return (_dict_cudaError_t[err],)
32870@cython.embedsignature(True)
32871def cudaExecutionCtxWaitEvent(ctx, event):
32872 """ Make an execution context wait on an event.
32874 Makes all future work submitted to execution context `ctx` wait for all
32875 work captured in `event`. The synchronization will be performed on the
32876 device and will not block the calling CPU thread. See
32877 :py:obj:`~.cudaExecutionCtxRecordEvent()` for details on what is
32878 captured by an event. If the execution context passed to `ctx` is the
32879 device (primary) context obtained via
32880 :py:obj:`~.cudaDeviceGetExecutionCtx()`, all green contexts created on
32881 the device will wait for `event` as well.
32883 Parameters
32884 ----------
32885 ctx : :py:obj:`~.cudaExecutionContext_t`
32886 Execution context to wait for (required parameter, see note below)
32887 event : :py:obj:`~.CUevent` or :py:obj:`~.cudaEvent_t`
32888 Event to wait on
32890 Returns
32891 -------
32892 cudaError_t
32893 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorCudartUnloading`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidHandle`, :py:obj:`~.cudaErrorStreamCaptureUnsupported`
32895 See Also
32896 --------
32897 :py:obj:`~.cudaExecutionCtxRecordEvent`, :py:obj:`~.cudaStreamWaitEvent`, :py:obj:`~.cuCtxWaitEvent`, :py:obj:`~.cuGreenCtxWaitEvent`
32899 Notes
32900 -----
32901 `event` may be from a different execution context or device than `ctx`.
32903 The API will return :py:obj:`~.cudaErrorStreamCaptureUnsupported` and invalidate the capture if the specified event `event` is part of an ongoing capture sequence or if the specified execution context `ctx` has a stream in the capture mode.
32904 """
32905 cdef cyruntime.cudaEvent_t cyevent
32906 if event is None:
32907 pevent = 0
32908 elif isinstance(event, (cudaEvent_t,driver.CUevent)):
32909 pevent = int(event)
32910 else:
32911 pevent = int(cudaEvent_t(event))
32912 cyevent = <cyruntime.cudaEvent_t><void_ptr>pevent
32913 cdef cyruntime.cudaExecutionContext_t cyctx
32914 if ctx is None:
32915 pctx = 0
32916 elif isinstance(ctx, (cudaExecutionContext_t,)):
32917 pctx = int(ctx)
32918 else:
32919 pctx = int(cudaExecutionContext_t(ctx))
32920 cyctx = <cyruntime.cudaExecutionContext_t><void_ptr>pctx
32921 with nogil:
32922 err = cyruntime.cudaExecutionCtxWaitEvent(cyctx, cyevent)
32923 return (_dict_cudaError_t[err],)
32925@cython.embedsignature(True)
32926def cudaDeviceGetExecutionCtx(int device):
32927 """ Returns the execution context for a device.
32929 Returns in `ctx` the execution context for the specified device. This
32930 is the device's primary context. The returned context can then be
32931 passed to APIs that take in a cudaExecutionContext_t enabling explicit
32932 context-based programming without relying on thread-local state.
32934 Passing the returned execution context to
32935 :py:obj:`~.cudaExecutionCtxDestroy()` is not allowed and will result in
32936 undefined behavior.
32938 Parameters
32939 ----------
32940 device : int
32941 Device to get the execution context for
32943 Returns
32944 -------
32945 cudaError_t
32946 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidDevice`
32947 ctx : :py:obj:`~.cudaExecutionContext_t`
32948 Returns the device execution context
32950 See Also
32951 --------
32952 :py:obj:`~.cudaExecutionCtxGetDevice`, :py:obj:`~.cudaExecutionCtxGetId`
32953 """
32954 cdef cudaExecutionContext_t ctx = cudaExecutionContext_t()
32955 with nogil:
32956 err = cyruntime.cudaDeviceGetExecutionCtx(<cyruntime.cudaExecutionContext_t*>ctx._pvt_ptr, device)
32957 if err != cyruntime.cudaSuccess:
32958 return (_dict_cudaError_t[err], None)
32959 return (_dict_cudaError_t[err], ctx)
32961@cython.embedsignature(True)
32962def cudaGetExportTable(pExportTableId : Optional[cudaUUID_t]):
32963 """"""
32964 cdef void_ptr ppExportTable = 0
32965 cdef cyruntime.cudaUUID_t* cypExportTableId_ptr = pExportTableId._pvt_ptr if pExportTableId is not None else NULL
32966 with nogil:
32967 err = cyruntime.cudaGetExportTable(<const void**>&ppExportTable, cypExportTableId_ptr)
32968 if err != cyruntime.cudaSuccess:
32969 return (_dict_cudaError_t[err], None)
32970 return (_dict_cudaError_t[err], ppExportTable)
32972@cython.embedsignature(True)
32973def cudaGetKernel(entryFuncAddr):
32974 """ Get pointer to device kernel that matches entry function `entryFuncAddr`.
32976 Returns in `kernelPtr` the device kernel corresponding to the entry
32977 function `entryFuncAddr`.
32979 Note that it is possible that there are multiple symbols belonging to
32980 different translation units with the same `entryFuncAddr` registered
32981 with this CUDA Runtime and so the order which the translation units are
32982 loaded and registered with the CUDA Runtime can lead to differing
32983 return pointers in `kernelPtr` . Suggested methods of ensuring
32984 uniqueness are to limit visibility of global device functions by using
32985 static or hidden visibility attribute in the respective translation
32986 units.
32988 Parameters
32989 ----------
32990 entryFuncAddr : Any
32991 Address of device entry function to search kernel for
32993 Returns
32994 -------
32995 cudaError_t
32996 :py:obj:`~.cudaSuccess`
32997 kernelPtr : :py:obj:`~.cudaKernel_t`
32998 Returns the device kernel
33000 See Also
33001 --------
33002 cudaGetKernel (C++ API)
33003 """
33004 cdef cudaKernel_t kernelPtr = cudaKernel_t()
33005 cyentryFuncAddr = _HelperInputVoidPtr(entryFuncAddr)
33006 cdef void* cyentryFuncAddr_ptr = <void*><void_ptr>cyentryFuncAddr.cptr
33007 with nogil:
33008 err = cyruntime.cudaGetKernel(<cyruntime.cudaKernel_t*>kernelPtr._pvt_ptr, cyentryFuncAddr_ptr)
33009 if err != cyruntime.cudaSuccess:
33010 return (_dict_cudaError_t[err], None)
33011 return (_dict_cudaError_t[err], kernelPtr)
33013@cython.embedsignature(True)
33014def make_cudaPitchedPtr(d, size_t p, size_t xsz, size_t ysz):
33015 """ Returns a :py:obj:`~.cudaPitchedPtr` based on input parameters.
33017 Returns a :py:obj:`~.cudaPitchedPtr` based on the specified input
33018 parameters `d`, `p`, `xsz`, and `ysz`.
33020 Parameters
33021 ----------
33022 d : Any
33023 Pointer to allocated memory
33024 p : size_t
33025 Pitch of allocated memory in bytes
33026 xsz : size_t
33027 Logical width of allocation in elements
33028 ysz : size_t
33029 Logical height of allocation in elements
33031 Returns
33032 -------
33033 cudaError_t.cudaSuccess
33034 cudaError_t.cudaSuccess
33035 :py:obj:`~.cudaPitchedPtr`
33036 :py:obj:`~.cudaPitchedPtr` specified by `d`, `p`, `xsz`, and `ysz`
33038 See Also
33039 --------
33040 make_cudaExtent, make_cudaPos
33041 """
33042 cyd = _HelperInputVoidPtr(d)
33043 cdef void* cyd_ptr = <void*><void_ptr>cyd.cptr
33044 with nogil:
33045 err = cyruntime.make_cudaPitchedPtr(cyd_ptr, p, xsz, ysz)
33046 cdef cudaPitchedPtr wrapper = cudaPitchedPtr()
33047 wrapper._pvt_ptr[0] = err
33048 return wrapper
33050@cython.embedsignature(True)
33051def make_cudaPos(size_t x, size_t y, size_t z):
33052 """ Returns a :py:obj:`~.cudaPos` based on input parameters.
33054 Returns a :py:obj:`~.cudaPos` based on the specified input parameters
33055 `x`, `y`, and `z`.
33057 Parameters
33058 ----------
33059 x : size_t
33060 X position
33061 y : size_t
33062 Y position
33063 z : size_t
33064 Z position
33066 Returns
33067 -------
33068 cudaError_t.cudaSuccess
33069 cudaError_t.cudaSuccess
33070 :py:obj:`~.cudaPos`
33071 :py:obj:`~.cudaPos` specified by `x`, `y`, and `z`
33073 See Also
33074 --------
33075 make_cudaExtent, make_cudaPitchedPtr
33076 """
33077 with nogil:
33078 err = cyruntime.make_cudaPos(x, y, z)
33079 cdef cudaPos wrapper = cudaPos()
33080 wrapper._pvt_ptr[0] = err
33081 return wrapper
33083@cython.embedsignature(True)
33084def make_cudaExtent(size_t w, size_t h, size_t d):
33085 """ Returns a :py:obj:`~.cudaExtent` based on input parameters.
33087 Returns a :py:obj:`~.cudaExtent` based on the specified input
33088 parameters `w`, `h`, and `d`.
33090 Parameters
33091 ----------
33092 w : size_t
33093 Width in elements when referring to array memory, in bytes when
33094 referring to linear memory
33095 h : size_t
33096 Height in elements
33097 d : size_t
33098 Depth in elements
33100 Returns
33101 -------
33102 cudaError_t.cudaSuccess
33103 cudaError_t.cudaSuccess
33104 :py:obj:`~.cudaExtent`
33105 :py:obj:`~.cudaExtent` specified by `w`, `h`, and `d`
33107 See Also
33108 --------
33109 make_cudaPitchedPtr, make_cudaPos
33110 """
33111 with nogil:
33112 err = cyruntime.make_cudaExtent(w, h, d)
33113 cdef cudaExtent wrapper = cudaExtent()
33114 wrapper._pvt_ptr[0] = err
33115 return wrapper
33117@cython.embedsignature(True)
33118def cudaGraphicsEGLRegisterImage(image, unsigned int flags):
33119 """ Registers an EGL image.
33121 Registers the EGLImageKHR specified by `image` for access by CUDA. A
33122 handle to the registered object is returned as `pCudaResource`.
33123 Additional Mapping/Unmapping is not required for the registered
33124 resource and :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be
33125 directly called on the `pCudaResource`.
33127 The application will be responsible for synchronizing access to shared
33128 objects. The application must ensure that any pending operation which
33129 access the objects have completed before passing control to CUDA. This
33130 may be accomplished by issuing and waiting for glFinish command on all
33131 GLcontexts (for OpenGL and likewise for other APIs). The application
33132 will be also responsible for ensuring that any pending operation on the
33133 registered CUDA resource has completed prior to executing subsequent
33134 commands in other APIs accesing the same memory objects. This can be
33135 accomplished by calling cuCtxSynchronize or cuEventSynchronize
33136 (preferably).
33138 The surface's intended usage is specified using `flags`, as follows:
33140 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
33141 how this resource will be used. It is therefore assumed that this
33142 resource will be read from and written to by CUDA. This is the
33143 default value.
33145 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
33146 will not write to this resource.
33148 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
33149 CUDA will not read from this resource and will write over the entire
33150 contents of the resource, so none of the data previously stored in
33151 the resource will be preserved.
33153 The EGLImageKHR is an object which can be used to create EGLImage
33154 target resource. It is defined as a void pointer. typedef void*
33155 EGLImageKHR
33157 Parameters
33158 ----------
33159 image : :py:obj:`~.EGLImageKHR`
33160 An EGLImageKHR image which can be used to create target resource.
33161 flags : unsigned int
33162 Map flags
33164 Returns
33165 -------
33166 cudaError_t
33167 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33168 pCudaResource : :py:obj:`~.cudaGraphicsResource`
33169 Pointer to the returned object handle
33171 See Also
33172 --------
33173 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame`, :py:obj:`~.cuGraphicsEGLRegisterImage`
33174 """
33175 cdef cyruntime.EGLImageKHR cyimage
33176 if image is None:
33177 pimage = 0
33178 elif isinstance(image, (EGLImageKHR,)):
33179 pimage = int(image)
33180 else:
33181 pimage = int(EGLImageKHR(image))
33182 cyimage = <cyruntime.EGLImageKHR><void_ptr>pimage
33183 cdef cudaGraphicsResource_t pCudaResource = cudaGraphicsResource_t()
33184 with nogil:
33185 err = cyruntime.cudaGraphicsEGLRegisterImage(pCudaResource._pvt_ptr, cyimage, flags)
33186 if err != cyruntime.cudaSuccess:
33187 return (_dict_cudaError_t[err], None)
33188 return (_dict_cudaError_t[err], pCudaResource)
33190@cython.embedsignature(True)
33191def cudaEGLStreamConsumerConnect(eglStream):
33192 """ Connect CUDA to EGLStream as a consumer.
33194 Connect CUDA as a consumer to EGLStreamKHR specified by `eglStream`.
33196 The EGLStreamKHR is an EGL object that transfers a sequence of image
33197 frames from one API to another.
33199 Parameters
33200 ----------
33201 eglStream : :py:obj:`~.EGLStreamKHR`
33202 EGLStreamKHR handle
33204 Returns
33205 -------
33206 cudaError_t
33207 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33208 conn : :py:obj:`~.cudaEglStreamConnection`
33209 Pointer to the returned connection handle
33211 See Also
33212 --------
33213 :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnect`
33214 """
33215 cdef cyruntime.EGLStreamKHR cyeglStream
33216 if eglStream is None:
33217 peglStream = 0
33218 elif isinstance(eglStream, (EGLStreamKHR,)):
33219 peglStream = int(eglStream)
33220 else:
33221 peglStream = int(EGLStreamKHR(eglStream))
33222 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
33223 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
33224 with nogil:
33225 err = cyruntime.cudaEGLStreamConsumerConnect(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream)
33226 if err != cyruntime.cudaSuccess:
33227 return (_dict_cudaError_t[err], None)
33228 return (_dict_cudaError_t[err], conn)
33230@cython.embedsignature(True)
33231def cudaEGLStreamConsumerConnectWithFlags(eglStream, unsigned int flags):
33232 """ Connect CUDA to EGLStream as a consumer with given flags.
33234 Connect CUDA as a consumer to EGLStreamKHR specified by `stream` with
33235 specified `flags` defined by :py:obj:`~.cudaEglResourceLocationFlags`.
33237 The flags specify whether the consumer wants to access frames from
33238 system memory or video memory. Default is
33239 :py:obj:`~.cudaEglResourceLocationVidmem`.
33241 Parameters
33242 ----------
33243 eglStream : :py:obj:`~.EGLStreamKHR`
33244 EGLStreamKHR handle
33245 flags : unsigned int
33246 Flags denote intended location - system or video.
33248 Returns
33249 -------
33250 cudaError_t
33251 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33252 conn : :py:obj:`~.cudaEglStreamConnection`
33253 Pointer to the returned connection handle
33255 See Also
33256 --------
33257 :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerConnectWithFlags`
33258 """
33259 cdef cyruntime.EGLStreamKHR cyeglStream
33260 if eglStream is None:
33261 peglStream = 0
33262 elif isinstance(eglStream, (EGLStreamKHR,)):
33263 peglStream = int(eglStream)
33264 else:
33265 peglStream = int(EGLStreamKHR(eglStream))
33266 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
33267 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
33268 with nogil:
33269 err = cyruntime.cudaEGLStreamConsumerConnectWithFlags(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream, flags)
33270 if err != cyruntime.cudaSuccess:
33271 return (_dict_cudaError_t[err], None)
33272 return (_dict_cudaError_t[err], conn)
33274@cython.embedsignature(True)
33275def cudaEGLStreamConsumerDisconnect(conn):
33276 """ Disconnect CUDA as a consumer to EGLStream .
33278 Disconnect CUDA as a consumer to EGLStreamKHR.
33280 Parameters
33281 ----------
33282 conn : :py:obj:`~.cudaEglStreamConnection`
33283 Conection to disconnect.
33285 Returns
33286 -------
33287 cudaError_t
33288 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33290 See Also
33291 --------
33292 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerDisconnect`
33293 """
33294 cdef cyruntime.cudaEglStreamConnection *cyconn
33295 if conn is None:
33296 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33297 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33298 pconn = conn.getPtr()
33299 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33300 elif isinstance(conn, (int)):
33301 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33302 else:
33303 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33304 with nogil:
33305 err = cyruntime.cudaEGLStreamConsumerDisconnect(cyconn)
33306 return (_dict_cudaError_t[err],)
33308@cython.embedsignature(True)
33309def cudaEGLStreamConsumerAcquireFrame(conn, pCudaResource, pStream, unsigned int timeout):
33310 """ Acquire an image frame from the EGLStream with CUDA as a consumer.
33312 Acquire an image frame from EGLStreamKHR.
33313 :py:obj:`~.cudaGraphicsResourceGetMappedEglFrame` can be called on
33314 `pCudaResource` to get :py:obj:`~.cudaEglFrame`.
33316 Parameters
33317 ----------
33318 conn : :py:obj:`~.cudaEglStreamConnection`
33319 Connection on which to acquire
33320 pCudaResource : :py:obj:`~.cudaGraphicsResource_t`
33321 CUDA resource on which the EGLStream frame will be mapped for use.
33322 pStream : :py:obj:`~.cudaStream_t`
33323 CUDA stream for synchronization and any data migrations implied by
33324 :py:obj:`~.cudaEglResourceLocationFlags`.
33325 timeout : unsigned int
33326 Desired timeout in usec.
33328 Returns
33329 -------
33330 cudaError_t
33331 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`, :py:obj:`~.cudaErrorLaunchTimeout`
33333 See Also
33334 --------
33335 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerReleaseFrame`, :py:obj:`~.cuEGLStreamConsumerAcquireFrame`
33336 """
33337 cdef cyruntime.cudaStream_t *cypStream
33338 if pStream is None:
33339 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
33340 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
33341 ppStream = pStream.getPtr()
33342 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
33343 elif isinstance(pStream, (int)):
33344 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
33345 else:
33346 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
33347 cdef cyruntime.cudaGraphicsResource_t *cypCudaResource
33348 if pCudaResource is None:
33349 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>NULL
33350 elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):
33351 ppCudaResource = pCudaResource.getPtr()
33352 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>ppCudaResource
33353 elif isinstance(pCudaResource, (int)):
33354 cypCudaResource = <cyruntime.cudaGraphicsResource_t*><void_ptr>pCudaResource
33355 else:
33356 raise TypeError("Argument 'pCudaResource' is not instance of type (expected <class 'int, runtime.cudaGraphicsResource_t'>, found " + str(type(pCudaResource)))
33357 cdef cyruntime.cudaEglStreamConnection *cyconn
33358 if conn is None:
33359 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33360 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33361 pconn = conn.getPtr()
33362 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33363 elif isinstance(conn, (int)):
33364 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33365 else:
33366 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33367 with nogil:
33368 err = cyruntime.cudaEGLStreamConsumerAcquireFrame(cyconn, cypCudaResource, cypStream, timeout)
33369 return (_dict_cudaError_t[err],)
33371@cython.embedsignature(True)
33372def cudaEGLStreamConsumerReleaseFrame(conn, pCudaResource, pStream):
33373 """ Releases the last frame acquired from the EGLStream.
33375 Release the acquired image frame specified by `pCudaResource` to
33376 EGLStreamKHR.
33378 Parameters
33379 ----------
33380 conn : :py:obj:`~.cudaEglStreamConnection`
33381 Connection on which to release
33382 pCudaResource : :py:obj:`~.cudaGraphicsResource_t`
33383 CUDA resource whose corresponding frame is to be released
33384 pStream : :py:obj:`~.cudaStream_t`
33385 CUDA stream on which release will be done.
33387 Returns
33388 -------
33389 cudaError_t
33390 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33392 See Also
33393 --------
33394 :py:obj:`~.cudaEGLStreamConsumerConnect`, :py:obj:`~.cudaEGLStreamConsumerDisconnect`, :py:obj:`~.cudaEGLStreamConsumerAcquireFrame`, :py:obj:`~.cuEGLStreamConsumerReleaseFrame`
33395 """
33396 cdef cyruntime.cudaStream_t *cypStream
33397 if pStream is None:
33398 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
33399 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
33400 ppStream = pStream.getPtr()
33401 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
33402 elif isinstance(pStream, (int)):
33403 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
33404 else:
33405 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
33406 cdef cyruntime.cudaGraphicsResource_t cypCudaResource
33407 if pCudaResource is None:
33408 ppCudaResource = 0
33409 elif isinstance(pCudaResource, (cudaGraphicsResource_t,)):
33410 ppCudaResource = int(pCudaResource)
33411 else:
33412 ppCudaResource = int(cudaGraphicsResource_t(pCudaResource))
33413 cypCudaResource = <cyruntime.cudaGraphicsResource_t><void_ptr>ppCudaResource
33414 cdef cyruntime.cudaEglStreamConnection *cyconn
33415 if conn is None:
33416 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33417 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33418 pconn = conn.getPtr()
33419 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33420 elif isinstance(conn, (int)):
33421 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33422 else:
33423 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33424 with nogil:
33425 err = cyruntime.cudaEGLStreamConsumerReleaseFrame(cyconn, cypCudaResource, cypStream)
33426 return (_dict_cudaError_t[err],)
33428@cython.embedsignature(True)
33429def cudaEGLStreamProducerConnect(eglStream, width, height):
33430 """ Connect CUDA to EGLStream as a producer.
33432 Connect CUDA as a producer to EGLStreamKHR specified by `stream`.
33434 The EGLStreamKHR is an EGL object that transfers a sequence of image
33435 frames from one API to another.
33437 Parameters
33438 ----------
33439 eglStream : :py:obj:`~.EGLStreamKHR`
33440 EGLStreamKHR handle
33441 width : :py:obj:`~.EGLint`
33442 width of the image to be submitted to the stream
33443 height : :py:obj:`~.EGLint`
33444 height of the image to be submitted to the stream
33446 Returns
33447 -------
33448 cudaError_t
33449 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33450 conn : :py:obj:`~.cudaEglStreamConnection`
33451 Pointer to the returned connection handle
33453 See Also
33454 --------
33455 :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerConnect`
33456 """
33457 cdef cyruntime.EGLint cyheight
33458 if height is None:
33459 pheight = 0
33460 elif isinstance(height, (EGLint,)):
33461 pheight = int(height)
33462 else:
33463 pheight = int(EGLint(height))
33464 cyheight = <cyruntime.EGLint><void_ptr>pheight
33465 cdef cyruntime.EGLint cywidth
33466 if width is None:
33467 pwidth = 0
33468 elif isinstance(width, (EGLint,)):
33469 pwidth = int(width)
33470 else:
33471 pwidth = int(EGLint(width))
33472 cywidth = <cyruntime.EGLint><void_ptr>pwidth
33473 cdef cyruntime.EGLStreamKHR cyeglStream
33474 if eglStream is None:
33475 peglStream = 0
33476 elif isinstance(eglStream, (EGLStreamKHR,)):
33477 peglStream = int(eglStream)
33478 else:
33479 peglStream = int(EGLStreamKHR(eglStream))
33480 cyeglStream = <cyruntime.EGLStreamKHR><void_ptr>peglStream
33481 cdef cudaEglStreamConnection conn = cudaEglStreamConnection()
33482 with nogil:
33483 err = cyruntime.cudaEGLStreamProducerConnect(<cyruntime.cudaEglStreamConnection*>conn._pvt_ptr, cyeglStream, cywidth, cyheight)
33484 if err != cyruntime.cudaSuccess:
33485 return (_dict_cudaError_t[err], None)
33486 return (_dict_cudaError_t[err], conn)
33488@cython.embedsignature(True)
33489def cudaEGLStreamProducerDisconnect(conn):
33490 """ Disconnect CUDA as a producer to EGLStream .
33492 Disconnect CUDA as a producer to EGLStreamKHR.
33494 Parameters
33495 ----------
33496 conn : :py:obj:`~.cudaEglStreamConnection`
33497 Conection to disconnect.
33499 Returns
33500 -------
33501 cudaError_t
33502 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33504 See Also
33505 --------
33506 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerDisconnect`
33507 """
33508 cdef cyruntime.cudaEglStreamConnection *cyconn
33509 if conn is None:
33510 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33511 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33512 pconn = conn.getPtr()
33513 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33514 elif isinstance(conn, (int)):
33515 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33516 else:
33517 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33518 with nogil:
33519 err = cyruntime.cudaEGLStreamProducerDisconnect(cyconn)
33520 return (_dict_cudaError_t[err],)
33522@cython.embedsignature(True)
33523def cudaEGLStreamProducerPresentFrame(conn, eglframe not None : cudaEglFrame, pStream):
33524 """ Present a CUDA eglFrame to the EGLStream with CUDA as a producer.
33526 The :py:obj:`~.cudaEglFrame` is defined as:
33528 **View CUDA Toolkit Documentation for a C++ code example**
33530 For :py:obj:`~.cudaEglFrame` of type :py:obj:`~.cudaEglFrameTypePitch`,
33531 the application may present sub-region of a memory allocation. In that
33532 case, :py:obj:`~.cudaPitchedPtr.ptr` will specify the start address of
33533 the sub-region in the allocation and :py:obj:`~.cudaEglPlaneDesc` will
33534 specify the dimensions of the sub-region.
33536 Parameters
33537 ----------
33538 conn : :py:obj:`~.cudaEglStreamConnection`
33539 Connection on which to present the CUDA array
33540 eglframe : :py:obj:`~.cudaEglFrame`
33541 CUDA Eglstream Proucer Frame handle to be sent to the consumer over
33542 EglStream.
33543 pStream : :py:obj:`~.cudaStream_t`
33544 CUDA stream on which to present the frame.
33546 Returns
33547 -------
33548 cudaError_t
33549 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33551 See Also
33552 --------
33553 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerReturnFrame`, :py:obj:`~.cuEGLStreamProducerPresentFrame`
33554 """
33555 cdef cyruntime.cudaStream_t *cypStream
33556 if pStream is None:
33557 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
33558 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
33559 ppStream = pStream.getPtr()
33560 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
33561 elif isinstance(pStream, (int)):
33562 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
33563 else:
33564 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
33565 cdef cyruntime.cudaEglStreamConnection *cyconn
33566 if conn is None:
33567 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33568 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33569 pconn = conn.getPtr()
33570 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33571 elif isinstance(conn, (int)):
33572 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33573 else:
33574 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33575 with nogil:
33576 err = cyruntime.cudaEGLStreamProducerPresentFrame(cyconn, eglframe._pvt_ptr[0], cypStream)
33577 return (_dict_cudaError_t[err],)
33579@cython.embedsignature(True)
33580def cudaEGLStreamProducerReturnFrame(conn, eglframe : Optional[cudaEglFrame], pStream):
33581 """ Return the CUDA eglFrame to the EGLStream last released by the consumer.
33583 This API can potentially return cudaErrorLaunchTimeout if the consumer
33584 has not returned a frame to EGL stream. If timeout is returned the
33585 application can retry.
33587 Parameters
33588 ----------
33589 conn : :py:obj:`~.cudaEglStreamConnection`
33590 Connection on which to present the CUDA array
33591 eglframe : :py:obj:`~.cudaEglFrame`
33592 CUDA Eglstream Proucer Frame handle returned from the consumer over
33593 EglStream.
33594 pStream : :py:obj:`~.cudaStream_t`
33595 CUDA stream on which to return the frame.
33597 Returns
33598 -------
33599 cudaError_t
33600 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorLaunchTimeout`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33602 See Also
33603 --------
33604 :py:obj:`~.cudaEGLStreamProducerConnect`, :py:obj:`~.cudaEGLStreamProducerDisconnect`, :py:obj:`~.cudaEGLStreamProducerPresentFrame`, :py:obj:`~.cuEGLStreamProducerReturnFrame`
33605 """
33606 cdef cyruntime.cudaStream_t *cypStream
33607 if pStream is None:
33608 cypStream = <cyruntime.cudaStream_t*><void_ptr>NULL
33609 elif isinstance(pStream, (cudaStream_t,driver.CUstream)):
33610 ppStream = pStream.getPtr()
33611 cypStream = <cyruntime.cudaStream_t*><void_ptr>ppStream
33612 elif isinstance(pStream, (int)):
33613 cypStream = <cyruntime.cudaStream_t*><void_ptr>pStream
33614 else:
33615 raise TypeError("Argument 'pStream' is not instance of type (expected <class 'int, runtime.cudaStream_t'>, found " + str(type(pStream)))
33616 cdef cyruntime.cudaEglStreamConnection *cyconn
33617 if conn is None:
33618 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>NULL
33619 elif isinstance(conn, (cudaEglStreamConnection,driver.CUeglStreamConnection)):
33620 pconn = conn.getPtr()
33621 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>pconn
33622 elif isinstance(conn, (int)):
33623 cyconn = <cyruntime.cudaEglStreamConnection*><void_ptr>conn
33624 else:
33625 raise TypeError("Argument 'conn' is not instance of type (expected <class 'int, runtime.cudaEglStreamConnection'>, found " + str(type(conn)))
33626 cdef cyruntime.cudaEglFrame* cyeglframe_ptr = eglframe._pvt_ptr if eglframe is not None else NULL
33627 with nogil:
33628 err = cyruntime.cudaEGLStreamProducerReturnFrame(cyconn, cyeglframe_ptr, cypStream)
33629 return (_dict_cudaError_t[err],)
33631@cython.embedsignature(True)
33632def cudaGraphicsResourceGetMappedEglFrame(resource, unsigned int index, unsigned int mipLevel):
33633 """ Get an eglFrame through which to access a registered EGL graphics resource.
33635 Returns in `*eglFrame` an eglFrame pointer through which the registered
33636 graphics resource `resource` may be accessed. This API can only be
33637 called for EGL graphics resources.
33639 The :py:obj:`~.cudaEglFrame` is defined as
33641 **View CUDA Toolkit Documentation for a C++ code example**
33643 Parameters
33644 ----------
33645 resource : :py:obj:`~.cudaGraphicsResource_t`
33646 Registered resource to access.
33647 index : unsigned int
33648 Index for cubemap surfaces.
33649 mipLevel : unsigned int
33650 Mipmap level for the subresource to access.
33652 Returns
33653 -------
33654 cudaError_t
33655 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorUnknown`
33656 eglFrame : :py:obj:`~.cudaEglFrame`
33657 Returned eglFrame.
33659 See Also
33660 --------
33661 :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsResourceGetMappedEglFrame`
33663 Notes
33664 -----
33665 Note that in case of multiplanar `*eglFrame`, pitch of only first plane (unsigned int :py:obj:`~.cudaEglPlaneDesc.pitch`) is to be considered by the application.
33666 """
33667 cdef cyruntime.cudaGraphicsResource_t cyresource
33668 if resource is None:
33669 presource = 0
33670 elif isinstance(resource, (cudaGraphicsResource_t,)):
33671 presource = int(resource)
33672 else:
33673 presource = int(cudaGraphicsResource_t(resource))
33674 cyresource = <cyruntime.cudaGraphicsResource_t><void_ptr>presource
33675 cdef cudaEglFrame eglFrame = cudaEglFrame()
33676 with nogil:
33677 err = cyruntime.cudaGraphicsResourceGetMappedEglFrame(<cyruntime.cudaEglFrame*>eglFrame._pvt_ptr, cyresource, index, mipLevel)
33678 if err != cyruntime.cudaSuccess:
33679 return (_dict_cudaError_t[err], None)
33680 return (_dict_cudaError_t[err], eglFrame)
33682@cython.embedsignature(True)
33683def cudaEventCreateFromEGLSync(eglSync, unsigned int flags):
33684 """ Creates an event from EGLSync object.
33686 Creates an event *phEvent from an EGLSyncKHR eglSync with the flages
33687 specified via `flags`. Valid flags include:
33689 - :py:obj:`~.cudaEventDefault`: Default event creation flag.
33691 - :py:obj:`~.cudaEventBlockingSync`: Specifies that the created event
33692 should use blocking synchronization. A CPU thread that uses
33693 :py:obj:`~.cudaEventSynchronize()` to wait on an event created with
33694 this flag will block until the event has actually been completed.
33696 :py:obj:`~.cudaEventRecord` and TimingData are not supported for events
33697 created from EGLSync.
33699 The EGLSyncKHR is an opaque handle to an EGL sync object. typedef void*
33700 EGLSyncKHR
33702 Parameters
33703 ----------
33704 eglSync : :py:obj:`~.EGLSyncKHR`
33705 Opaque handle to EGLSync object
33706 flags : unsigned int
33707 Event creation flags
33709 Returns
33710 -------
33711 cudaError_t
33712 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInitializationError`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorLaunchFailure`, :py:obj:`~.cudaErrorMemoryAllocation`
33713 phEvent : :py:obj:`~.cudaEvent_t`
33714 Returns newly created event
33716 See Also
33717 --------
33718 :py:obj:`~.cudaEventQuery`, :py:obj:`~.cudaEventSynchronize`, :py:obj:`~.cudaEventDestroy`
33719 """
33720 cdef cyruntime.EGLSyncKHR cyeglSync
33721 if eglSync is None:
33722 peglSync = 0
33723 elif isinstance(eglSync, (EGLSyncKHR,)):
33724 peglSync = int(eglSync)
33725 else:
33726 peglSync = int(EGLSyncKHR(eglSync))
33727 cyeglSync = <cyruntime.EGLSyncKHR><void_ptr>peglSync
33728 cdef cudaEvent_t phEvent = cudaEvent_t()
33729 with nogil:
33730 err = cyruntime.cudaEventCreateFromEGLSync(<cyruntime.cudaEvent_t*>phEvent._pvt_ptr, cyeglSync, flags)
33731 if err != cyruntime.cudaSuccess:
33732 return (_dict_cudaError_t[err], None)
33733 return (_dict_cudaError_t[err], phEvent)
33735@cython.embedsignature(True)
33736def cudaProfilerStart():
33737 """ Enable profiling.
33739 Enables profile collection by the active profiling tool for the current
33740 context. If profiling is already enabled, then
33741 :py:obj:`~.cudaProfilerStart()` has no effect.
33743 cudaProfilerStart and cudaProfilerStop APIs are used to
33744 programmatically control the profiling granularity by allowing
33745 profiling to be done only on selective pieces of code.
33747 Returns
33748 -------
33749 cudaError_t
33750 :py:obj:`~.cudaSuccess`
33752 See Also
33753 --------
33754 :py:obj:`~.cudaProfilerStop`, :py:obj:`~.cuProfilerStart`
33755 """
33756 with nogil:
33757 err = cyruntime.cudaProfilerStart()
33758 return (_dict_cudaError_t[err],)
33760@cython.embedsignature(True)
33761def cudaProfilerStop():
33762 """ Disable profiling.
33764 Disables profile collection by the active profiling tool for the
33765 current context. If profiling is already disabled, then
33766 :py:obj:`~.cudaProfilerStop()` has no effect.
33768 cudaProfilerStart and cudaProfilerStop APIs are used to
33769 programmatically control the profiling granularity by allowing
33770 profiling to be done only on selective pieces of code.
33772 Returns
33773 -------
33774 cudaError_t
33775 :py:obj:`~.cudaSuccess`
33777 See Also
33778 --------
33779 :py:obj:`~.cudaProfilerStart`, :py:obj:`~.cuProfilerStop`
33780 """
33781 with nogil:
33782 err = cyruntime.cudaProfilerStop()
33783 return (_dict_cudaError_t[err],)
33785@cython.embedsignature(True)
33786def cudaGLGetDevices(unsigned int cudaDeviceCount, deviceList not None : cudaGLDeviceList):
33787 """ Gets the CUDA devices associated with the current OpenGL context.
33789 Returns in `*pCudaDeviceCount` the number of CUDA-compatible devices
33790 corresponding to the current OpenGL context. Also returns in
33791 `*pCudaDevices` at most `cudaDeviceCount` of the CUDA-compatible
33792 devices corresponding to the current OpenGL context. If any of the GPUs
33793 being used by the current OpenGL context are not CUDA capable then the
33794 call will return cudaErrorNoDevice.
33796 Parameters
33797 ----------
33798 cudaDeviceCount : unsigned int
33799 The size of the output device array `pCudaDevices`
33800 deviceList : cudaGLDeviceList
33801 The set of devices to return. This set may be cudaGLDeviceListAll
33802 for all devices, cudaGLDeviceListCurrentFrame for the devices used
33803 to render the current frame (in SLI), or cudaGLDeviceListNextFrame
33804 for the devices used to render the next frame (in SLI).
33806 Returns
33807 -------
33808 cudaError_t
33809 cudaSuccess
33810 cudaErrorNoDevice
33811 cudaErrorInvalidGraphicsContext
33812 cudaErrorUnknown
33813 pCudaDeviceCount : unsigned int
33814 Returned number of CUDA devices corresponding to the current OpenGL
33815 context
33816 pCudaDevices : list[int]
33817 Returned CUDA devices corresponding to the current OpenGL context
33819 See Also
33820 --------
33821 ~.cudaGraphicsUnregisterResource
33822 ~.cudaGraphicsMapResources
33823 ~.cudaGraphicsSubResourceGetMappedArray
33824 ~.cudaGraphicsResourceGetMappedPointer
33825 ~.cuGLGetDevices
33827 Notes
33828 -----
33829 This function is not supported on Mac OS X.
33831 """
33832 cdef unsigned int pCudaDeviceCount = 0
33833 cdef int* cypCudaDevices = NULL
33834 pypCudaDevices = []
33835 if cudaDeviceCount != 0:
33836 cypCudaDevices = <int*>calloc(cudaDeviceCount, sizeof(int))
33837 if cypCudaDevices is NULL:
33838 raise MemoryError('Failed to allocate length x size memory: ' + str(cudaDeviceCount) + 'x' + str(sizeof(int)))
33839 cdef cyruntime.cudaGLDeviceList cydeviceList = deviceList.value
33840 with nogil:
33841 err = cyruntime.cudaGLGetDevices(&pCudaDeviceCount, cypCudaDevices, cudaDeviceCount, cydeviceList)
33842 if cudaError_t(err) == cudaError_t(0):
33843 pypCudaDevices = [<void_ptr>cypCudaDevices[idx] for idx in range(cudaDeviceCount)]
33844 if cypCudaDevices is not NULL:
33845 free(cypCudaDevices)
33846 if err != cyruntime.cudaSuccess:
33847 return (_dict_cudaError_t[err], None, None)
33848 return (_dict_cudaError_t[err], pCudaDeviceCount, pypCudaDevices)
33850@cython.embedsignature(True)
33851def cudaGraphicsGLRegisterImage(image, target, unsigned int flags):
33852 """ Register an OpenGL texture or renderbuffer object.
33854 Registers the texture or renderbuffer object specified by `image` for
33855 access by CUDA. A handle to the registered object is returned as
33856 `resource`.
33858 `target` must match the type of the object, and must be one of
33859 :py:obj:`~.GL_TEXTURE_2D`, :py:obj:`~.GL_TEXTURE_RECTANGLE`,
33860 :py:obj:`~.GL_TEXTURE_CUBE_MAP`, :py:obj:`~.GL_TEXTURE_3D`,
33861 :py:obj:`~.GL_TEXTURE_2D_ARRAY`, or :py:obj:`~.GL_RENDERBUFFER`.
33863 The register flags `flags` specify the intended usage, as follows:
33865 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
33866 how this resource will be used. It is therefore assumed that this
33867 resource will be read from and written to by CUDA. This is the
33868 default value.
33870 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
33871 will not write to this resource.
33873 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
33874 CUDA will not read from this resource and will write over the entire
33875 contents of the resource, so none of the data previously stored in
33876 the resource will be preserved.
33878 - :py:obj:`~.cudaGraphicsRegisterFlagsSurfaceLoadStore`: Specifies that
33879 CUDA will bind this resource to a surface reference.
33881 - :py:obj:`~.cudaGraphicsRegisterFlagsTextureGather`: Specifies that
33882 CUDA will perform texture gather operations on this resource.
33884 The following image formats are supported. For brevity's sake, the list
33885 is abbreviated. For ex., {GL_R, GL_RG} X {8, 16} would expand to the
33886 following 4 formats {GL_R8, GL_R16, GL_RG8, GL_RG16} :
33888 - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA,
33889 GL_INTENSITY
33891 - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I,
33892 32I}
33894 - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X {8, 16,
33895 16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT,
33896 32I_EXT}
33898 The following image classes are currently disallowed:
33900 - Textures with borders
33902 - Multisampled renderbuffers
33904 Parameters
33905 ----------
33906 image : :py:obj:`~.GLuint`
33907 name of texture or renderbuffer object to be registered
33908 target : :py:obj:`~.GLenum`
33909 Identifies the type of object specified by `image`
33910 flags : unsigned int
33911 Register flags
33913 Returns
33914 -------
33915 cudaError_t
33916 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`
33917 resource : :py:obj:`~.cudaGraphicsResource`
33918 Pointer to the returned object handle
33920 See Also
33921 --------
33922 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsGLRegisterImage`
33923 """
33924 cdef cyruntime.GLenum cytarget
33925 if target is None:
33926 ptarget = 0
33927 elif isinstance(target, (GLenum,)):
33928 ptarget = int(target)
33929 else:
33930 ptarget = int(GLenum(target))
33931 cytarget = <cyruntime.GLenum><void_ptr>ptarget
33932 cdef cyruntime.GLuint cyimage
33933 if image is None:
33934 pimage = 0
33935 elif isinstance(image, (GLuint,)):
33936 pimage = int(image)
33937 else:
33938 pimage = int(GLuint(image))
33939 cyimage = <cyruntime.GLuint><void_ptr>pimage
33940 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
33941 with nogil:
33942 err = cyruntime.cudaGraphicsGLRegisterImage(resource._pvt_ptr, cyimage, cytarget, flags)
33943 if err != cyruntime.cudaSuccess:
33944 return (_dict_cudaError_t[err], None)
33945 return (_dict_cudaError_t[err], resource)
33947@cython.embedsignature(True)
33948def cudaGraphicsGLRegisterBuffer(buffer, unsigned int flags):
33949 """ Registers an OpenGL buffer object.
33951 Registers the buffer object specified by `buffer` for access by CUDA. A
33952 handle to the registered object is returned as `resource`. The register
33953 flags `flags` specify the intended usage, as follows:
33955 - :py:obj:`~.cudaGraphicsRegisterFlagsNone`: Specifies no hints about
33956 how this resource will be used. It is therefore assumed that this
33957 resource will be read from and written to by CUDA. This is the
33958 default value.
33960 - :py:obj:`~.cudaGraphicsRegisterFlagsReadOnly`: Specifies that CUDA
33961 will not write to this resource.
33963 - :py:obj:`~.cudaGraphicsRegisterFlagsWriteDiscard`: Specifies that
33964 CUDA will not read from this resource and will write over the entire
33965 contents of the resource, so none of the data previously stored in
33966 the resource will be preserved.
33968 Parameters
33969 ----------
33970 buffer : :py:obj:`~.GLuint`
33971 name of buffer object to be registered
33972 flags : unsigned int
33973 Register flags
33975 Returns
33976 -------
33977 cudaError_t
33978 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorOperatingSystem`, :py:obj:`~.cudaErrorUnknown`
33979 resource : :py:obj:`~.cudaGraphicsResource`
33980 Pointer to the returned object handle
33982 See Also
33983 --------
33984 :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsMapResources`, :py:obj:`~.cudaGraphicsResourceGetMappedPointer`, :py:obj:`~.cuGraphicsGLRegisterBuffer`
33985 """
33986 cdef cyruntime.GLuint cybuffer
33987 if buffer is None:
33988 pbuffer = 0
33989 elif isinstance(buffer, (GLuint,)):
33990 pbuffer = int(buffer)
33991 else:
33992 pbuffer = int(GLuint(buffer))
33993 cybuffer = <cyruntime.GLuint><void_ptr>pbuffer
33994 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
33995 with nogil:
33996 err = cyruntime.cudaGraphicsGLRegisterBuffer(resource._pvt_ptr, cybuffer, flags)
33997 if err != cyruntime.cudaSuccess:
33998 return (_dict_cudaError_t[err], None)
33999 return (_dict_cudaError_t[err], resource)
34001@cython.embedsignature(True)
34002def cudaVDPAUGetDevice(vdpDevice, vdpGetProcAddress):
34003 """ Gets the CUDA device associated with a VdpDevice.
34005 Returns the CUDA device associated with a VdpDevice, if applicable.
34007 Parameters
34008 ----------
34009 vdpDevice : :py:obj:`~.VdpDevice`
34010 A VdpDevice handle
34011 vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
34012 VDPAU's VdpGetProcAddress function pointer
34014 Returns
34015 -------
34016 cudaError_t
34017 :py:obj:`~.cudaSuccess`
34018 device : int
34019 Returns the device associated with vdpDevice, or -1 if the device
34020 associated with vdpDevice is not a compute device.
34022 See Also
34023 --------
34024 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cuVDPAUGetDevice`
34025 """
34026 cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress
34027 if vdpGetProcAddress is None:
34028 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL
34029 elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
34030 pvdpGetProcAddress = vdpGetProcAddress.getPtr()
34031 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
34032 elif isinstance(vdpGetProcAddress, (int)):
34033 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
34034 else:
34035 raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
34036 cdef cyruntime.VdpDevice cyvdpDevice
34037 if vdpDevice is None:
34038 pvdpDevice = 0
34039 elif isinstance(vdpDevice, (VdpDevice,)):
34040 pvdpDevice = int(vdpDevice)
34041 else:
34042 pvdpDevice = int(VdpDevice(vdpDevice))
34043 cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
34044 cdef int device = 0
34045 with nogil:
34046 err = cyruntime.cudaVDPAUGetDevice(&device, cyvdpDevice, cyvdpGetProcAddress)
34047 if err != cyruntime.cudaSuccess:
34048 return (_dict_cudaError_t[err], None)
34049 return (_dict_cudaError_t[err], device)
34051@cython.embedsignature(True)
34052def cudaVDPAUSetVDPAUDevice(int device, vdpDevice, vdpGetProcAddress):
34053 """ Sets a CUDA device to use VDPAU interoperability.
34055 Records `vdpDevice` as the VdpDevice for VDPAU interoperability with
34056 the CUDA device `device` and sets `device` as the current device for
34057 the calling host thread.
34059 This function will immediately initialize the primary context on
34060 `device` if needed.
34062 If `device` has already been initialized then this call will fail with
34063 the error :py:obj:`~.cudaErrorSetOnActiveProcess`. In this case it is
34064 necessary to reset `device` using :py:obj:`~.cudaDeviceReset()` before
34065 VDPAU interoperability on `device` may be enabled.
34067 Parameters
34068 ----------
34069 device : int
34070 Device to use for VDPAU interoperability
34071 vdpDevice : :py:obj:`~.VdpDevice`
34072 The VdpDevice to interoperate with
34073 vdpGetProcAddress : :py:obj:`~.VdpGetProcAddress`
34074 VDPAU's VdpGetProcAddress function pointer
34076 Returns
34077 -------
34078 cudaError_t
34079 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorSetOnActiveProcess`
34081 See Also
34082 --------
34083 :py:obj:`~.cudaGraphicsVDPAURegisterVideoSurface`, :py:obj:`~.cudaGraphicsVDPAURegisterOutputSurface`, :py:obj:`~.cudaDeviceReset`
34084 """
34085 cdef cyruntime.VdpGetProcAddress *cyvdpGetProcAddress
34086 if vdpGetProcAddress is None:
34087 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>NULL
34088 elif isinstance(vdpGetProcAddress, (VdpGetProcAddress,)):
34089 pvdpGetProcAddress = vdpGetProcAddress.getPtr()
34090 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>pvdpGetProcAddress
34091 elif isinstance(vdpGetProcAddress, (int)):
34092 cyvdpGetProcAddress = <cyruntime.VdpGetProcAddress*><void_ptr>vdpGetProcAddress
34093 else:
34094 raise TypeError("Argument 'vdpGetProcAddress' is not instance of type (expected <class 'int, runtime.VdpGetProcAddress'>, found " + str(type(vdpGetProcAddress)))
34095 cdef cyruntime.VdpDevice cyvdpDevice
34096 if vdpDevice is None:
34097 pvdpDevice = 0
34098 elif isinstance(vdpDevice, (VdpDevice,)):
34099 pvdpDevice = int(vdpDevice)
34100 else:
34101 pvdpDevice = int(VdpDevice(vdpDevice))
34102 cyvdpDevice = <cyruntime.VdpDevice><void_ptr>pvdpDevice
34103 with nogil:
34104 err = cyruntime.cudaVDPAUSetVDPAUDevice(device, cyvdpDevice, cyvdpGetProcAddress)
34105 return (_dict_cudaError_t[err],)
34107@cython.embedsignature(True)
34108def cudaGraphicsVDPAURegisterVideoSurface(vdpSurface, unsigned int flags):
34109 """ Register a VdpVideoSurface object.
34111 Registers the VdpVideoSurface specified by `vdpSurface` for access by
34112 CUDA. A handle to the registered object is returned as `resource`. The
34113 surface's intended usage is specified using `flags`, as follows:
34115 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
34116 this resource will be used. It is therefore assumed that this
34117 resource will be read from and written to by CUDA. This is the
34118 default value.
34120 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
34121 not write to this resource.
34123 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA
34124 will not read from this resource and will write over the entire
34125 contents of the resource, so none of the data previously stored in
34126 the resource will be preserved.
34128 Parameters
34129 ----------
34130 vdpSurface : :py:obj:`~.VdpVideoSurface`
34131 VDPAU object to be registered
34132 flags : unsigned int
34133 Map flags
34135 Returns
34136 -------
34137 cudaError_t
34138 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
34139 resource : :py:obj:`~.cudaGraphicsResource`
34140 Pointer to the returned object handle
34142 See Also
34143 --------
34144 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterVideoSurface`
34145 """
34146 cdef cyruntime.VdpVideoSurface cyvdpSurface
34147 if vdpSurface is None:
34148 pvdpSurface = 0
34149 elif isinstance(vdpSurface, (VdpVideoSurface,)):
34150 pvdpSurface = int(vdpSurface)
34151 else:
34152 pvdpSurface = int(VdpVideoSurface(vdpSurface))
34153 cyvdpSurface = <cyruntime.VdpVideoSurface><void_ptr>pvdpSurface
34154 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
34155 with nogil:
34156 err = cyruntime.cudaGraphicsVDPAURegisterVideoSurface(resource._pvt_ptr, cyvdpSurface, flags)
34157 if err != cyruntime.cudaSuccess:
34158 return (_dict_cudaError_t[err], None)
34159 return (_dict_cudaError_t[err], resource)
34161@cython.embedsignature(True)
34162def cudaGraphicsVDPAURegisterOutputSurface(vdpSurface, unsigned int flags):
34163 """ Register a VdpOutputSurface object.
34165 Registers the VdpOutputSurface specified by `vdpSurface` for access by
34166 CUDA. A handle to the registered object is returned as `resource`. The
34167 surface's intended usage is specified using `flags`, as follows:
34169 - :py:obj:`~.cudaGraphicsMapFlagsNone`: Specifies no hints about how
34170 this resource will be used. It is therefore assumed that this
34171 resource will be read from and written to by CUDA. This is the
34172 default value.
34174 - :py:obj:`~.cudaGraphicsMapFlagsReadOnly`: Specifies that CUDA will
34175 not write to this resource.
34177 - :py:obj:`~.cudaGraphicsMapFlagsWriteDiscard`: Specifies that CUDA
34178 will not read from this resource and will write over the entire
34179 contents of the resource, so none of the data previously stored in
34180 the resource will be preserved.
34182 Parameters
34183 ----------
34184 vdpSurface : :py:obj:`~.VdpOutputSurface`
34185 VDPAU object to be registered
34186 flags : unsigned int
34187 Map flags
34189 Returns
34190 -------
34191 cudaError_t
34192 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidDevice`, :py:obj:`~.cudaErrorInvalidValue`, :py:obj:`~.cudaErrorInvalidResourceHandle`, :py:obj:`~.cudaErrorUnknown`
34193 resource : :py:obj:`~.cudaGraphicsResource`
34194 Pointer to the returned object handle
34196 See Also
34197 --------
34198 :py:obj:`~.cudaVDPAUSetVDPAUDevice`, :py:obj:`~.cudaGraphicsUnregisterResource`, :py:obj:`~.cudaGraphicsSubResourceGetMappedArray`, :py:obj:`~.cuGraphicsVDPAURegisterOutputSurface`
34199 """
34200 cdef cyruntime.VdpOutputSurface cyvdpSurface
34201 if vdpSurface is None:
34202 pvdpSurface = 0
34203 elif isinstance(vdpSurface, (VdpOutputSurface,)):
34204 pvdpSurface = int(vdpSurface)
34205 else:
34206 pvdpSurface = int(VdpOutputSurface(vdpSurface))
34207 cyvdpSurface = <cyruntime.VdpOutputSurface><void_ptr>pvdpSurface
34208 cdef cudaGraphicsResource_t resource = cudaGraphicsResource_t()
34209 with nogil:
34210 err = cyruntime.cudaGraphicsVDPAURegisterOutputSurface(resource._pvt_ptr, cyvdpSurface, flags)
34211 if err != cyruntime.cudaSuccess:
34212 return (_dict_cudaError_t[err], None)
34213 return (_dict_cudaError_t[err], resource)
34216@cython.embedsignature(True)
34217def getLocalRuntimeVersion():
34218 """ Returns the CUDA Runtime version of local shared library.
34220 Returns in `*runtimeVersion` the version number of the current CUDA
34221 Runtime instance. The version is returned as (1000 * major + 10 *
34222 minor). For example, CUDA 9.2 would be represented by 9020.
34224 As of CUDA 12.0, this function no longer initializes CUDA. The purpose
34225 of this API is solely to return a compile-time constant stating the
34226 CUDA Toolkit version in the above format.
34228 This function automatically returns :py:obj:`~.cudaErrorInvalidValue`
34229 if the `runtimeVersion` argument is NULL.
34231 Returns
34232 -------
34233 cudaError_t
34234 :py:obj:`~.cudaSuccess`, :py:obj:`~.cudaErrorInvalidValue`
34235 runtimeVersion : int
34236 Returns the CUDA Runtime version.
34238 See Also
34239 --------
34240 :py:obj:`~.cudaDriverGetVersion`, :py:obj:`~.cuDriverGetVersion`
34241 """
34242 cdef int runtimeVersion = 0
34243 err = cyruntime.getLocalRuntimeVersion(&runtimeVersion)
34244 return (cudaError_t(err), runtimeVersion)
34247cdef class cudaBindingsRuntimeGlobal:
34248 cdef map[void_ptr, void*] _allocated
34250 def __dealloc__(self):
34251 for item in self._allocated:
34252 free(item.second)
34253 self._allocated.clear()
34255cdef cudaBindingsRuntimeGlobal m_global = cudaBindingsRuntimeGlobal()
34258@cython.embedsignature(True)
34259def sizeof(objType):
34260 """ Returns the size of provided CUDA Python structure in bytes
34262 Parameters
34263 ----------
34264 objType : Any
34265 CUDA Python object
34267 Returns
34268 -------
34269 lowered_name : int
34270 The size of `objType` in bytes
34271 """
34272 if objType == dim3:
34273 return sizeof(cyruntime.dim3)
34274 if objType == cudaDevResourceDesc_t:
34275 return sizeof(cyruntime.cudaDevResourceDesc_t)
34276 if objType == cudaExecutionContext_t:
34277 return sizeof(cyruntime.cudaExecutionContext_t)
34278 if objType == cudaChannelFormatDesc:
34279 return sizeof(cyruntime.cudaChannelFormatDesc)
34280 if objType == cudaArray_t:
34281 return sizeof(cyruntime.cudaArray_t)
34282 if objType == cudaArray_const_t:
34283 return sizeof(cyruntime.cudaArray_const_t)
34284 if objType == cudaMipmappedArray_t:
34285 return sizeof(cyruntime.cudaMipmappedArray_t)
34286 if objType == cudaMipmappedArray_const_t:
34287 return sizeof(cyruntime.cudaMipmappedArray_const_t)
34288 if objType == cudaArraySparseProperties:
34289 return sizeof(cyruntime.cudaArraySparseProperties)
34290 if objType == cudaArrayMemoryRequirements:
34291 return sizeof(cyruntime.cudaArrayMemoryRequirements)
34292 if objType == cudaPitchedPtr:
34293 return sizeof(cyruntime.cudaPitchedPtr)
34294 if objType == cudaExtent:
34295 return sizeof(cyruntime.cudaExtent)
34296 if objType == cudaPos:
34297 return sizeof(cyruntime.cudaPos)
34298 if objType == cudaMemcpy3DParms:
34299 return sizeof(cyruntime.cudaMemcpy3DParms)
34300 if objType == cudaMemcpyNodeParams:
34301 return sizeof(cyruntime.cudaMemcpyNodeParams)
34302 if objType == cudaMemcpy3DPeerParms:
34303 return sizeof(cyruntime.cudaMemcpy3DPeerParms)
34304 if objType == cudaMemsetParams:
34305 return sizeof(cyruntime.cudaMemsetParams)
34306 if objType == cudaMemsetParamsV2:
34307 return sizeof(cyruntime.cudaMemsetParamsV2)
34308 if objType == cudaAccessPolicyWindow:
34309 return sizeof(cyruntime.cudaAccessPolicyWindow)
34310 if objType == cudaHostFn_t:
34311 return sizeof(cyruntime.cudaHostFn_t)
34312 if objType == cudaHostNodeParams:
34313 return sizeof(cyruntime.cudaHostNodeParams)
34314 if objType == cudaHostNodeParamsV2:
34315 return sizeof(cyruntime.cudaHostNodeParamsV2)
34316 if objType == cudaResourceDesc:
34317 return sizeof(cyruntime.cudaResourceDesc)
34318 if objType == cudaResourceViewDesc:
34319 return sizeof(cyruntime.cudaResourceViewDesc)
34320 if objType == cudaPointerAttributes:
34321 return sizeof(cyruntime.cudaPointerAttributes)
34322 if objType == cudaFuncAttributes:
34323 return sizeof(cyruntime.cudaFuncAttributes)
34324 if objType == cudaMemLocation:
34325 return sizeof(cyruntime.cudaMemLocation)
34326 if objType == cudaMemAccessDesc:
34327 return sizeof(cyruntime.cudaMemAccessDesc)
34328 if objType == cudaMemPoolProps:
34329 return sizeof(cyruntime.cudaMemPoolProps)
34330 if objType == cudaMemPoolPtrExportData:
34331 return sizeof(cyruntime.cudaMemPoolPtrExportData)
34332 if objType == cudaMemAllocNodeParams:
34333 return sizeof(cyruntime.cudaMemAllocNodeParams)
34334 if objType == cudaMemAllocNodeParamsV2:
34335 return sizeof(cyruntime.cudaMemAllocNodeParamsV2)
34336 if objType == cudaMemFreeNodeParams:
34337 return sizeof(cyruntime.cudaMemFreeNodeParams)
34338 if objType == cudaMemcpyAttributes:
34339 return sizeof(cyruntime.cudaMemcpyAttributes)
34340 if objType == cudaOffset3D:
34341 return sizeof(cyruntime.cudaOffset3D)
34342 if objType == cudaMemcpy3DOperand:
34343 return sizeof(cyruntime.cudaMemcpy3DOperand)
34344 if objType == cudaMemcpy3DBatchOp:
34345 return sizeof(cyruntime.cudaMemcpy3DBatchOp)
34346 if objType == CUuuid_st:
34347 return sizeof(cyruntime.CUuuid_st)
34348 if objType == CUuuid:
34349 return sizeof(cyruntime.CUuuid)
34350 if objType == cudaUUID_t:
34351 return sizeof(cyruntime.cudaUUID_t)
34352 if objType == cudaDeviceProp:
34353 return sizeof(cyruntime.cudaDeviceProp)
34354 if objType == cudaIpcEventHandle_st:
34355 return sizeof(cyruntime.cudaIpcEventHandle_st)
34356 if objType == cudaIpcEventHandle_t:
34357 return sizeof(cyruntime.cudaIpcEventHandle_t)
34358 if objType == cudaIpcMemHandle_st:
34359 return sizeof(cyruntime.cudaIpcMemHandle_st)
34360 if objType == cudaIpcMemHandle_t:
34361 return sizeof(cyruntime.cudaIpcMemHandle_t)
34362 if objType == cudaMemFabricHandle_st:
34363 return sizeof(cyruntime.cudaMemFabricHandle_st)
34364 if objType == cudaMemFabricHandle_t:
34365 return sizeof(cyruntime.cudaMemFabricHandle_t)
34366 if objType == cudaExternalMemoryHandleDesc:
34367 return sizeof(cyruntime.cudaExternalMemoryHandleDesc)
34368 if objType == cudaExternalMemoryBufferDesc:
34369 return sizeof(cyruntime.cudaExternalMemoryBufferDesc)
34370 if objType == cudaExternalMemoryMipmappedArrayDesc:
34371 return sizeof(cyruntime.cudaExternalMemoryMipmappedArrayDesc)
34372 if objType == cudaExternalSemaphoreHandleDesc:
34373 return sizeof(cyruntime.cudaExternalSemaphoreHandleDesc)
34374 if objType == cudaExternalSemaphoreSignalParams:
34375 return sizeof(cyruntime.cudaExternalSemaphoreSignalParams)
34376 if objType == cudaExternalSemaphoreWaitParams:
34377 return sizeof(cyruntime.cudaExternalSemaphoreWaitParams)
34378 if objType == cudaDevSmResource:
34379 return sizeof(cyruntime.cudaDevSmResource)
34380 if objType == cudaDevWorkqueueConfigResource:
34381 return sizeof(cyruntime.cudaDevWorkqueueConfigResource)
34382 if objType == cudaDevWorkqueueResource:
34383 return sizeof(cyruntime.cudaDevWorkqueueResource)
34384 if objType == cudaDevSmResourceGroupParams_st:
34385 return sizeof(cyruntime.cudaDevSmResourceGroupParams_st)
34386 if objType == cudaDevSmResourceGroupParams:
34387 return sizeof(cyruntime.cudaDevSmResourceGroupParams)
34388 if objType == cudaDevResource_st:
34389 return sizeof(cyruntime.cudaDevResource_st)
34390 if objType == cudaDevResource:
34391 return sizeof(cyruntime.cudaDevResource)
34392 if objType == cudaStream_t:
34393 return sizeof(cyruntime.cudaStream_t)
34394 if objType == cudaEvent_t:
34395 return sizeof(cyruntime.cudaEvent_t)
34396 if objType == cudaGraphicsResource_t:
34397 return sizeof(cyruntime.cudaGraphicsResource_t)
34398 if objType == cudaExternalMemory_t:
34399 return sizeof(cyruntime.cudaExternalMemory_t)
34400 if objType == cudaExternalSemaphore_t:
34401 return sizeof(cyruntime.cudaExternalSemaphore_t)
34402 if objType == cudaGraph_t:
34403 return sizeof(cyruntime.cudaGraph_t)
34404 if objType == cudaGraphNode_t:
34405 return sizeof(cyruntime.cudaGraphNode_t)
34406 if objType == cudaUserObject_t:
34407 return sizeof(cyruntime.cudaUserObject_t)
34408 if objType == cudaGraphConditionalHandle:
34409 return sizeof(cyruntime.cudaGraphConditionalHandle)
34410 if objType == cudaFunction_t:
34411 return sizeof(cyruntime.cudaFunction_t)
34412 if objType == cudaKernel_t:
34413 return sizeof(cyruntime.cudaKernel_t)
34414 if objType == cudalibraryHostUniversalFunctionAndDataTable:
34415 return sizeof(cyruntime.cudalibraryHostUniversalFunctionAndDataTable)
34416 if objType == cudaLibrary_t:
34417 return sizeof(cyruntime.cudaLibrary_t)
34418 if objType == cudaMemPool_t:
34419 return sizeof(cyruntime.cudaMemPool_t)
34420 if objType == cudaKernelNodeParams:
34421 return sizeof(cyruntime.cudaKernelNodeParams)
34422 if objType == cudaKernelNodeParamsV2:
34423 return sizeof(cyruntime.cudaKernelNodeParamsV2)
34424 if objType == cudaExternalSemaphoreSignalNodeParams:
34425 return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParams)
34426 if objType == cudaExternalSemaphoreSignalNodeParamsV2:
34427 return sizeof(cyruntime.cudaExternalSemaphoreSignalNodeParamsV2)
34428 if objType == cudaExternalSemaphoreWaitNodeParams:
34429 return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParams)
34430 if objType == cudaExternalSemaphoreWaitNodeParamsV2:
34431 return sizeof(cyruntime.cudaExternalSemaphoreWaitNodeParamsV2)
34432 if objType == cudaConditionalNodeParams:
34433 return sizeof(cyruntime.cudaConditionalNodeParams)
34434 if objType == cudaChildGraphNodeParams:
34435 return sizeof(cyruntime.cudaChildGraphNodeParams)
34436 if objType == cudaEventRecordNodeParams:
34437 return sizeof(cyruntime.cudaEventRecordNodeParams)
34438 if objType == cudaEventWaitNodeParams:
34439 return sizeof(cyruntime.cudaEventWaitNodeParams)
34440 if objType == cudaGraphNodeParams:
34441 return sizeof(cyruntime.cudaGraphNodeParams)
34442 if objType == cudaGraphEdgeData_st:
34443 return sizeof(cyruntime.cudaGraphEdgeData_st)
34444 if objType == cudaGraphEdgeData:
34445 return sizeof(cyruntime.cudaGraphEdgeData)
34446 if objType == cudaGraphExec_t:
34447 return sizeof(cyruntime.cudaGraphExec_t)
34448 if objType == cudaGraphInstantiateParams_st:
34449 return sizeof(cyruntime.cudaGraphInstantiateParams_st)
34450 if objType == cudaGraphInstantiateParams:
34451 return sizeof(cyruntime.cudaGraphInstantiateParams)
34452 if objType == cudaGraphExecUpdateResultInfo_st:
34453 return sizeof(cyruntime.cudaGraphExecUpdateResultInfo_st)
34454 if objType == cudaGraphExecUpdateResultInfo:
34455 return sizeof(cyruntime.cudaGraphExecUpdateResultInfo)
34456 if objType == cudaGraphDeviceNode_t:
34457 return sizeof(cyruntime.cudaGraphDeviceNode_t)
34458 if objType == cudaGraphKernelNodeUpdate:
34459 return sizeof(cyruntime.cudaGraphKernelNodeUpdate)
34460 if objType == cudaLaunchMemSyncDomainMap_st:
34461 return sizeof(cyruntime.cudaLaunchMemSyncDomainMap_st)
34462 if objType == cudaLaunchMemSyncDomainMap:
34463 return sizeof(cyruntime.cudaLaunchMemSyncDomainMap)
34464 if objType == cudaLaunchAttributeValue:
34465 return sizeof(cyruntime.cudaLaunchAttributeValue)
34466 if objType == cudaLaunchAttribute_st:
34467 return sizeof(cyruntime.cudaLaunchAttribute_st)
34468 if objType == cudaLaunchAttribute:
34469 return sizeof(cyruntime.cudaLaunchAttribute)
34470 if objType == cudaAsyncCallbackHandle_t:
34471 return sizeof(cyruntime.cudaAsyncCallbackHandle_t)
34472 if objType == cudaAsyncNotificationInfo:
34473 return sizeof(cyruntime.cudaAsyncNotificationInfo)
34474 if objType == cudaAsyncNotificationInfo_t:
34475 return sizeof(cyruntime.cudaAsyncNotificationInfo_t)
34476 if objType == cudaAsyncCallback:
34477 return sizeof(cyruntime.cudaAsyncCallback)
34478 if objType == cudaLogsCallbackHandle:
34479 return sizeof(cyruntime.cudaLogsCallbackHandle)
34480 if objType == cudaLogIterator:
34481 return sizeof(cyruntime.cudaLogIterator)
34482 if objType == cudaSurfaceObject_t:
34483 return sizeof(cyruntime.cudaSurfaceObject_t)
34484 if objType == cudaTextureDesc:
34485 return sizeof(cyruntime.cudaTextureDesc)
34486 if objType == cudaTextureObject_t:
34487 return sizeof(cyruntime.cudaTextureObject_t)
34488 if objType == cudaStreamCallback_t:
34489 return sizeof(cyruntime.cudaStreamCallback_t)
34490 if objType == cudaLogsCallback_t:
34491 return sizeof(cyruntime.cudaLogsCallback_t)
34492 if objType == GLenum:
34493 return sizeof(cyruntime.GLenum)
34494 if objType == GLuint:
34495 return sizeof(cyruntime.GLuint)
34496 if objType == EGLImageKHR:
34497 return sizeof(cyruntime.EGLImageKHR)
34498 if objType == EGLStreamKHR:
34499 return sizeof(cyruntime.EGLStreamKHR)
34500 if objType == EGLint:
34501 return sizeof(cyruntime.EGLint)
34502 if objType == EGLSyncKHR:
34503 return sizeof(cyruntime.EGLSyncKHR)
34504 if objType == VdpDevice:
34505 return sizeof(cyruntime.VdpDevice)
34506 if objType == VdpGetProcAddress:
34507 return sizeof(cyruntime.VdpGetProcAddress)
34508 if objType == VdpVideoSurface:
34509 return sizeof(cyruntime.VdpVideoSurface)
34510 if objType == VdpOutputSurface:
34511 return sizeof(cyruntime.VdpOutputSurface)
34512 if objType == cudaStreamAttrValue:
34513 return sizeof(cyruntime.cudaStreamAttrValue)
34514 if objType == cudaKernelNodeAttrValue:
34515 return sizeof(cyruntime.cudaKernelNodeAttrValue)
34516 if objType == cudaEglPlaneDesc_st:
34517 return sizeof(cyruntime.cudaEglPlaneDesc_st)
34518 if objType == cudaEglPlaneDesc:
34519 return sizeof(cyruntime.cudaEglPlaneDesc)
34520 if objType == cudaEglFrame_st:
34521 return sizeof(cyruntime.cudaEglFrame_st)
34522 if objType == cudaEglFrame:
34523 return sizeof(cyruntime.cudaEglFrame)
34524 if objType == cudaEglStreamConnection:
34525 return sizeof(cyruntime.cudaEglStreamConnection)
34526 raise TypeError("Unknown type: " + str(objType))
34528cdef int _add_native_handle_getters() except?-1:
34529 from cuda.bindings.utils import _add_cuda_native_handle_getter
34530 def cudaDevResourceDesc_t_getter(cudaDevResourceDesc_t x): return <uintptr_t><void*><cyruntime.cudaDevResourceDesc_t>(x._pvt_ptr[0])
34531 _add_cuda_native_handle_getter(cudaDevResourceDesc_t, cudaDevResourceDesc_t_getter)
34532 def cudaExecutionContext_t_getter(cudaExecutionContext_t x): return <uintptr_t><void*><cyruntime.cudaExecutionContext_t>(x._pvt_ptr[0])
34533 _add_cuda_native_handle_getter(cudaExecutionContext_t, cudaExecutionContext_t_getter)
34534 def cudaArray_t_getter(cudaArray_t x): return <uintptr_t><void*><cyruntime.cudaArray_t>(x._pvt_ptr[0])
34535 _add_cuda_native_handle_getter(cudaArray_t, cudaArray_t_getter)
34536 def cudaArray_const_t_getter(cudaArray_const_t x): return <uintptr_t><void*><cyruntime.cudaArray_const_t>(x._pvt_ptr[0])
34537 _add_cuda_native_handle_getter(cudaArray_const_t, cudaArray_const_t_getter)
34538 def cudaMipmappedArray_t_getter(cudaMipmappedArray_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_t>(x._pvt_ptr[0])
34539 _add_cuda_native_handle_getter(cudaMipmappedArray_t, cudaMipmappedArray_t_getter)
34540 def cudaMipmappedArray_const_t_getter(cudaMipmappedArray_const_t x): return <uintptr_t><void*><cyruntime.cudaMipmappedArray_const_t>(x._pvt_ptr[0])
34541 _add_cuda_native_handle_getter(cudaMipmappedArray_const_t, cudaMipmappedArray_const_t_getter)
34542 def cudaStream_t_getter(cudaStream_t x): return <uintptr_t><void*><cyruntime.cudaStream_t>(x._pvt_ptr[0])
34543 _add_cuda_native_handle_getter(cudaStream_t, cudaStream_t_getter)
34544 def cudaEvent_t_getter(cudaEvent_t x): return <uintptr_t><void*><cyruntime.cudaEvent_t>(x._pvt_ptr[0])
34545 _add_cuda_native_handle_getter(cudaEvent_t, cudaEvent_t_getter)
34546 def cudaGraphicsResource_t_getter(cudaGraphicsResource_t x): return <uintptr_t><void*><cyruntime.cudaGraphicsResource_t>(x._pvt_ptr[0])
34547 _add_cuda_native_handle_getter(cudaGraphicsResource_t, cudaGraphicsResource_t_getter)
34548 def cudaExternalMemory_t_getter(cudaExternalMemory_t x): return <uintptr_t><void*><cyruntime.cudaExternalMemory_t>(x._pvt_ptr[0])
34549 _add_cuda_native_handle_getter(cudaExternalMemory_t, cudaExternalMemory_t_getter)
34550 def cudaExternalSemaphore_t_getter(cudaExternalSemaphore_t x): return <uintptr_t><void*><cyruntime.cudaExternalSemaphore_t>(x._pvt_ptr[0])
34551 _add_cuda_native_handle_getter(cudaExternalSemaphore_t, cudaExternalSemaphore_t_getter)
34552 def cudaGraph_t_getter(cudaGraph_t x): return <uintptr_t><void*><cyruntime.cudaGraph_t>(x._pvt_ptr[0])
34553 _add_cuda_native_handle_getter(cudaGraph_t, cudaGraph_t_getter)
34554 def cudaGraphNode_t_getter(cudaGraphNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphNode_t>(x._pvt_ptr[0])
34555 _add_cuda_native_handle_getter(cudaGraphNode_t, cudaGraphNode_t_getter)
34556 def cudaUserObject_t_getter(cudaUserObject_t x): return <uintptr_t><void*><cyruntime.cudaUserObject_t>(x._pvt_ptr[0])
34557 _add_cuda_native_handle_getter(cudaUserObject_t, cudaUserObject_t_getter)
34558 def cudaFunction_t_getter(cudaFunction_t x): return <uintptr_t><void*><cyruntime.cudaFunction_t>(x._pvt_ptr[0])
34559 _add_cuda_native_handle_getter(cudaFunction_t, cudaFunction_t_getter)
34560 def cudaKernel_t_getter(cudaKernel_t x): return <uintptr_t><void*><cyruntime.cudaKernel_t>(x._pvt_ptr[0])
34561 _add_cuda_native_handle_getter(cudaKernel_t, cudaKernel_t_getter)
34562 def cudaLibrary_t_getter(cudaLibrary_t x): return <uintptr_t><void*><cyruntime.cudaLibrary_t>(x._pvt_ptr[0])
34563 _add_cuda_native_handle_getter(cudaLibrary_t, cudaLibrary_t_getter)
34564 def cudaMemPool_t_getter(cudaMemPool_t x): return <uintptr_t><void*><cyruntime.cudaMemPool_t>(x._pvt_ptr[0])
34565 _add_cuda_native_handle_getter(cudaMemPool_t, cudaMemPool_t_getter)
34566 def cudaGraphExec_t_getter(cudaGraphExec_t x): return <uintptr_t><void*><cyruntime.cudaGraphExec_t>(x._pvt_ptr[0])
34567 _add_cuda_native_handle_getter(cudaGraphExec_t, cudaGraphExec_t_getter)
34568 def cudaGraphDeviceNode_t_getter(cudaGraphDeviceNode_t x): return <uintptr_t><void*><cyruntime.cudaGraphDeviceNode_t>(x._pvt_ptr[0])
34569 _add_cuda_native_handle_getter(cudaGraphDeviceNode_t, cudaGraphDeviceNode_t_getter)
34570 def cudaAsyncCallbackHandle_t_getter(cudaAsyncCallbackHandle_t x): return <uintptr_t><void*><cyruntime.cudaAsyncCallbackHandle_t>(x._pvt_ptr[0])
34571 _add_cuda_native_handle_getter(cudaAsyncCallbackHandle_t, cudaAsyncCallbackHandle_t_getter)
34572 def cudaLogsCallbackHandle_getter(cudaLogsCallbackHandle x): return <uintptr_t><void*><cyruntime.cudaLogsCallbackHandle>(x._pvt_ptr[0])
34573 _add_cuda_native_handle_getter(cudaLogsCallbackHandle, cudaLogsCallbackHandle_getter)
34574 def EGLImageKHR_getter(EGLImageKHR x): return <uintptr_t><void*><cyruntime.EGLImageKHR>(x._pvt_ptr[0])
34575 _add_cuda_native_handle_getter(EGLImageKHR, EGLImageKHR_getter)
34576 def EGLStreamKHR_getter(EGLStreamKHR x): return <uintptr_t><void*><cyruntime.EGLStreamKHR>(x._pvt_ptr[0])
34577 _add_cuda_native_handle_getter(EGLStreamKHR, EGLStreamKHR_getter)
34578 def EGLSyncKHR_getter(EGLSyncKHR x): return <uintptr_t><void*><cyruntime.EGLSyncKHR>(x._pvt_ptr[0])
34579 _add_cuda_native_handle_getter(EGLSyncKHR, EGLSyncKHR_getter)
34580 def cudaEglStreamConnection_getter(cudaEglStreamConnection x): return <uintptr_t><void*><cyruntime.cudaEglStreamConnection>(x._pvt_ptr[0])
34581 _add_cuda_native_handle_getter(cudaEglStreamConnection, cudaEglStreamConnection_getter)
34582 return 0
34583_add_native_handle_getters()